Exemple #1
0
 def forward_test(self, img_features, all_class_attributes):
     XW = self.projection(img_features)  # shape [B, num_attributes]
     XW = normalizeFeaturesL2(
         XW)  # normalize each projected vector to have unit length
     scores = torch.matmul(XW.unsqueeze(1), all_class_attributes).squeeze(
         1)  # shape [B, num_classes]
     return scores.argmax(1)  # shape [B]
Exemple #2
0
 def forward_train(self, img_features, all_class_attributes,
                   class_attributes, labels):
     '''
     img_features: torch.Tensor of shape [B, img_feature_size]
     class_attributes: torch.Tensor of shape [B, num_attributes]
     labels: torch.Tensor of shape [B]
     all_class_attributes: torch.Tensor of shape [num_attributes, num_classes]
     returns scalar loss
     '''
     if len(img_features.shape) == 4:
         img_features = self.avg_pool(img_features).squeeze(2).squeeze(
             2)  # remove h, w dimensions
     XW = torch.matmul(img_features.unsqueeze(1),
                       self.W).squeeze(1)  # shape [B, num_attributes]
     XW = normalizeFeaturesL2(
         XW)  # normalize each projected vector to have unit length
     scores = torch.matmul(XW.unsqueeze(1), all_class_attributes).squeeze(
         1)  # shape [B, num_classes]
     gt_class_scores = scores[torch.arange(len(scores)),
                              labels].unsqueeze(1)  # shape [B, 1]
     # add margin to scores
     losses = self.margin + scores - gt_class_scores  # shape [B, num_classes]
     losses[torch.arange(len(losses)), labels] = 0.0
     losses = losses.max(dim=1)[0]  # shape [B]
     return losses.clamp(0).mean()
Exemple #3
0
 def forward_train(self, img_features, all_class_attributes,
                   class_attributes, labels):
     '''
     img_features: torch.Tensor of shape [B, img_feature_size, H, W]
     class_attributes: torch.Tensor of shape [B, num_attributes]
     labels: torch.Tensor of shape [B]
     all_class_attributes: torch.Tensor of shape [num_attributes, num_classes]
     returns scalar loss
     '''
     XW = torch.tensordot(img_features, self.W, [[1], [0]]).permute(
         0, 3, 1, 2)  # shape [B, num_attributes, H, W]
     XW = self.apply_gmpool(XW)  # shape [B, num_attributes]
     if torch.any(XW.isnan()):
         print("YIKES")
     XW = normalizeFeaturesL2(
         XW)  # normalize each projected vector to have unit length
     scores = torch.matmul(XW.unsqueeze(1), all_class_attributes).squeeze(
         1)  # shape [B, num_classes]
     gt_class_scores = scores[torch.arange(len(scores)),
                              labels].unsqueeze(1)  # shape [B, 1]
     # add margin to scores
     losses = self.margin + scores - gt_class_scores  # shape [B, num_classes]
     losses[torch.arange(len(losses)), labels] = 0.0
     losses = losses.max(dim=1)[0]  # shape [B]
     return losses.clamp(0).mean()
Exemple #4
0
    def __init__(self, img_feature_size, num_attributes, margin):
        super(SJE_MHA, self).__init__()
        self.margin = margin

        # copying initialization technique from original code
        W = torch.rand(img_feature_size, num_attributes, requires_grad=True)
        W = normalizeFeaturesL2(W.permute(1, 0)).permute(1, 0)
        self.W = nn.Parameter(W, requires_grad=True)
Exemple #5
0
    def __init__(self, dataset_name, split, norm_type='none', norm_info=None):
        assert dataset_name in ('APY', 'AWA1', 'AWA2', 'CUB', 'SUN')
        loc_dict = {
            'train': 'train_loc',
            'val': 'val_loc',
            'test': 'test_unseen_loc'
        }
        assert split in loc_dict
        loc = loc_dict[split]

        # load data
        data_folder = '../xlsa17/data/' + dataset_name + '/'
        res101 = io.loadmat(data_folder + 'res101.mat')
        att_splits = io.loadmat(data_folder + 'att_splits.mat')

        # filter based on split
        self.img_features = torch.Tensor(
            res101['features'][:, np.squeeze(att_splits[loc] - 1)]).permute(
                1, 0)  # shape [N,d]
        self.img_names = [
            i[0].split('/')[-1]
            for i in res101['image_files'][np.squeeze(att_splits[loc]) - 1, 0]
        ]
        self.labels = torch.LongTensor(
            np.squeeze(res101['labels'][np.squeeze(att_splits[loc] -
                                                   1)]))  # shape [N]
        unique_labels = np.unique(self.labels)
        self.attributes = AWA2_ATTRIBUTES
        self.classes = np.array(AWA2_CLASSES)[unique_labels - 1]
        i = 0
        for label in unique_labels:
            self.labels[self.labels == label] = i
            i += 1
        self.class_attributes = torch.Tensor(
            att_splits['att'][:, unique_labels -
                              1])  # shape [num_attributes, num_classes]

        self.length = len(self.labels)
        assert self.length == self.img_features.shape[0]

        assert norm_type in ('std', 'L2', 'None')
        if norm_type == 'std':
            if split == 'train':
                self.norm_info = {
                    'std': self.img_features.std(0),
                    'mean': self.img_features.mean(0),
                }
            else:
                assert norm_info is not None
                self.norm_info = norm_info
            std = self.norm_info['std'].unsqueeze(0)
            std[std == 0] = 1
            mean = self.norm_info['mean'].unsqueeze(0)
            self.img_features = (self.img_features - mean) / std
        elif norm_type == 'L2':
            self.img_features = normalizeFeaturesL2(self.img_features)
Exemple #6
0
 def forward_test(self, img_features, all_class_attributes):
     if len(img_features.shape) == 4:
         img_features = self.avg_pool(img_features).squeeze(2).squeeze(
             2)  # remove h, w dimensions
     XW = torch.matmul(img_features.unsqueeze(1),
                       self.W).squeeze(1)  # shape [B, num_attributes]
     XW = normalizeFeaturesL2(
         XW)  # normalize each projected vector to have unit length
     scores = torch.matmul(XW.unsqueeze(1), all_class_attributes).squeeze(
         1)  # shape [B, num_classes]
     return scores.argmax(1)  # shape [B]
Exemple #7
0
 def forward_test(self, img_features, all_class_attributes):
     XW = torch.tensordot(img_features, self.W, [[1], [0]]).permute(
         0, 3, 1, 2)  # shape [B, num_attributes, H, W]
     XW = self.apply_gmpool(XW)  # shape [B, num_attributes]
     if torch.any(XW.isnan()):
         print("YIKES")
     XW = normalizeFeaturesL2(
         XW)  # normalize each projected vector to have unit length
     scores = torch.matmul(XW.unsqueeze(1), all_class_attributes).squeeze(
         1)  # shape [B, num_classes]
     return scores.argmax(1)  # shape [B]
Exemple #8
0
    def __init__(self, img_feature_size, num_attributes, margin):
        super(SJE_WeightedCosine, self).__init__()
        self.margin = margin

        # copying initialization technique from original code
        W = torch.rand(img_feature_size, num_attributes, requires_grad=True)
        W = normalizeFeaturesL2(W.permute(1, 0)).permute(1, 0)
        self.W = nn.Parameter(W, requires_grad=True)

        weights = torch.zeros(num_attributes, requires_grad=True)
        self.weights = nn.Parameter(weights, requires_grad=True)

        self.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
Exemple #9
0
    def __init__(self, img_feature_size, num_attributes, margin):
        super(SJE_GMPool, self).__init__()
        self.margin = margin

        # copying initialization technique from original code
        W = torch.rand(img_feature_size, num_attributes, requires_grad=True)
        W = normalizeFeaturesL2(W.permute(1, 0)).permute(1, 0)
        self.W = nn.Parameter(W, requires_grad=True)

        power = torch.zeros(num_attributes, requires_grad=True)
        self.power = nn.Parameter(power, requires_grad=True)
        self.example_indices = random.choices(range(1000),
                                              k=2)  # this is a hack
Exemple #10
0
 def __getitem__(self, idx):
     img = torch.Tensor(self.img_features[idx, ...])  # shape [C,H,W]
     # normalize
     if self.norm_type == 'std':
         std = self.norm_info['std'].unsqueeze(0)
         std[std == 0] = 1
         mean = self.norm_info['mean'].unsqueeze(0)
         img = (img - mean) / std
     elif self.norm_type == 'L2':
         img = normalizeFeaturesL2(img.permute(1, 2, 0).view(
             49, 2048)).view(7, 7, 2048).permute(2, 0, 1)
     label = self.labels[idx]
     class_attributes = self.class_attributes[:, label]
     return {
         'img': img,
         'label': label,
         'class_attributes': class_attributes
     }
Exemple #11
0
 def __init__(self, img_feature_size, num_attributes, margin):
     super(SJE_Linear, self).__init__()
     self.margin = margin
     self.projection = nn.Linear(img_feature_size, num_attributes)
     self.projection.weight.data = normalizeFeaturesL2(
         self.projection.weight.data)