def forward_test(self, img_features, all_class_attributes): XW = self.projection(img_features) # shape [B, num_attributes] XW = normalizeFeaturesL2( XW) # normalize each projected vector to have unit length scores = torch.matmul(XW.unsqueeze(1), all_class_attributes).squeeze( 1) # shape [B, num_classes] return scores.argmax(1) # shape [B]
def forward_train(self, img_features, all_class_attributes, class_attributes, labels): ''' img_features: torch.Tensor of shape [B, img_feature_size] class_attributes: torch.Tensor of shape [B, num_attributes] labels: torch.Tensor of shape [B] all_class_attributes: torch.Tensor of shape [num_attributes, num_classes] returns scalar loss ''' if len(img_features.shape) == 4: img_features = self.avg_pool(img_features).squeeze(2).squeeze( 2) # remove h, w dimensions XW = torch.matmul(img_features.unsqueeze(1), self.W).squeeze(1) # shape [B, num_attributes] XW = normalizeFeaturesL2( XW) # normalize each projected vector to have unit length scores = torch.matmul(XW.unsqueeze(1), all_class_attributes).squeeze( 1) # shape [B, num_classes] gt_class_scores = scores[torch.arange(len(scores)), labels].unsqueeze(1) # shape [B, 1] # add margin to scores losses = self.margin + scores - gt_class_scores # shape [B, num_classes] losses[torch.arange(len(losses)), labels] = 0.0 losses = losses.max(dim=1)[0] # shape [B] return losses.clamp(0).mean()
def forward_train(self, img_features, all_class_attributes, class_attributes, labels): ''' img_features: torch.Tensor of shape [B, img_feature_size, H, W] class_attributes: torch.Tensor of shape [B, num_attributes] labels: torch.Tensor of shape [B] all_class_attributes: torch.Tensor of shape [num_attributes, num_classes] returns scalar loss ''' XW = torch.tensordot(img_features, self.W, [[1], [0]]).permute( 0, 3, 1, 2) # shape [B, num_attributes, H, W] XW = self.apply_gmpool(XW) # shape [B, num_attributes] if torch.any(XW.isnan()): print("YIKES") XW = normalizeFeaturesL2( XW) # normalize each projected vector to have unit length scores = torch.matmul(XW.unsqueeze(1), all_class_attributes).squeeze( 1) # shape [B, num_classes] gt_class_scores = scores[torch.arange(len(scores)), labels].unsqueeze(1) # shape [B, 1] # add margin to scores losses = self.margin + scores - gt_class_scores # shape [B, num_classes] losses[torch.arange(len(losses)), labels] = 0.0 losses = losses.max(dim=1)[0] # shape [B] return losses.clamp(0).mean()
def __init__(self, img_feature_size, num_attributes, margin): super(SJE_MHA, self).__init__() self.margin = margin # copying initialization technique from original code W = torch.rand(img_feature_size, num_attributes, requires_grad=True) W = normalizeFeaturesL2(W.permute(1, 0)).permute(1, 0) self.W = nn.Parameter(W, requires_grad=True)
def __init__(self, dataset_name, split, norm_type='none', norm_info=None): assert dataset_name in ('APY', 'AWA1', 'AWA2', 'CUB', 'SUN') loc_dict = { 'train': 'train_loc', 'val': 'val_loc', 'test': 'test_unseen_loc' } assert split in loc_dict loc = loc_dict[split] # load data data_folder = '../xlsa17/data/' + dataset_name + '/' res101 = io.loadmat(data_folder + 'res101.mat') att_splits = io.loadmat(data_folder + 'att_splits.mat') # filter based on split self.img_features = torch.Tensor( res101['features'][:, np.squeeze(att_splits[loc] - 1)]).permute( 1, 0) # shape [N,d] self.img_names = [ i[0].split('/')[-1] for i in res101['image_files'][np.squeeze(att_splits[loc]) - 1, 0] ] self.labels = torch.LongTensor( np.squeeze(res101['labels'][np.squeeze(att_splits[loc] - 1)])) # shape [N] unique_labels = np.unique(self.labels) self.attributes = AWA2_ATTRIBUTES self.classes = np.array(AWA2_CLASSES)[unique_labels - 1] i = 0 for label in unique_labels: self.labels[self.labels == label] = i i += 1 self.class_attributes = torch.Tensor( att_splits['att'][:, unique_labels - 1]) # shape [num_attributes, num_classes] self.length = len(self.labels) assert self.length == self.img_features.shape[0] assert norm_type in ('std', 'L2', 'None') if norm_type == 'std': if split == 'train': self.norm_info = { 'std': self.img_features.std(0), 'mean': self.img_features.mean(0), } else: assert norm_info is not None self.norm_info = norm_info std = self.norm_info['std'].unsqueeze(0) std[std == 0] = 1 mean = self.norm_info['mean'].unsqueeze(0) self.img_features = (self.img_features - mean) / std elif norm_type == 'L2': self.img_features = normalizeFeaturesL2(self.img_features)
def forward_test(self, img_features, all_class_attributes): if len(img_features.shape) == 4: img_features = self.avg_pool(img_features).squeeze(2).squeeze( 2) # remove h, w dimensions XW = torch.matmul(img_features.unsqueeze(1), self.W).squeeze(1) # shape [B, num_attributes] XW = normalizeFeaturesL2( XW) # normalize each projected vector to have unit length scores = torch.matmul(XW.unsqueeze(1), all_class_attributes).squeeze( 1) # shape [B, num_classes] return scores.argmax(1) # shape [B]
def forward_test(self, img_features, all_class_attributes): XW = torch.tensordot(img_features, self.W, [[1], [0]]).permute( 0, 3, 1, 2) # shape [B, num_attributes, H, W] XW = self.apply_gmpool(XW) # shape [B, num_attributes] if torch.any(XW.isnan()): print("YIKES") XW = normalizeFeaturesL2( XW) # normalize each projected vector to have unit length scores = torch.matmul(XW.unsqueeze(1), all_class_attributes).squeeze( 1) # shape [B, num_classes] return scores.argmax(1) # shape [B]
def __init__(self, img_feature_size, num_attributes, margin): super(SJE_WeightedCosine, self).__init__() self.margin = margin # copying initialization technique from original code W = torch.rand(img_feature_size, num_attributes, requires_grad=True) W = normalizeFeaturesL2(W.permute(1, 0)).permute(1, 0) self.W = nn.Parameter(W, requires_grad=True) weights = torch.zeros(num_attributes, requires_grad=True) self.weights = nn.Parameter(weights, requires_grad=True) self.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
def __init__(self, img_feature_size, num_attributes, margin): super(SJE_GMPool, self).__init__() self.margin = margin # copying initialization technique from original code W = torch.rand(img_feature_size, num_attributes, requires_grad=True) W = normalizeFeaturesL2(W.permute(1, 0)).permute(1, 0) self.W = nn.Parameter(W, requires_grad=True) power = torch.zeros(num_attributes, requires_grad=True) self.power = nn.Parameter(power, requires_grad=True) self.example_indices = random.choices(range(1000), k=2) # this is a hack
def __getitem__(self, idx): img = torch.Tensor(self.img_features[idx, ...]) # shape [C,H,W] # normalize if self.norm_type == 'std': std = self.norm_info['std'].unsqueeze(0) std[std == 0] = 1 mean = self.norm_info['mean'].unsqueeze(0) img = (img - mean) / std elif self.norm_type == 'L2': img = normalizeFeaturesL2(img.permute(1, 2, 0).view( 49, 2048)).view(7, 7, 2048).permute(2, 0, 1) label = self.labels[idx] class_attributes = self.class_attributes[:, label] return { 'img': img, 'label': label, 'class_attributes': class_attributes }
def __init__(self, img_feature_size, num_attributes, margin): super(SJE_Linear, self).__init__() self.margin = margin self.projection = nn.Linear(img_feature_size, num_attributes) self.projection.weight.data = normalizeFeaturesL2( self.projection.weight.data)