def get_metric_learning_loss_funcs(matches: List[DictConfig], ): """ Return a list of pytorch metric learning's loss functions based on their names. Parameters ---------- matches A list of matches from the matcher config. Returns ------- A list of loss functions from the pytorch metric learning package. """ metric_learning_loss_funcs = [] for per_match in matches: if per_match.loss.type.lower() == CONTRASTIVE_LOSS: metric_learning_loss_funcs.append( losses.ContrastiveLoss( pos_margin=per_match.loss.pos_margin, neg_margin=per_match.loss.neg_margin, distance=get_metric_learning_distance_func( per_match.distance.type), )) else: raise ValueError( f"Unknown metric learning loss: {per_match.loss.type}") return metric_learning_loss_funcs
def __init__( self, model, margin=0.2, lr=1e-3, lr_patience=2, lr_decay_ratio=0.5, memory_batch_max_num=2048, ): super().__init__() self.save_hyperparameters() self.model = model self.margin = margin self.lr = lr self.lr_patience = lr_patience self.lr_decay_ratio = lr_decay_ratio self.memory_batch_max_num = memory_batch_max_num self.loss_func = losses.CrossBatchMemory( losses.ContrastiveLoss(pos_margin=1, neg_margin=0, distance=CosineSimilarity()), self.model.feature_dim, memory_size=self.memory_batch_max_num, miner=miners.MultiSimilarityMiner(epsilon=self.margin))
def __init__(self, pos_margin, neg_margin, normalize_embeddings): self.pos_margin = pos_margin self.neg_margin = neg_margin self.distance = LpDistance(normalize_embeddings=normalize_embeddings) self.miner_fn = HardTripletMinerWithMasks(distance=self.distance) # We use contrastive loss with squared Euclidean distance self.loss_fn = losses.ContrastiveLoss(pos_margin=self.pos_margin, neg_margin=self.neg_margin, distance=self.distance)
def __init_loss_fn(self, hparams): self.miner = miners.DistanceWeightedMiner(cutoff=0.5, nonzero_loss_cutoff=1.4) if hparams.loss_kind == "contrastive": self.loss_fn = losses.ContrastiveLoss( neg_margin=hparams.contrastive_neg_margin) elif hparams.loss_kind == "triplet": self.loss_fn = losses.TripletMarginLoss(margin=hparams.margin) else: raise ValueError(f"unknown loss: {hparams.loss_kind}")
def test_distributed_loss(self): for world_size in range(1,5): batch_size = 20 lr = 1 inputs = [torch.randn(batch_size, 10) for _ in range(world_size)] labels = [torch.randint(low=0, high=2, size=(batch_size,)) for _ in range(world_size)] original_model = ToyMpModel().to(self.device) model = ToyMpModel().to(self.device) model.load_state_dict(original_model.state_dict()) optimizer = optim.SGD(original_model.parameters(), lr=lr) optimizer.zero_grad() all_inputs = torch.cat(inputs, dim=0).to(self.device) all_labels = torch.cat(labels, dim=0).to(self.device) all_outputs = original_model(all_inputs) original_loss_fn = losses.ContrastiveLoss() original_miner_fn = miners.MultiSimilarityMiner() correct_indices_tuple = original_miner_fn(all_outputs, all_labels) correct_loss = original_loss_fn(all_outputs, all_labels, correct_indices_tuple) correct_loss.backward() optimizer.step() # need to make separate copy to do test properly loss_fn = losses.ContrastiveLoss() miner_fn = miners.MultiSimilarityMiner() mp.spawn(single_process_function, args=(world_size, lr, model, inputs, labels, loss_fn, miner_fn, original_model, original_loss_fn, original_miner_fn, correct_loss.detach(), correct_indices_tuple, self.device), nprocs=world_size, join=True)
def contrastive_loss(trial, pos_margin_range=(0.0, 2.0), neg_margin_range=(0.0, 2.0), **kwargs): pos_margin = trial.suggest_uniform("pos_margin", *pos_margin_range) neg_margin = trial.suggest_uniform("neg_margin", *neg_margin_range) loss = losses.ContrastiveLoss(pos_margin=pos_margin, neg_margin=neg_margin, **sample_regularizer(trial)) return {"loss": loss}
def __init__(self, pos_margin, neg_margin, normalize_embeddings): self.pos_margin = pos_margin self.neg_margin = neg_margin self.distance = LpDistance(normalize_embeddings=normalize_embeddings, collect_stats=True) self.miner_fn = HardTripletMinerWithMasks(distance=self.distance) # We use contrastive loss with squared Euclidean distance reducer_fn = reducers.AvgNonZeroReducer(collect_stats=True) self.loss_fn = losses.ContrastiveLoss(pos_margin=self.pos_margin, neg_margin=self.neg_margin, distance=self.distance, reducer=reducer_fn, collect_stats=True)
def __init__(self, args): super().__init__() #ckpt = '../../../Masterproef/thesis_code/recognition/wandb/run-20210409_112741-28fdpx5s/files/thesis/28fdpx5s/checkpoints/epoch=299-step=18899.ckpt' self.model = torchvision.models.resnet18(pretrained=True) self.model.fc = nn.Linear( 512, 2378, True) #Change fully connected layer to 2379 output self.args = args self.extractor = torch.nn.Sequential( OrderedDict(list(self.model.named_children())[:-1]), ) self.classifier = torch.nn.Sequential( OrderedDict(list(self.model.named_children())[-1:])) #Enkel CrossEntropy loss werkt momenteel if self.args.loss == 'CrossEntropy': self.loss = torch.nn.CrossEntropyLoss() self.loss_requires_classifier = True elif self.args.loss == 'ArcFace': self.loss = losses.ArcFaceLoss( margin=0.5, embedding_size=self.classifier.fc.in_features, num_classes=self.classifier.fc.out_features) self.loss_requires_classifier = False elif self.args.loss == 'ContrastiveLoss': self.loss = losses.ContrastiveLoss(pos_margin=0, neg_margin=1) self.loss_requires_classifier = False #sampler toevoegen!!!! elif self.args.loss == 'TripletMargin': self.loss = losses.TripletMarginLoss(margin=0.1) self.loss_requires_classifier = False elif self.args.loss == 'CircleLoss': self.loss = losses.CircleLoss(m=0.4, gamma=80) self.loss_requires_classifier = False else: raise ValueError(f'Unsupported loss: {self.args.loss}')
def __init__(self, margin=0.5, **kwargs): super(ContrastiveLoss, self).__init__() self.margin = margin self.loss_func = losses.ContrastiveLoss(neg_margin=self.margin)
def train_model(model, model_test, criterion, optimizer, scheduler, num_epochs=25): since = time.time() # best_model_wts = model.state_dict() # best_acc = 0.0 warm_up = 0.1 # We start from the 0.1*lrRate warm_iteration = round(dataset_sizes['satellite'] / opt.batchsize) * opt.warm_epoch # first 5 epoch if opt.arcface: criterion_arcface = losses.ArcFaceLoss(num_classes=opt.nclasses, embedding_size=512) if opt.cosface: criterion_cosface = losses.CosFaceLoss(num_classes=opt.nclasses, embedding_size=512) if opt.circle: criterion_circle = CircleLoss(m=0.25, gamma=32) # gamma = 64 may lead to a better result. if opt.triplet: miner = miners.MultiSimilarityMiner() criterion_triplet = losses.TripletMarginLoss(margin=0.3) if opt.lifted: criterion_lifted = losses.GeneralizedLiftedStructureLoss(neg_margin=1, pos_margin=0) if opt.contrast: criterion_contrast = losses.ContrastiveLoss(pos_margin=0, neg_margin=1) if opt.sphere: criterion_sphere = losses.SphereFaceLoss(num_classes=opt.nclasses, embedding_size=512, margin=4) for epoch in range(num_epochs - start_epoch): epoch = epoch + start_epoch print('Epoch {}/{}'.format(epoch, num_epochs - 1)) print('-' * 10) # Each epoch has a training and validation phase for phase in ['train']: if phase == 'train': model.train(True) # Set model to training mode else: model.train(False) # Set model to evaluate mode running_loss = 0.0 running_corrects = 0.0 running_corrects2 = 0.0 running_corrects3 = 0.0 # Iterate over data. for data, data2, data3, data4 in zip(dataloaders['satellite'], dataloaders['street'], dataloaders['drone'], dataloaders['google']): # get the inputs inputs, labels = data inputs2, labels2 = data2 inputs3, labels3 = data3 inputs4, labels4 = data4 now_batch_size, c, h, w = inputs.shape if now_batch_size < opt.batchsize: # skip the last batch continue if use_gpu: inputs = Variable(inputs.cuda().detach()) inputs2 = Variable(inputs2.cuda().detach()) inputs3 = Variable(inputs3.cuda().detach()) labels = Variable(labels.cuda().detach()) labels2 = Variable(labels2.cuda().detach()) labels3 = Variable(labels3.cuda().detach()) if opt.extra_Google: inputs4 = Variable(inputs4.cuda().detach()) labels4 = Variable(labels4.cuda().detach()) else: inputs, labels = Variable(inputs), Variable(labels) # zero the parameter gradients optimizer.zero_grad() # forward if phase == 'val': with torch.no_grad(): outputs, outputs2 = model(inputs, inputs2) else: if opt.views == 2: outputs, outputs2 = model(inputs, inputs2) elif opt.views == 3: if opt.extra_Google: outputs, outputs2, outputs3, outputs4 = model(inputs, inputs2, inputs3, inputs4) else: outputs, outputs2, outputs3 = model(inputs, inputs2, inputs3) return_feature = opt.arcface or opt.cosface or opt.circle or opt.triplet or opt.contrast or opt.lifted or opt.sphere if opt.views == 2: _, preds = torch.max(outputs.data, 1) _, preds2 = torch.max(outputs2.data, 1) loss = criterion(outputs, labels) + criterion(outputs2, labels2) elif opt.views == 3: if return_feature: logits, ff = outputs logits2, ff2 = outputs2 logits3, ff3 = outputs3 fnorm = torch.norm(ff, p=2, dim=1, keepdim=True) fnorm2 = torch.norm(ff2, p=2, dim=1, keepdim=True) fnorm3 = torch.norm(ff3, p=2, dim=1, keepdim=True) ff = ff.div(fnorm.expand_as(ff)) # 8*512,tensor ff2 = ff2.div(fnorm2.expand_as(ff2)) ff3 = ff3.div(fnorm3.expand_as(ff3)) loss = criterion(logits, labels) + criterion(logits2, labels2) + criterion(logits3, labels3) _, preds = torch.max(logits.data, 1) _, preds2 = torch.max(logits2.data, 1) _, preds3 = torch.max(logits3.data, 1) # Multiple perspectives are combined to calculate losses, please join ''--loss_merge'' in run.sh if opt.loss_merge: ff_all = torch.cat((ff, ff2, ff3), dim=0) labels_all = torch.cat((labels, labels2, labels3), dim=0) if opt.extra_Google: logits4, ff4 = outputs4 fnorm4 = torch.norm(ff4, p=2, dim=1, keepdim=True) ff4 = ff4.div(fnorm4.expand_as(ff4)) loss = criterion(logits, labels) + criterion(logits2, labels2) + criterion(logits3, labels3) +criterion(logits4, labels4) if opt.loss_merge: ff_all = torch.cat((ff_all, ff4), dim=0) labels_all = torch.cat((labels_all, labels4), dim=0) if opt.arcface: if opt.loss_merge: loss += criterion_arcface(ff_all, labels_all) else: loss += criterion_arcface(ff, labels) + criterion_arcface(ff2, labels2) + criterion_arcface(ff3, labels3) # /now_batch_size if opt.extra_Google: loss += criterion_arcface(ff4, labels4) # /now_batch_size if opt.cosface: if opt.loss_merge: loss += criterion_cosface(ff_all, labels_all) else: loss += criterion_cosface(ff, labels) + criterion_cosface(ff2, labels2) + criterion_cosface(ff3, labels3) # /now_batch_size if opt.extra_Google: loss += criterion_cosface(ff4, labels4) # /now_batch_size if opt.circle: if opt.loss_merge: loss += criterion_circle(*convert_label_to_similarity(ff_all, labels_all)) / now_batch_size else: loss += criterion_circle(*convert_label_to_similarity(ff, labels)) / now_batch_size + criterion_circle(*convert_label_to_similarity(ff2, labels2)) / now_batch_size + criterion_circle(*convert_label_to_similarity(ff3, labels3)) / now_batch_size if opt.extra_Google: loss += criterion_circle(*convert_label_to_similarity(ff4, labels4)) / now_batch_size if opt.triplet: if opt.loss_merge: hard_pairs_all = miner(ff_all, labels_all) loss += criterion_triplet(ff_all, labels_all, hard_pairs_all) else: hard_pairs = miner(ff, labels) hard_pairs2 = miner(ff2, labels2) hard_pairs3 = miner(ff3, labels3) loss += criterion_triplet(ff, labels, hard_pairs) + criterion_triplet(ff2, labels2, hard_pairs2) + criterion_triplet(ff3, labels3, hard_pairs3)# /now_batch_size if opt.extra_Google: hard_pairs4 = miner(ff4, labels4) loss += criterion_triplet(ff4, labels4, hard_pairs4) if opt.lifted: if opt.loss_merge: loss += criterion_lifted(ff_all, labels_all) else: loss += criterion_lifted(ff, labels) + criterion_lifted(ff2, labels2) + criterion_lifted(ff3, labels3) # /now_batch_size if opt.extra_Google: loss += criterion_lifted(ff4, labels4) if opt.contrast: if opt.loss_merge: loss += criterion_contrast(ff_all, labels_all) else: loss += criterion_contrast(ff, labels) + criterion_contrast(ff2,labels2) + criterion_contrast(ff3, labels3) # /now_batch_size if opt.extra_Google: loss += criterion_contrast(ff4, labels4) if opt.sphere: if opt.loss_merge: loss += criterion_sphere(ff_all, labels_all) / now_batch_size else: loss += criterion_sphere(ff, labels) / now_batch_size + criterion_sphere(ff2, labels2) / now_batch_size + criterion_sphere(ff3, labels3) / now_batch_size if opt.extra_Google: loss += criterion_sphere(ff4, labels4) else: _, preds = torch.max(outputs.data, 1) _, preds2 = torch.max(outputs2.data, 1) _, preds3 = torch.max(outputs3.data, 1) if opt.loss_merge: outputs_all = torch.cat((outputs, outputs2, outputs3), dim=0) labels_all = torch.cat((labels, labels2, labels3), dim=0) if opt.extra_Google: outputs_all = torch.cat((outputs_all, outputs4), dim=0) labels_all = torch.cat((labels_all, labels4), dim=0) loss = 4*criterion(outputs_all, labels_all) else: loss = criterion(outputs, labels) + criterion(outputs2, labels2) + criterion(outputs3, labels3) if opt.extra_Google: loss += criterion(outputs4, labels4) # backward + optimize only if in training phase if epoch < opt.warm_epoch and phase == 'train': warm_up = min(1.0, warm_up + 0.9 / warm_iteration) loss *= warm_up if phase == 'train': if fp16: # we use optimier to backward loss with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() else: loss.backward() optimizer.step() ########## if opt.moving_avg < 1.0: update_average(model_test, model, opt.moving_avg) # statistics if int(version[0]) > 0 or int(version[2]) > 3: # for the new version like 0.4.0, 0.5.0 and 1.0.0 running_loss += loss.item() * now_batch_size else: # for the old version like 0.3.0 and 0.3.1 running_loss += loss.data[0] * now_batch_size running_corrects += float(torch.sum(preds == labels.data)) running_corrects2 += float(torch.sum(preds2 == labels2.data)) if opt.views == 3: running_corrects3 += float(torch.sum(preds3 == labels3.data)) epoch_loss = running_loss / dataset_sizes['satellite'] epoch_acc = running_corrects / dataset_sizes['satellite'] epoch_acc2 = running_corrects2 / dataset_sizes['satellite'] if opt.views == 2: print('{} Loss: {:.4f} Satellite_Acc: {:.4f} Street_Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc, epoch_acc2)) elif opt.views == 3: epoch_acc3 = running_corrects3 / dataset_sizes['satellite'] print('{} Loss: {:.4f} Satellite_Acc: {:.4f} Street_Acc: {:.4f} Drone_Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc, epoch_acc2, epoch_acc3)) y_loss[phase].append(epoch_loss) y_err[phase].append(1.0 - epoch_acc) # deep copy the model if phase == 'train': scheduler.step() last_model_wts = model.state_dict() if epoch % 20 == 19: save_network(model, opt.name, epoch) # draw_curve(epoch) time_elapsed = time.time() - since print('Training complete in {:.0f}m {:.0f}s'.format( time_elapsed // 60, time_elapsed % 60)) print() time_elapsed = time.time() - since print('Training complete in {:.0f}m {:.0f}s'.format( time_elapsed // 60, time_elapsed % 60)) # print('Best val Acc: {:4f}'.format(best_acc)) # save_network(model_test, opt.name+'adapt', epoch) return model
def __init__(self, train_dl, val_dl, unseen_dl, model, optimizer, scheduler, criterion, mining_function, loss, savePath='./models/', device='cuda', BATCH_SIZE=64): self.device = device self.train_dl = train_dl self.val_dl = val_dl self.unseen_dl = unseen_dl self.BATCH_SIZE = BATCH_SIZE self.model = model.to(self.device) self.optimizer = optimizer self.scheduler = scheduler self.criterion = criterion self.mining_function = mining_function self.loss = loss self.distance = distances.LpDistance(normalize_embeddings=True, p=2, power=1) self.reducer = reducers.ThresholdReducer(low=0) self.regularizer = regularizers.LpRegularizer(p=2) if self.mining_function == 'triplet': self.mining_func = miners.TripletMarginMiner( margin=0.01, distance=self.distance, type_of_triplets="semihard") elif self.mining_function == 'pair': self.mining_func = miners.PairMarginMiner(pos_margin=0, neg_margin=0.2) if self.loss == 'triplet': self.loss_function = losses.TripletMarginLoss( margin=0.01, distance=self.distance, reducer=self.reducer) elif self.loss == 'contrastive': self.loss_function = losses.ContrastiveLoss(pos_margin=0, neg_margin=1.5) elif self.loss == 'panc': self.loss_function = losses.ProxyAnchorLoss( 9, 128, margin=0.01, alpha=5, reducer=self.reducer, weight_regularizer=self.regularizer) elif self.loss == 'pnca': self.loss_function = losses.ProxyNCALoss( 9, 128, softmax_scale=1, reducer=self.reducer, weight_regularizer=self.regularizer) elif self.loss == 'normsoftmax': self.loss_function = losses.NormalizedSoftmaxLoss( 9, 128, temperature=0.05, reducer=self.reducer, weight_regularizer=self.regularizer) if self.loss in ['normsoftmax', 'panc', 'pnca']: self.loss_optimizer = optim.SGD(self.loss_function.parameters(), lr=0.0001, momentum=0.9) self.loss_scheduler = lr_scheduler.ReduceLROnPlateau( self.loss_optimizer, 'min', patience=3, threshold=0.0001, factor=0.1, verbose=True) self.savePath = savePath + 'efigi{}_{}_128'.format( self.mining_function, self.loss)
def __init__(self, use_pairwise=True): super(ContrastiveLossPML, self).__init__() self.pos, self.neg, self.distance = self.get_pos_neg_vals(use_pairwise) self.loss = losses.ContrastiveLoss(pos_margin=self.pos, neg_margin=self.neg, distance=self.distance)
# model setup, model profile, optimizer config and loss definition model = Model(backbone_type, gd_config, feature_dim, num_classes=len(train_data_set.class_to_idx)).cuda() flops, params = profile(model, inputs=(torch.randn(1, 3, 224, 224).cuda(), ), verbose=False) flops, params = clever_format([flops, params]) print('# Model Params: {} FLOPs: {}'.format(params, flops)) if opt.class_loss == 'arcface': class_criterion = losses.ArcFaceLoss(num_classes=len(train_data_set.class_to_idx), \ embedding_size=512) # , reducer=reducers.ThresholdReducer(low=0.1) elif opt.class_loss == 'contra': distance = distances.CosineSimilarity() class_criterion = losses.ContrastiveLoss(distance=distance) elif opt.class_loss == 'multi': class_criterion = losses.MultiSimilarityLoss() else: class_criterion = LabelSmoothingCrossEntropyLoss( smoothing=smoothing, temperature=temperature) if opt.class_loss == 'arcface': print('Using ArcFace') optimizer = Adam([{ 'params': model.parameters() }, { 'params': class_criterion.parameters() }], lr=1e-4) else: optimizer = Adam(model.parameters(), lr=1e-4)