def __init__(self, margin=0.1): super(DistWeightContrastiveLoss, self).__init__() self.margin = margin self.ranking_loss = nn.MarginRankingLoss(margin=self.margin)
best_correct_pairs = 0 best_loss = 1000 train_iters = 0 val_iters = 0 # Optimizer (SGD) lr = 0.0001 momentum = 0.9 weight_decay = 1e-4 variance = 0 variance_step = 0.001 # Loss criterion = nn.MarginRankingLoss(margin=margin).cuda(gpu) # Model model = model.Model_Multiple_Negatives().cuda(gpu) model = torch.nn.DataParallel(model, device_ids=gpus) optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=momentum, weight_decay=weight_decay) # Optionally resume from a checkpoint if resume: print("Loading pretrained model") print("=> loading checkpoint '{}'".format(resume)) checkpoint = torch.load(resume, map_location={
############################################## ############################################## ############################################## ############################################## ############################################## ############################################## def save_model(model, name, epoch, folder_name): print("Saving Model") torch.save(model.state_dict(), (folder_name + "trained_{}.pth").format(epoch)) print("Done saving Model") gat_loss_func = nn.MarginRankingLoss(margin=0.5) def GAT_Loss(train_indices, valid_invalid_ratio): len_pos_triples = train_indices.shape[0] // (int(valid_invalid_ratio) + 1) pos_triples = train_indices[:len_pos_triples] neg_triples = train_indices[len_pos_triples:] pos_triples = pos_triples.repeat(int(valid_invalid_ratio), 1) source_embeds = entity_embed[pos_triples[:, 0]] relation_embeds = relation_embed[pos_triples[:, 1]] tail_embeds = entity_embed[pos_triples[:, 2]] x = source_embeds + relation_embeds - tail_embeds
def __init__(self, margin=0, num_instances=None): super(TripletLoss, self).__init__() self.margin = margin self.ranking_loss = nn.MarginRankingLoss(margin=margin)
def __init__(self, margin=0): super(CenterLoss, self).__init__() self.margin = margin self.ranking_loss_center = nn.MarginRankingLoss(margin=self.margin) self.centers = nn.Parameter(torch.randn(767, 2048)).cuda() # for modelent40
def loss_func(self, p_score, n_score): criterion = nn.MarginRankingLoss(self.config.margin, False).cuda() y = Variable(torch.Tensor([-1])).cuda() loss = criterion(p_score, n_score, y) return loss
def train_gat(args): # Creating the gat model here. #################################### print("Defining model") print( "\nModel type -> GAT layer with {} heads used , Initital Embeddings training" .format(args.nheads_GAT[0])) model_gat = SpKBGATModified(entity_embeddings, relation_embeddings, args.entity_out_dim, args.entity_out_dim, args.drop_GAT, args.alpha, args.nheads_GAT, args.use_simple_layer) wandb.watch(model_gat, log="all") if CUDA: model_gat.cuda() optimizer = torch.optim.Adam(model_gat.parameters(), lr=args.lr, weight_decay=args.weight_decay_gat) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=500, gamma=0.5, last_epoch=-1) torch.nn.utils.clip_grad_norm(model_gat.parameters(), 0.1) gat_loss_func = nn.MarginRankingLoss(margin=args.margin) current_batch_2hop_indices = torch.tensor([]) if (args.use_2hop): current_batch_2hop_indices = Corpus_.get_batch_nhop_neighbors_all( args, Corpus_.unique_entities_train, node_neighbors_2hop) if args.use_2hop: if CUDA: current_batch_2hop_indices = Variable( torch.LongTensor(current_batch_2hop_indices)).cuda() else: current_batch_2hop_indices = Variable( torch.LongTensor(current_batch_2hop_indices)) else: current_batch_2hop_indices = None epoch_losses = [] # losses of all epochs print("Number of epochs {}".format(args.epochs_gat)) for epoch in range(args.epochs_gat): print("\nepoch-> ", epoch) random.shuffle(Corpus_.train_triples) Corpus_.train_indices = np.array(list(Corpus_.train_triples)).astype( np.int32) model_gat.train() # getting in training mode start_time = time.time() epoch_loss = [] if len(Corpus_.train_indices) % args.batch_size_gat == 0: num_iters_per_epoch = len( Corpus_.train_indices) // args.batch_size_gat else: num_iters_per_epoch = (len(Corpus_.train_indices) // args.batch_size_gat) + 1 for iters in range(num_iters_per_epoch): start_time_iter = time.time() train_indices, train_values = Corpus_.get_iteration_batch(iters) if CUDA: train_indices = Variable( torch.LongTensor(train_indices)).cuda() train_values = Variable(torch.FloatTensor(train_values)).cuda() else: train_indices = Variable(torch.LongTensor(train_indices)) train_values = Variable(torch.FloatTensor(train_values)) # forward pass entity_embed, relation_embed = model_gat( Corpus_, Corpus_.train_adj_matrix, train_indices, current_batch_2hop_indices) optimizer.zero_grad() loss = batch_gat_loss(gat_loss_func, train_indices, entity_embed, relation_embed) loss.backward() optimizer.step() epoch_loss.append(loss.data.item()) end_time_iter = time.time() print( "Iteration-> {0} , Iteration_time-> {1:.4f} , Iteration_loss {2:.4f}" .format(iters, end_time_iter - start_time_iter, loss.data.item())) scheduler.step() print("Epoch {} , average loss {} , epoch_time {}".format( epoch, sum(epoch_loss) / len(epoch_loss), time.time() - start_time)) epoch_losses.append(sum(epoch_loss) / len(epoch_loss)) wandb.log({'epoch_loss': epoch_losses[-1]}) if (epoch + 1) % 200 == 0 or (epoch + 1) == args.epochs_gat: save_model(model_gat, args.data, epoch, args.output_folder, args.use_2hop) if (epoch + 1) == args.epochs_gat: save_final(model_gat, 'encoder', wandb.run.dir, args.use_2hop)
savefile = '_'.join([ 'Aggregator_model', str(batch_size), str(num_epochs), str(num_features), str(num_hidden_units) ]) model = aggregator_model(num_features, num_hidden_units) model = model.to(device) model.train() # Dataset and loader train_dataset = triplettrainDataset_aggregator(x_train, x_train_names) criterion = nn.MarginRankingLoss(margin=1.0) criterion = criterion.to(device) if (optimi == 'ADAM'): optimizer = optim.Adam(model.parameters(), lr=lr) elif (optimi == 'SGD'): optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.9) L_train = len(x_train_names) total_step = int(L_train / batch_size) training_loss = [] for epoch in range(num_epochs): running_loss = 0.0 epoch_counter = 0 I_permutation = np.random.permutation(L_train)
def __init__(self, batch_size, margin=0.3): super(OriTripletLoss, self).__init__() self.margin = margin self.ranking_loss = nn.MarginRankingLoss(margin=margin)
def train(self): trainData = LoadTrainData( self.entity2id, self.id2entity, self.relation2id, self.id2relation, self.train_triples, self.valid_triples, self.test_triples, self.headRelation2Tail, self.tailRelation2Head, self.left_entity, self.right_entity, self.left_num, self.right_num) self.entityTotal, self.relationTotal, self.trainTotal, self.validTotal, self.testTotal = trainData.get_total( ) self.model = TransE(self.entityTotal, self.relationTotal, dim=100, batch_size=self.batch_size) self.optimizer = optim.SGD(self.model.parameters(), lr=self.learning_rate) self.criterion = nn.MarginRankingLoss(margin=5.0) self.margin = torch.Tensor([self.margin]) self.margin.requires_grad = False if self.use_gpu: self.model = self.model.cuda() self.margin = self.margin.cuda() prob = 500 index_loader = DataLoader(dataset=TrainDataset(self.trainTotal), batch_size=self.batch_size, shuffle=True) training_range = tqdm(range(self.train_times)) # 进度条 for epoch in training_range: # 一个epoch 花费时间51.15秒 running_loss = 0.0 for batch in index_loader: # start = time.time() # print(len(batch)*26) self.data['h'] = [0] * self.batch_size * (1 + self.neg) self.data['r'] = [0] * self.batch_size * (1 + self.neg) self.data['t'] = [0] * self.batch_size * (1 + self.neg) self.data['y'] = [0] * self.batch_size * (1 + self.neg) # 获取每个batch数据 i = 0 for index in batch: # print("----------") # print(index) # print("----------") # print(type(index)) # 收集正样本 head = self.train_triples[index][0] rel = self.train_triples[index][1] tail = self.train_triples[index][2] self.data['h'][i] = head self.data['r'][i] = rel self.data['t'][i] = tail self.data['y'][i] = 1 # print(self.data['h'][i], self.data['r'][i], self.data['t'][i], self.data['y'][i]) last = self.batch_size for neg in range(self.neg): self.data['h'][last + i] = head self.data['r'][last + i] = rel self.data['t'][last + i] = tail self.data['y'][last + i] = -1 if self.bern: prob = 1000 * self.left_num[rel] / ( self.left_num[rel] + self.right_num[rel]) rmd = random.random() * 1000 # print("rmd:", rmd, "prob:", prob) if rmd < prob: while True: corrupt_head = random.randint( 0, self.entityTotal - 1) if corrupt_head not in self.left_entity[rel]: self.data['h'][last + i] = corrupt_head break else: while True: corrupt_tail = random.randint( 0, self.entityTotal - 1) if corrupt_tail not in self.right_entity[rel]: self.data['t'][last + i] = corrupt_tail break # print(self.data['h'][i + last], self.data['r'][i + last], self.data['t'][i + last], # self.data['y'][i + last]) last += self.batch_size # print("---------------------") i += 1 # 获取完毕batch数据 # 中间写上代码块 # print(self.data['h']) # print(self.data['r']) # print(self.data['t']) # print(self.data['y']) # 转变成tensor for key in self.data: self.data[key] = self.to_var(self.data[key]) p_score, n_score = self.model(self.data) # print(p_score.size()) # print(n_score.size()) loss = (torch.max(p_score - n_score, -self.margin)).mean() + self.margin running_loss += loss self.optimizer.zero_grad() loss.backward() self.optimizer.step() # end = time.time() # print('Running time: %s Seconds' % (end - start)) # 处理之后 training_range.set_description("Epoch %d | loss: %f" % (epoch, loss)) # 设置当前阶段的输出 cur_time = datetime.now().strftime('%Y-%m-%d') self.model.save_checkpoint('.', 'model_params' + cur_time + '.pkl')
#----------------------------------------- # Setting up the model: model.to(device) if args.model_type in ['bert', 'albert', 'roberta']: model_dim = config.hidden_size elif args.model_type in ['gpt2']: model_dim = config.n_embd mlp = Context_MLP(in_size=model_dim) mlp = mlp.to(device) #----------------------------------------- #----------------------------------------- # The loss function: criterion = nn.MarginRankingLoss(margin=args.loss_margin, reduction='none') #----------------------------------------- #----------------------------------------- # Creating the data loaders: train_dataloader, test_dataloader = create_loaders(args, Ranking_Dataset, tokenizer) #----------------------------------------- #----------------------------------------- # Tensorboard writer: tb_writer = SummaryWriter( log_dir=f'{logs_path}/{datetime.now().strftime("%d%m%Y-%H_%M_%S")}/') #----------------------------------------- #-----------------------------------------
def incremental_train_and_eval_MR_LF_TDE(epochs, tg_model, ref_model, tg_optimizer, tg_lr_scheduler, \ trainloader, testloader, \ iteration, start_iteration, \ lamda, \ dist, K, lw_mr, causal_embed=None, \ fix_bn=False, weight_per_class=None, device=None): if device is None: device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") mu = 0.9 # causal embed momentum if causal_embed is None: if iteration > start_iteration: causal_embed = torch.FloatTensor(1, ref_model.fc.in_features).zero_().to(device) else: causal_embed = torch.FloatTensor(1, tg_model.fc.in_features).zero_().to(device) if iteration > start_iteration: ref_model.eval() num_old_classes = ref_model.fc.out_features handle_ref_features = ref_model.fc.register_forward_hook(get_ref_features) handle_cur_features = tg_model.fc.register_forward_hook(get_cur_features) handle_old_scores_bs = tg_model.fc.fc1.register_forward_hook(get_old_scores_before_scale) handle_new_scores_bs = tg_model.fc.fc2.register_forward_hook(get_new_scores_before_scale) for epoch in range(epochs): #train tg_model.train() if fix_bn: for m in tg_model.modules(): if isinstance(m, nn.BatchNorm2d): m.eval() #m.weight.requires_grad = False #m.bias.requires_grad = False train_loss = 0 train_loss1 = 0 train_loss2 = 0 train_loss3 = 0 correct = 0 total = 0 tg_lr_scheduler.step() print('\nEpoch: %d, LR: ' % epoch, end='') print(tg_lr_scheduler.get_lr()) for batch_idx, (inputs, targets) in enumerate(tqdm(trainloader)): inputs, targets = inputs.to(device), targets.to(device) tg_optimizer.zero_grad() outputs = tg_model(inputs) if iteration == start_iteration: loss = nn.CrossEntropyLoss(weight_per_class)(outputs, targets) else: ref_outputs = ref_model(inputs) loss1 = nn.CosineEmbeddingLoss()(cur_features, ref_features.detach(), \ torch.ones(inputs.shape[0]).to(device)) * lamda # update causal_embed with torch.no_grad(): cur_features_mean = cur_features.detach().mean(0, keepdim=True) causal_embed = mu * causal_embed + cur_features_mean loss2 = nn.CrossEntropyLoss(weight_per_class)(outputs, targets) ################################################# #scores before scale, [-1, 1] outputs_bs = torch.cat((old_scores, new_scores), dim=1) assert (outputs_bs.size() == outputs.size()) #get groud truth scores gt_index = torch.zeros(outputs_bs.size()).to(device) gt_index = gt_index.scatter(1, targets.view(-1, 1), 1).ge(0.5) gt_scores = outputs_bs.masked_select(gt_index) #get top-K scores on novel classes max_novel_scores = outputs_bs[:, num_old_classes:].topk(K, dim=1)[0] #the index of hard samples, i.e., samples of old classes hard_index = targets.lt(num_old_classes) hard_num = torch.nonzero(hard_index).size(0) #print("hard examples size: ", hard_num) if hard_num > 0: gt_scores = gt_scores[hard_index].view(-1, 1).repeat(1, K) max_novel_scores = max_novel_scores[hard_index] assert (gt_scores.size() == max_novel_scores.size()) assert (gt_scores.size(0) == hard_num) #print("hard example gt scores: ", gt_scores.size(), gt_scores) #print("hard example max novel scores: ", max_novel_scores.size(), max_novel_scores) loss3 = nn.MarginRankingLoss(margin=dist)(gt_scores.view(-1, 1), \ max_novel_scores.view(-1, 1), torch.ones(hard_num*K).to(device)) * lw_mr else: loss3 = torch.zeros(1).to(device) ################################################# loss = loss1 + loss2 + loss3 loss.backward() tg_optimizer.step() train_loss += loss.item() if iteration > start_iteration: train_loss1 += loss1.item() train_loss2 += loss2.item() train_loss3 += loss3.item() _, predicted = outputs.max(1) total += targets.size(0) correct += predicted.eq(targets).sum().item() if iteration == start_iteration: print('Train set: {}, Train Loss: {:.4f} Acc: {:.4f}'.format(\ len(trainloader), train_loss/(batch_idx+1), 100.*correct/total)) else: print('Train set: {}, Train Loss1: {:.4f}, Train Loss2: {:.4f}, Train Loss3: {:.4f},\ Train Loss: {:.4f} Acc: {:.4f}' .format(len(trainloader), \ train_loss1/(batch_idx+1), train_loss2/(batch_idx+1), train_loss3/(batch_idx+1), train_loss/(batch_idx+1), 100.*correct/total)) #eval tg_model.eval() test_loss = 0 correct = 0 total = 0 with torch.no_grad(): for batch_idx, (inputs, targets) in enumerate(testloader): inputs, targets = inputs.to(device), targets.to(device) outputs = tg_model(inputs) loss = nn.CrossEntropyLoss(weight_per_class)(outputs, targets) test_loss += loss.item() _, predicted = outputs.max(1) total += targets.size(0) correct += predicted.eq(targets).sum().item() print('Test set: {} Test Loss: {:.4f} Acc: {:.4f}'.format(\ len(testloader), test_loss/(batch_idx+1), 100.*correct/total)) if iteration > start_iteration: print("Removing register_forward_hook") handle_ref_features.remove() handle_cur_features.remove() handle_old_scores_bs.remove() handle_new_scores_bs.remove() return tg_model, causal_embed
def loss(self, positive_score, negative_score): """graph embedding loss function""" target = torch.tensor([-1], dtype=torch.long) loss_func = nn.MarginRankingLoss(margin=self.margin, reduction='none') return loss_func(positive_score, negative_score, target)
def __init__(self, margin=0, num_instances=0, use_semi=True): super(TripletLoss, self).__init__() self.margin = margin self.ranking_loss = nn.MarginRankingLoss(margin=self.margin) self.K = num_instances self.use_semi = use_semi
def __init__(self, margin=0, mode='hard'): super(MatrixLoss, self).__init__() self.margin = margin self.ranking_loss = nn.MarginRankingLoss(margin=margin) self.mode = mode
def __init__(self, batch_size, margin=0.5): super(TripletLoss, self).__init__() self.margin = margin self.ranking_loss = nn.MarginRankingLoss(margin=margin) self.batch_size = batch_size self.mask = torch.eye(batch_size)
in_size, out_size = [x.size() for x in in_params], [x.size() for x in out_params] in_sum, out_sum = sum([np.prod(x) for x in in_size ]), sum([np.prod(x) for x in out_size]) print "IN : {} params".format(in_sum) #print print_params(in_names, in_size) print "OUT : {} params".format(out_sum) #print print_params(out_names, out_size) print "TOTAL : {} params".format(in_sum + out_sum) loss_fn = { 'xent': nn.CrossEntropyLoss(), 'mse': nn.MSELoss(), 'mrl': nn.MarginRankingLoss(), 'mlml': nn.MultiLabelMarginLoss(), 'mml': nn.MultiMarginLoss() } tt = torch if not args.cpu: loss_fn = {k: v.cuda() for (k, v) in loss_fn.items()} tt = torch.cuda optimizer = torch.optim.Adam(in_params, lr=args.lr) out_data = {'train':{'x':[], 'y':[] }, \ 'valid':{'x':[], 'y':[] }, \ 'bleu':{'x':[], 'y':[] }, \ 'best_valid':{'x':[], 'y':[] } }
def __init__(self, margin=0.3): super(TripletLoss, self).__init__() self.margin = margin # https://pytorch.org/docs/1.2.0/nn.html?highlight=marginrankingloss#torch.nn.MarginRankingLoss self.ranking_loss = nn.MarginRankingLoss(margin=margin)
def main(): parser = argparse.ArgumentParser() parser.add_argument('-task', required=True) parser.add_argument('-model', required=True) parser.add_argument('-eval_step', type=int, default=10) parser.add_argument('-epoch', type=int, default=400) parser.add_argument('-d_word_vec', type=int, default=300) parser.add_argument('-batch_size', type=int, default=100) parser.add_argument('-save_model', default=None) parser.add_argument('-save_mode', type=str, choices=['all', 'best'], default='best') parser.add_argument('-no_cuda', action='store_true') parser.add_argument('-lr', type=float, default=0.001) parser.add_argument('-n_bins', type=float, default=21) opt = parser.parse_args() opt.cuda = not opt.no_cuda opt.mu = kernal_mus(opt.n_bins) opt.sigma = kernel_sigmas(opt.n_bins) print opt # ========= Preparing DataLoader =========# if opt.task == "wikiqa": train_filename = "./data/wikiqa/wiki_train_pair.pkl" test_filename = "./data/wikiqa/wiki_test.pkl" dev_filename = "./data/wikiqa/wiki_dev.pkl" train_data = pickle.load(open(train_filename, 'r')) test_data = pickle.load(open(test_filename, 'r')) dev_data = pickle.load(open(dev_filename, 'r')) weights = np.load("./data/wikiqa/embed.txt") elif opt.task == "trecqa-clean": train_filename = "./data/trecqa/trec_train_pair.pkl" test_filename = "./data/trecqa/trec_test_clean.pkl" dev_filename = "./data/trecqa/trec_dev_clean.pkl" train_data = pickle.load(open(train_filename, 'r')) test_data = pickle.load(open(test_filename, 'r')) dev_data = pickle.load(open(dev_filename, 'r')) weights = np.load("./data/trecqa/embed.txt") elif opt.task == "trecqa-all": train_filename = "./data/trecqa/trec_train_pair.pkl" test_filename = "./data/trecqa/trec_test_all.pkl" dev_filename = "./data/trecqa/trec_dev_all.pkl" train_data = pickle.load(open(train_filename, 'r')) test_data = pickle.load(open(test_filename, 'r')) dev_data = pickle.load(open(dev_filename, 'r')) weights = np.load("./data/trecqa/embed.txt") else: raise ("Not implement!") train_data = Dataloader(data=train_data, opt=opt, shuffle=True) test_data = DataloaderTest(data=test_data, opt=opt) dev_data = DataloaderTest(data=dev_data, opt=opt) if opt.model == "knrm": model = KNRM.knrm(opt, weights) elif opt.model == "cknrm": model = CKNRM.knrm(opt, weights) else: raise ("No such model!") crit = nn.MarginRankingLoss(margin=1, size_average=True) if opt.cuda: model = model.cuda() crit = crit.cuda() optimizer = torch.optim.Adam(model.parameters(), lr=opt.lr) train(model, opt, crit, optimizer, train_data, dev_data, test_data)
def __init__(self, p1, p2, input_var=None, var=["z"], margin=0): super().__init__(p1, p2, input_var) self.var = var self.loss = nn.MarginRankingLoss(margin=margin, reduce=False)
def train(device, net, dataloader, val_loader, args, logger, experiment): def update(engine, data): input_left, input_right, label = data['left_image'], data['right_image'], data['winner'] input_left, input_right, label = input_left.to(device), input_right.to(device), label.to(device) rank_label = label.clone() inverse_label = label.clone() label[label==-1] = 0 # zero the parameter gradients optimizer.zero_grad() rank_label = rank_label.float() start = timer() output_clf,output_rank_left, output_rank_right = net(input_left,input_right) end = timer() logger.info(f'FORWARD,{end-start:.4f}') #compute clf loss start = timer() loss_clf = clf_crit(output_clf,label) #compute ranking loss loss_rank = compute_ranking_loss(output_rank_left, output_rank_right, label, rank_crit) loss = loss_clf + loss_rank end = timer() logger.info(f'LOSS,{end-start:.4f}') #compute ranking accuracy start = timer() rank_acc = compute_ranking_accuracy(output_rank_left, output_rank_right, label) end = timer() logger.info(f'RANK-ACC,{end-start:.4f}') # backward step start = timer() loss.backward() optimizer.step() end = timer() logger.info(f'BACKWARD,{end-start:.4f}') #swapped forward start = timer() inverse_label*=-1 #swap label inverse_rank_label = inverse_label.clone() inverse_rank_label = inverse_rank_label.float() inverse_label[inverse_label==-1] = 0 end = timer() logger.info(f'SWAPPED-SETUP,{end-start:.4f}') start = timer() outputs, output_rank_left, output_rank_right = net(input_right,input_left) #pass swapped input end = timer() logger.info(f'SWAPPED-FORWARD,{end-start:.4f}') start = timer() inverse_loss_clf = clf_crit(outputs, inverse_label) #compute ranking loss inverse_loss_rank = compute_ranking_loss(output_rank_left, output_rank_right, label, rank_crit) #swapped backward inverse_loss = inverse_loss_clf + inverse_loss_rank end = timer() logger.info(f'SWAPPED-LOSS,{end-start:.4f}') start = timer() inverse_loss.backward() optimizer.step() end = timer() logger.info(f'SWAPPED-BACKWARD,{end-start:.4f}') return { 'loss':loss.item(), 'loss_clf':loss_clf.item(), 'loss_rank':loss_rank.item(), 'y':label, 'y_pred': output_clf, 'rank_acc': rank_acc } def inference(engine,data): with torch.no_grad(): start = timer() input_left, input_right, label = data['left_image'], data['right_image'], data['winner'] input_left, input_right, label = input_left.to(device), input_right.to(device), label.to(device) rank_label = label.clone() label[label==-1] = 0 rank_label = rank_label.float() # forward output_clf,output_rank_left, output_rank_right = net(input_left,input_right) loss_clf = clf_crit(output_clf,label) loss_rank = compute_ranking_loss(output_rank_left, output_rank_right, label, rank_crit) rank_acc = compute_ranking_accuracy(output_rank_left, output_rank_right, label) loss = loss_clf + loss_rank end = timer() logger.info(f'INFERENCE,{end-start:.4f}') return { 'loss':loss.item(), 'loss_clf':loss_clf.item(), 'loss_rank':loss_rank.item(), 'y':label, 'y_pred': output_clf, 'rank_acc': rank_acc } net = net.to(device) clf_crit = nn.NLLLoss() rank_crit = nn.MarginRankingLoss(reduction='mean', margin=1) optimizer = optim.SGD(net.parameters(), lr=args.lr, weight_decay=args.wd, momentum=0.9) lamb = Variable(torch.FloatTensor([1]),requires_grad = False).cuda()[0] trainer = Engine(update) evaluator = Engine(inference) writer = SummaryWriter() RunningAverage(output_transform=lambda x: x['loss']).attach(trainer, 'loss') RunningAverage(output_transform=lambda x: x['loss_clf']).attach(trainer, 'loss_clf') RunningAverage(output_transform=lambda x: x['loss_rank']).attach(trainer, 'loss_rank') RunningAverage(output_transform=lambda x: x['rank_acc']).attach(trainer, 'rank_acc') RunningAverage(Accuracy(output_transform=lambda x: (x['y_pred'],x['y']))).attach(trainer,'avg_acc') RunningAverage(output_transform=lambda x: x['loss']).attach(evaluator, 'loss') RunningAverage(output_transform=lambda x: x['loss_clf']).attach(evaluator, 'loss_clf') RunningAverage(output_transform=lambda x: x['loss_rank']).attach(evaluator, 'loss_rank') RunningAverage(output_transform=lambda x: x['rank_acc']).attach(evaluator, 'rank_acc') RunningAverage(Accuracy(output_transform=lambda x: (x['y_pred'],x['y']))).attach(evaluator,'avg_acc') if args.pbar: pbar = ProgressBar(persist=False) pbar.attach(trainer,['loss','avg_acc', 'rank_acc']) pbar = ProgressBar(persist=False) pbar.attach(evaluator,['loss','loss_clf', 'loss_rank','avg_acc']) @trainer.on(Events.EPOCH_COMPLETED) def log_validation_results(trainer): net.eval() evaluator.run(val_loader) trainer.state.metrics['val_acc'] = evaluator.state.metrics['rank_acc'] net.train() tb_log( { "accuracy":{ 'accuracy':trainer.state.metrics['avg_acc'], 'rank_accuracy':trainer.state.metrics['rank_acc'] }, "loss": { 'total':trainer.state.metrics['loss'], 'clf':trainer.state.metrics['loss_clf'], 'rank':trainer.state.metrics['loss_rank'] } }, { "accuracy":{ 'accuracy':evaluator.state.metrics['avg_acc'], 'rank_accuracy':evaluator.state.metrics['rank_acc'] }, "loss": { 'total':evaluator.state.metrics['loss'], 'clf':evaluator.state.metrics['loss_clf'], 'rank':evaluator.state.metrics['loss_rank'] } }, writer, args.attribute, trainer.state.epoch ) handler = ModelCheckpoint(args.model_dir, '{}_{}_{}'.format(args.model, args.premodel, args.attribute), n_saved=1, create_dir=True, save_as_state_dict=True, require_empty=False, score_function=lambda engine: engine.state.metrics['val_acc']) trainer.add_event_handler(Events.EPOCH_COMPLETED, handler, { 'model': net }) if (args.resume): def start_epoch(engine): engine.state.epoch = args.epoch trainer.add_event_handler(Events.STARTED, start_epoch) evaluator.add_event_handler(Events.STARTED, start_epoch) trainer.run(dataloader,max_epochs=args.max_epochs)
def __init__(self, p1, p2, input_var=None, margin=0.5): super().__init__(p1, p2, input_var) self.loss = nn.MarginRankingLoss(margin=margin)
def __init__(self, margin=0.3, mutual_flag=False): super(TripletLoss, self).__init__() self.margin = margin self.ranking_loss = nn.MarginRankingLoss(margin=margin) self.mutual = mutual_flag
from __future__ import absolute_import
['softsign', nn.Softsign()], ['softmin', nn.Softmin()], ['tanhshrink', nn.Tanhshrink()], ['rrelu', nn.RReLU()], ['glu', nn.GLU()], ]) loss = nn.ModuleDict( [['l1', nn.L1Loss()], ['nll', nn.NLLLoss()], ['kldiv', nn.KLDivLoss()], ['mse', nn.MSELoss()], ['bce', nn.BCELoss()], ['bce_with_logits', nn.BCEWithLogitsLoss()], ['cosine_embedding', nn.CosineEmbeddingLoss()], ['ctc', nn.CTCLoss()], ['hinge_embedding', nn.HingeEmbeddingLoss()], ['margin_ranking', nn.MarginRankingLoss()], ['multi_label_margin', nn.MultiLabelMarginLoss()], ['multi_label_soft_margin', nn.MultiLabelSoftMarginLoss()], ['multi_margin', nn.MultiMarginLoss()], ['smooth_l1', nn.SmoothL1Loss()], ['soft_margin', nn.SoftMarginLoss()], ['cross_entropy', nn.CrossEntropyLoss()], ['triplet_margin', nn.TripletMarginLoss()], ['poisson_nll', nn.PoissonNLLLoss()]]) optimizer = dict({ 'adadelta': optim.Adadelta, 'adagrad': optim.Adagrad, 'adam': optim.Adam, 'sparse_adam': optim.SparseAdam,
def __init__(self, margin=0.3, lamb=10.0, same_margin=1.0): super(weightedContrastiveLoss, self).__init__() self.margin = margin #self.margin_pos = same_margin # Modified by Sun 2019.1.21 self.lamb = lamb self.ranking_loss = nn.MarginRankingLoss(margin=margin)
def __init__(self, margin=None): self.margin = margin if margin is not None: self.ranking_loss = nn.MarginRankingLoss(margin=margin) else: self.ranking_loss = nn.SoftMarginLoss()
def forward(self, h_batch, r_batch, t_batch, h_neg_batch, r_neg_batch, t_neg_batch): ''' :param h_batch: variable containing tensor of head entity :param r_batch: variable containing tensor of relation :param t_batch: variable containing tensor of tail entity :return: ''' embed_h_batch = self.embed_entity(h_batch) # size_batch * dim embed_r_batch = self.embed_relation(r_batch) # size_batch * dim embed_t_batch = self.embed_entity(t_batch) # size_batch * dim embed_h_neg_batch = self.embed_entity(h_neg_batch) # size_batch * dim embed_r_neg_batch = self.embed_entity(r_neg_batch) # size_batch * dim embed_t_neg_batch = self.embed_entity(t_neg_batch) # size_batch * dim # get neighbor context neighbor_context = torch.LongTensor( get_neighbor_context_batch(h_batch)).type(dtype_LongTensor_cuda) embed_neighbor_context_r = self.embed_relation( Variable(neighbor_context[:, :, 0]) ) # size_batch * size_neighbor_context * dim embed_neighbor_context_t = self.embed_entity( Variable(neighbor_context[:, :, 1]) ) # size_batch * size_neighbor_context * dim neighbor_tmp = -(embed_neighbor_context_r - embed_neighbor_context_t ) # size_batch * size_neighbor_context * dim a = torch.norm(-neighbor_tmp + embed_r_batch[:, np.newaxis, :] - embed_t_batch[:, np.newaxis, :], p=self.args.p, dim=2, keepdim=False) # size_batch * size_neighbor_context alpha = F.softmin(a, dim=1) # size_batch * size_neighbor_context # g_n_pos = - torch.sum(alpha * torch.norm(embed_h_batch[:,np.newaxis,:] + neighbor_tmp, p=self.args.p, dim=2, keepdim=False)) # size_batch # g_n_neg = - torch.sum(alpha * torch.norm(embed_h_neg_batch[:,np.newaxis,:] + neighbor_tmp, p=self.args.p, dim=2, keepdim=False)) # size_batch g_n_pos = -torch.norm(torch.sum(alpha[:, :, np.newaxis] * neighbor_tmp, dim=1) - embed_h_batch, p=self.args.p, dim=1, keepdim=False) # size_batch g_n_neg = -torch.norm(torch.sum(alpha[:, :, np.newaxis] * neighbor_tmp, dim=1) - embed_h_neg_batch, p=self.args.p, dim=1, keepdim=False) # size_batch # get path context path_context = torch.LongTensor( get_path_context_batch(h_batch, t_batch)).type(dtype_LongTensor_cuda) rel_sign = torch.sign(path_context.type(dtype_FloatTensor_cuda) ) # size_batch * size_path_context * length_path embed_path_list = [] for i in range(len(path_context) ): # because the indices of embedding should be <= 2 embed_path_list.append( self.embed_relation(Variable(torch.abs(path_context[i])))) embed_path_context = torch.cat( [torch.unsqueeze(embed, 0) for embed in embed_path_list], 0) # size_batch * size_path_context * length_path * dim embed_path = torch.sum( Variable(rel_sign[:, :, :, np.newaxis], requires_grad=True) * embed_path_context, dim=2, keepdim=False) # size_batch * size_path_context * dim b = torch.norm(embed_h_batch[:, np.newaxis, :] + embed_path - embed_t_batch[:, np.newaxis, :], p=self.args.p, dim=2, keepdim=False) # size_batch * size_path_context beta = F.softmin(b, dim=1) # size_batch * size_path_context g_p_pos = -torch.norm(torch.sum(beta[:, :, np.newaxis] * embed_path, dim=1) - embed_r_batch, p=self.args.p, dim=1, keepdim=False) g_p_neg = -torch.norm(torch.sum(beta[:, :, np.newaxis] * embed_path, dim=1) - embed_r_neg_batch, p=self.args.p, dim=1, keepdim=False) # g_t g_t_pos = -torch.norm(embed_h_batch + embed_r_batch - embed_t_batch, p=self.args.p, dim=1, keepdim=False) g_t_neg = -torch.norm( embed_h_neg_batch + embed_r_neg_batch - embed_t_neg_batch, p=self.args.p, dim=1, keepdim=False) # loss_g_n_pos = - torch.sum(F.logsigmoid(g_n_pos)) # loss_g_n_neg = - torch.sum(F.logsigmoid(- g_n_neg)) # loss_g_p_pos = - torch.sum(F.logsigmoid(g_p_pos)) # loss_g_p_neg = - torch.sum(F.logsigmoid(- g_p_neg)) # loss_g_t_pos = - torch.sum(F.logsigmoid(g_t_pos)) # loss_g_t_neg = - torch.sum(F.logsigmoid(- g_t_neg)) # loss = loss_g_n_pos + loss_g_n_neg + loss_g_p_pos + loss_g_p_neg + loss_g_t_pos + loss_g_t_neg loss_function = nn.MarginRankingLoss(margin=1, size_average=False) target = Variable(torch.FloatTensor([1] * len(h_batch)), requires_grad=False).type(dtype_FloatTensor_cuda) # loss = loss_function(F.sigmoid(g_n_pos) + F.sigmoid(g_p_pos) + F.sigmoid(g_t_pos), F.sigmoid(g_n_neg) + F.sigmoid(g_p_neg) + F.sigmoid(g_t_neg), target.type(dtype_FloatTensor_cuda)) loss = loss_function(g_n_pos, g_n_neg, target) + loss_function( g_p_pos, g_p_neg, target) + loss_function(g_t_pos, g_t_neg, target) return loss
def __init__(self, margin=0.3): super(TripletLoss_out, self).__init__() self.margin = margin self.ranking_loss = nn.MarginRankingLoss(margin=self.margin)
loss_1 = target[idx, idx] * (torch.log(target[idx, idx]) - inputs[idx, idx]) print("第一个元素loss:", loss_1) # ---------------------------------------------- 10 Margin Ranking Loss -------------------------------------------- flag = 0 # flag = 1 if flag: x1 = torch.tensor([[1], [2], [3]], dtype=torch.float) x2 = torch.tensor([[2], [2], [2]], dtype=torch.float) target = torch.tensor([1, 1, -1], dtype=torch.float) loss_f_none = nn.MarginRankingLoss(margin=0, reduction='none') loss = loss_f_none(x1, x2, target) print(loss) # ---------------------------------------------- 11 Multi Label Margin Loss ----------------------------------------- flag = 0 # flag = 1 if flag: x = torch.tensor([[0.1, 0.2, 0.4, 0.8]]) y = torch.tensor([[0, 3, -1, -1]], dtype=torch.long) loss_f = nn.MultiLabelMarginLoss(reduction='none')