loss_fn = nn.CosineEmbeddingLoss(margin=0) for epoch in range(epoch): print('round', round, 'epoch', epoch) model.train() train_loss = 0 torch.cuda.manual_seed_all(epoch) train_dataloader = DataLoader(train_dataset, collate_fn=collate_fn_link_entity_vector, shuffle=True, batch_size=train_batch_size) for index, X, label, pos, vector, length in tqdm(train_dataloader): #model.zero_grad() X = nn.utils.rnn.pad_sequence(X, batch_first=True).type(torch.LongTensor) X = X.to(device) vector = vector.to(device).type(torch.float) length = length.to(device) #ner = ner.type(torch.float).cuda() mask_X = get_mask(X, length, is_cuda=use_cuda).to(device) pos = pos.type(torch.LongTensor).to(device) label = label.to(device).type(torch.float) pred = model(X, mask_X, pos, vector, length) loss = loss_fn(pred, vector, target=label) loss.backward() #loss = loss_fn(pred, ner) optimizer.step() optimizer.zero_grad() # Clip gradients: gradients are modified in place nn.utils.clip_grad_norm_(model.parameters(), clip) train_loss += loss.item() train_loss = train_loss/len(train_part)*1e5
model.to(device) valid_dataloader = DataLoader(valid_dataset, collate_fn=collate_fn_link, shuffle=False, batch_size=valid_batch_size) model.eval() valid_loss = 0 pred_set = [] label_set = [] for index, X, type, pos, length in tqdm(valid_dataloader): X = nn.utils.rnn.pad_sequence(X, batch_first=True).type(torch.LongTensor) X = X.cuda() length = length.cuda() mask_X = get_mask(X, length, is_cuda=True).cuda() pos = pos.type(torch.LongTensor).cuda() type = type.cuda() mask_for_pool = get_mask_bertpiece(X, length, pos, is_cuda=True).type(torch.float) with torch.no_grad(): pred = model(X, mask_X, pos, length, mask_for_pool).to(device) # print(pred.size(),type.size(),torch.max(type)) loss = loss_fn(pred, type) # print('loss',loss) pred_set.append(pred.cpu().numpy()) label_set.append(type.cpu().numpy()) valid_loss += loss.item() valid_loss = valid_loss / len(dev_X)
model.eval() valid_loss = 0 trn_labels, trn_preds = [], [] for sa, sb, la, lb, numerical_features, label in tqdm(valid_dataloader): sa = nn.utils.rnn.pad_sequence(sa, batch_first=True).type(torch.LongTensor) sb = nn.utils.rnn.pad_sequence(sb, batch_first=True).type(torch.LongTensor) n_feats = numerical_features.type(torch.FloatTensor) sa = Variable(sa.cuda()).squeeze(-1) sb = Variable(sb.cuda()).squeeze(-1) la = la.cuda() lb = lb.cuda() n_feats = Variable(n_feats.cuda()) label = label.type(torch.FloatTensor).cuda().view(-1, 1) mask_a = get_mask(sa, la, is_cuda=True) mask_b = get_mask(sb, lb, is_cuda=True) with torch.no_grad(): pred = model(sa, sb, mask_a, mask_b, la, lb, n_feats) trn_preds.append(pred.cpu().numpy()) # break trn_preds = np.concatenate(trn_preds) # torch.save(model.state_dict(), 'model_type/model_type_%d_%d.pth' % (round, epoch)) pred_vector.append(trn_preds) print(len(pred_vector)) pred_vector = np.concatenate(pred_vector, axis=0)
for i in range(parts): query = query_sp[i] l_query = l_query_sp[i] pos = pos_sp[i] candidate_abstract = candidate_abstract_sp[i] l_abstract = l_abstract_sp[i] candidate_labels = candidate_labels_sp[i] l_labels = l_labels_sp[i] candidate_type = candidate_type_sp[i] candidate_numattrs = candidate_numattrs_sp[i] candidate_abstract_numwords = candidate_abstract_numwords_sp[i] query = nn.utils.rnn.pad_sequence(query, batch_first=True).type( torch.LongTensor).to(device) l_query = l_query.to(device) mask_query = get_mask( query, l_query, is_cuda=use_cuda).to(device).type(torch.float) candidate_abstract = nn.utils.rnn.pad_sequence( candidate_abstract, batch_first=True).type(torch.LongTensor).to(device) l_abstract = l_abstract.to(device) mask_abstract = get_mask(candidate_abstract, l_abstract, is_cuda=use_cuda).to(device).type( torch.float) candidate_labels = nn.utils.rnn.pad_sequence( candidate_labels, batch_first=True).type(torch.LongTensor).to(device) l_labels = l_labels.to(device) mask_labels = get_mask(candidate_labels,
collate_fn=collate_fn, shuffle=True, batch_size=batch_size) loss_fn = nn.BCELoss() optimizer = torch.optim.Adam(model.parameters()) clip = 50 for epoch in range(10): model.train() train_loss = 0 for index, X, length, numerical_features, label in tqdm(train_dataloader): X = nn.utils.rnn.pad_sequence(X, batch_first=True).type(torch.LongTensor) X = X.cuda() n_feats = numerical_features.type(torch.float).cuda() label = label.type(torch.float).cuda() mask_X = get_mask(X, length, is_cuda=True).type(torch.float) pred = model(X, mask_X, length, n_feats) loss = loss_fn(pred, label) optimizer.zero_grad() loss.backward() # Clip gradients: gradients are modified in place _ = nn.utils.clip_grad_norm_(model.parameters(), clip) #_ = nn.utils.clip_grad_norm_(model.parameters(), clip) optimizer.step() train_loss += loss.item() #break save_checkpoint('model/senti.pth', model)