def test(net, data_loader): net.eval() ds = data_loader.dataset sum = 0 for i, (seq1, seq2, mask1, mask2) in enumerate(tqdm(data_loader)): input_ids1, attention_mask1 = split_seq(seq1.to(device)), split_seq( mask1.to(device)) input_ids2, attention_mask2 = split_seq(seq2.to(device)), split_seq( mask2.to(device)) with torch.no_grad(): for i in range(len(input_ids1)): input, label = ds.DataCollatorForLanguageModeling( input_ids1[i]) output = net(input_ids=input, attention_mask=attention_mask1[i], labels=label) sum += output.loss.cpu().numpy() for i in range(len(input_ids2)): input, label = ds.DataCollatorForLanguageModeling( input_ids2[i]) output = net(input_ids=input, attention_mask=attention_mask2[i], labels=label) sum += output.loss.cpu().numpy() print("eval loss ", sum)
def finetune_eval(net, data_loader, epoch): net.eval() ds = data_loader.dataset with torch.no_grad(): avg = [] gt = [] cons = [] for i, (seq1, seq2, seq3, mask1, mask2, mask3) in enumerate(tqdm(data_loader)): input_ids1, attention_mask1 = split_seq( seq1.to(device)), split_seq(mask1.to(device)) input_ids2, attention_mask2 = split_seq( seq2.to(device)), split_seq(mask2.to(device)) anchor, pos = 0, 0 for i in range(len(input_ids1)): output = net.bert(input_ids=input_ids1[i], attention_mask=attention_mask1[i]) embeddings1 = output.last_hidden_state[:, 0:1, :] anchor = anchor + embeddings1 anchor = anchor / len(input_ids1) anchor = F.normalize(anchor, dim=1) for i in range(len(input_ids2)): output = net.bert(input_ids=input_ids2[i], attention_mask=attention_mask2[i]) embeddings2 = output.last_hidden_state[:, 0:1, :] pos = pos + embeddings2 pos = pos / len(input_ids2) pos = F.normalize(pos, dim=1) ans = 0 for i in range(len(anchor)): # check every vector of (vA,vB) vA = anchor[i].cpu().numpy() sim = [] vA = vA[0] for j in range(len(pos)): vB = pos[j].cpu().numpy() vB = vB[0] sim.append(np.linalg.norm(vA - vB)) if j != i: cons.append(sim[-1]) sim = np.array(sim) #print(sim) y = np.argsort(sim) posi = 0 for j in range(len(pos)): if y[j] == i: posi = j + 1 #print(i,": ",y) gt.append(sim[i]) print(posi, len(anchor)) ans += 1 / posi ans = ans / len(anchor) avg.append(ans) print("MRR ", np.mean(np.array(avg))) print("random similarity: ", np.mean(np.array(cons))) print("gt similarity: ", np.mean(np.array(gt)))
def train(net, data_loader, train_optimizer, epoch): net.train() ds=data_loader.dataset for i, (seq1,seq2,mask1,mask2) in enumerate(tqdm(data_loader)): input_ids1, attention_mask1= split_seq(seq1.to(device)),split_seq(mask1.to(device)) input_ids2, attention_mask2= split_seq(seq2.to(device)),split_seq(mask2.to(device)) for i in range(len(input_ids1)): input,label=ds.DataCollatorForLanguageModeling(input_ids1[i]) output=net(input_ids=input,attention_mask=attention_mask1[i],labels=label) train_optimizer.zero_grad() output.loss.backward() train_optimizer.step() for i in range(len(input_ids2)): input,label=ds.DataCollatorForLanguageModeling(input_ids2[i]) output=net(input_ids=input,attention_mask=attention_mask2[i],labels=label) train_optimizer.zero_grad() output.loss.backward() train_optimizer.step()
def finetune_train(net, data_loader, train_optimizer, epoch): net.train() ds=data_loader.dataset trainloss=0 triplet_loss = nn.TripletMarginLoss(margin=1.0, p=2) for i, (seq1,seq2,seq3,mask1,mask2,mask3) in enumerate(tqdm(data_loader)): input_ids1, attention_mask1= split_seq(seq1.to(device)),split_seq(mask1.to(device)) input_ids2, attention_mask2= split_seq(seq2.to(device)),split_seq(mask2.to(device)) input_ids3, attention_mask3= split_seq(seq3.to(device)),split_seq(mask3.to(device)) train_optimizer.zero_grad() anchor,pos,neg=0,0,0 for i in range(len(input_ids1)): output=net.bert(input_ids=input_ids1[i],attention_mask=attention_mask1[i]) embeddings1=output.last_hidden_state[:,0:1,:] anchor=anchor+embeddings1 for i in range(len(input_ids2)): output=net.bert(input_ids=input_ids2[i],attention_mask=attention_mask2[i]) embeddings2=output.last_hidden_state[:,0:1,:] pos=pos+embeddings2 for i in range(len(input_ids3)): output=net.bert(input_ids=input_ids3[i],attention_mask=attention_mask3[i]) embeddings3=output.last_hidden_state[:,0:1,:] neg=neg+embeddings3 loss = triplet_loss(anchor, pos, neg) loss.backward() train_optimizer.step() print(trainloss)