Example #1
0
def test(net, data_loader):
    net.eval()
    ds = data_loader.dataset
    sum = 0
    for i, (seq1, seq2, mask1, mask2) in enumerate(tqdm(data_loader)):
        input_ids1, attention_mask1 = split_seq(seq1.to(device)), split_seq(
            mask1.to(device))
        input_ids2, attention_mask2 = split_seq(seq2.to(device)), split_seq(
            mask2.to(device))
        with torch.no_grad():
            for i in range(len(input_ids1)):
                input, label = ds.DataCollatorForLanguageModeling(
                    input_ids1[i])
                output = net(input_ids=input,
                             attention_mask=attention_mask1[i],
                             labels=label)
                sum += output.loss.cpu().numpy()

            for i in range(len(input_ids2)):
                input, label = ds.DataCollatorForLanguageModeling(
                    input_ids2[i])
                output = net(input_ids=input,
                             attention_mask=attention_mask2[i],
                             labels=label)
                sum += output.loss.cpu().numpy()
    print("eval loss ", sum)
def finetune_eval(net, data_loader, epoch):
    net.eval()
    ds = data_loader.dataset

    with torch.no_grad():
        avg = []
        gt = []
        cons = []
        for i, (seq1, seq2, seq3, mask1, mask2,
                mask3) in enumerate(tqdm(data_loader)):
            input_ids1, attention_mask1 = split_seq(
                seq1.to(device)), split_seq(mask1.to(device))
            input_ids2, attention_mask2 = split_seq(
                seq2.to(device)), split_seq(mask2.to(device))
            anchor, pos = 0, 0
            for i in range(len(input_ids1)):
                output = net.bert(input_ids=input_ids1[i],
                                  attention_mask=attention_mask1[i])
                embeddings1 = output.last_hidden_state[:, 0:1, :]
                anchor = anchor + embeddings1
            anchor = anchor / len(input_ids1)
            anchor = F.normalize(anchor, dim=1)
            for i in range(len(input_ids2)):
                output = net.bert(input_ids=input_ids2[i],
                                  attention_mask=attention_mask2[i])
                embeddings2 = output.last_hidden_state[:, 0:1, :]
                pos = pos + embeddings2
            pos = pos / len(input_ids2)
            pos = F.normalize(pos, dim=1)
            ans = 0
            for i in range(len(anchor)):  # check every vector of (vA,vB)
                vA = anchor[i].cpu().numpy()
                sim = []
                vA = vA[0]
                for j in range(len(pos)):
                    vB = pos[j].cpu().numpy()
                    vB = vB[0]
                    sim.append(np.linalg.norm(vA - vB))
                    if j != i:
                        cons.append(sim[-1])
                sim = np.array(sim)
                #print(sim)
                y = np.argsort(sim)
                posi = 0
                for j in range(len(pos)):
                    if y[j] == i:
                        posi = j + 1
                #print(i,": ",y)
                gt.append(sim[i])
                print(posi, len(anchor))
                ans += 1 / posi

            ans = ans / len(anchor)
            avg.append(ans)
        print("MRR ", np.mean(np.array(avg)))
        print("random similarity: ", np.mean(np.array(cons)))
        print("gt similarity: ", np.mean(np.array(gt)))
def train(net, data_loader, train_optimizer, epoch):
    net.train()
    ds=data_loader.dataset
    for i, (seq1,seq2,mask1,mask2) in enumerate(tqdm(data_loader)):
        input_ids1, attention_mask1= split_seq(seq1.to(device)),split_seq(mask1.to(device))
        input_ids2, attention_mask2= split_seq(seq2.to(device)),split_seq(mask2.to(device))
        for i in range(len(input_ids1)):
            input,label=ds.DataCollatorForLanguageModeling(input_ids1[i])
            output=net(input_ids=input,attention_mask=attention_mask1[i],labels=label)
            train_optimizer.zero_grad()
            output.loss.backward()
            train_optimizer.step()
        for i in range(len(input_ids2)):
            input,label=ds.DataCollatorForLanguageModeling(input_ids2[i])
            output=net(input_ids=input,attention_mask=attention_mask2[i],labels=label)
            train_optimizer.zero_grad()
            output.loss.backward()
            train_optimizer.step()
Example #4
0
def finetune_train(net, data_loader, train_optimizer, epoch):
    net.train()
    ds=data_loader.dataset
    trainloss=0
    triplet_loss = nn.TripletMarginLoss(margin=1.0, p=2)
    for i, (seq1,seq2,seq3,mask1,mask2,mask3) in enumerate(tqdm(data_loader)):
        input_ids1, attention_mask1= split_seq(seq1.to(device)),split_seq(mask1.to(device))
        input_ids2, attention_mask2= split_seq(seq2.to(device)),split_seq(mask2.to(device))
        input_ids3, attention_mask3= split_seq(seq3.to(device)),split_seq(mask3.to(device))
        train_optimizer.zero_grad()
        anchor,pos,neg=0,0,0
        for i in range(len(input_ids1)):
            output=net.bert(input_ids=input_ids1[i],attention_mask=attention_mask1[i])
            embeddings1=output.last_hidden_state[:,0:1,:]
            anchor=anchor+embeddings1

        for i in range(len(input_ids2)):
            output=net.bert(input_ids=input_ids2[i],attention_mask=attention_mask2[i])
            embeddings2=output.last_hidden_state[:,0:1,:]
            pos=pos+embeddings2

        for i in range(len(input_ids3)):
            output=net.bert(input_ids=input_ids3[i],attention_mask=attention_mask3[i])
            embeddings3=output.last_hidden_state[:,0:1,:]
            neg=neg+embeddings3
        loss = triplet_loss(anchor, pos, neg)
        loss.backward()
        train_optimizer.step()
    print(trainloss)