Esempio n. 1
0
 def get_bleu(self, out, dec_out, n=None):
     if self.padding_idx in out:
         cnd = out[:out.index(self.padding_idx)]
     else:
         cnd = out
     if self.padding_idx in dec_out:
         ref = [dec_out[:dec_out.index(self.padding_idx)]]
     else:
         ref = [dec_out]
     if n is None:
         return BLEU(ref, cnd)
     else:
         w = [0, 0, 0, 0]
         w[n-1] = 1
         w = (tuple(w))
         return BLEU(ref, cnd, weights=w)
Esempio n. 2
0
def get_bleu(out, dec_out, EOS_IDX):
    out = out.tolist()
    dec_out = dec_out.tolist()
    stop_token = EOS_IDX
    if stop_token in out:
        cnd = out[:out.index(stop_token)]
    else:
        cnd = out

    if stop_token in dec_out:
        ref = [dec_out[:dec_out.index(stop_token)]]
    else:
        ref = [dec_out]

    bleu = BLEU(ref, cnd)

    return bleu
Esempio n. 3
0
def evaluateN(encoder, decoder, sentence, ref, max_length=MAX_LENGTH):
    with torch.no_grad():
        input_tensor = tensorFromSentence(input_lang, sentence)
        input_length = input_tensor.size()[0]
        encoder_hidden = encoder.initHidden()
        target_tensor = tensorFromSentence(output_lang, ref)
        encoder_outputs = torch.zeros(max_length,
                                      encoder.hidden_size,
                                      device=device)

        for ei in range(input_length):
            encoder_output, encoder_hidden = encoder(input_tensor[ei],
                                                     encoder_hidden)
            encoder_outputs[ei] += encoder_output[0, 0]

        decoder_input = torch.tensor([[SOS_token]], device=device)  # SOS

        decoder_hidden = encoder_hidden

        decoded_words = []

        for di in range(max_length):
            decoder_output, decoder_hidden = decoder(decoder_input,
                                                     decoder_hidden)

            topv, topi = decoder_output.topk(1)
            if topi.item() == EOS_token:
                decoded_words.append(1)
                break
            else:
                decoded_words.append(topi.item())
            decoder_input = topi.squeeze().detach()
        #print(decoded_words, target_tensor.view(1,1,-1).int().tolist())
        return BLEU(target_tensor.view(1, -1).int().tolist(),
                    decoded_words,
                    weights=(1 / 3, 1 / 3, 1 / 3))
Esempio n. 4
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue May  4 23:42:52 2021

@author: ziyi
"""

from nltk.translate.bleu_score import sentence_bleu as BLEU
seq_i = [1, 2, 3, 4, 5]
pred_i = [1, 2, 3, 4, 6]

for i in range(len(pred_i)):
    G = 0
    print('i = {}'.format(i))
    for j in range(len(pred_i) - i, len(pred_i) + 1):
        if j > 0:

            G = G + BLEU([seq_i], pred_i[:j], weights=(1 / 2, 1 / 2)) - BLEU(
                [seq_i], pred_i[:j - 1], weights=(1 / 2, 1 / 2))
        else:
            G = G + BLEU([seq_i], pred_i[:j], weights=(1 / 2, 1 / 2))
        print('j = {}'.format(j))
        print(G)
    print(G)
Esempio n. 5
0
def trainRL(input_tensor,
            target_tensor,
            encoder,
            decoder,
            encoder_optimizer,
            decoder_optimizer,
            criterion,
            alpha=0.5,
            max_length=MAX_LENGTH):
    encoder_hidden = encoder.initHidden()

    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    input_length = input_tensor.size(0)
    target_length = target_tensor.size(0)

    encoder_outputs = torch.zeros(max_length,
                                  encoder.hidden_size,
                                  device=device)

    #loss = 0
    #fake_encoder = deepcopy(encoder)
    for ei in range(input_length):
        encoder_output, encoder_hidden = encoder(input_tensor[ei],
                                                 encoder_hidden)
        encoder_outputs[ei] = encoder_output[0, 0]

    decoder_input = torch.tensor([[SOS_token]], device=device)

    decoder_hidden = encoder_hidden

    output_soft = []  #torch.zeros(target_length)
    decoded_tokens = torch.zeros(max_length)
    softmax = nn.Softmax(dim=1)

    #fake_decoder = deepcopy(decoder)
    lossSL = 0
    for di in range(max_length):
        decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden)
        #topv, topi = decoder_output.topk(1)
        #decoder_input = topi.squeeze().detach()  # detach from history as input
        #print(decoder_output)
        soft = softmax(decoder_output)
        #print(soft)
        topi = soft.multinomial(num_samples=1, replacement=True)
        #print(topi)
        #print(idx)
        #topv, topi = soft.topk(1)
        topv = soft[0, topi]
        #print(topi)
        #print(topv1, topi1)
        output_soft.append(topv)
        decoded_tokens[di] = topi.squeeze().detach()
        decoder_input = topi.squeeze().detach()  # detach from history as input
        #print(target_tensor[di])
        try:
            lossSL += criterion(decoder_output, target_tensor[di])
        except:
            lossSL += criterion(decoder_output, torch.tensor([1],
                                                             device=device))
        #lossSL += criterion(decoder_output, target_tensor[di])
        if decoder_input.item() == EOS_token:
            decoded_tokens = decoded_tokens[:di + 1]
            break

    lossRL = 0
    for i in range(len(output_soft) - 1):

        G = 0
        for j in range(len(output_soft) - i - 1, len(output_soft)):
            if j > 1:

                #print(target_tensor.view(1,-1).int().tolist(),decoded_tokens[:j].int().tolist())
                #print(BLEU(decoded_tokens[:j].view(1,-1).int().tolist(),target_tensor.view(1,1,-1).int().tolist()) - BLEU(decoded_tokens[:j-1].view(1,-1).int().tolist(),target_tensor.view(1,1,-1).int().tolist()))
                G = G + BLEU(target_tensor.view(1, -1).int().tolist(),
                             decoded_tokens[:j].int().tolist(),
                             weights=(1 / 3, 1 / 3, 1 / 3)) - BLEU(
                                 target_tensor.view(1, -1).int().tolist(),
                                 decoded_tokens[:j - 1].int().tolist(),
                                 weights=(1 / 3, 1 / 3, 1 / 3))
            else:
                G = G + BLEU(target_tensor.view(1, -1).int().tolist(),
                             decoded_tokens[:j].int().tolist(),
                             weights=(1 / 3, 1 / 3, 1 / 3))

        lossRL += -G * torch.log(output_soft[len(output_soft) - i - 1])

    loss = (1 - alpha) * lossRL + alpha * lossSL
    #print(lossRL, loss)
    loss.backward(retain_graph=True)

    encoder_optimizer.step()
    decoder_optimizer.step()
    #except:
    #    1
    #print(target_tensor.view(1,-1).int().tolist(),decoded_tokens.int().tolist())
    return BLEU(target_tensor.view(1, -1).int().tolist(),
                decoded_tokens.int().tolist(),
                weights=(1 / 3, 1 / 3, 1 / 3))
Esempio n. 6
0
def train(input_tensor,
          target_tensor,
          encoder,
          decoder,
          encoder_optimizer,
          decoder_optimizer,
          criterion,
          max_length=MAX_LENGTH):
    encoder_hidden = encoder.initHidden()

    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    input_length = input_tensor.size(0)
    target_length = target_tensor.size(0)

    encoder_outputs = torch.zeros(max_length,
                                  encoder.hidden_size,
                                  device=device)

    loss = 0

    for ei in range(input_length):
        encoder_output, encoder_hidden = encoder(input_tensor[ei],
                                                 encoder_hidden)
        encoder_outputs[ei] = encoder_output[0, 0]

    decoder_input = torch.tensor([[SOS_token]], device=device)

    decoder_hidden = encoder_hidden

    use_teacher_forcing = True if random.random(
    ) < teacher_forcing_ratio else False

    decoded_tokens = torch.zeros(target_length)
    if use_teacher_forcing:
        # Teacher forcing: Feed the target as the next input
        for di in range(target_length):
            decoder_output, decoder_hidden = decoder(decoder_input,
                                                     decoder_hidden)
            topv, topi = decoder_output.topk(1)
            #print(decoder_output)
            decoded_tokens[di] = topi.squeeze().detach().clone()
            loss += criterion(decoder_output, target_tensor[di])
            decoder_input = target_tensor[di]  # Teacher forcing

    else:
        # Without teacher forcing: use its own predictions as the next input
        for di in range(target_length):
            decoder_output, decoder_hidden = decoder(decoder_input,
                                                     decoder_hidden)
            topv, topi = decoder_output.topk(1)
            #print(decoder_output)
            decoded_tokens[di] = topi.squeeze().detach().clone()
            decoder_input = topi.squeeze().detach(
            )  # detach from history as input
            #try:
            loss += criterion(decoder_output, target_tensor[di])
            #except:
            #    loss += criterion(decoder_output,2)
            if decoder_input.item() == EOS_token:
                decoded_tokens = decoded_tokens[:di + 1]
                break
    #print(loss)
    loss.backward()

    encoder_optimizer.step()
    decoder_optimizer.step()
    #print(target_tensor.view(1,-1).int().tolist(),decoded_tokens.int().tolist())
    #return loss.item() / target_length
    return BLEU(target_tensor.view(1, -1).int().tolist(),
                decoded_tokens.int().tolist(),
                weights=(1 / 3, 1 / 3, 1 / 3))
for i in range(i_init, len(data)):
    print(i)
    instance = data[i]
    ins_ls = instance['instructions']
    for ins in ins_ls:

        # =============================================================================
        #             #translate to target lang and then back
        #             interval_sentence = translator.translate(ins,lang_tgt=lang)
        #             translated_back = translator.translate(interval_sentence, lang_tgt='en')
        # =============================================================================
        results = pool.map(add_backed_ins, lang_ls)

        # calculte bleu score and add good samples to the dataset
        for res_i in results:
            bscore = BLEU([ins], res_i)
            if bscore > lower_bleu and bscore < upper_bleu:
                data_new[i]['instructions'].append(res_i)

    # sleep in case of being blocked
    if i % 15 == 0:
        time.sleep(600)

    # timer and save snapshots
    if i % 100 == 0:
        time_i = time.time()
        print('done %s out of %s , time elapsed: %s, remaining: %s' %
              (i, len(data), time_i - time_st,
               (time_i - time_st) * len(data) / (3600 * (i + 1))))

    # save snapshot and delete last one
Esempio n. 8
0
def trainAC(input_tensor,
            target_tensor,
            encoder1,
            decoder1,
            encoder2,
            decoder2,
            encoder1d,
            decoder1d,
            encoder2t,
            decoder2t,
            encoder1_optimizer,
            decoder1_optimizer,
            encoder2_optimizer,
            decoder2_optimizer,
            criterion,
            alpha,
            max_length=MAX_LENGTH):
    encoder_hidden = encoder1.initHidden()

    encoder1_optimizer.zero_grad()
    decoder1_optimizer.zero_grad()
    encoder2_optimizer.zero_grad()
    decoder2_optimizer.zero_grad()

    input_length = input_tensor.size(0)
    target_length = target_tensor.size(0)

    encoder_outputs = torch.zeros(max_length,
                                  encoder1d.hidden_size,
                                  device=device)

    for ei in range(input_length):
        encoder_output, encoder_hidden = encoder1d(input_tensor[ei],
                                                   encoder_hidden)
        encoder_outputs[ei] = encoder_output[0, 0]

    decoder_input = torch.tensor([[SOS_token]], device=device)

    decoder_hidden = encoder_hidden

    output_soft = []  #torch.zeros(target_length)
    decoded_tokens = torch.zeros(max_length, dtype=torch.long, device=device)
    softmax = nn.Softmax(dim=1)

    lossSL = 0
    for di in range(max_length):
        decoder_output, decoder_hidden = decoder1d(decoder_input,
                                                   decoder_hidden)
        soft = softmax(decoder_output)
        topi = soft.multinomial(num_samples=1, replacement=True)
        topv = soft[0, topi]
        output_soft.append(topv)
        decoded_tokens[di] = topi.squeeze().detach()
        decoder_input = topi.squeeze().detach()  # detach from history as input
        try:
            lossSL += criterion(decoder_output, target_tensor[di])
        except:
            lossSL += criterion(decoder_output, torch.tensor([1],
                                                             device=device))
        #lossSL += criterion(decoder_output, target_tensor[di])
        if decoder_input.item() == EOS_token:
            decoded_tokens = decoded_tokens[:di + 1]
            break
    lossRL = 0
    for i in range(len(output_soft) - 1):

        G = 0
        for j in range(len(output_soft) - i - 1, len(output_soft)):
            if j > 1:

                #print(target_tensor.view(1,-1).int().tolist(),decoded_tokens[:j].int().tolist())
                #print(BLEU(decoded_tokens[:j].view(1,-1).int().tolist(),target_tensor.view(1,1,-1).int().tolist()) - BLEU(decoded_tokens[:j-1].view(1,-1).int().tolist(),target_tensor.view(1,1,-1).int().tolist()))
                G = G + BLEU(target_tensor.view(1, -1).int().tolist(),
                             decoded_tokens[:j].int().tolist(),
                             weights=(1 / 3, 1 / 3, 1 / 3)) - BLEU(
                                 target_tensor.view(1, -1).int().tolist(),
                                 decoded_tokens[:j - 1].int().tolist(),
                                 weights=(1 / 3, 1 / 3, 1 / 3))
            else:
                G = G + BLEU(target_tensor.view(1, -1).int().tolist(),
                             decoded_tokens[:j].int().tolist(),
                             weights=(1 / 3, 1 / 3, 1 / 3))

        lossRL += -G * torch.log(output_soft[len(output_soft) - i - 1])

    loss = (1 - alpha) * lossRL + alpha * lossSL
    #print(lossRL, loss)
    loss.backward(retain_graph=True)

    encoder1_optimizer.step()
    decoder1_optimizer.step()

    returnLs = torch.zeros(len(decoded_tokens), device=device)
    #print(decoded_words,decoded_tokens)
    G = 0
    for j in range(len(decoded_tokens)):

        G = G + BLEU(target_tensor.view(1, -1).int().tolist(),
                     decoded_tokens[:len(decoded_tokens) - j],
                     weights=(1 / 3, 1 / 3, 1 / 3)) - BLEU(
                         target_tensor.view(1, -1).int().tolist(),
                         decoded_tokens[:len(decoded_tokens) - j - 1],
                         weights=(1 / 3, 1 / 3, 1 / 3))

        returnLs[len(decoded_tokens) - j - 1] = G

    for ei in range(target_length):

        encoder_output, encoder_hidden = encoder2(target_tensor[ei],
                                                  encoder_hidden)
        encoder_outputs[ei] = encoder_output[0, 0]

    decoder_input = torch.tensor([[SOS_token]], device=device)

    decoder_hidden = encoder_hidden
    loss = 0
    mle = nn.MSELoss()
    for di in range(len(decoded_tokens)):

        decoder_output, decoder_hidden = decoder2(decoder_input,
                                                  decoder_hidden)
        loss += mle(decoder_output, returnLs[di])
        #loss += (decoder_output-returnLs[di])*(decoder_output-returnLs[di])
        #print(decoded_tokens[di])
        decoder_input = decoded_tokens[di].squeeze().detach()
    #print(loss)
    loss.backward()

    encoder2_optimizer.step()
    decoder2_optimizer.step()

    #e1param = list(encoder1.parameters())
    #for param in encoder1d.parameters(): param = param*0.95 +
    encoder1d = deepcopy(encoder1)
    decoder1d = deepcopy(decoder1)
    encoder2t = deepcopy(encoder2)
    decoder2t = deepcopy(decoder2)

    return BLEU(target_tensor.view(1, -1).int().tolist(),
                decoded_tokens.int().tolist(),
                weights=(1 / 3, 1 / 3, 1 / 3))
Esempio n. 9
0
def pretrain2(input_tensor,
              target_tensor,
              encoder1,
              decoder1,
              encoder2,
              decoder2,
              encoder_optimizer,
              decoder_optimizer,
              criterion,
              max_length=MAX_LENGTH):
    with torch.no_grad():
        #input_tensor = tensorFromSentence(input_lang, sentence)
        input_length = input_tensor.size()[0]
        target_length = target_tensor.size()[0]
        encoder_hidden = encoder1.initHidden()
        #target_tensor = tensorFromSentence(output_lang,ref)
        encoder_outputs = torch.zeros(max_length,
                                      encoder1.hidden_size,
                                      device=device)
        for ei in range(input_length):
            encoder_output, encoder_hidden = encoder1(input_tensor[ei],
                                                      encoder_hidden)
            encoder_outputs[ei] += encoder_output[0, 0]

        decoder_input = torch.tensor([[SOS_token]], device=device)  # SOS

        decoder_hidden = encoder_hidden

        decoded_words = []
        decoded_tokens = []

        for di in range(max_length):
            decoder_output, decoder_hidden = decoder1(decoder_input,
                                                      decoder_hidden)
            #print(decoder_input)
            topv, topi = decoder_output.topk(1)
            if topi.item() == EOS_token:
                decoded_words.append(1)
                decoded_tokens.append(
                    torch.tensor([[EOS_token]], device=device))
                break
            else:
                decoded_words.append(topi.item())
                decoded_tokens.append(topi)
            decoder_input = topi.squeeze().detach()

    returnLs = torch.zeros(len(decoded_words), device=device)
    #print(decoded_words,decoded_tokens)
    G = 0
    for j in range(len(decoded_words)):

        G = G + BLEU(target_tensor.view(1, -1).int().tolist(),
                     decoded_words[:len(decoded_words) - j],
                     weights=(1 / 3, 1 / 3, 1 / 3)) - BLEU(
                         target_tensor.view(1, -1).int().tolist(),
                         decoded_words[:len(decoded_words) - j - 1],
                         weights=(1 / 3, 1 / 3, 1 / 3))

        returnLs[len(decoded_words) - j - 1] = G
    #print(returnLs)
    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()
    for ei in range(target_length):

        encoder_output, encoder_hidden = encoder2(target_tensor[ei],
                                                  encoder_hidden)
        encoder_outputs[ei] = encoder_output[0, 0]

    decoder_input = torch.tensor([[SOS_token]], device=device)

    decoder_hidden = encoder_hidden
    loss = 0
    for di in range(len(decoded_words)):
        #print(decoder_input)
        #print(loss)
        decoder_output, decoder_hidden = decoder2(decoder_input,
                                                  decoder_hidden)
        #print(decoder_output, returnLs[di])
        #print(decoder_output-returnLs[di])
        loss += criterion(decoder_output, returnLs[di])
        #loss += (decoder_output-returnLs[di])*(decoder_output-returnLs[di])
        decoder_input = decoded_tokens[di].squeeze().detach()
    #print(loss)
    loss.backward()

    encoder_optimizer.step()
    decoder_optimizer.step()

    return loss.item() / target_length