def get_bleu(self, out, dec_out, n=None): if self.padding_idx in out: cnd = out[:out.index(self.padding_idx)] else: cnd = out if self.padding_idx in dec_out: ref = [dec_out[:dec_out.index(self.padding_idx)]] else: ref = [dec_out] if n is None: return BLEU(ref, cnd) else: w = [0, 0, 0, 0] w[n-1] = 1 w = (tuple(w)) return BLEU(ref, cnd, weights=w)
def get_bleu(out, dec_out, EOS_IDX): out = out.tolist() dec_out = dec_out.tolist() stop_token = EOS_IDX if stop_token in out: cnd = out[:out.index(stop_token)] else: cnd = out if stop_token in dec_out: ref = [dec_out[:dec_out.index(stop_token)]] else: ref = [dec_out] bleu = BLEU(ref, cnd) return bleu
def evaluateN(encoder, decoder, sentence, ref, max_length=MAX_LENGTH): with torch.no_grad(): input_tensor = tensorFromSentence(input_lang, sentence) input_length = input_tensor.size()[0] encoder_hidden = encoder.initHidden() target_tensor = tensorFromSentence(output_lang, ref) encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device) for ei in range(input_length): encoder_output, encoder_hidden = encoder(input_tensor[ei], encoder_hidden) encoder_outputs[ei] += encoder_output[0, 0] decoder_input = torch.tensor([[SOS_token]], device=device) # SOS decoder_hidden = encoder_hidden decoded_words = [] for di in range(max_length): decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden) topv, topi = decoder_output.topk(1) if topi.item() == EOS_token: decoded_words.append(1) break else: decoded_words.append(topi.item()) decoder_input = topi.squeeze().detach() #print(decoded_words, target_tensor.view(1,1,-1).int().tolist()) return BLEU(target_tensor.view(1, -1).int().tolist(), decoded_words, weights=(1 / 3, 1 / 3, 1 / 3))
#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Tue May 4 23:42:52 2021 @author: ziyi """ from nltk.translate.bleu_score import sentence_bleu as BLEU seq_i = [1, 2, 3, 4, 5] pred_i = [1, 2, 3, 4, 6] for i in range(len(pred_i)): G = 0 print('i = {}'.format(i)) for j in range(len(pred_i) - i, len(pred_i) + 1): if j > 0: G = G + BLEU([seq_i], pred_i[:j], weights=(1 / 2, 1 / 2)) - BLEU( [seq_i], pred_i[:j - 1], weights=(1 / 2, 1 / 2)) else: G = G + BLEU([seq_i], pred_i[:j], weights=(1 / 2, 1 / 2)) print('j = {}'.format(j)) print(G) print(G)
def trainRL(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, alpha=0.5, max_length=MAX_LENGTH): encoder_hidden = encoder.initHidden() encoder_optimizer.zero_grad() decoder_optimizer.zero_grad() input_length = input_tensor.size(0) target_length = target_tensor.size(0) encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device) #loss = 0 #fake_encoder = deepcopy(encoder) for ei in range(input_length): encoder_output, encoder_hidden = encoder(input_tensor[ei], encoder_hidden) encoder_outputs[ei] = encoder_output[0, 0] decoder_input = torch.tensor([[SOS_token]], device=device) decoder_hidden = encoder_hidden output_soft = [] #torch.zeros(target_length) decoded_tokens = torch.zeros(max_length) softmax = nn.Softmax(dim=1) #fake_decoder = deepcopy(decoder) lossSL = 0 for di in range(max_length): decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden) #topv, topi = decoder_output.topk(1) #decoder_input = topi.squeeze().detach() # detach from history as input #print(decoder_output) soft = softmax(decoder_output) #print(soft) topi = soft.multinomial(num_samples=1, replacement=True) #print(topi) #print(idx) #topv, topi = soft.topk(1) topv = soft[0, topi] #print(topi) #print(topv1, topi1) output_soft.append(topv) decoded_tokens[di] = topi.squeeze().detach() decoder_input = topi.squeeze().detach() # detach from history as input #print(target_tensor[di]) try: lossSL += criterion(decoder_output, target_tensor[di]) except: lossSL += criterion(decoder_output, torch.tensor([1], device=device)) #lossSL += criterion(decoder_output, target_tensor[di]) if decoder_input.item() == EOS_token: decoded_tokens = decoded_tokens[:di + 1] break lossRL = 0 for i in range(len(output_soft) - 1): G = 0 for j in range(len(output_soft) - i - 1, len(output_soft)): if j > 1: #print(target_tensor.view(1,-1).int().tolist(),decoded_tokens[:j].int().tolist()) #print(BLEU(decoded_tokens[:j].view(1,-1).int().tolist(),target_tensor.view(1,1,-1).int().tolist()) - BLEU(decoded_tokens[:j-1].view(1,-1).int().tolist(),target_tensor.view(1,1,-1).int().tolist())) G = G + BLEU(target_tensor.view(1, -1).int().tolist(), decoded_tokens[:j].int().tolist(), weights=(1 / 3, 1 / 3, 1 / 3)) - BLEU( target_tensor.view(1, -1).int().tolist(), decoded_tokens[:j - 1].int().tolist(), weights=(1 / 3, 1 / 3, 1 / 3)) else: G = G + BLEU(target_tensor.view(1, -1).int().tolist(), decoded_tokens[:j].int().tolist(), weights=(1 / 3, 1 / 3, 1 / 3)) lossRL += -G * torch.log(output_soft[len(output_soft) - i - 1]) loss = (1 - alpha) * lossRL + alpha * lossSL #print(lossRL, loss) loss.backward(retain_graph=True) encoder_optimizer.step() decoder_optimizer.step() #except: # 1 #print(target_tensor.view(1,-1).int().tolist(),decoded_tokens.int().tolist()) return BLEU(target_tensor.view(1, -1).int().tolist(), decoded_tokens.int().tolist(), weights=(1 / 3, 1 / 3, 1 / 3))
def train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length=MAX_LENGTH): encoder_hidden = encoder.initHidden() encoder_optimizer.zero_grad() decoder_optimizer.zero_grad() input_length = input_tensor.size(0) target_length = target_tensor.size(0) encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device) loss = 0 for ei in range(input_length): encoder_output, encoder_hidden = encoder(input_tensor[ei], encoder_hidden) encoder_outputs[ei] = encoder_output[0, 0] decoder_input = torch.tensor([[SOS_token]], device=device) decoder_hidden = encoder_hidden use_teacher_forcing = True if random.random( ) < teacher_forcing_ratio else False decoded_tokens = torch.zeros(target_length) if use_teacher_forcing: # Teacher forcing: Feed the target as the next input for di in range(target_length): decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden) topv, topi = decoder_output.topk(1) #print(decoder_output) decoded_tokens[di] = topi.squeeze().detach().clone() loss += criterion(decoder_output, target_tensor[di]) decoder_input = target_tensor[di] # Teacher forcing else: # Without teacher forcing: use its own predictions as the next input for di in range(target_length): decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden) topv, topi = decoder_output.topk(1) #print(decoder_output) decoded_tokens[di] = topi.squeeze().detach().clone() decoder_input = topi.squeeze().detach( ) # detach from history as input #try: loss += criterion(decoder_output, target_tensor[di]) #except: # loss += criterion(decoder_output,2) if decoder_input.item() == EOS_token: decoded_tokens = decoded_tokens[:di + 1] break #print(loss) loss.backward() encoder_optimizer.step() decoder_optimizer.step() #print(target_tensor.view(1,-1).int().tolist(),decoded_tokens.int().tolist()) #return loss.item() / target_length return BLEU(target_tensor.view(1, -1).int().tolist(), decoded_tokens.int().tolist(), weights=(1 / 3, 1 / 3, 1 / 3))
for i in range(i_init, len(data)): print(i) instance = data[i] ins_ls = instance['instructions'] for ins in ins_ls: # ============================================================================= # #translate to target lang and then back # interval_sentence = translator.translate(ins,lang_tgt=lang) # translated_back = translator.translate(interval_sentence, lang_tgt='en') # ============================================================================= results = pool.map(add_backed_ins, lang_ls) # calculte bleu score and add good samples to the dataset for res_i in results: bscore = BLEU([ins], res_i) if bscore > lower_bleu and bscore < upper_bleu: data_new[i]['instructions'].append(res_i) # sleep in case of being blocked if i % 15 == 0: time.sleep(600) # timer and save snapshots if i % 100 == 0: time_i = time.time() print('done %s out of %s , time elapsed: %s, remaining: %s' % (i, len(data), time_i - time_st, (time_i - time_st) * len(data) / (3600 * (i + 1)))) # save snapshot and delete last one
def trainAC(input_tensor, target_tensor, encoder1, decoder1, encoder2, decoder2, encoder1d, decoder1d, encoder2t, decoder2t, encoder1_optimizer, decoder1_optimizer, encoder2_optimizer, decoder2_optimizer, criterion, alpha, max_length=MAX_LENGTH): encoder_hidden = encoder1.initHidden() encoder1_optimizer.zero_grad() decoder1_optimizer.zero_grad() encoder2_optimizer.zero_grad() decoder2_optimizer.zero_grad() input_length = input_tensor.size(0) target_length = target_tensor.size(0) encoder_outputs = torch.zeros(max_length, encoder1d.hidden_size, device=device) for ei in range(input_length): encoder_output, encoder_hidden = encoder1d(input_tensor[ei], encoder_hidden) encoder_outputs[ei] = encoder_output[0, 0] decoder_input = torch.tensor([[SOS_token]], device=device) decoder_hidden = encoder_hidden output_soft = [] #torch.zeros(target_length) decoded_tokens = torch.zeros(max_length, dtype=torch.long, device=device) softmax = nn.Softmax(dim=1) lossSL = 0 for di in range(max_length): decoder_output, decoder_hidden = decoder1d(decoder_input, decoder_hidden) soft = softmax(decoder_output) topi = soft.multinomial(num_samples=1, replacement=True) topv = soft[0, topi] output_soft.append(topv) decoded_tokens[di] = topi.squeeze().detach() decoder_input = topi.squeeze().detach() # detach from history as input try: lossSL += criterion(decoder_output, target_tensor[di]) except: lossSL += criterion(decoder_output, torch.tensor([1], device=device)) #lossSL += criterion(decoder_output, target_tensor[di]) if decoder_input.item() == EOS_token: decoded_tokens = decoded_tokens[:di + 1] break lossRL = 0 for i in range(len(output_soft) - 1): G = 0 for j in range(len(output_soft) - i - 1, len(output_soft)): if j > 1: #print(target_tensor.view(1,-1).int().tolist(),decoded_tokens[:j].int().tolist()) #print(BLEU(decoded_tokens[:j].view(1,-1).int().tolist(),target_tensor.view(1,1,-1).int().tolist()) - BLEU(decoded_tokens[:j-1].view(1,-1).int().tolist(),target_tensor.view(1,1,-1).int().tolist())) G = G + BLEU(target_tensor.view(1, -1).int().tolist(), decoded_tokens[:j].int().tolist(), weights=(1 / 3, 1 / 3, 1 / 3)) - BLEU( target_tensor.view(1, -1).int().tolist(), decoded_tokens[:j - 1].int().tolist(), weights=(1 / 3, 1 / 3, 1 / 3)) else: G = G + BLEU(target_tensor.view(1, -1).int().tolist(), decoded_tokens[:j].int().tolist(), weights=(1 / 3, 1 / 3, 1 / 3)) lossRL += -G * torch.log(output_soft[len(output_soft) - i - 1]) loss = (1 - alpha) * lossRL + alpha * lossSL #print(lossRL, loss) loss.backward(retain_graph=True) encoder1_optimizer.step() decoder1_optimizer.step() returnLs = torch.zeros(len(decoded_tokens), device=device) #print(decoded_words,decoded_tokens) G = 0 for j in range(len(decoded_tokens)): G = G + BLEU(target_tensor.view(1, -1).int().tolist(), decoded_tokens[:len(decoded_tokens) - j], weights=(1 / 3, 1 / 3, 1 / 3)) - BLEU( target_tensor.view(1, -1).int().tolist(), decoded_tokens[:len(decoded_tokens) - j - 1], weights=(1 / 3, 1 / 3, 1 / 3)) returnLs[len(decoded_tokens) - j - 1] = G for ei in range(target_length): encoder_output, encoder_hidden = encoder2(target_tensor[ei], encoder_hidden) encoder_outputs[ei] = encoder_output[0, 0] decoder_input = torch.tensor([[SOS_token]], device=device) decoder_hidden = encoder_hidden loss = 0 mle = nn.MSELoss() for di in range(len(decoded_tokens)): decoder_output, decoder_hidden = decoder2(decoder_input, decoder_hidden) loss += mle(decoder_output, returnLs[di]) #loss += (decoder_output-returnLs[di])*(decoder_output-returnLs[di]) #print(decoded_tokens[di]) decoder_input = decoded_tokens[di].squeeze().detach() #print(loss) loss.backward() encoder2_optimizer.step() decoder2_optimizer.step() #e1param = list(encoder1.parameters()) #for param in encoder1d.parameters(): param = param*0.95 + encoder1d = deepcopy(encoder1) decoder1d = deepcopy(decoder1) encoder2t = deepcopy(encoder2) decoder2t = deepcopy(decoder2) return BLEU(target_tensor.view(1, -1).int().tolist(), decoded_tokens.int().tolist(), weights=(1 / 3, 1 / 3, 1 / 3))
def pretrain2(input_tensor, target_tensor, encoder1, decoder1, encoder2, decoder2, encoder_optimizer, decoder_optimizer, criterion, max_length=MAX_LENGTH): with torch.no_grad(): #input_tensor = tensorFromSentence(input_lang, sentence) input_length = input_tensor.size()[0] target_length = target_tensor.size()[0] encoder_hidden = encoder1.initHidden() #target_tensor = tensorFromSentence(output_lang,ref) encoder_outputs = torch.zeros(max_length, encoder1.hidden_size, device=device) for ei in range(input_length): encoder_output, encoder_hidden = encoder1(input_tensor[ei], encoder_hidden) encoder_outputs[ei] += encoder_output[0, 0] decoder_input = torch.tensor([[SOS_token]], device=device) # SOS decoder_hidden = encoder_hidden decoded_words = [] decoded_tokens = [] for di in range(max_length): decoder_output, decoder_hidden = decoder1(decoder_input, decoder_hidden) #print(decoder_input) topv, topi = decoder_output.topk(1) if topi.item() == EOS_token: decoded_words.append(1) decoded_tokens.append( torch.tensor([[EOS_token]], device=device)) break else: decoded_words.append(topi.item()) decoded_tokens.append(topi) decoder_input = topi.squeeze().detach() returnLs = torch.zeros(len(decoded_words), device=device) #print(decoded_words,decoded_tokens) G = 0 for j in range(len(decoded_words)): G = G + BLEU(target_tensor.view(1, -1).int().tolist(), decoded_words[:len(decoded_words) - j], weights=(1 / 3, 1 / 3, 1 / 3)) - BLEU( target_tensor.view(1, -1).int().tolist(), decoded_words[:len(decoded_words) - j - 1], weights=(1 / 3, 1 / 3, 1 / 3)) returnLs[len(decoded_words) - j - 1] = G #print(returnLs) encoder_optimizer.zero_grad() decoder_optimizer.zero_grad() for ei in range(target_length): encoder_output, encoder_hidden = encoder2(target_tensor[ei], encoder_hidden) encoder_outputs[ei] = encoder_output[0, 0] decoder_input = torch.tensor([[SOS_token]], device=device) decoder_hidden = encoder_hidden loss = 0 for di in range(len(decoded_words)): #print(decoder_input) #print(loss) decoder_output, decoder_hidden = decoder2(decoder_input, decoder_hidden) #print(decoder_output, returnLs[di]) #print(decoder_output-returnLs[di]) loss += criterion(decoder_output, returnLs[di]) #loss += (decoder_output-returnLs[di])*(decoder_output-returnLs[di]) decoder_input = decoded_tokens[di].squeeze().detach() #print(loss) loss.backward() encoder_optimizer.step() decoder_optimizer.step() return loss.item() / target_length