def greedy(self, sentences, max_ratio=2, train=False): self._train(train) input_lengths = [ len(data.tokenize(sentence)) for sentence in sentences ] hidden, context, context_lengths = self.encode(sentences, train) context_mask = self.mask(context_lengths) translations = [[] for sentence in sentences] prev_words = len(sentences) * [data.SOS] pending = set(range(len(sentences))) output = self.device(self.decoder.initial_output(len(sentences))) while len(pending) > 0: var = self.device( Variable(torch.LongTensor([prev_words]), requires_grad=False)) logprobs, hidden, output = self.decoder(var, len(sentences) * [1], self.decoder_embeddings, hidden, context, context_mask, output, self.generator) prev_words = logprobs.max( dim=2)[1].squeeze().data.cpu().numpy().tolist() for i in pending.copy(): if prev_words[i] == data.EOS: pending.discard(i) else: translations[i].append(prev_words[i]) if len(translations[i]) >= max_ratio * input_lengths[i]: pending.discard(i) return self.trg_dictionary.ids2sentences(translations)
def greedy(self, sentences, max_ratio=2, train=False): self._train(train) input_lengths = [ len(data.tokenize(sentence)) for sentence in sentences ] hidden, context, context_lengths = self.encode(sentences, train) context_mask = self.mask(context_lengths) translations = [[] for sentence in sentences] prev_words = len(sentences) * [data.SOS] pending = set(range(len(sentences))) output = self.decoder.initial_output(len(sentences)) while len(pending) > 0: var = tf.Variable([prev_words], dtype=tf.int64, trainable=False) logprobs, hidden, output = self.decoder(var, len(sentences) * [1], self.decoder_embeddings, hidden, context, context_mask, output, self.generator) #prev_words = logprobs.max(dim=2)[1].squeeze().data.cpu().numpy().tolist() #argmax axis=2 into a list of int prev_words = tf.squeeze(tf.math.argmax(logprobs, axis=2), 0).numpy().tolist() for i in pending.copy(): if prev_words[i] == data.EOS: pending.discard(i) else: translations[i].append(prev_words[i]) if len(translations[i]) >= max_ratio * input_lengths[i]: pending.discard(i) # for e in self.trg_dictionary.ids2sentences(translations): # if e == "": # print(0) return self.trg_dictionary.ids2sentences(translations)
def greedy(self, sentences, max_ratio=2, train=False,pass_att=False,no_noise=False,encodings=None,pass_context=False\ ,detach_encoder=False,ncontrol=None): self._train(train) input_lengths = [len(data.tokenize(sentence)) for sentence in sentences] if encodings is not None: (hidden,context,context_lengths,sentences) = encodings else: hidden, context, context_lengths, sentences = self.encode(sentences, train,no_noise=no_noise) context_mask = self.mask(context_lengths) translations = [[] for sentence in sentences] translations_att = [[] for sentence in sentences] prev_words = len(sentences)*[data.SOS] pending = set(range(len(sentences))) output = self.device(self.decoder.initial_output(len(sentences))) context_list = [] # print("SENTENCES GIVEN TO SCORE: {}".format(sentences[0])) while len(pending) > 0: # print(pending) var = self.device(Variable(torch.LongTensor([prev_words]), requires_grad=False)) logprobs, hidden, output, att_scores,att_contexts = self.decoder(var, len(sentences)*[1], self.decoder_embeddings, hidden, context, context_mask, output, self.generator\ , pass_att=True, pass_context=True,detach_encoder=detach_encoder,ncontrol=ncontrol) postmask = torch.ByteTensor([0 if i in pending else 1 for i in range(var.data.size()[0])]).unsqueeze(0).unsqueeze(2) att_contexts.masked_fill_(self.device(Variable(postmask,requires_grad=False)),0) context_list.append(att_contexts) if logprobs.size()[1]==1: prev_words = [logprobs.max(dim=2)[1].squeeze().data.cpu().numpy().tolist()] else: prev_words = logprobs.max(dim=2)[1].squeeze().data.cpu().numpy().tolist() # prev_words = # print('att_scores {}'.format(att_scores.size())) prev_words_att = att_scores.topk(dim=2,k=2)[1].squeeze().data.cpu().numpy().tolist() # print("att_scores IN GREEDY FUNCTION {} {}".format(att_scores,prev_words_att)) for i in pending.copy(): if prev_words[i] == data.EOS: pending.discard(i) else: translations[i].append(prev_words[i]) translations_att[i].append(prev_words_att[i]) if len(translations[i]) >= max_ratio*input_lengths[i]: pending.discard(i) if not pass_context: return self.trg_dictionary.ids2sentences(translations,translations_att=translations_att,sentences=sentences,pass_att=pass_att) else: # print(translations) # print("simpreds",max([len(x) for x in translations])) return self.trg_dictionary.ids2sentences(translations,translations_att=translations_att,sentences=sentences,pass_att=pass_att), torch.cat(context_list)
def beam_search(self, sentences, beam_size=12, max_ratio=2, train=False): self._train(train) batch_size = len(sentences) input_lengths = [ len(data.tokenize(sentence)) for sentence in sentences ] hidden, context, context_lengths = self.encode(sentences, train) translations = [[] for sentence in sentences] pending = set(range(batch_size)) hidden = hidden.repeat(1, beam_size, 1) context = context.repeat(1, beam_size, 1) context_lengths *= beam_size context_mask = self.mask(context_lengths) ones = beam_size * batch_size * [1] prev_words = beam_size * batch_size * [data.SOS] output = self.device( self.decoder.initial_output(beam_size * batch_size)) translation_scores = batch_size * [-float('inf')] hypotheses = batch_size * [ (0.0, []) ] + (beam_size - 1) * batch_size * [(-float('inf'), []) ] # (score, translation) while len(pending) > 0: # Each iteration should update: prev_words, hidden, output var = self.device( Variable(torch.LongTensor([prev_words]), requires_grad=False)) logprobs, hidden, output = self.decoder(var, ones, self.decoder_embeddings, hidden, context, context_mask, output, self.generator) prev_words = logprobs.max( dim=2)[1].squeeze().data.cpu().numpy().tolist() word_scores, words = logprobs.topk(k=beam_size + 1, dim=2, sorted=False) word_scores = word_scores.squeeze(0).data.cpu().numpy().tolist( ) # (beam_size*batch_size) * (beam_size+1) words = words.squeeze(0).data.cpu().numpy().tolist() for sentence_index in pending.copy(): candidates = [] # (score, index, word) for rank in range(beam_size): index = sentence_index + rank * batch_size for i in range(beam_size + 1): word = words[index][i] score = hypotheses[index][0] + word_scores[index][i] if word != data.EOS: candidates.append((score, index, word)) elif score > translation_scores[sentence_index]: translations[sentence_index] = hypotheses[index][ 1] + [word] translation_scores[sentence_index] = score best = [] # score, word, translation, hidden, output for score, current_index, word in sorted( candidates, reverse=True)[:beam_size]: translation = hypotheses[current_index][1] + [word] best.append((score, word, translation, hidden[:, current_index, :].data, output[current_index].data)) for rank, (score, word, translation, h, o) in enumerate(best): next_index = sentence_index + rank * batch_size hypotheses[next_index] = (score, translation) prev_words[next_index] = word hidden[:, next_index, :] = h output[next_index, :] = o if len(hypotheses[sentence_index] [1]) >= max_ratio * input_lengths[ sentence_index] or translation_scores[ sentence_index] > hypotheses[sentence_index][0]: pending.discard(sentence_index) if len(translations[sentence_index]) == 0: translations[sentence_index] = hypotheses[ sentence_index][1] translation_scores[sentence_index] = hypotheses[ sentence_index][0] return self.trg_dictionary.ids2sentences(translations)
def beam_search(self, sentences, beam_size=12, max_ratio=2, train=False,rnk=2,noiseratio=0.5,pass_att=False,ncontrol=0): self._train(train) batch_size = len(sentences) input_lengths = [len(data.tokenize(sentence)) for sentence in sentences] hidden, context, context_lengths, sentences = self.encode(sentences, train,noiseratio=noiseratio,testing=True) translations = [[] for sentence in sentences] pending = set(range(batch_size)) hidden = hidden.repeat(1, beam_size, 1) context = context.repeat(1, beam_size, 1) context_lengths *= beam_size context_mask = self.mask(context_lengths) ones = beam_size*batch_size*[1] prev_words = beam_size*batch_size*[data.SOS] output = self.device(self.decoder.initial_output(beam_size*batch_size)) translation_scores = batch_size*[-float('inf')] hypotheses = batch_size*[(0.0, [])] + (beam_size-1)*batch_size*[(-float('inf'), [])] # (score, translation) while len(pending) > 0: # Each iteration should update: prev_words, hidden, output var = self.device(Variable(torch.LongTensor([prev_words]), requires_grad=False)) logprobs, hidden, output, att_scores = self.decoder(var, ones, self.decoder_embeddings, hidden, context, context_mask, output, self.generator,pass_att=True,ncontrol=ncontrol) prev_words = logprobs.max(dim=2)[1].squeeze().data.cpu().numpy().tolist() prev_words_att = att_scores.topk(dim=2,k=2)[1].squeeze().data.cpu().numpy().tolist() word_scores, words = logprobs.topk(k=beam_size+1, dim=2, sorted=False) word_scores = word_scores.squeeze(0).data.cpu().numpy().tolist() # (beam_size*batch_size) * (beam_size+1) words = words.squeeze(0).data.cpu().numpy().tolist() for sentence_index in pending.copy(): #consider a particular source for which beamsize best half translations have been extracted. #now next best beamsize translations should be found. #candidates which aren't finished will be found. candidates = [] # (score, index, word) for rank in range(beam_size): index = sentence_index + rank*batch_size for i in range(beam_size + 1): word = words[index][i] word_att = prev_words_att[index] score = hypotheses[index][0] + word_scores[index][i] if word != data.EOS: candidates.append((score, index, word, word_att)) elif score > translation_scores[sentence_index]: translations[sentence_index] = hypotheses[index][1] + [(word,word_att)] translation_scores[sentence_index] = score best = [] # score, word, translation, hidden, output #beamsize best translations are inserted into best. for score, current_index, word, word_att in sorted(candidates, reverse=True)[:beam_size]: translation = hypotheses[current_index][1] + [(word,word_att)] best.append((score, word, word_att, translation, hidden[:, current_index, :].data, output[current_index].data)) #update hypotheses based on best array for rank, (score, word, word_att, translation, h, o) in enumerate(best): next_index = sentence_index + rank*batch_size hypotheses[next_index] = (score, translation) prev_words[next_index] = word hidden[:, next_index, :] = h output[next_index, :] = o if len(hypotheses[sentence_index][1]) >= max_ratio*input_lengths[sentence_index] or translation_scores[sentence_index] > hypotheses[sentence_index][0]: pending.discard(sentence_index) if len(translations[sentence_index]) == 0: translations[sentence_index] = hypotheses[sentence_index][1] translation_scores[sentence_index] = hypotheses[sentence_index][0] translations_att = [[translations[i][j][1] for j in range(len(translations[i])) ] for i in range(len(translations))] translations = [[translations[i][j][0] for j in range(len(translations[i])) ] for i in range(len(translations))] return self.trg_dictionary.ids2sentences(translations,translations_att=translations_att,sentences=sentences,pass_att=pass_att,testing=True)
def beam_search(self, sentences, beam_size=12, max_ratio=2, train=False): self._train(train) batch_size = len(sentences) input_lengths = [ len(data.tokenize(sentence)) for sentence in sentences ] hidden, context, context_lengths = self.encode(sentences, train) translations = [[] for sentence in sentences] pending = set(range(batch_size)) hidden = tf.keras.backend.repeat_elements(hidden, beam_size, 1) context = tf.keras.backend.repeat_elements(context, beam_size, 1) context_lengths *= beam_size context_mask = self.mask(context_lengths) ones = beam_size * batch_size * [1] prev_words = beam_size * batch_size * [data.SOS] #output = self.device(self.decoder.initial_output(beam_size*batch_size)) output = self.decoder.initial_output(beam_size * batch_size) translation_scores = batch_size * [-float('inf')] hypotheses = batch_size * [ (0.0, []) ] + (beam_size - 1) * batch_size * [(-float('inf'), []) ] # (score, translation) hidden_npy = None output_npy = None while len(pending) > 0: # Each iteration should update: prev_words, hidden, output if hidden_npy is not None: hidden = tf.convert_to_tensor(hidden_npy, dtype=tf.float32) output = tf.convert_to_tensor(output_npy, dtype=tf.float32) var = tf.Variable( [prev_words], dtype=tf.int64, trainable=False ) # self.device(Variable(torch.LongTensor([prev_words]), requires_grad=False)) logprobs, hidden, output = self.decoder(var, ones, self.decoder_embeddings, hidden, context, context_mask, output, self.generator) prev_words = tf.squeeze(tf.math.argmax(logprobs, axis=2), 0).numpy().tolist() word_scores, words = tf.raw_ops.TopKV2(input=logprobs, k=beam_size + 1, sorted=False) word_scores = tf.squeeze( word_scores, [0]).numpy().tolist() # (beam_size*batch_size) * (beam_size+1) words = tf.squeeze(words, [0]).numpy().tolist() hidden_npy = hidden.numpy() output_npy = output.numpy() for sentence_index in pending.copy(): candidates = [] # (score, index, word) for rank in range(beam_size): index = sentence_index + rank * batch_size for i in range(beam_size + 1): word = words[index][i] score = hypotheses[index][0] + word_scores[index][i] if word != data.EOS: candidates.append((score, index, word)) elif score > translation_scores[sentence_index]: translations[sentence_index] = hypotheses[index][ 1] + [word] translation_scores[sentence_index] = score best = [] # score, word, translation, hidden, output for score, current_index, word in sorted( candidates, reverse=True)[:beam_size]: translation = hypotheses[current_index][1] + [word] best.append((score, word, translation, hidden_npy[:, current_index, :], output_npy[current_index])) for rank, (score, word, translation, h, o) in enumerate(best): next_index = sentence_index + rank * batch_size hypotheses[next_index] = (score, translation) prev_words[next_index] = word hidden_npy[:, next_index, :] = h output_npy[next_index, :] = o if len(hypotheses[sentence_index] [1]) >= max_ratio * input_lengths[ sentence_index] or translation_scores[ sentence_index] > hypotheses[sentence_index][0]: pending.discard(sentence_index) if len(translations[sentence_index]) == 0: translations[sentence_index] = hypotheses[ sentence_index][1] translation_scores[sentence_index] = hypotheses[ sentence_index][0] return self.trg_dictionary.ids2sentences(translations)