def predict_first_name(self, inp, k=10, max_len=40, len_norm=False): seq = self.fn_tokenizer.texts_to_sequences([inp]) seq = torch.tensor(seq).long() end_token = self.fn_tokenizer.word_index['$'] with torch.no_grad(): kseq, kscore = beam_search(seq, self.fn_model, end_token, k, max_len) ksent = [seq2text(x, self.ln_tokenizer, end_token) for x in kseq] kscore = np.exp(kscore) if len_norm: sent_len = [len(x) for x in ksent] kscore = kscore / sent_len res = [(sent,float(score)) for sent, score in zip(ksent, kscore)] return res
def predict_current_word(self, inp, k=5, max_len=10, len_norm=False): seq = self.tokenizer.texts_to_sequences([inp]) seq = torch.tensor(seq).long() end_token = self.tokenizer.word_index[' '] with torch.no_grad(): kseq, kscore = beam_search(seq, self.model, end_token, k, max_len) ksent = [seq2text(x, self.tokenizer, end_token) for x in kseq] kscore = np.exp(kscore) if len_norm: sent_len = [len(x) for x in ksent] kscore = kscore / sent_len res = { inp[1:] + sent + ' ': float(score) for sent, score in zip(ksent, kscore) } return res
def test_model2(model, test_loader, device, word_dict): with torch.no_grad(): model.eval() start_time = time.time() all_predictions = [] eos_token = test_loader.dataset.vocab.index('<eos>') for batch_idx, (inputs, _, _, _, _, seq_order) in \ enumerate(test_loader): hypos = beam_search(model.get_initial_state, model.generate, inputs, eos_token, batch_size=1, beam_width=8, num_hypotheses=1, max_length=250) pred_list = [] for n in hypos: nn = n.to_sequence_of_values() pred_list.append(nn[1:][:-1]) attention_weights = [n.to_sequence_of_extras() for n in hypos] pred_str = generate_labels_string(pred_list, test_loader.dataset.vocab) torch.save(attention_weights, 'attention_weights_test.pt') all_predictions.extend(pred_str) print('Test Iteration: %d/%d' % (batch_idx + 1, len(test_loader)), end="\r", flush=True) # Try to map words in strings to closest words. # all_predictions = map_strings_to_closest_words(all_predictions, # word_dict) # Save predictions in csv file. save_test_results(all_predictions) end_time = time.time() print('\nTotal Test Predictions: %d Time: %d s' % (len(all_predictions), end_time - start_time))
def beam_search_reply(self, persona, message, beam_width): ''' Beam Search Parameters ---------- persona : str persona description message : str message beam_width : int beam_width to use Returns ------- list of str beam_width replies from most likely to least likely ''' def process_inputs(persona, msg): persona = pre.START_SEQ_TOKEN + ' ' + persona + ' ' + pre.END_SEQ_TOKEN msg = pre.START_SEQ_TOKEN + ' ' + msg + ' ' + pre.END_SEQ_TOKEN persona = pre.encode_sequences(self.tokenizer, self.persona_length, [persona]) persona = tf.convert_to_tensor(persona) msg = pre.encode_sequences(self.tokenizer, self.msg_length, [msg]) msg = tf.convert_to_tensor(msg) enc_state = tf.zeros((1, LSTM_DIM)) encoder_persona_states, encoder_msg_states, *initial_state = self.encoder( [persona, msg, enc_state]) return [encoder_persona_states, encoder_msg_states, initial_state] def pred_function(inputs, state, last_word): # decoder step decoder_input = tf.expand_dims([last_word], 0) if state is None: # first call to pred function encoder_persona_states, encoder_msg_states, initial_state = inputs context_vec = tf.zeros((1, LSTM_DIM * 4)) else: encoder_persona_states, encoder_msg_states, context_vec, initial_state = state logits, _, _, context_vec, *initial_state = self.decoder( [decoder_input, encoder_persona_states, encoder_msg_states, False, context_vec, initial_state]) # return output and new state return logits[0], [encoder_persona_states, encoder_msg_states, context_vec, initial_state] sos = self.tokenizer.word_index[pre.START_SEQ_TOKEN] eos = self.tokenizer.word_index[pre.END_SEQ_TOKEN] replys = beam_search(persona, message, process_inputs, pred_function, self.reply_length, sos, eos, beam_width) replys_str = [] for reply in replys: single_reply_str = [] for i in reply: word = pre.index_to_word(i, self.tokenizer) single_reply_str.append(word) replys_str.append(" ".join(single_reply_str)) return replys_str
def beam_search_reply(self, persona, message, beam_width): ''' Beam Search Parameters ---------- persona : str persona description message : str message beam_width : int beam_width to use Returns ------- list of str beam_width replies from most likely to least likely ''' def process_inputs(persona, msg): input_seq = encode_sentence(persona, msg, self.tokenizer, None) seg_seq = generate_segment_list(input_seq, len(input_seq), self.tokenizer.vocab_size + SEP, False) input_seq = tf.expand_dims(input_seq, 0) seg_seq = tf.expand_dims(seg_seq, 0) out_seq = tf.expand_dims([self.tokenizer.vocab_size + SOS], 0) return [input_seq, seg_seq, out_seq] def pred_function(inputs, state, last_word): # decoder step if state is None: # first call to pred function input_seq, seg_seq, out_seq = inputs else: input_seq, seg_seq, out_seq = state # add last word to out_seq last_word = tf.convert_to_tensor([last_word], dtype=tf.int32) last_word = tf.expand_dims(last_word, 0) out_seq = tf.concat([out_seq, last_word], axis=-1) encoder_mask, look_ahead_mask, decoder_mask = create_masks( input_seq, out_seq) # => (batch_size, out_seq.shape[1], vocab_size) pred, _ = self.transformer([ input_seq, seg_seq, out_seq, False, encoder_mask, look_ahead_mask, decoder_mask ]) # get the last word predicted by the transformer pred = pred[:, -1:, :] logits = tf.squeeze(pred) # return output and new state return logits, [input_seq, seg_seq, out_seq] sos = self.tokenizer.vocab_size + SOS eos = self.tokenizer.vocab_size + EOS replys = beam_search(persona, message, process_inputs, pred_function, self.out_seq_length, sos, eos, beam_width) replys_str = [] for reply in replys: replys_str.append( self.tokenizer.decode([w for w in reply if w < sos])) return replys_str
def beam_search_reply(self, persona, message, beam_width): ''' Beam Search Parameters ---------- persona : str persona description message : str message beam_width : int beam_width to use Returns ------- list of str beam_width replies from most likely to least likely ''' def process_inputs(persona, msg): in_seq_length = self.persona_length + self.msg_length input_msg = (pre.START_SEQ_TOKEN + ' ' + persona + ' ' + pre.SEP_SEQ_TOKEN + ' ' + msg + ' ' + pre.END_SEQ_TOKEN) input_seq = pre.encode_sequences(self.tokenizer, in_seq_length, [input_msg]) input_seq = tf.convert_to_tensor(input_seq) # generate the segment for the input_msg by using seperator token segment_input = np.array( [pre.generate_segment_array(input_msg, in_seq_length)]) segment_input = tf.convert_to_tensor(segment_input) encoder_initial = tf.zeros((1, LSTM_DIM)) encoder_out, *initial_state = self.encoder( [input_seq, segment_input, encoder_initial]) return [encoder_out, initial_state] def pred_function(inputs, state, last_word): # decoder step decoder_input = tf.expand_dims([last_word], 0) if state is None: # first call to pred function encoder_out, initial_state = inputs else: encoder_out, initial_state = state logits, _, *initial_state = self.decoder( [decoder_input, encoder_out, False, initial_state]) # return output and new state return logits[0], [encoder_out, initial_state] sos = self.tokenizer.word_index[pre.START_SEQ_TOKEN] eos = self.tokenizer.word_index[pre.END_SEQ_TOKEN] replys = beam_search(persona, message, process_inputs, pred_function, self.reply_length, sos, eos, beam_width) replys_str = [] for reply in replys: single_reply_str = [] for i in reply: word = pre.index_to_word(i, self.tokenizer) single_reply_str.append(word) replys_str.append(" ".join(single_reply_str)) return replys_str