def chat(self, question, chat_settings): """Chat with the chatbot model by predicting an answer to a question. 'question' and 'answer' in this context are generic terms for the interactions in a dialog exchange and can be statements, remarks, queries, requests, or any other type of dialog speech. For example: Question: "How are you?" Answer: "Fine." Question: "That's great." Answer: "Yeah." Args: question: The input question for which the model should predict an answer. chat_settings: The ChatSettings instance containing the chat settings and inference hyperparameters Returns: q_with_hist: question with history if chat_settings.show_question_context = True otherwise None. answers: array of answer beams if chat_settings.show_all_beams = True otherwise the single selected answer. """ #Process the question by cleaning it and converting it to an integer encoded vector if chat_settings.enable_auto_punctuation: question = Vocabulary.auto_punctuate(question) question = Vocabulary.clean_text(question, normalize_words = chat_settings.inference_hparams.normalize_words) question = self.input_vocabulary.words2ints(question) #Prepend the currently tracked steps of the conversation history separated by EOS tokens. #This allows for deeper dialog context to influence the answer prediction. question_with_history = [] for i in range(len(self.conversation_history)): question_with_history += self.conversation_history[i] + [self.input_vocabulary.eos_int()] question_with_history += question #Get the answer prediction batch = np.zeros((1, len(question_with_history))) batch[0] = question_with_history max_output_sequence_length = chat_settings.inference_hparams.max_answer_words + 1 # + 1 since the EOS token is counted as a timestep predicted_answer_info = self.predict_batch(inputs = batch, input_sequence_length = np.array([len(question_with_history)]), max_output_sequence_length = max_output_sequence_length, beam_length_penalty_weight = chat_settings.inference_hparams.beam_length_penalty_weight, sampling_temperature = chat_settings.inference_hparams.sampling_temperature, log_summary = chat_settings.inference_hparams.log_summary) #Read the answer prediction answer_beams = [] if self.beam_width > 0: #For beam search decoding: if show_all_beams is enabeled then output all beams (sequences), otherwise take the first beam. # The beams (in the "predictions" matrix) are ordered with the highest ranked beams first. beam_count = 1 if not chat_settings.show_all_beams else len(predicted_answer_info["predictions_seq_lengths"][0]) for i in range(beam_count): predicted_answer_seq_length = predicted_answer_info["predictions_seq_lengths"][0][i] - 1 #-1 to exclude the EOS token predicted_answer = predicted_answer_info["predictions"][0][:predicted_answer_seq_length, i].tolist() answer_beams.append(predicted_answer) else: #For greedy / sampling decoding: only one beam (sequence) is returned, based on the argmax for greedy decoding # or the sampling distribution for sampling decoding. Return this beam. beam_count = 1 predicted_answer_seq_length = predicted_answer_info["predictions_seq_lengths"][0] - 1 #-1 to exclude the EOS token predicted_answer = predicted_answer_info["predictions"][0][:predicted_answer_seq_length].tolist() answer_beams.append(predicted_answer) #Add new conversation steps to the end of the history and trim from the beginning if it is longer than conv_history_length #Answers need to be converted from output_vocabulary ints to input_vocabulary ints (since they will be fed back in to the encoder) self.conversation_history.append(question) answer_for_history = self.output_vocabulary.ints2words(answer_beams[0], is_punct_discrete_word = True, capitalize_i = False) answer_for_history = self.input_vocabulary.words2ints(answer_for_history) self.conversation_history.append(answer_for_history) self.trim_conversation_history(chat_settings.inference_hparams.conv_history_length) #Convert the answer(s) to text and return answers = [] for i in range(beam_count): answer = self.output_vocabulary.ints2words(answer_beams[i]) answers.append(answer) q_with_hist = None if not chat_settings.show_question_context else self.input_vocabulary.ints2words(question_with_history) if chat_settings.show_all_beams: return q_with_hist, answers else: return q_with_hist, answers[0]
def chat(self, question, chat_settings): """ chat with the seq2seq model :param question: input question in which the model should predict an answer :param chat_settings: chat settings :return: answer """ # Process the question by cleaning it and converting it to an integer encoded vector if chat_settings.enable_auto_punctuation: question = Vocabulary.auto_punctuate(question) question = Vocabulary.clean_text( question, normalize_words=chat_settings.inference_hparams.normalize_words) question = self.vocabulary.words2ints(question) # Get the answer prediction batch = np.expand_dims(question, 0) max_output_sequence_length = chat_settings.inference_hparams.max_answer_words + 1 predicted_answer_info = self.predict_batch( inputs=batch, input_batch_lengths=1, max_output_sequence_length=max_output_sequence_length, beam_length_penalty_weight=chat_settings.inference_hparams. beam_length_penalty_weight) # Read the answer prediction answer_beams = [] if self.beam_width > 0: # For beam search decoding: if show_all_beams is enabeled then output all beams (sequences), # otherwise take the first beam. # The beams (in the "predictions" matrix) are ordered with the highest ranked beams first. beam_count = 1 if not chat_settings.show_all_beams else len( predicted_answer_info["predictions_seq_lengths"][0]) for i in range(beam_count): predicted_answer_seq_length = predicted_answer_info[ "predictions_seq_lengths"][0][ i] - 1 # -1 to exclude the EOS token predicted_answer = predicted_answer_info["predictions"][ 0][:predicted_answer_seq_length, i].tolist() answer_beams.append(predicted_answer) else: # For greedy / sampling decoding: only one beam (sequence) is returned, # based on the argmax for greedy decoding # or the sampling distribution for sampling decoding. Return this beam. beam_count = 1 predicted_answer_seq_length = predicted_answer_info[ "predictions_seq_lengths"][0] - 1 # -1 to exclude the EOS token predicted_answer = predicted_answer_info["predictions"][ 0][:predicted_answer_seq_length].tolist() answer_beams.append(predicted_answer) # Convert the answer(s) to text and return answers = [] for i in range(beam_count): answer = self.vocabulary.ints2words(answer_beams[i]) answers.append(answer) if chat_settings.show_all_beams: return answers else: return answers[0]
def chat(self, question, chat_settings): if chat_settings.enable_auto_punctuation: question = Vocabulary.auto_punctuate(question) question = Vocabulary.clean_text( question, normalize_words=chat_settings.inference_hparams.normalize_words) question = self.input_vocabulary.words2ints(question) question_with_history = [] for i in range(len(self.conversation_history)): question_with_history += self.conversation_history[i] + [ self.input_vocabulary.eos_int() ] question_with_history += question #Get the answer prediction batch = np.zeros((1, len(question_with_history))) batch[0] = question_with_history max_output_sequence_length = chat_settings.inference_hparams.max_answer_words + 1 # + 1 since the EOS token is counted as a timestep predicted_answer_info = self.predict_batch( inputs=batch, input_sequence_length=np.array([len(question_with_history)]), max_output_sequence_length=max_output_sequence_length, beam_length_penalty_weight=chat_settings.inference_hparams. beam_length_penalty_weight, sampling_temperature=chat_settings.inference_hparams. sampling_temperature, log_summary=chat_settings.inference_hparams.log_summary) #Read the answer prediction answer_beams = [] if self.beam_width > 0: #For beam search decoding: if show_all_beams is enabeled then output all beams (sequences), otherwise take the first beam. # The beams (in the "predictions" matrix) are ordered with the highest ranked beams first. beam_count = 1 if not chat_settings.show_all_beams else len( predicted_answer_info["predictions_seq_lengths"][0]) for i in range(beam_count): predicted_answer_seq_length = predicted_answer_info[ "predictions_seq_lengths"][0][ i] - 1 #-1 to exclude the EOS token predicted_answer = predicted_answer_info["predictions"][ 0][:predicted_answer_seq_length, i].tolist() answer_beams.append(predicted_answer) else: #For greedy / sampling decoding: only one beam (sequence) is returned, based on the argmax for greedy decoding # or the sampling distribution for sampling decoding. Return this beam. beam_count = 1 predicted_answer_seq_length = predicted_answer_info[ "predictions_seq_lengths"][0] - 1 #-1 to exclude the EOS token predicted_answer = predicted_answer_info["predictions"][ 0][:predicted_answer_seq_length].tolist() answer_beams.append(predicted_answer) #Add new conversation steps to the end of the history and trim from the beginning if it is longer than conv_history_length #Answers need to be converted from output_vocabulary ints to input_vocabulary ints (since they will be fed back in to the encoder) self.conversation_history.append(question) answer_for_history = self.output_vocabulary.ints2words( answer_beams[0], is_punct_discrete_word=True, capitalize_i=False) answer_for_history = self.input_vocabulary.words2ints( answer_for_history) self.conversation_history.append(answer_for_history) self.trim_conversation_history( chat_settings.inference_hparams.conv_history_length) #Convert the answer(s) to text and return answers = [] for i in range(beam_count): answer = self.output_vocabulary.ints2words(answer_beams[i]) answers.append(answer) q_with_hist = None if not chat_settings.show_question_context else self.input_vocabulary.ints2words( question_with_history) if chat_settings.show_all_beams: return q_with_hist, answers else: return q_with_hist, answers[0]