def is_user_input_trigger_similar(self, user_input, trigger): if self._filter: # remove non-interrogative sentences, filter tags, remove stop words, stemming user_input, trigger = self.filter_sentence(user_input), self.filter_sentence(trigger) else: user_input, trigger = RegexUtil.normalize_string(user_input), RegexUtil.normalize_string(trigger) return dice_sentence(user_input, trigger) >= self._threeshold
def are_answer_similar_enough(self, answer1, answer2): if self.__filter: # remove stop words then stem then normalize string answer1, answer2 = remove_stop_words(answer1), remove_stop_words(answer2) answer1, answer2 = tok_stem(answer1), tok_stem(answer2) answer1, answer2 = RegexUtil.normalize_string(answer1), RegexUtil.normalize_string(answer2) else: answer1, answer2 = RegexUtil.normalize_string(answer1), RegexUtil.normalize_string(answer2) return dice_sentence(answer1, answer2) >= self.__threshold
def are_answer_similar_enough(self, answer1, answer2): if self._filter: # remove stop words then stem then normalize string answer1 = RegexUtil.normalize_string(tok_stem(remove_stop_words(answer1))) answer2 = RegexUtil.normalize_string(tok_stem(remove_stop_words(answer2))) else: answer1 = RegexUtil.normalize_string(answer1) answer2 = RegexUtil.normalize_string(answer2) return med_sentence(answer1, answer2) <= self._med_answers_min
def are_answer_similar_enough(self, answer1, answer2): if self.__filter: # remove stop words then stem then normalize string answer1, answer2 = remove_stop_words(answer1), remove_stop_words(answer2) answer1, answer2 = tok_stem(answer1), tok_stem(answer2) answer1, answer2 = RegexUtil.normalize_string(answer1), RegexUtil.normalize_string(answer2) else: answer1, answer2 = RegexUtil.normalize_string(answer1), RegexUtil.normalize_string(answer2) return similar_yes_no(answer1, answer2, self.__weight, self.__measure) >= self.__threshold
def filter_sentence(self, sentence): # remove non-interrogative sentence sentence = filter_non_interrogative_sentence(sentence) tagged_sentence = self._tagger.tag_sentence(sentence) # filtering sentence by the tags sentence = self._tagger.construct_sentence(filter_tags(tagged_sentence, self._tags_to_filter_triggers)) # removing stop words and steming sentence = RegexUtil.normalize_string(tok_stem(remove_stop_words(sentence))) return sentence
def get_answers(self, user_input): """ Given a user_input, returns all possible answers sorted by the most frequent to the least frequent :param user_input :return: sorted list of tuples (string, int), the first value is the answer and the second the #occurences if there is no answers, a empty list is returned """ if isinstance(user_input, str): user_input = user_input.decode("utf-8") user_input = RegexUtil.custom_strip(user_input) if user_input not in self.__user_input_answers_dic: return None return self.__user_input_answers_dic[user_input]
def get_answer(self, user_input): """ Given a user_input, returns the most probable answer. If there is draw, it is returned one of them :param user_input :return: The answer (string) """ if isinstance(user_input, str): user_input = user_input.decode("utf-8") user_input = RegexUtil.custom_strip(user_input) answers = self.get_answers(user_input) if answers is None: return AnswerPickerAnswerResult.INVALID_USER_INPUT if len(answers) == 0: return AnswerPickerAnswerResult.TRIGGER_NOT_FOUND else: return answers[0][0] # [first answer] [first element tuple]
def _evaluate(self, answer_picker, questions_file_path, max_n_answers): answers_list = list() with open(questions_file_path) as questionFile: for question in questionFile: # remove - and whitespace question = RegexUtil.custom_strip(question) answer = answer_picker.get_answer(question) # check if input is invalid or if there is no possible answer if answer == AnswerPickerAnswerResult.INVALID_USER_INPUT or AnswerPickerAnswerResult.TRIGGER_NOT_FOUND == answer: annotation = 'n' else: annotation = self._get_annotation(question, answer, max_n_answers) # add to the list for future analysis answers_list.append(annotation) questionFile.close() return answers_list
def is_user_input_trigger_similar(self, user_input, trigger): return RegexUtil.normalize_string(user_input) == RegexUtil.normalize_string(trigger)
def _normalize_trigger(self, trigger): # lowercase, no punctuation diacritics transformation return RegexUtil.normalize_string(trigger)
def _normalize_user_input(self, user_input): # lowercase, no punctuation diacritics transformation return RegexUtil.normalize_string(user_input)
def is_user_input_trigger_identical(self, user_input, trigger): return RegexUtil.normalize_string(user_input) == RegexUtil.normalize_string(trigger)