def is_user_input_trigger_similar(self, user_input, trigger):
        if self._filter:
            # remove non-interrogative sentences, filter tags, remove stop words, stemming
            user_input, trigger = self.filter_sentence(user_input), self.filter_sentence(trigger)
        else:
            user_input, trigger = RegexUtil.normalize_string(user_input),  RegexUtil.normalize_string(trigger)

        return dice_sentence(user_input, trigger) >= self._threeshold
 def are_answer_similar_enough(self, answer1, answer2):
     if self.__filter:
         # remove stop words then stem then normalize string
         answer1, answer2 = remove_stop_words(answer1), remove_stop_words(answer2)
         answer1, answer2 = tok_stem(answer1), tok_stem(answer2)
         answer1, answer2 = RegexUtil.normalize_string(answer1), RegexUtil.normalize_string(answer2)
     else:
         answer1, answer2 = RegexUtil.normalize_string(answer1), RegexUtil.normalize_string(answer2)
     return dice_sentence(answer1, answer2) >= self.__threshold
    def are_answer_similar_enough(self, answer1, answer2):
        if self._filter:
            # remove stop words then stem then normalize string
            answer1 = RegexUtil.normalize_string(tok_stem(remove_stop_words(answer1)))
            answer2 = RegexUtil.normalize_string(tok_stem(remove_stop_words(answer2)))
        else:
            answer1 = RegexUtil.normalize_string(answer1)
            answer2 = RegexUtil.normalize_string(answer2)

        return med_sentence(answer1, answer2) <= self._med_answers_min
    def are_answer_similar_enough(self, answer1, answer2):
        if self.__filter:
            # remove stop words then stem then normalize string
            answer1, answer2 = remove_stop_words(answer1), remove_stop_words(answer2)
            answer1, answer2 = tok_stem(answer1), tok_stem(answer2)
            answer1, answer2 = RegexUtil.normalize_string(answer1), RegexUtil.normalize_string(answer2)
        else:
            answer1, answer2 = RegexUtil.normalize_string(answer1), RegexUtil.normalize_string(answer2)

        return similar_yes_no(answer1, answer2, self.__weight, self.__measure) >= self.__threshold
    def filter_sentence(self, sentence):
        # remove non-interrogative sentence
        sentence = filter_non_interrogative_sentence(sentence)
        tagged_sentence = self._tagger.tag_sentence(sentence)

        # filtering sentence by the tags
        sentence = self._tagger.construct_sentence(filter_tags(tagged_sentence, self._tags_to_filter_triggers))

        # removing stop words and steming
        sentence = RegexUtil.normalize_string(tok_stem(remove_stop_words(sentence)))
        return sentence
    def get_answers(self, user_input):
        """
        Given a user_input, returns all possible answers sorted by the most frequent to the least frequent

        :param user_input
        :return: sorted list of tuples (string, int), the first value is the answer and the second the #occurences
                 if there is no answers, a empty list is returned
        """

        if isinstance(user_input, str): user_input = user_input.decode("utf-8")
        user_input = RegexUtil.custom_strip(user_input)
        if user_input not in self.__user_input_answers_dic:
            return None

        return self.__user_input_answers_dic[user_input]
    def get_answer(self, user_input):
        """
        Given a user_input, returns the most probable answer. If there is draw, it is returned one of them

        :param user_input
        :return: The answer (string)
        """

        if isinstance(user_input, str): user_input = user_input.decode("utf-8")
        user_input = RegexUtil.custom_strip(user_input)
        answers = self.get_answers(user_input)
        if answers is None:
            return AnswerPickerAnswerResult.INVALID_USER_INPUT
        if len(answers) == 0:
            return AnswerPickerAnswerResult.TRIGGER_NOT_FOUND
        else:
            return answers[0][0]  # [first answer] [first element tuple]
    def _evaluate(self, answer_picker, questions_file_path, max_n_answers):
        answers_list = list()
        with open(questions_file_path) as questionFile:
            for question in questionFile:
                # remove - and whitespace
                question = RegexUtil.custom_strip(question)
                answer = answer_picker.get_answer(question)

                # check if input is invalid or if there is no possible answer
                if answer == AnswerPickerAnswerResult.INVALID_USER_INPUT or AnswerPickerAnswerResult.TRIGGER_NOT_FOUND == answer:
                    annotation = 'n'
                else:
                    annotation = self._get_annotation(question, answer, max_n_answers)

                # add to the list for future analysis
                answers_list.append(annotation)
            questionFile.close()
        return answers_list
 def is_user_input_trigger_similar(self, user_input, trigger):
     return RegexUtil.normalize_string(user_input) == RegexUtil.normalize_string(trigger)
 def _normalize_trigger(self, trigger):
     # lowercase, no punctuation diacritics transformation
     return RegexUtil.normalize_string(trigger)
 def _normalize_user_input(self, user_input):
     # lowercase, no punctuation diacritics transformation
     return RegexUtil.normalize_string(user_input)
 def is_user_input_trigger_identical(self, user_input, trigger):
     return RegexUtil.normalize_string(user_input) == RegexUtil.normalize_string(trigger)