Esempio n. 1
0
    def test_if_importing_yields_correct_legth_data(self):
        """Test if data imported is parsed correctly."""
        lines = bot.import_dataset('movie_lines.txt')
        conversations = bot.import_dataset('movie_conversations.txt')

        self.assertEqual(len(lines), 304714)
        self.assertEqual(len(conversations), 83098)
Esempio n. 2
0
    def test_map_lines_return_ke_and_values_only(self):
        """Test if return from `map_lines` yields a dict {Lxxx: text}."""
        lines = bot.import_dataset('movie_lines.txt')
        mapped_lines = bot.map_lines(lines)
        for key, value in mapped_lines.items():
            payload = key + value

            self.assertNotIn('++$++', payload)
Esempio n. 3
0
    def test_if_questions_are_separated_from_answers(self):
        """Test if questions are followed by answer index."""
        all_conversations = bot.import_dataset('movie_conversations.txt')
        all_conversations = bot.get_conversations(all_conversations)
        questions, answers = bot.separate_questions_from_answers(
            all_conversations)
        for question, answer in zip(questions, answers):
            question = int(question.replace('L', ''))
            answer = int(answer.replace('L', ''))

            self.assertTrue(answer - question == 1)
Esempio n. 4
0
    def test_if_onle_ids_of_conversations_are_returned(self):
        """Test if only ids of the dialogs are returned."""
        conversations = bot.import_dataset('movie_conversations.txt')
        payload = bot.get_conversations(conversations)

        self.assertNotIn('++$++', payload)
Esempio n. 5
0
"""Chatbot preprocessosr."""
import chatbot_tools as bot

# importing dataset
lines = bot.import_dataset('movie_lines.txt')
conversations = bot.import_dataset('movie_conversations.txt')
id2lines = bot.map_lines(lines)
conversations_ids = bot.get_conversations(conversations)

questions, answers = bot.separate_questions_from_answers(
    conversations_ids
)

mapped_questions = [id2lines[question] for question in questions]
mapped_answers = [id2lines[answer] for answer in answers]

clean_questions = list()
clean_answers = list()

for q_text, a_text in zip(mapped_questions, mapped_answers):
    clean_questions.append(bot.clean_text(q_text))
    clean_answers.append(bot.clean_text(a_text))


word2count = dict()

for question in clean_questions:
    for word in question.split():
        if word not in word2count:
            word2count[word] = 1
        else: