def test_if_importing_yields_correct_legth_data(self): """Test if data imported is parsed correctly.""" lines = bot.import_dataset('movie_lines.txt') conversations = bot.import_dataset('movie_conversations.txt') self.assertEqual(len(lines), 304714) self.assertEqual(len(conversations), 83098)
def test_map_lines_return_ke_and_values_only(self): """Test if return from `map_lines` yields a dict {Lxxx: text}.""" lines = bot.import_dataset('movie_lines.txt') mapped_lines = bot.map_lines(lines) for key, value in mapped_lines.items(): payload = key + value self.assertNotIn('++$++', payload)
def test_if_questions_are_separated_from_answers(self): """Test if questions are followed by answer index.""" all_conversations = bot.import_dataset('movie_conversations.txt') all_conversations = bot.get_conversations(all_conversations) questions, answers = bot.separate_questions_from_answers( all_conversations) for question, answer in zip(questions, answers): question = int(question.replace('L', '')) answer = int(answer.replace('L', '')) self.assertTrue(answer - question == 1)
def test_if_onle_ids_of_conversations_are_returned(self): """Test if only ids of the dialogs are returned.""" conversations = bot.import_dataset('movie_conversations.txt') payload = bot.get_conversations(conversations) self.assertNotIn('++$++', payload)
"""Chatbot preprocessosr.""" import chatbot_tools as bot # importing dataset lines = bot.import_dataset('movie_lines.txt') conversations = bot.import_dataset('movie_conversations.txt') id2lines = bot.map_lines(lines) conversations_ids = bot.get_conversations(conversations) questions, answers = bot.separate_questions_from_answers( conversations_ids ) mapped_questions = [id2lines[question] for question in questions] mapped_answers = [id2lines[answer] for answer in answers] clean_questions = list() clean_answers = list() for q_text, a_text in zip(mapped_questions, mapped_answers): clean_questions.append(bot.clean_text(q_text)) clean_answers.append(bot.clean_text(a_text)) word2count = dict() for question in clean_questions: for word in question.split(): if word not in word2count: word2count[word] = 1 else: