class CorpusUtilsTestCase(TestCase): def setUp(self): self.corpus = Corpus() def test_get_file_path(self): """ Test that a dotted path is properly converted to a file address. """ path = self.corpus.get_file_path('chatterbot.corpus.english') self.assertIn(os.path.join('chatterbot', 'corpus', 'data', 'english'), path) def test_read_corpus(self): corpus_path = os.path.join(self.corpus.data_directory, 'english', 'conversations.json') data = self.corpus.read_corpus(corpus_path) self.assertIn('conversations', data) def test_list_english_corpus_files(self): data_files = self.corpus.list_corpus_files('chatterbot.corpus.english') self.assertGreaterEqual(len(data_files), 3) self.assertIn('.json', data_files[0]) def test_load_corpus(self): corpus = self.corpus.load_corpus('chatterbot.corpus.english.greetings') self.assertEqual(len(corpus), 1) self.assertIn(['Hi', 'Hello'], corpus[0]) def test_load_corpus_general(self): corpus = self.corpus.load_corpus("chatterbot.corpus.english") self.assertEqual(len(corpus), 3) self.assertIn(['Hi', 'Hello'], corpus[1])
class ChatterBotCorpusTrainer(Trainer): """ Allows the chat bot to be trained using data from the ChatterBot dialog corpus. """ def __init__(self, chatbot, **kwargs): super().__init__(chatbot, **kwargs) from chatterbot.corpus import Corpus self.corpus = Corpus() def train(self, *corpus_paths): # Allow a list of corpora to be passed instead of arguments if len(corpus_paths) == 1: if isinstance(corpus_paths[0], list): corpus_paths = corpus_paths[0] # Train the chat bot with each statement and response pair for corpus_path in corpus_paths: corpora = self.corpus.load_corpus(corpus_path) corpus_files = self.corpus.list_corpus_files(corpus_path) for corpus_count, corpus in enumerate(corpora): for conversation_count, conversation in enumerate(corpus): if self.show_training_progress: utils.print_progress_bar( str(os.path.basename(corpus_files[corpus_count])) + ' Training', conversation_count + 1, len(corpus)) previous_statement_text = None for text in conversation: _statement = Statement( text=text, in_response_to=previous_statement_text, conversation='training') _statement.add_tags(*corpus.categories) statement = self.get_preprocessed_statement(_statement) previous_statement_text = statement.text self.chatbot.storage.create( text=statement.text, in_response_to=statement.in_response_to, conversation=statement.conversation, tags=statement.tags)
class newCorpusTrainer(trainers.Trainer): def __init__(self, storage, **kwargs): super(newCorpusTrainer, self).__init__(storage, **kwargs) from chatterbot.corpus import Corpus self.corpus = Corpus() def train(self, *corpus_paths): # Allow a list of corpora to be passed instead of arguments if len(corpus_paths) == 1: if isinstance(corpus_paths[0], list): corpus_paths = corpus_paths[0] # Train the chat bot with each statement and response pair for corpus_path in corpus_paths: corpora = self.corpus.load_corpus(corpus_path) corpus_files = self.corpus.list_corpus_files(corpus_path) for corpus_count, corpus in enumerate(corpora): for conversation_count, conversation in enumerate(corpus): print_progress_bar( str(os.path.basename(corpus_files[corpus_count])) + " Training", conversation_count + 1, len(corpus)) previous_statement_line = [] statement_line = [] for line in conversation: for text in line: statement = self.get_or_create(text) statement.add_tags(corpus.categories) statement_line.append(statement.text) if previous_statement_line != []: for previous_statement_text in previous_statement_line: statement.add_response( Response(previous_statement_text)) self.storage.update(statement) previous_statement_line = statement_line statement_line = []
class CorpusUtilsTestCase(TestCase): def setUp(self): self.corpus = Corpus() def test_get_file_path(self): """ Test that a dotted path is properly converted to a file address. """ path = self.corpus.get_file_path('chatterbot.corpus.english') self.assertIn( os.path.join('chatterbot', 'corpus', 'data', 'english'), path ) def test_read_english_corpus(self): corpus_path = os.path.join( self.corpus.data_directory, 'english', 'conversations.corpus.json' ) data = self.corpus.read_corpus(corpus_path) self.assertIn('conversations', data) def test_list_english_corpus_files(self): data_files = self.corpus.list_corpus_files('chatterbot.corpus.english') self.assertGreaterEqual(len(data_files), 3) self.assertIn('.json', data_files[0]) def test_load_corpus(self): corpus = self.corpus.load_corpus('chatterbot.corpus.english.greetings') self.assertEqual(len(corpus), 1) self.assertIn(['Hi', 'Hello'], corpus[0]) def test_load_corpus_english(self): corpus = self.corpus.load_corpus("chatterbot.corpus.english") self.assertEqual(len(corpus), 3) self.assertIn(['Hi', 'Hello'], corpus[1])