def setUp(self): super().setUp() self.trainer = UbuntuCorpusTrainer( self.chatbot, ubuntu_corpus_data_directory='./.ubuntu_test_data/', show_training_progress=False )
def DefTrain(): trainer = ChatterBotCorpusTrainer(c.chatbot) trainer.train("chatterbot.corpus.english.ai", "chatterbot.corpus.english.conversations", "chatterbot.corpus.english.greetings", "chatterbot.corpus.english.emotion") UbuntuCorpusTrainer(c.chatbot)
def train(self, path=None): if not path: trainer = UbuntuCorpusTrainer(self.bot) trainer.train() else: trainer = ChatterBotCorpusTrainer(self.bot) trainer.train(path)
async def initialize(self): await self.bot.wait_until_red_ready() self.mention_regex = re.compile(rf"<@!?{self.bot.user.id}>") self._event.set() self.chat_bot = ChatBot( self.bot.user.name, storage_adapter="chatterbot.storage.SQLStorageAdapter", database_uri=f"sqlite:///{self.__database}", logic_adapters=[ "chatterbot.logic.MathematicalEvaluation", "chatterbot.logic.TimeLogicAdapter", "chatterbot.logic.BestMatch", ], filters=[filters.get_recent_repeated_responses], preprocessors=[ "chatterbot.preprocessors.clean_whitespace", "chatterbot.preprocessors.unescape_html", "chatterbot.preprocessors.convert_to_ascii", ], ) self._list_trainer = ListTrainer(self.chat_bot) self._corpus_trainer = ChatterBotCorpusTrainer(self.chat_bot) self._ubuntu_trainer = UbuntuCorpusTrainer(self.chat_bot) self.learn.start()
class UbuntuCorpusTrainerTestCase(ChatBotTestCase): """ Test the Ubuntu Corpus trainer class. """ def setUp(self): super().setUp() self.trainer = UbuntuCorpusTrainer( self.chatbot, ubuntu_corpus_data_directory='./.ubuntu_test_data/', show_training_progress=False ) def tearDown(self): super().tearDown() self._remove_data() def _get_data(self): data1 = ( b'2004-11-04T16:49:00.000Z tom jane Hello\n' b'2004-11-04T16:49:00.000Z tom jane Is anyone there?\n' b'2004-11-04T16:49:00.000Z jane Yes\n' b'\n' ) data2 = ( b'2004-11-04T16:49:00.000Z tom jane Hello\n' b'2004-11-04T16:49:00.000Z tom Is anyone there?\n' b'2004-11-04T16:49:00.000Z jane Yes\n' b'\n' ) return data1, data2 def _remove_data(self): """ Clean up by removing the corpus data directory. """ import shutil if os.path.exists(self.trainer.data_directory): shutil.rmtree(self.trainer.data_directory) def _create_test_corpus(self, data): """ Create a small tar in a similar format to the Ubuntu corpus file in memory for testing. """ file_path = os.path.join(self.trainer.data_directory, 'ubuntu_dialogs.tgz') tar = tarfile.TarFile(file_path, 'w') tsv1 = BytesIO(data[0]) tsv2 = BytesIO(data[1]) tarinfo = tarfile.TarInfo('dialogs/3/1.tsv') tarinfo.size = len(data[0]) tar.addfile(tarinfo, fileobj=tsv1) tarinfo = tarfile.TarInfo('dialogs/3/2.tsv') tarinfo.size = len(data[1]) tar.addfile(tarinfo, fileobj=tsv2) tsv1.close() tsv2.close() tar.close() return file_path def _destroy_test_corpus(self): """ Remove the test corpus file. """ file_path = os.path.join(self.trainer.data_directory, 'ubuntu_dialogs.tgz') if os.path.exists(file_path): os.remove(file_path) def _mock_get_response(self, *args, **kwargs): """ Return a requests.Response object. """ import requests response = requests.Response() response._content = b'Some response content' response.headers['content-length'] = len(response.content) return response def test_download(self): """ Test the download function for the Ubuntu corpus trainer. """ import requests requests.get = Mock(side_effect=self._mock_get_response) download_url = 'https://example.com/download.tgz' self.trainer.download(download_url, show_status=False) file_name = download_url.split('/')[-1] downloaded_file_path = os.path.join(self.trainer.data_directory, file_name) requests.get.assert_called_with(download_url, stream=True) self.assertTrue(os.path.exists(downloaded_file_path)) # Remove the dummy download_url os.remove(downloaded_file_path) def test_download_file_exists(self): """ Test the case that the corpus file exists. """ import requests file_path = os.path.join(self.trainer.data_directory, 'download.tgz') open(file_path, 'a').close() requests.get = Mock(side_effect=self._mock_get_response) download_url = 'https://example.com/download.tgz' self.trainer.download(download_url, show_status=False) # Remove the dummy download_url os.remove(file_path) self.assertFalse(requests.get.called) def test_download_url_not_found(self): """ Test the case that the url being downloaded does not exist. """ self.skipTest('This test needs to be created.') def test_extract(self): """ Test the extraction of text from a decompressed Ubuntu Corpus file. """ file_object_path = self._create_test_corpus(self._get_data()) self.trainer.extract(file_object_path) self._destroy_test_corpus() corpus_path = os.path.join(self.trainer.extracted_data_directory, 'dialogs', '3') self.assertTrue(os.path.exists(self.trainer.extracted_data_directory)) self.assertTrue(os.path.exists(os.path.join(corpus_path, '1.tsv'))) self.assertTrue(os.path.exists(os.path.join(corpus_path, '2.tsv'))) def test_train(self): """ Test that the chat bot is trained using data from the Ubuntu Corpus. """ self._create_test_corpus(self._get_data()) self.trainer.train() self._destroy_test_corpus() response = self.chatbot.get_response('Is anyone there?') self.assertEqual(response.text, 'Yes') def test_train_sets_search_text(self): """ Test that the chat bot is trained using data from the Ubuntu Corpus. """ self._create_test_corpus(self._get_data()) self.trainer.train() self._destroy_test_corpus() results = list(self.chatbot.storage.filter(text='Is anyone there?')) self.assertEqual(len(results), 2) self.assertEqual(results[0].search_text, 'VERB:anyone NOUN:there') def test_train_sets_search_in_response_to(self): """ Test that the chat bot is trained using data from the Ubuntu Corpus. """ self._create_test_corpus(self._get_data()) self.trainer.train() self._destroy_test_corpus() results = list(self.chatbot.storage.filter(in_response_to='Is anyone there?')) self.assertEqual(len(results), 2) self.assertEqual(results[0].search_in_response_to, 'VERB:anyone NOUN:there') def test_is_extracted(self): """ Test that a check can be done for if the corpus has aleady been extracted. """ file_object_path = self._create_test_corpus(self._get_data()) self.trainer.extract(file_object_path) extracted = self.trainer.is_extracted(self.trainer.extracted_data_directory) self._destroy_test_corpus() self.assertTrue(extracted) def test_is_not_extracted(self): """ Test that a check can be done for if the corpus has aleady been extracted. """ self._remove_data() extracted = self.trainer.is_extracted(self.trainer.extracted_data_directory) self.assertFalse(extracted)
def get_ubuntu_corpus_trainer(chatbot): return UbuntuCorpusTrainer( chatbot, show_training_progress=False )
def train_bot_ubuntu(): trainer = UbuntuCorpusTrainer(bot) trainer.train()
def _train_ubuntu(self): trainer = UbuntuCorpusTrainer(self.chatbot) trainer.train() return True
""" This example shows how to train a chat bot using the Ubuntu Corpus of conversation dialog. """ import logging from chatterbot import ChatBot from chatterbot.trainers import UbuntuCorpusTrainer # Enable info level logging logging.basicConfig(level=logging.INFO) chatbot = ChatBot('Example Bot') trainer = UbuntuCorpusTrainer(chatbot) # Start by training our bot with the Ubuntu corpus data trainer.train() # Now let's get a response to a greeting response = chatbot.get_response('How are you doing today?') print(response)
def __train_ubuntu_corpus(self): trainer = UbuntuCorpusTrainer(self.chatbot) trainer.train()
def setUp(self): super(UbuntuCorpusTrainerTestCase, self).setUp() self.trainer = UbuntuCorpusTrainer(self.chatbot, show_training_progress=False)
def _train_ubuntu(self): trainer = UbuntuCorpusTrainer( self.chatbot, ubuntu_corpus_data_directory=cog_data_path(self) / "ubuntu_data") trainer.train() return True
def setUp(self): super(UbuntuCorpusTrainerTestCase, self).setUp() self.trainer = UbuntuCorpusTrainer( self.chatbot, show_training_progress=False )
class UbuntuCorpusTrainerTestCase(ChatBotTestCase): """ Test the Ubuntu Corpus trainer class. """ def setUp(self): super(UbuntuCorpusTrainerTestCase, self).setUp() self.trainer = UbuntuCorpusTrainer( self.chatbot, show_training_progress=False ) def tearDown(self): super(UbuntuCorpusTrainerTestCase, self).tearDown() self._remove_data() def _get_data(self): data1 = ( b'2004-11-04T16:49:00.000Z tom jane Hello\n' + b'2004-11-04T16:49:00.000Z tom jane Is anyone there?\n' + b'2004-11-04T16:49:00.000Z jane Yes\n' + b'\n' ) data2 = ( b'2004-11-04T16:49:00.000Z tom jane Hello\n' + b'2004-11-04T16:49:00.000Z tom Is anyone there?\n' + b'2004-11-04T16:49:00.000Z jane Yes\n' + b'\n' ) return data1, data2 def _remove_data(self): """ Clean up by removing the corpus data directory. """ import shutil if os.path.exists(self.trainer.data_directory): shutil.rmtree(self.trainer.data_directory) def _create_test_corpus(self, data): """ Create a small tar in a similar format to the Ubuntu corpus file in memory for testing. """ file_path = os.path.join(self.trainer.data_directory, 'ubuntu_dialogs.tgz') tar = tarfile.TarFile(file_path, 'w') tsv1 = BytesIO(data[0]) tsv2 = BytesIO(data[1]) tarinfo = tarfile.TarInfo('dialogs/3/1.tsv') tarinfo.size = len(data[0]) tar.addfile(tarinfo, fileobj=tsv1) tarinfo = tarfile.TarInfo('dialogs/3/2.tsv') tarinfo.size = len(data[1]) tar.addfile(tarinfo, fileobj=tsv2) tsv1.close() tsv2.close() tar.close() return file_path def _destroy_test_corpus(self): """ Remove the test corpus file. """ file_path = os.path.join(self.trainer.data_directory, 'ubuntu_dialogs.tgz') if os.path.exists(file_path): os.remove(file_path) def _mock_get_response(self, *args, **kwargs): """ Return a requests.Response object. """ import requests response = requests.Response() response._content = b'Some response content' response.headers['content-length'] = len(response.content) return response def test_download(self): """ Test the download function for the Ubuntu corpus trainer. """ import requests requests.get = Mock(side_effect=self._mock_get_response) download_url = 'https://example.com/download.tgz' self.trainer.download(download_url, show_status=False) file_name = download_url.split('/')[-1] downloaded_file_path = os.path.join(self.trainer.data_directory, file_name) requests.get.assert_called_with(download_url, stream=True) self.assertTrue(os.path.exists(downloaded_file_path)) # Remove the dummy download_url os.remove(downloaded_file_path) def test_download_file_exists(self): """ Test the case that the corpus file exists. """ import requests file_path = os.path.join(self.trainer.data_directory, 'download.tgz') open(file_path, 'a').close() requests.get = Mock(side_effect=self._mock_get_response) download_url = 'https://example.com/download.tgz' self.trainer.download(download_url, show_status=False) # Remove the dummy download_url os.remove(file_path) self.assertFalse(requests.get.called) def test_download_url_not_found(self): """ Test the case that the url being downloaded does not exist. """ raise unittest.SkipTest('This test needs to be created.') def test_extract(self): """ Test the extraction of text from a decompressed Ubuntu Corpus file. """ file_object_path = self._create_test_corpus(self._get_data()) self.trainer.extract(file_object_path) self._destroy_test_corpus() corpus_path = os.path.join(self.trainer.extracted_data_directory, 'dialogs', '3') self.assertTrue(os.path.exists(self.trainer.extracted_data_directory)) self.assertTrue(os.path.exists(os.path.join(corpus_path, '1.tsv'))) self.assertTrue(os.path.exists(os.path.join(corpus_path, '2.tsv'))) def test_train(self): """ Test that the chat bot is trained using data from the Ubuntu Corpus. """ self._create_test_corpus(self._get_data()) self.trainer.train() self._destroy_test_corpus() response = self.chatbot.get_response('Is anyone there?') self.assertEqual(response, 'Yes') def test_is_extracted(self): """ Test that a check can be done for if the corpus has aleady been extracted. """ file_object_path = self._create_test_corpus(self._get_data()) self.trainer.extract(file_object_path) extracted = self.trainer.is_extracted(self.trainer.extracted_data_directory) self._destroy_test_corpus() self.assertTrue(extracted) def test_is_not_extracted(self): """ Test that a check can be done for if the corpus has aleady been extracted. """ self._remove_data() extracted = self.trainer.is_extracted(self.trainer.extracted_data_directory) self.assertFalse(extracted)
preprocessors=[ 'chatterbot.preprocessors.clean_whitespace', "chatterbot.preprocessors.unescape_html" ], ) trainer = ListTrainer(chatbot) trainer.train([ "Hello there?", "General Kenobi!", ]) # trainer = ChatterBotCorpusTrainer(chatbot) # trainer.train( # "chatterbot.corpus.english" # ) trainer = UbuntuCorpusTrainer(chatbot) trainer.train() @app.route('/') def home(): return render_template('index.html') @socketio.on('connect') def on_connect(): global total_users total_users += 1 print(f'Client connected!| Total users: {total_users}')
class Brainz(commands.Cog): """Adds neurons to [botname].""" def __init__(self, bot): self.bot = bot self.chat_bot: ChatBot = None self._list_trainer: ListTrainer = None self._corpus_trainer: ChatterBotCorpusTrainer = None self._ubuntu_trainer: UbuntuCorpusTrainer = None self.__database = cog_data_path(self) / "chatter.sqlite3" self._message_cache = [] self._event = asyncio.Event() self.mention_regex: Optional[re.Pattern] = None self.config = Config.get_conf(self, identifier=208903205982044161, force_registration=True) @tasks.loop(seconds=15.0) async def learn(self): if self._message_cache and self._list_trainer: message_cache = self._message_cache.copy() self._message_cache.clear() self._list_trainer.train(message_cache) del message_cache def cog_unload(self): self.learn.cancel() async def initialize(self): await self.bot.wait_until_red_ready() self.mention_regex = re.compile(rf"<@!?{self.bot.user.id}>") self._event.set() self.chat_bot = ChatBot( self.bot.user.name, storage_adapter="chatterbot.storage.SQLStorageAdapter", database_uri=f"sqlite:///{self.__database}", logic_adapters=[ "chatterbot.logic.MathematicalEvaluation", "chatterbot.logic.TimeLogicAdapter", "chatterbot.logic.BestMatch", ], filters=[filters.get_recent_repeated_responses], preprocessors=[ "chatterbot.preprocessors.clean_whitespace", "chatterbot.preprocessors.unescape_html", "chatterbot.preprocessors.convert_to_ascii", ], ) self._list_trainer = ListTrainer(self.chat_bot) self._corpus_trainer = ChatterBotCorpusTrainer(self.chat_bot) self._ubuntu_trainer = UbuntuCorpusTrainer(self.chat_bot) self.learn.start() @commands.Cog.listener() async def on_message_without_command(self, message: discord.Message): if message.author.bot: return self._message_cache.append(message.clean_content) if (not self._event.is_set()) or self.mention_regex is None: return if not self.mention_regex.search(message.content): return guild = message.guild channel = message.channel author = message.author if guild and (not channel.permissions_for(guild.me).send_messages or (await self.bot.cog_disabled_in_guild(self, guild)) or not (await self.bot.ignored_channel_or_guild(message))): return if not (await self.bot.allowed_by_whitelist_blacklist(author)): return if guild: perms = message.channel.permissions_for(guild.me) else: perms = message.channel.permissions_for(self.bot.user) if perms.send_messages: await message.channel.send( self.chat_bot.get_response(message.clean_content)) @commands.group(name="feed") @commands.is_owner() async def command_feed(self, ctx: commands.Context): """Training commands.""" @command_feed.command(name="ubuntu") async def command_feed_ubuntu(self, ctx: commands.Context, *, language: str.lower): """Train [botname] with the community Ubuntu dataset.""" async with ctx.typing(): self._ubuntu_trainer.train() await ctx.send( f"{ctx.author.mention} I've have learnt a lot about this thing you call 'Internet'." ) @command_feed.command(name="language") async def command_feed_language(self, ctx: commands.Context, *, language: str.lower): """Train [botname] in the specified languages.""" supported_language = { "bengali", "chinese", "english", "french", "german", "hebrew", "hindi", "indonesian", "italian", "japanese", "korean", "marathi", "oriya", "persian", "portuguese", "russian", "spanish", "swedish", "telugu", "thai", "traditionalchinese", "turkish", } if language not in supported_language: return await ctx.send( f"`{language}` is not a supported language, please use one of the following\n\n" f"{humanize_list(supported_language, style='or')}") await ctx.send(f"I'm being trained on {language.title()}") async with ctx.typing(): self._corpus_trainer.train(f"chatterbot.corpus.{language}") await ctx.send( f"{ctx.author.mention} I've have learnt a lot about {language.title()}." ) @command_feed.command(name="local") async def command_feed_local( self, ctx: commands.Context, *, channel: Optional[discord.TextChannel] = None): """Train [botname] in the current server or specified channel. THIS MAY TAKE A VERY LONG TIME. """ async with ctx.typing(): if channel: perms = channel.permissions_for(ctx.me) if perms.read_message_history and perms.read_messages: messages = (await channel.history( limit=None ).filter(lambda m: not m.author.bot).flatten()) self._message_cache.extend( [m.clean_content for m in messages]) else: return await ctx.send( "I need `Read Messages` and `Read Message History` in " f"{channel.mention} to learn from it") else: if ctx.guild: for channel in ctx.guild.text_channels: perms = channel.permissions_for(ctx.me) if perms.read_message_history and perms.read_messages: messages = (await channel.history( limit=None ).filter(lambda m: not m.author.bot).flatten()) self._message_cache.extend( [m.clean_content for m in messages]) else: messages = (await channel.history( limit=None ).filter(lambda m: not m.author.bot).flatten()) self._message_cache.extend( [m.clean_content for m in messages]) if channel: await ctx.send( f"{ctx.author.mention} I've have learnt a lot about {channel.mention}" ) elif ctx.guild: await ctx.send( f"{ctx.author.mention} I've have learnt a lot about {ctx.guild.name}" ) else: await ctx.send( f"{ctx.author.mention} I've have learnt a lot about our conversations." )