Ejemplo n.º 1
0
 def setUp(self):
     super().setUp()
     self.trainer = UbuntuCorpusTrainer(
         self.chatbot,
         ubuntu_corpus_data_directory='./.ubuntu_test_data/',
         show_training_progress=False
     )
Ejemplo n.º 2
0
def DefTrain():
    trainer = ChatterBotCorpusTrainer(c.chatbot)
    trainer.train("chatterbot.corpus.english.ai",
              "chatterbot.corpus.english.conversations",
              "chatterbot.corpus.english.greetings",
              "chatterbot.corpus.english.emotion")
    UbuntuCorpusTrainer(c.chatbot)
Ejemplo n.º 3
0
 def train(self, path=None):
     if not path:
         trainer = UbuntuCorpusTrainer(self.bot)
         trainer.train()
     else:
         trainer = ChatterBotCorpusTrainer(self.bot)
         trainer.train(path)
Ejemplo n.º 4
0
 async def initialize(self):
     await self.bot.wait_until_red_ready()
     self.mention_regex = re.compile(rf"<@!?{self.bot.user.id}>")
     self._event.set()
     self.chat_bot = ChatBot(
         self.bot.user.name,
         storage_adapter="chatterbot.storage.SQLStorageAdapter",
         database_uri=f"sqlite:///{self.__database}",
         logic_adapters=[
             "chatterbot.logic.MathematicalEvaluation",
             "chatterbot.logic.TimeLogicAdapter",
             "chatterbot.logic.BestMatch",
         ],
         filters=[filters.get_recent_repeated_responses],
         preprocessors=[
             "chatterbot.preprocessors.clean_whitespace",
             "chatterbot.preprocessors.unescape_html",
             "chatterbot.preprocessors.convert_to_ascii",
         ],
     )
     self._list_trainer = ListTrainer(self.chat_bot)
     self._corpus_trainer = ChatterBotCorpusTrainer(self.chat_bot)
     self._ubuntu_trainer = UbuntuCorpusTrainer(self.chat_bot)
     self.learn.start()
Ejemplo n.º 5
0
class UbuntuCorpusTrainerTestCase(ChatBotTestCase):
    """
    Test the Ubuntu Corpus trainer class.
    """

    def setUp(self):
        super().setUp()
        self.trainer = UbuntuCorpusTrainer(
            self.chatbot,
            ubuntu_corpus_data_directory='./.ubuntu_test_data/',
            show_training_progress=False
        )

    def tearDown(self):
        super().tearDown()

        self._remove_data()

    def _get_data(self):

        data1 = (
            b'2004-11-04T16:49:00.000Z	tom	jane	Hello\n'
            b'2004-11-04T16:49:00.000Z	tom	jane	Is anyone there?\n'
            b'2004-11-04T16:49:00.000Z	jane		Yes\n'
            b'\n'
        )

        data2 = (
            b'2004-11-04T16:49:00.000Z	tom	jane	Hello\n'
            b'2004-11-04T16:49:00.000Z	tom		Is anyone there?\n'
            b'2004-11-04T16:49:00.000Z	jane		Yes\n'
            b'\n'
        )

        return data1, data2

    def _remove_data(self):
        """
        Clean up by removing the corpus data directory.
        """
        import shutil

        if os.path.exists(self.trainer.data_directory):
            shutil.rmtree(self.trainer.data_directory)

    def _create_test_corpus(self, data):
        """
        Create a small tar in a similar format to the
        Ubuntu corpus file in memory for testing.
        """
        file_path = os.path.join(self.trainer.data_directory, 'ubuntu_dialogs.tgz')
        tar = tarfile.TarFile(file_path, 'w')

        tsv1 = BytesIO(data[0])
        tsv2 = BytesIO(data[1])

        tarinfo = tarfile.TarInfo('dialogs/3/1.tsv')
        tarinfo.size = len(data[0])
        tar.addfile(tarinfo, fileobj=tsv1)

        tarinfo = tarfile.TarInfo('dialogs/3/2.tsv')
        tarinfo.size = len(data[1])
        tar.addfile(tarinfo, fileobj=tsv2)

        tsv1.close()
        tsv2.close()
        tar.close()

        return file_path

    def _destroy_test_corpus(self):
        """
        Remove the test corpus file.
        """
        file_path = os.path.join(self.trainer.data_directory, 'ubuntu_dialogs.tgz')

        if os.path.exists(file_path):
            os.remove(file_path)

    def _mock_get_response(self, *args, **kwargs):
        """
        Return a requests.Response object.
        """
        import requests
        response = requests.Response()
        response._content = b'Some response content'
        response.headers['content-length'] = len(response.content)
        return response

    def test_download(self):
        """
        Test the download function for the Ubuntu corpus trainer.
        """
        import requests

        requests.get = Mock(side_effect=self._mock_get_response)
        download_url = 'https://example.com/download.tgz'
        self.trainer.download(download_url, show_status=False)

        file_name = download_url.split('/')[-1]
        downloaded_file_path = os.path.join(self.trainer.data_directory, file_name)

        requests.get.assert_called_with(download_url, stream=True)
        self.assertTrue(os.path.exists(downloaded_file_path))

        # Remove the dummy download_url
        os.remove(downloaded_file_path)

    def test_download_file_exists(self):
        """
        Test the case that the corpus file exists.
        """
        import requests

        file_path = os.path.join(self.trainer.data_directory, 'download.tgz')
        open(file_path, 'a').close()

        requests.get = Mock(side_effect=self._mock_get_response)
        download_url = 'https://example.com/download.tgz'
        self.trainer.download(download_url, show_status=False)

        # Remove the dummy download_url
        os.remove(file_path)

        self.assertFalse(requests.get.called)

    def test_download_url_not_found(self):
        """
        Test the case that the url being downloaded does not exist.
        """
        self.skipTest('This test needs to be created.')

    def test_extract(self):
        """
        Test the extraction of text from a decompressed Ubuntu Corpus file.
        """
        file_object_path = self._create_test_corpus(self._get_data())
        self.trainer.extract(file_object_path)

        self._destroy_test_corpus()
        corpus_path = os.path.join(self.trainer.extracted_data_directory, 'dialogs', '3')

        self.assertTrue(os.path.exists(self.trainer.extracted_data_directory))
        self.assertTrue(os.path.exists(os.path.join(corpus_path, '1.tsv')))
        self.assertTrue(os.path.exists(os.path.join(corpus_path, '2.tsv')))

    def test_train(self):
        """
        Test that the chat bot is trained using data from the Ubuntu Corpus.
        """
        self._create_test_corpus(self._get_data())

        self.trainer.train()
        self._destroy_test_corpus()

        response = self.chatbot.get_response('Is anyone there?')
        self.assertEqual(response.text, 'Yes')

    def test_train_sets_search_text(self):
        """
        Test that the chat bot is trained using data from the Ubuntu Corpus.
        """
        self._create_test_corpus(self._get_data())

        self.trainer.train()
        self._destroy_test_corpus()

        results = list(self.chatbot.storage.filter(text='Is anyone there?'))

        self.assertEqual(len(results), 2)
        self.assertEqual(results[0].search_text, 'VERB:anyone NOUN:there')

    def test_train_sets_search_in_response_to(self):
        """
        Test that the chat bot is trained using data from the Ubuntu Corpus.
        """
        self._create_test_corpus(self._get_data())

        self.trainer.train()
        self._destroy_test_corpus()

        results = list(self.chatbot.storage.filter(in_response_to='Is anyone there?'))

        self.assertEqual(len(results), 2)
        self.assertEqual(results[0].search_in_response_to, 'VERB:anyone NOUN:there')

    def test_is_extracted(self):
        """
        Test that a check can be done for if the corpus has aleady been extracted.
        """
        file_object_path = self._create_test_corpus(self._get_data())
        self.trainer.extract(file_object_path)

        extracted = self.trainer.is_extracted(self.trainer.extracted_data_directory)
        self._destroy_test_corpus()

        self.assertTrue(extracted)

    def test_is_not_extracted(self):
        """
        Test that a check can be done for if the corpus has aleady been extracted.
        """
        self._remove_data()
        extracted = self.trainer.is_extracted(self.trainer.extracted_data_directory)

        self.assertFalse(extracted)
Ejemplo n.º 6
0
def get_ubuntu_corpus_trainer(chatbot):
    return UbuntuCorpusTrainer(
        chatbot,
        show_training_progress=False
    )
Ejemplo n.º 7
0
def train_bot_ubuntu():

    trainer = UbuntuCorpusTrainer(bot)
    trainer.train()
Ejemplo n.º 8
0
 def _train_ubuntu(self):
     trainer = UbuntuCorpusTrainer(self.chatbot)
     trainer.train()
     return True
Ejemplo n.º 9
0
"""
This example shows how to train a chat bot using the
Ubuntu Corpus of conversation dialog.
"""
import logging
from chatterbot import ChatBot
from chatterbot.trainers import UbuntuCorpusTrainer

# Enable info level logging
logging.basicConfig(level=logging.INFO)

chatbot = ChatBot('Example Bot')

trainer = UbuntuCorpusTrainer(chatbot)

# Start by training our bot with the Ubuntu corpus data
trainer.train()

# Now let's get a response to a greeting
response = chatbot.get_response('How are you doing today?')
print(response)
Ejemplo n.º 10
0
 def __train_ubuntu_corpus(self):
     trainer = UbuntuCorpusTrainer(self.chatbot)
     trainer.train()
 def setUp(self):
     super(UbuntuCorpusTrainerTestCase, self).setUp()
     self.trainer = UbuntuCorpusTrainer(self.chatbot,
                                        show_training_progress=False)
Ejemplo n.º 12
0
 def _train_ubuntu(self):
     trainer = UbuntuCorpusTrainer(
         self.chatbot,
         ubuntu_corpus_data_directory=cog_data_path(self) / "ubuntu_data")
     trainer.train()
     return True
 def setUp(self):
     super(UbuntuCorpusTrainerTestCase, self).setUp()
     self.trainer = UbuntuCorpusTrainer(
         self.chatbot,
         show_training_progress=False
     )
class UbuntuCorpusTrainerTestCase(ChatBotTestCase):
    """
    Test the Ubuntu Corpus trainer class.
    """

    def setUp(self):
        super(UbuntuCorpusTrainerTestCase, self).setUp()
        self.trainer = UbuntuCorpusTrainer(
            self.chatbot,
            show_training_progress=False
        )

    def tearDown(self):
        super(UbuntuCorpusTrainerTestCase, self).tearDown()

        self._remove_data()

    def _get_data(self):

        data1 = (
            b'2004-11-04T16:49:00.000Z	tom	jane	Hello\n' +
            b'2004-11-04T16:49:00.000Z	tom	jane	Is anyone there?\n' +
            b'2004-11-04T16:49:00.000Z	jane		Yes\n' +
            b'\n'
        )

        data2 = (
            b'2004-11-04T16:49:00.000Z	tom	jane	Hello\n' +
            b'2004-11-04T16:49:00.000Z	tom		Is anyone there?\n' +
            b'2004-11-04T16:49:00.000Z	jane		Yes\n' +
            b'\n'
        )

        return data1, data2

    def _remove_data(self):
        """
        Clean up by removing the corpus data directory.
        """
        import shutil

        if os.path.exists(self.trainer.data_directory):
            shutil.rmtree(self.trainer.data_directory)

    def _create_test_corpus(self, data):
        """
        Create a small tar in a similar format to the
        Ubuntu corpus file in memory for testing.
        """
        file_path = os.path.join(self.trainer.data_directory, 'ubuntu_dialogs.tgz')
        tar = tarfile.TarFile(file_path, 'w')

        tsv1 = BytesIO(data[0])
        tsv2 = BytesIO(data[1])

        tarinfo = tarfile.TarInfo('dialogs/3/1.tsv')
        tarinfo.size = len(data[0])
        tar.addfile(tarinfo, fileobj=tsv1)

        tarinfo = tarfile.TarInfo('dialogs/3/2.tsv')
        tarinfo.size = len(data[1])
        tar.addfile(tarinfo, fileobj=tsv2)

        tsv1.close()
        tsv2.close()
        tar.close()

        return file_path

    def _destroy_test_corpus(self):
        """
        Remove the test corpus file.
        """
        file_path = os.path.join(self.trainer.data_directory, 'ubuntu_dialogs.tgz')

        if os.path.exists(file_path):
            os.remove(file_path)

    def _mock_get_response(self, *args, **kwargs):
        """
        Return a requests.Response object.
        """
        import requests
        response = requests.Response()
        response._content = b'Some response content'
        response.headers['content-length'] = len(response.content)
        return response

    def test_download(self):
        """
        Test the download function for the Ubuntu corpus trainer.
        """
        import requests

        requests.get = Mock(side_effect=self._mock_get_response)
        download_url = 'https://example.com/download.tgz'
        self.trainer.download(download_url, show_status=False)

        file_name = download_url.split('/')[-1]
        downloaded_file_path = os.path.join(self.trainer.data_directory, file_name)

        requests.get.assert_called_with(download_url, stream=True)
        self.assertTrue(os.path.exists(downloaded_file_path))

        # Remove the dummy download_url
        os.remove(downloaded_file_path)

    def test_download_file_exists(self):
        """
        Test the case that the corpus file exists.
        """
        import requests

        file_path = os.path.join(self.trainer.data_directory, 'download.tgz')
        open(file_path, 'a').close()

        requests.get = Mock(side_effect=self._mock_get_response)
        download_url = 'https://example.com/download.tgz'
        self.trainer.download(download_url, show_status=False)

        # Remove the dummy download_url
        os.remove(file_path)

        self.assertFalse(requests.get.called)

    def test_download_url_not_found(self):
        """
        Test the case that the url being downloaded does not exist.
        """
        raise unittest.SkipTest('This test needs to be created.')

    def test_extract(self):
        """
        Test the extraction of text from a decompressed Ubuntu Corpus file.
        """
        file_object_path = self._create_test_corpus(self._get_data())
        self.trainer.extract(file_object_path)

        self._destroy_test_corpus()
        corpus_path = os.path.join(self.trainer.extracted_data_directory, 'dialogs', '3')

        self.assertTrue(os.path.exists(self.trainer.extracted_data_directory))
        self.assertTrue(os.path.exists(os.path.join(corpus_path, '1.tsv')))
        self.assertTrue(os.path.exists(os.path.join(corpus_path, '2.tsv')))

    def test_train(self):
        """
        Test that the chat bot is trained using data from the Ubuntu Corpus.
        """
        self._create_test_corpus(self._get_data())

        self.trainer.train()
        self._destroy_test_corpus()

        response = self.chatbot.get_response('Is anyone there?')
        self.assertEqual(response, 'Yes')

    def test_is_extracted(self):
        """
        Test that a check can be done for if the corpus has aleady been extracted.
        """
        file_object_path = self._create_test_corpus(self._get_data())
        self.trainer.extract(file_object_path)

        extracted = self.trainer.is_extracted(self.trainer.extracted_data_directory)
        self._destroy_test_corpus()

        self.assertTrue(extracted)

    def test_is_not_extracted(self):
        """
        Test that a check can be done for if the corpus has aleady been extracted.
        """
        self._remove_data()
        extracted = self.trainer.is_extracted(self.trainer.extracted_data_directory)

        self.assertFalse(extracted)
Ejemplo n.º 15
0
    preprocessors=[
        'chatterbot.preprocessors.clean_whitespace',
        "chatterbot.preprocessors.unescape_html"
    ],

)
trainer = ListTrainer(chatbot)
trainer.train([
    "Hello there?",
    "General Kenobi!",
])
# trainer = ChatterBotCorpusTrainer(chatbot)
# trainer.train(
#     "chatterbot.corpus.english"
# )
trainer = UbuntuCorpusTrainer(chatbot)
trainer.train()


@app.route('/')
def home():
    return render_template('index.html')


@socketio.on('connect')
def on_connect():
    global total_users
    total_users += 1
    print(f'Client connected!| Total users: {total_users}')

Ejemplo n.º 16
0
class Brainz(commands.Cog):
    """Adds neurons to [botname]."""
    def __init__(self, bot):
        self.bot = bot
        self.chat_bot: ChatBot = None
        self._list_trainer: ListTrainer = None
        self._corpus_trainer: ChatterBotCorpusTrainer = None
        self._ubuntu_trainer: UbuntuCorpusTrainer = None
        self.__database = cog_data_path(self) / "chatter.sqlite3"
        self._message_cache = []
        self._event = asyncio.Event()
        self.mention_regex: Optional[re.Pattern] = None
        self.config = Config.get_conf(self,
                                      identifier=208903205982044161,
                                      force_registration=True)

    @tasks.loop(seconds=15.0)
    async def learn(self):
        if self._message_cache and self._list_trainer:
            message_cache = self._message_cache.copy()
            self._message_cache.clear()
            self._list_trainer.train(message_cache)
            del message_cache

    def cog_unload(self):
        self.learn.cancel()

    async def initialize(self):
        await self.bot.wait_until_red_ready()
        self.mention_regex = re.compile(rf"<@!?{self.bot.user.id}>")
        self._event.set()
        self.chat_bot = ChatBot(
            self.bot.user.name,
            storage_adapter="chatterbot.storage.SQLStorageAdapter",
            database_uri=f"sqlite:///{self.__database}",
            logic_adapters=[
                "chatterbot.logic.MathematicalEvaluation",
                "chatterbot.logic.TimeLogicAdapter",
                "chatterbot.logic.BestMatch",
            ],
            filters=[filters.get_recent_repeated_responses],
            preprocessors=[
                "chatterbot.preprocessors.clean_whitespace",
                "chatterbot.preprocessors.unescape_html",
                "chatterbot.preprocessors.convert_to_ascii",
            ],
        )
        self._list_trainer = ListTrainer(self.chat_bot)
        self._corpus_trainer = ChatterBotCorpusTrainer(self.chat_bot)
        self._ubuntu_trainer = UbuntuCorpusTrainer(self.chat_bot)
        self.learn.start()

    @commands.Cog.listener()
    async def on_message_without_command(self, message: discord.Message):
        if message.author.bot:
            return
        self._message_cache.append(message.clean_content)

        if (not self._event.is_set()) or self.mention_regex is None:
            return
        if not self.mention_regex.search(message.content):
            return

        guild = message.guild
        channel = message.channel
        author = message.author

        if guild and (not channel.permissions_for(guild.me).send_messages or
                      (await self.bot.cog_disabled_in_guild(self, guild)) or
                      not (await self.bot.ignored_channel_or_guild(message))):
            return
        if not (await self.bot.allowed_by_whitelist_blacklist(author)):
            return

        if guild:
            perms = message.channel.permissions_for(guild.me)
        else:
            perms = message.channel.permissions_for(self.bot.user)

        if perms.send_messages:
            await message.channel.send(
                self.chat_bot.get_response(message.clean_content))

    @commands.group(name="feed")
    @commands.is_owner()
    async def command_feed(self, ctx: commands.Context):
        """Training commands."""

    @command_feed.command(name="ubuntu")
    async def command_feed_ubuntu(self, ctx: commands.Context, *,
                                  language: str.lower):
        """Train [botname] with the community Ubuntu dataset."""
        async with ctx.typing():
            self._ubuntu_trainer.train()
        await ctx.send(
            f"{ctx.author.mention} I've have learnt a lot about this thing you call 'Internet'."
        )

    @command_feed.command(name="language")
    async def command_feed_language(self, ctx: commands.Context, *,
                                    language: str.lower):
        """Train [botname] in the specified languages."""
        supported_language = {
            "bengali",
            "chinese",
            "english",
            "french",
            "german",
            "hebrew",
            "hindi",
            "indonesian",
            "italian",
            "japanese",
            "korean",
            "marathi",
            "oriya",
            "persian",
            "portuguese",
            "russian",
            "spanish",
            "swedish",
            "telugu",
            "thai",
            "traditionalchinese",
            "turkish",
        }
        if language not in supported_language:
            return await ctx.send(
                f"`{language}` is not a supported language, please use one of the following\n\n"
                f"{humanize_list(supported_language, style='or')}")
        await ctx.send(f"I'm being trained on {language.title()}")
        async with ctx.typing():
            self._corpus_trainer.train(f"chatterbot.corpus.{language}")

        await ctx.send(
            f"{ctx.author.mention} I've have learnt a lot about {language.title()}."
        )

    @command_feed.command(name="local")
    async def command_feed_local(
            self,
            ctx: commands.Context,
            *,
            channel: Optional[discord.TextChannel] = None):
        """Train [botname] in the current server or specified channel.

        THIS MAY TAKE A VERY LONG TIME.
        """
        async with ctx.typing():
            if channel:
                perms = channel.permissions_for(ctx.me)
                if perms.read_message_history and perms.read_messages:
                    messages = (await channel.history(
                        limit=None
                    ).filter(lambda m: not m.author.bot).flatten())
                    self._message_cache.extend(
                        [m.clean_content for m in messages])
                else:
                    return await ctx.send(
                        "I need `Read Messages` and `Read Message History` in "
                        f"{channel.mention} to learn from it")
            else:
                if ctx.guild:
                    for channel in ctx.guild.text_channels:
                        perms = channel.permissions_for(ctx.me)
                        if perms.read_message_history and perms.read_messages:
                            messages = (await channel.history(
                                limit=None
                            ).filter(lambda m: not m.author.bot).flatten())
                            self._message_cache.extend(
                                [m.clean_content for m in messages])
                else:
                    messages = (await channel.history(
                        limit=None
                    ).filter(lambda m: not m.author.bot).flatten())
                    self._message_cache.extend(
                        [m.clean_content for m in messages])
        if channel:
            await ctx.send(
                f"{ctx.author.mention} I've have learnt a lot about {channel.mention}"
            )
        elif ctx.guild:
            await ctx.send(
                f"{ctx.author.mention} I've have learnt a lot about {ctx.guild.name}"
            )
        else:
            await ctx.send(
                f"{ctx.author.mention} I've have learnt a lot about our conversations."
            )