Ejemplo n.º 1
0
    def get_statements(self):
        """
        Returns list of random statements from the API.
        """
        from twitter import TwitterError
        statements = []

        # Generate a random word
        random_word = self.random_word(self.random_seed_word, self.lang)

        self.logger.info(
            u'Requesting 50 random tweets containing the word {}'.format(
                random_word))
        tweets = self.api.GetSearch(term=random_word, count=50, lang=self.lang)
        for tweet in tweets:
            statement = Statement(tweet.text)

            if tweet.in_reply_to_status_id:
                try:
                    status = self.api.GetStatus(tweet.in_reply_to_status_id)
                    statement.add_response(Response(status.text))
                    statements.append(statement)
                except TwitterError as error:
                    self.logger.warning(str(error))

        self.logger.info('Adding {} tweets with responses'.format(
            len(statements)))

        return statements
Ejemplo n.º 2
0
    def train(self, *corpus_paths):

        # Allow a list of corpora to be passed instead of arguments
        if len(corpus_paths) == 1:
            if isinstance(corpus_paths[0], list):
                corpus_paths = corpus_paths[0]

        # Train the chat bot with each statement and response pair
        for corpus_path in corpus_paths:

            corpora = self.corpus.load_corpus(corpus_path)

            corpus_files = self.corpus.list_corpus_files(corpus_path)
            for corpus_count, corpus in enumerate(corpora):
                for conversation_count, conversation in enumerate(corpus):

                    if self.show_training_progress:
                        utils.print_progress_bar(
                            str(os.path.basename(corpus_files[corpus_count])) +
                            ' Training', conversation_count + 1, len(corpus))

                    previous_statement_text = None

                    for text in conversation:
                        statement = self.get_or_create(text)
                        statement.add_tags(corpus.categories)

                        if previous_statement_text:
                            statement.add_response(
                                Response(previous_statement_text))

                        previous_statement_text = statement.text
                        self.storage.update(statement)
Ejemplo n.º 3
0
    def learn_response(self, statement, previous_statement):
        """
        Learn that the statement provided is a valid response.
        """
        from chatter.chatterbot.conversation import Response

        if previous_statement:
            statement.add_response(Response(previous_statement.text))
            self.logger.info('Adding "{}" as a response to "{}"'.format(
                statement.text, previous_statement.text))

        # Save the statement after selecting a response
        self.storage.update(statement)
Ejemplo n.º 4
0
    def deserialize_responses(self, response_list):
        """
        Takes the list of response items and returns
        the list converted to Response objects.
        """
        Statement = self.get_model('statement')
        Response = self.get_model('response')
        proxy_statement = Statement('')

        for response in response_list:
            text = response['text']
            del response['text']

            proxy_statement.add_response(Response(text, **response))

        return proxy_statement.in_response_to
Ejemplo n.º 5
0
    def train(self):
        import glob
        import csv

        # Download and extract the Ubuntu dialog corpus if needed
        corpus_download_path = self.download(self.data_download_url)

        # Extract if the directory doesn not already exists
        if not self.is_extracted(self.extracted_data_directory):
            self.extract(corpus_download_path)

        extracted_corpus_path = os.path.join(self.extracted_data_directory,
                                             '**', '**', '*.tsv')

        file_kwargs = {}

        # Specify the encoding in Python versions 3 and up
        file_kwargs['encoding'] = 'utf-8'
        # WARNING: This might fail to read a unicode corpus file in Python 2.x

        for file in glob.iglob(extracted_corpus_path):
            self.logger.info('Training from: {}'.format(file))

            with open(file, 'r', **file_kwargs) as tsv:
                reader = csv.reader(tsv, delimiter='\t')

                previous_statement_text = None

                for row in reader:
                    if len(row) > 0:
                        text = row[3]
                        statement = self.get_or_create(text)
                        print(text, len(row))

                        statement.add_extra_data('datetime', row[0])
                        statement.add_extra_data('speaker', row[1])

                        if row[2].strip():
                            statement.add_extra_data('addressing_speaker',
                                                     row[2])

                        if previous_statement_text:
                            statement.add_response(
                                Response(previous_statement_text))

                        previous_statement_text = statement.text
                        self.storage.update(statement)
Ejemplo n.º 6
0
    def train(self, conversation):
        """
        Train the chat bot based on the provided list of
        statements that represents a single conversation.
        """
        previous_statement_text = None

        for conversation_count, text in enumerate(conversation):
            if self.show_training_progress:
                utils.print_progress_bar('List Trainer',
                                         conversation_count + 1,
                                         len(conversation))

            statement = self.get_or_create(text)

            if previous_statement_text:
                statement.add_response(Response(previous_statement_text))

            previous_statement_text = statement.text
            self.storage.update(statement)