Beispiel #1
0
    def generate(self,
                 message: str,
                 doc: Doc = None,
                 ignore_topics: List[str] = []) -> Optional[str]:

        if doc is None:
            filtered_message = MarkovFilters.filter_input(message)
            doc = self._nlp(filtered_message)

        subjects = []
        for token in doc:
            if (token.text in ignore_topics):
                continue
            markov_word = self._markov_model.select(token.text)
            if markov_word is not None:
                subjects.append(markov_word)
        if len(subjects) == 0:
            UNHEARD_LIST = [
                "Didn’t catch that", "Try again", "Are you even trying",
                "That might be too much for me right now",
                "I’ll learn how eventually",
                "I don't know how to respond to that yet"
            ]
            UNHEARD_RESPONSE = random.choice(UNHEARD_LIST)
            return UNHEARD_RESPONSE

        def structure_generator():
            sentence_stats_manager = InputTextStatManager()
            while True:
                choices, p_values = sentence_stats_manager.probabilities()
                if len(choices) > 0:
                    num_sentences = np.random.choice(choices, p=p_values)
                else:
                    num_sentences = np.random.randint(1, 5)
                yield self._structure_scheduler.predict(
                    num_sentences=num_sentences)

        generator = MarkovGenerator(structure_generator=structure_generator(),
                                    subjects=subjects)

        reply_words = []
        sentences = generator.generate(db=self._markov_model)
        if sentences is None:
            MISUNDERSTOOD_LIST = [
                'Huh.', 'Huh', 'Huh!', 'Huh?', 'Huh!?', 'HUH?'
            ]
            MISUNDERSTOOD_REPONSE = random.choice(MISUNDERSTOOD_LIST)
            return MISUNDERSTOOD_REPONSE
        for sentence in sentences:
            for word_idx, word in enumerate(sentence):
                if not word.compound:
                    text = CapitalizationMode.transform(word.mode, word.text)
                else:
                    text = word.text
                reply_words.append(text)

        reply = " ".join(reply_words)
        filtered_reply = MarkovFilters.smooth_output(reply)

        return filtered_reply
Beispiel #2
0
    def _preprocess_markov_data(self, all_training_data: bool = False):
        spacy_preprocessor = SpacyPreprocessor()

        self._logger.info("Training_Preprocessing_Markov(Import)")
        if not all_training_data:
            imported_messages = ImportTrainingDataManager().new_training_data()
        else:
            imported_messages = ImportTrainingDataManager().all_training_data()
        for message_idx, message in enumerate(imported_messages):
            # Print Progress
            if message_idx % 100 == 0:
                self._logger.info(
                    "Training_Preprocessing_Markov(Import): %f%%" % (message_idx / len(imported_messages) * 100))

            doc = self._nlp(MarkovFilters.filter_input(message[0].decode()))
            spacy_preprocessor.preprocess(doc)

        tweets = None
        if self._twitter_connector is not None:
            self._logger.info("Training_Preprocessing_Markov(Twitter)")
            from storage.twitter import TwitterTrainingDataManager

            if not all_training_data:
                tweets = TwitterTrainingDataManager().new_training_data()
            else:
                tweets = TwitterTrainingDataManager().all_training_data()
            for tweet_idx, tweet in enumerate(tweets):
                # Print Progress
                if tweet_idx % 100 == 0:
                    self._logger.info("Training_Preprocessing_Markov(Twitter): %f%%" % (tweet_idx / len(tweets) * 100))

                doc = self._nlp(MarkovFilters.filter_input(tweet[0].decode()))
                spacy_preprocessor.preprocess(doc)

        discord_messages = None
        if self._discord_connector is not None:
            self._logger.info("Training_Preprocessing_Markov(Discord)")
            from storage.discord import DiscordTrainingDataManager

            if not all_training_data:
                discord_messages = DiscordTrainingDataManager().new_training_data()
            else:
                discord_messages = DiscordTrainingDataManager().all_training_data()

            for message_idx, message in enumerate(discord_messages):
                # Print Progress
                if message_idx % 100 == 0:
                    self._logger.info(
                        "Training_Preprocessing_Markov(Discord): %f%%" % (message_idx / len(discord_messages) * 100))

                doc = self._nlp(MarkovFilters.filter_input(message[0].decode()))
                spacy_preprocessor.preprocess(doc)

        return spacy_preprocessor
Beispiel #3
0
    def _preprocess_structure_data(self):
        structure_preprocessor = StructurePreprocessor()

        self._logger.info("Training_Preprocessing_Structure(Import)")
        imported_messages = ImportTrainingDataManager().all_training_data(limit=STRUCTURE_MODEL_TRAINING_MAX_SIZE,
                                                                          order_by='id', order='desc')
        for message_idx, message in enumerate(imported_messages):
            # Print Progress
            if message_idx % 100 == 0:
                self._logger.info(
                    "Training_Preprocessing_Structure(Import): %f%%" % (
                            message_idx / min(STRUCTURE_MODEL_TRAINING_MAX_SIZE, len(imported_messages)) * 100))

            doc = self._nlp(MarkovFilters.filter_input(message[0].decode()))
            if not structure_preprocessor.preprocess(doc):
                return structure_preprocessor

        tweets = None
        if self._twitter_connector is not None:
            self._logger.info("Training_Preprocessing_Structure(Twitter)")
            from storage.twitter import TwitterTrainingDataManager

            tweets = TwitterTrainingDataManager().all_training_data(limit=STRUCTURE_MODEL_TRAINING_MAX_SIZE,
                                                                    order_by='timestamp', order='desc')
            for tweet_idx, tweet in enumerate(tweets):
                # Print Progress
                if tweet_idx % 100 == 0:
                    self._logger.info(
                        "Training_Preprocessing_Structure(Twitter): %f%%" % (
                                tweet_idx / min(STRUCTURE_MODEL_TRAINING_MAX_SIZE, len(tweets)) * 100))

                doc = self._nlp(MarkovFilters.filter_input(tweet[0].decode()))
                if not structure_preprocessor.preprocess(doc):
                    return structure_preprocessor

        discord_messages = None
        if self._discord_connector is not None:
            self._logger.info("Training_Preprocessing_Structure(Discord)")
            from storage.discord import DiscordTrainingDataManager

            discord_messages = DiscordTrainingDataManager().all_training_data(limit=STRUCTURE_MODEL_TRAINING_MAX_SIZE,
                                                                              order_by='timestamp', order='desc')
            for message_idx, message in enumerate(discord_messages):
                # Print Progress
                if message_idx % 100 == 0:
                    self._logger.info(
                        "Training_Preprocessing_Structure(Discord): %f%%" % (
                                message_idx / min(STRUCTURE_MODEL_TRAINING_MAX_SIZE, len(discord_messages)) * 100))

                doc = self._nlp(MarkovFilters.filter_input(message[0].decode()))
                if not structure_preprocessor.preprocess(doc):
                    return structure_preprocessor

        return structure_preprocessor
    def _main(self):
        self._set_status(AEStatus.RUNNING)

        while True:
            if self._connectors_event.wait(timeout=1):
                self._connectors_event.clear()

            for connector in self._connectors:
                while not connector.empty():
                    message = connector.recv()
                    if message is not None:
                        doc = self._nlp(
                            MarkovFilters.filter_input(message.text))
                        if message.learn:
                            MarkovTrainer(self._markov_model).learn(doc)
                            connector.send(None)
                        if message.reply:
                            reply = connector.generate(message, doc=doc)
                            connector.send(reply)
                    else:
                        connector.send(None)

            if self._status == AEStatus.SHUTTING_DOWN:
                self.shutdown()
                self._set_status(AEStatus.SHUTDOWN)
                sys.exit(0)
Beispiel #5
0
    def _main(self):
        self._set_status(AEStatus.RUNNING)

        while True:
            if self._connectors_event.wait(timeout=1):
                self._connectors_event.clear()

            for connector in self._connectors:
                while not connector.empty():
                    message = connector.recv()
                    if message is not None:
                        doc = self._nlp(MarkovFilters.filter_input(message.text))
                        if message.learn:
                            MarkovTrainer(self._markov_model).learn(doc)
                            connector.send(None)
                        if message.reply:
                            reply = connector.generate(message, doc=doc)
                            connector.send(reply)
                    else:
                        connector.send(None)

            if self._status == AEStatus.SHUTTING_DOWN:
                self.shutdown()
                self._set_status(AEStatus.SHUTDOWN)
                sys.exit(0)
    def generate(self, message: str, doc: Doc = None, ignore_topics: List[str] = []) -> Optional[str]:

        if doc is None:
            filtered_message = MarkovFilters.filter_input(message)
            doc = self._nlp(filtered_message)

        subjects = []
        for token in doc:
            if(token.text in ignore_topics):
                continue
            markov_word = self._markov_model.select(token.text)
            if markov_word is not None:
                subjects.append(markov_word)
        if len(subjects) == 0:
            return "I wasn't trained on that!"

        def structure_generator():
            sentence_stats_manager = InputTextStatManager()
            while True:
                choices, p_values = sentence_stats_manager.probabilities()
                if len(choices) > 0:
                    num_sentences = np.random.choice(choices, p=p_values)
                else:
                    num_sentences = np.random.randint(1, 5)
                yield self._structure_scheduler.predict(num_sentences=num_sentences)

        generator = MarkovGenerator(structure_generator=structure_generator(), subjects=subjects)

        reply_words = []
        sentences = generator.generate(db=self._markov_model)
        if sentences is None:
            return "Huh?"
        for sentence in sentences:
            for word_idx, word in enumerate(sentence):
                if not word.compound:
                    text = CapitalizationMode.transform(word.mode, word.text)
                else:
                    text = word.text
                reply_words.append(text)

        reply = " ".join(reply_words)
        filtered_reply = MarkovFilters.smooth_output(reply)

        return filtered_reply
Beispiel #7
0
def main():
    np.random.seed(int(time.time()))

    markov_db = MarkovTrieDb(MARKOV_DB_PATH)

    structure_model = StructureModelScheduler(use_gpu=USE_GPU)
    structure_model.start()
    structure_model.load(STRUCTURE_MODEL_PATH)

    subjects = []
    for word in ['Some', 'Words', 'Here']:
        select_word = markov_db.select(word)
        if select_word is not None:
            subjects.append(select_word)
        else:
            print("Couldn't select %s" % word)

    for i in range(0, 1000):

        def structure_generator():
            while True:
                yield structure_model.predict(num_sentences=1)

        markov_generator = MarkovGenerator(structure_generator(), subjects)

        words = []
        sentences = markov_generator.generate(markov_db)
        if sentences is None:
            continue

        for sentence_idx, sentence in enumerate(sentences):
            pos_list = [word.pos for word in sentence]
            for word_idx, word in enumerate(sentence):
                if not word.compound:
                    text = CapitalizationMode.transform(
                        word.mode,
                        sentences[sentence_idx][word_idx].text,
                    )
                else:
                    text = word.text
                words.append(text)

        message = " ".join(words)
        message = MarkovFilters.smooth_output(message)

        print(message)
Beispiel #8
0
def main():
    np.random.seed(int(time.time()))

    markov_db = MarkovTrieDb(MARKOV_DB_PATH)

    structure_model = StructureModelScheduler(use_gpu=USE_GPU)
    structure_model.start()
    structure_model.load(STRUCTURE_MODEL_PATH)

    subjects = []
    for word in ['Some', 'Words', 'Here']:
        select_word = markov_db.select(word)
        if select_word is not None:
            subjects.append(select_word)
        else:
            print("Couldn't select %s" % word)

    for i in range(0, 1000):

        def structure_generator():
            while True:
                yield structure_model.predict(num_sentences=1)

        markov_generator = MarkovGenerator(structure_generator(), subjects)

        words = []
        sentences = markov_generator.generate(markov_db)
        if sentences is None:
            continue

        for sentence_idx, sentence in enumerate(sentences):
            pos_list = [word.pos for word in sentence]
            for word_idx, word in enumerate(sentence):
                if not word.compound:
                    text = CapitalizationMode.transform(word.mode, sentences[sentence_idx][word_idx].text, )
                else:
                    text = word.text
                words.append(text)

        message = " ".join(words)
        message = MarkovFilters.smooth_output(message)

        print(message)
Beispiel #9
0
    def _preprocess_markov_data(self, all_training_data: bool = False):
        spacy_preprocessor = SpacyPreprocessor()

        self._logger.info("Training_Preprocessing_Markov(Import)")
        if not all_training_data:
            imported_messages = ImportTrainingDataManager().new_training_data()
        else:
            imported_messages = ImportTrainingDataManager().all_training_data()
        for message_idx, message in enumerate(imported_messages):
            # Print Progress
            if message_idx % 100 == 0:
                self._logger.info(
                    "Training_Preprocessing_Markov(Import): %f%%" %
                    (message_idx / len(imported_messages) * 100))

            doc = self._nlp(MarkovFilters.filter_input(message[0].decode()))
            spacy_preprocessor.preprocess(doc)

        tweets = None
        if self._twitter_connector is not None:
            self._logger.info("Training_Preprocessing_Markov(Twitter)")
            from storage.twitter import TwitterTrainingDataManager

            if not all_training_data:
                tweets = TwitterTrainingDataManager().new_training_data()
            else:
                tweets = TwitterTrainingDataManager().all_training_data()
            for tweet_idx, tweet in enumerate(tweets):
                # Print Progress
                if tweet_idx % 100 == 0:
                    self._logger.info(
                        "Training_Preprocessing_Markov(Twitter): %f%%" %
                        (tweet_idx / len(tweets) * 100))

                doc = self._nlp(MarkovFilters.filter_input(tweet[0].decode()))
                spacy_preprocessor.preprocess(doc)

        discord_messages = None
        if self._discord_connector is not None:
            self._logger.info("Training_Preprocessing_Markov(Discord)")
            from storage.discord import DiscordTrainingDataManager

            if not all_training_data:
                discord_messages = DiscordTrainingDataManager(
                ).new_training_data()
            else:
                discord_messages = DiscordTrainingDataManager(
                ).all_training_data()

            for message_idx, message in enumerate(discord_messages):
                # Print Progress
                if message_idx % 100 == 0:
                    self._logger.info(
                        "Training_Preprocessing_Markov(Discord): %f%%" %
                        (message_idx / len(discord_messages) * 100))

                doc = self._nlp(MarkovFilters.filter_input(
                    message[0].decode()))
                spacy_preprocessor.preprocess(doc)

        return spacy_preprocessor
Beispiel #10
0
    def _preprocess_structure_data(self):
        structure_preprocessor = StructurePreprocessor()

        self._logger.info("Training_Preprocessing_Structure(Import)")
        imported_messages = ImportTrainingDataManager().all_training_data(
            limit=STRUCTURE_MODEL_TRAINING_MAX_SIZE,
            order_by='id',
            order='desc')
        for message_idx, message in enumerate(imported_messages):
            # Print Progress
            if message_idx % 100 == 0:
                self._logger.info(
                    "Training_Preprocessing_Structure(Import): %f%%" %
                    (message_idx / min(STRUCTURE_MODEL_TRAINING_MAX_SIZE,
                                       len(imported_messages)) * 100))

            doc = self._nlp(MarkovFilters.filter_input(message[0].decode()))
            if not structure_preprocessor.preprocess(doc):
                return structure_preprocessor

        tweets = None
        if self._twitter_connector is not None:
            self._logger.info("Training_Preprocessing_Structure(Twitter)")
            from storage.twitter import TwitterTrainingDataManager

            tweets = TwitterTrainingDataManager().all_training_data(
                limit=STRUCTURE_MODEL_TRAINING_MAX_SIZE,
                order_by='timestamp',
                order='desc')
            for tweet_idx, tweet in enumerate(tweets):
                # Print Progress
                if tweet_idx % 100 == 0:
                    self._logger.info(
                        "Training_Preprocessing_Structure(Twitter): %f%%" %
                        (tweet_idx / min(STRUCTURE_MODEL_TRAINING_MAX_SIZE,
                                         len(tweets)) * 100))

                doc = self._nlp(MarkovFilters.filter_input(tweet[0].decode()))
                if not structure_preprocessor.preprocess(doc):
                    return structure_preprocessor

        discord_messages = None
        if self._discord_connector is not None:
            self._logger.info("Training_Preprocessing_Structure(Discord)")
            from storage.discord import DiscordTrainingDataManager

            discord_messages = DiscordTrainingDataManager().all_training_data(
                limit=STRUCTURE_MODEL_TRAINING_MAX_SIZE,
                order_by='timestamp',
                order='desc')
            for message_idx, message in enumerate(discord_messages):
                # Print Progress
                if message_idx % 100 == 0:
                    self._logger.info(
                        "Training_Preprocessing_Structure(Discord): %f%%" %
                        (message_idx / min(STRUCTURE_MODEL_TRAINING_MAX_SIZE,
                                           len(discord_messages)) * 100))

                doc = self._nlp(MarkovFilters.filter_input(
                    message[0].decode()))
                if not structure_preprocessor.preprocess(doc):
                    return structure_preprocessor

        return structure_preprocessor