def generate(self, message: str, doc: Doc = None, ignore_topics: List[str] = []) -> Optional[str]: if doc is None: filtered_message = MarkovFilters.filter_input(message) doc = self._nlp(filtered_message) subjects = [] for token in doc: if (token.text in ignore_topics): continue markov_word = self._markov_model.select(token.text) if markov_word is not None: subjects.append(markov_word) if len(subjects) == 0: UNHEARD_LIST = [ "Didn’t catch that", "Try again", "Are you even trying", "That might be too much for me right now", "I’ll learn how eventually", "I don't know how to respond to that yet" ] UNHEARD_RESPONSE = random.choice(UNHEARD_LIST) return UNHEARD_RESPONSE def structure_generator(): sentence_stats_manager = InputTextStatManager() while True: choices, p_values = sentence_stats_manager.probabilities() if len(choices) > 0: num_sentences = np.random.choice(choices, p=p_values) else: num_sentences = np.random.randint(1, 5) yield self._structure_scheduler.predict( num_sentences=num_sentences) generator = MarkovGenerator(structure_generator=structure_generator(), subjects=subjects) reply_words = [] sentences = generator.generate(db=self._markov_model) if sentences is None: MISUNDERSTOOD_LIST = [ 'Huh.', 'Huh', 'Huh!', 'Huh?', 'Huh!?', 'HUH?' ] MISUNDERSTOOD_REPONSE = random.choice(MISUNDERSTOOD_LIST) return MISUNDERSTOOD_REPONSE for sentence in sentences: for word_idx, word in enumerate(sentence): if not word.compound: text = CapitalizationMode.transform(word.mode, word.text) else: text = word.text reply_words.append(text) reply = " ".join(reply_words) filtered_reply = MarkovFilters.smooth_output(reply) return filtered_reply
def _preprocess_markov_data(self, all_training_data: bool = False): spacy_preprocessor = SpacyPreprocessor() self._logger.info("Training_Preprocessing_Markov(Import)") if not all_training_data: imported_messages = ImportTrainingDataManager().new_training_data() else: imported_messages = ImportTrainingDataManager().all_training_data() for message_idx, message in enumerate(imported_messages): # Print Progress if message_idx % 100 == 0: self._logger.info( "Training_Preprocessing_Markov(Import): %f%%" % (message_idx / len(imported_messages) * 100)) doc = self._nlp(MarkovFilters.filter_input(message[0].decode())) spacy_preprocessor.preprocess(doc) tweets = None if self._twitter_connector is not None: self._logger.info("Training_Preprocessing_Markov(Twitter)") from storage.twitter import TwitterTrainingDataManager if not all_training_data: tweets = TwitterTrainingDataManager().new_training_data() else: tweets = TwitterTrainingDataManager().all_training_data() for tweet_idx, tweet in enumerate(tweets): # Print Progress if tweet_idx % 100 == 0: self._logger.info("Training_Preprocessing_Markov(Twitter): %f%%" % (tweet_idx / len(tweets) * 100)) doc = self._nlp(MarkovFilters.filter_input(tweet[0].decode())) spacy_preprocessor.preprocess(doc) discord_messages = None if self._discord_connector is not None: self._logger.info("Training_Preprocessing_Markov(Discord)") from storage.discord import DiscordTrainingDataManager if not all_training_data: discord_messages = DiscordTrainingDataManager().new_training_data() else: discord_messages = DiscordTrainingDataManager().all_training_data() for message_idx, message in enumerate(discord_messages): # Print Progress if message_idx % 100 == 0: self._logger.info( "Training_Preprocessing_Markov(Discord): %f%%" % (message_idx / len(discord_messages) * 100)) doc = self._nlp(MarkovFilters.filter_input(message[0].decode())) spacy_preprocessor.preprocess(doc) return spacy_preprocessor
def _preprocess_structure_data(self): structure_preprocessor = StructurePreprocessor() self._logger.info("Training_Preprocessing_Structure(Import)") imported_messages = ImportTrainingDataManager().all_training_data(limit=STRUCTURE_MODEL_TRAINING_MAX_SIZE, order_by='id', order='desc') for message_idx, message in enumerate(imported_messages): # Print Progress if message_idx % 100 == 0: self._logger.info( "Training_Preprocessing_Structure(Import): %f%%" % ( message_idx / min(STRUCTURE_MODEL_TRAINING_MAX_SIZE, len(imported_messages)) * 100)) doc = self._nlp(MarkovFilters.filter_input(message[0].decode())) if not structure_preprocessor.preprocess(doc): return structure_preprocessor tweets = None if self._twitter_connector is not None: self._logger.info("Training_Preprocessing_Structure(Twitter)") from storage.twitter import TwitterTrainingDataManager tweets = TwitterTrainingDataManager().all_training_data(limit=STRUCTURE_MODEL_TRAINING_MAX_SIZE, order_by='timestamp', order='desc') for tweet_idx, tweet in enumerate(tweets): # Print Progress if tweet_idx % 100 == 0: self._logger.info( "Training_Preprocessing_Structure(Twitter): %f%%" % ( tweet_idx / min(STRUCTURE_MODEL_TRAINING_MAX_SIZE, len(tweets)) * 100)) doc = self._nlp(MarkovFilters.filter_input(tweet[0].decode())) if not structure_preprocessor.preprocess(doc): return structure_preprocessor discord_messages = None if self._discord_connector is not None: self._logger.info("Training_Preprocessing_Structure(Discord)") from storage.discord import DiscordTrainingDataManager discord_messages = DiscordTrainingDataManager().all_training_data(limit=STRUCTURE_MODEL_TRAINING_MAX_SIZE, order_by='timestamp', order='desc') for message_idx, message in enumerate(discord_messages): # Print Progress if message_idx % 100 == 0: self._logger.info( "Training_Preprocessing_Structure(Discord): %f%%" % ( message_idx / min(STRUCTURE_MODEL_TRAINING_MAX_SIZE, len(discord_messages)) * 100)) doc = self._nlp(MarkovFilters.filter_input(message[0].decode())) if not structure_preprocessor.preprocess(doc): return structure_preprocessor return structure_preprocessor
def _main(self): self._set_status(AEStatus.RUNNING) while True: if self._connectors_event.wait(timeout=1): self._connectors_event.clear() for connector in self._connectors: while not connector.empty(): message = connector.recv() if message is not None: doc = self._nlp( MarkovFilters.filter_input(message.text)) if message.learn: MarkovTrainer(self._markov_model).learn(doc) connector.send(None) if message.reply: reply = connector.generate(message, doc=doc) connector.send(reply) else: connector.send(None) if self._status == AEStatus.SHUTTING_DOWN: self.shutdown() self._set_status(AEStatus.SHUTDOWN) sys.exit(0)
def _main(self): self._set_status(AEStatus.RUNNING) while True: if self._connectors_event.wait(timeout=1): self._connectors_event.clear() for connector in self._connectors: while not connector.empty(): message = connector.recv() if message is not None: doc = self._nlp(MarkovFilters.filter_input(message.text)) if message.learn: MarkovTrainer(self._markov_model).learn(doc) connector.send(None) if message.reply: reply = connector.generate(message, doc=doc) connector.send(reply) else: connector.send(None) if self._status == AEStatus.SHUTTING_DOWN: self.shutdown() self._set_status(AEStatus.SHUTDOWN) sys.exit(0)
def generate(self, message: str, doc: Doc = None, ignore_topics: List[str] = []) -> Optional[str]: if doc is None: filtered_message = MarkovFilters.filter_input(message) doc = self._nlp(filtered_message) subjects = [] for token in doc: if(token.text in ignore_topics): continue markov_word = self._markov_model.select(token.text) if markov_word is not None: subjects.append(markov_word) if len(subjects) == 0: return "I wasn't trained on that!" def structure_generator(): sentence_stats_manager = InputTextStatManager() while True: choices, p_values = sentence_stats_manager.probabilities() if len(choices) > 0: num_sentences = np.random.choice(choices, p=p_values) else: num_sentences = np.random.randint(1, 5) yield self._structure_scheduler.predict(num_sentences=num_sentences) generator = MarkovGenerator(structure_generator=structure_generator(), subjects=subjects) reply_words = [] sentences = generator.generate(db=self._markov_model) if sentences is None: return "Huh?" for sentence in sentences: for word_idx, word in enumerate(sentence): if not word.compound: text = CapitalizationMode.transform(word.mode, word.text) else: text = word.text reply_words.append(text) reply = " ".join(reply_words) filtered_reply = MarkovFilters.smooth_output(reply) return filtered_reply
def main(): np.random.seed(int(time.time())) markov_db = MarkovTrieDb(MARKOV_DB_PATH) structure_model = StructureModelScheduler(use_gpu=USE_GPU) structure_model.start() structure_model.load(STRUCTURE_MODEL_PATH) subjects = [] for word in ['Some', 'Words', 'Here']: select_word = markov_db.select(word) if select_word is not None: subjects.append(select_word) else: print("Couldn't select %s" % word) for i in range(0, 1000): def structure_generator(): while True: yield structure_model.predict(num_sentences=1) markov_generator = MarkovGenerator(structure_generator(), subjects) words = [] sentences = markov_generator.generate(markov_db) if sentences is None: continue for sentence_idx, sentence in enumerate(sentences): pos_list = [word.pos for word in sentence] for word_idx, word in enumerate(sentence): if not word.compound: text = CapitalizationMode.transform( word.mode, sentences[sentence_idx][word_idx].text, ) else: text = word.text words.append(text) message = " ".join(words) message = MarkovFilters.smooth_output(message) print(message)
def main(): np.random.seed(int(time.time())) markov_db = MarkovTrieDb(MARKOV_DB_PATH) structure_model = StructureModelScheduler(use_gpu=USE_GPU) structure_model.start() structure_model.load(STRUCTURE_MODEL_PATH) subjects = [] for word in ['Some', 'Words', 'Here']: select_word = markov_db.select(word) if select_word is not None: subjects.append(select_word) else: print("Couldn't select %s" % word) for i in range(0, 1000): def structure_generator(): while True: yield structure_model.predict(num_sentences=1) markov_generator = MarkovGenerator(structure_generator(), subjects) words = [] sentences = markov_generator.generate(markov_db) if sentences is None: continue for sentence_idx, sentence in enumerate(sentences): pos_list = [word.pos for word in sentence] for word_idx, word in enumerate(sentence): if not word.compound: text = CapitalizationMode.transform(word.mode, sentences[sentence_idx][word_idx].text, ) else: text = word.text words.append(text) message = " ".join(words) message = MarkovFilters.smooth_output(message) print(message)
def _preprocess_markov_data(self, all_training_data: bool = False): spacy_preprocessor = SpacyPreprocessor() self._logger.info("Training_Preprocessing_Markov(Import)") if not all_training_data: imported_messages = ImportTrainingDataManager().new_training_data() else: imported_messages = ImportTrainingDataManager().all_training_data() for message_idx, message in enumerate(imported_messages): # Print Progress if message_idx % 100 == 0: self._logger.info( "Training_Preprocessing_Markov(Import): %f%%" % (message_idx / len(imported_messages) * 100)) doc = self._nlp(MarkovFilters.filter_input(message[0].decode())) spacy_preprocessor.preprocess(doc) tweets = None if self._twitter_connector is not None: self._logger.info("Training_Preprocessing_Markov(Twitter)") from storage.twitter import TwitterTrainingDataManager if not all_training_data: tweets = TwitterTrainingDataManager().new_training_data() else: tweets = TwitterTrainingDataManager().all_training_data() for tweet_idx, tweet in enumerate(tweets): # Print Progress if tweet_idx % 100 == 0: self._logger.info( "Training_Preprocessing_Markov(Twitter): %f%%" % (tweet_idx / len(tweets) * 100)) doc = self._nlp(MarkovFilters.filter_input(tweet[0].decode())) spacy_preprocessor.preprocess(doc) discord_messages = None if self._discord_connector is not None: self._logger.info("Training_Preprocessing_Markov(Discord)") from storage.discord import DiscordTrainingDataManager if not all_training_data: discord_messages = DiscordTrainingDataManager( ).new_training_data() else: discord_messages = DiscordTrainingDataManager( ).all_training_data() for message_idx, message in enumerate(discord_messages): # Print Progress if message_idx % 100 == 0: self._logger.info( "Training_Preprocessing_Markov(Discord): %f%%" % (message_idx / len(discord_messages) * 100)) doc = self._nlp(MarkovFilters.filter_input( message[0].decode())) spacy_preprocessor.preprocess(doc) return spacy_preprocessor
def _preprocess_structure_data(self): structure_preprocessor = StructurePreprocessor() self._logger.info("Training_Preprocessing_Structure(Import)") imported_messages = ImportTrainingDataManager().all_training_data( limit=STRUCTURE_MODEL_TRAINING_MAX_SIZE, order_by='id', order='desc') for message_idx, message in enumerate(imported_messages): # Print Progress if message_idx % 100 == 0: self._logger.info( "Training_Preprocessing_Structure(Import): %f%%" % (message_idx / min(STRUCTURE_MODEL_TRAINING_MAX_SIZE, len(imported_messages)) * 100)) doc = self._nlp(MarkovFilters.filter_input(message[0].decode())) if not structure_preprocessor.preprocess(doc): return structure_preprocessor tweets = None if self._twitter_connector is not None: self._logger.info("Training_Preprocessing_Structure(Twitter)") from storage.twitter import TwitterTrainingDataManager tweets = TwitterTrainingDataManager().all_training_data( limit=STRUCTURE_MODEL_TRAINING_MAX_SIZE, order_by='timestamp', order='desc') for tweet_idx, tweet in enumerate(tweets): # Print Progress if tweet_idx % 100 == 0: self._logger.info( "Training_Preprocessing_Structure(Twitter): %f%%" % (tweet_idx / min(STRUCTURE_MODEL_TRAINING_MAX_SIZE, len(tweets)) * 100)) doc = self._nlp(MarkovFilters.filter_input(tweet[0].decode())) if not structure_preprocessor.preprocess(doc): return structure_preprocessor discord_messages = None if self._discord_connector is not None: self._logger.info("Training_Preprocessing_Structure(Discord)") from storage.discord import DiscordTrainingDataManager discord_messages = DiscordTrainingDataManager().all_training_data( limit=STRUCTURE_MODEL_TRAINING_MAX_SIZE, order_by='timestamp', order='desc') for message_idx, message in enumerate(discord_messages): # Print Progress if message_idx % 100 == 0: self._logger.info( "Training_Preprocessing_Structure(Discord): %f%%" % (message_idx / min(STRUCTURE_MODEL_TRAINING_MAX_SIZE, len(discord_messages)) * 100)) doc = self._nlp(MarkovFilters.filter_input( message[0].decode())) if not structure_preprocessor.preprocess(doc): return structure_preprocessor return structure_preprocessor