def main(): """ Executes a MarkovChain for text generation. Then it will be wait for user input. If length of user input less than `window` parameter of the chain, then random text will be generated, else last 3 words of the input will be taken as a start of generated text. If you want break the process, then enter `48598ee283437e810f2f0eb1cf66e217`. """ chain = MarkovChain() # path relative to command line that executes that script. chain.chain = extensions.file.json.read("./src/markov-chain/generated-chains/ru/my-favorites-3-window.json") while True: start_text = input() if (start_text == "48598ee283437e810f2f0eb1cf66e217"): break # 3 - how many windows in the chain. start_text = handle_input_text(start_text, 3) if (start_text): print(chain.generate(start=start_text)) else: print(chain.generate())
def main(argv=None): args = parse_args() markov = MarkovChain([], args.order) samples, postprocessor = prepare_samples_and_postprocessor(args) markov.add_samples(samples) for i in range(args.count): sequence = markov.generate(args.length) print((postprocessor(sequence)))
class ModMarkovChain(Mod): def __init__( self, markov_chain_db: str, head_db: str, pmi_db: str, logger=None ): Mod.__init__(self, logger) self.markov_chain_db = markov_chain_db self.head_db = head_db self.gen = MarkovChain(self.markov_chain_db) self.hs = HeadSelector(self.head_db, pmi_db) def gen_from_sentence(self, sent, num=5): heads = self.hs.select(sent, num=num) print(heads) replies = [] for head, score in heads: query = (params.START_SYMBOL, head, ) query_cands = [] # search min_len = float("inf") min_sent = "" for i in range(10): sent = self.gen.generate(query) if len(sent) < min_len: min_sent = sent min_len = len(min_sent) query_cands.append(sent) # log for _cands in query_cands: self.logger.info("".join(_cands[1:])) if min_sent: replies.append(min_sent) return ["".join(_[1:]) for _ in replies] def can_utter(self, message, master): return True def utter(self, message, master): return [ (random.uniform(0.7, 1.0), text, "markov_chain", dict()) for text in self.gen_from_sentence( message["text"], num=3 ) ]
class ModMarkovChain(Mod): def __init__(self, markov_chain_db: str, head_db: str, pmi_db: str, logger=None): Mod.__init__(self, logger) self.markov_chain_db = markov_chain_db self.head_db = head_db self.gen = MarkovChain(self.markov_chain_db) self.hs = HeadSelector(self.head_db, pmi_db) def gen_from_sentence(self, sent, num=5): heads = self.hs.select(sent, num=num) print(heads) replies = [] for head, score in heads: query = ( params.START_SYMBOL, head, ) query_cands = [] # search min_len = float("inf") min_sent = "" for i in range(10): sent = self.gen.generate(query) if len(sent) < min_len: min_sent = sent min_len = len(min_sent) query_cands.append(sent) # log for _cands in query_cands: self.logger.info("".join(_cands[1:])) if min_sent: replies.append(min_sent) return ["".join(_[1:]) for _ in replies] def can_utter(self, message, master): return True def utter(self, message, master): return [(random.uniform(0.7, 1.0), text, "markov_chain", dict()) for text in self.gen_from_sentence(message["text"], num=3)]
class Generator: questions = [] def __init__(self, goal): self.engine = GrammarEngine('./dialogueSystem/grammar/generator.txt') self.p_engine = GrammarEngine( './dialogueSystem/grammar/polarity_response.txt') f_dracula = open('./dialogueSystem/dracula.txt', encoding="utf8") whole_dracula = f_dracula.read() train_dracula = whole_dracula[:int(len(whole_dracula) * 0.8)] train_dracula_file = open('./dialogueSystem/train_dracula.txt', "w+", encoding="utf8") train_dracula_file.write(train_dracula) self.identity_chain = MarkovChain('./dialogueSystem/train_dracula.txt', "word", 3) if goal == "Friend": f_questions = open('./dialogueSystem/questionsFriendGoal.txt', encoding="utf8") else: # User f_questions = open('./dialogueSystem/questionsUserGoal.txt', encoding="utf8") self.questions = f_questions.read().splitlines() # print(self.questions) self.asked_questions = [] self.prev_count = -1 self.model = DialogTag('distilbert-base-uncased') # dialogue tags # Resolve obligation: TODO(Starr) <- generate according to question type def resolveObligation(self, gameState, userResponse): # if gameState.questionActType == "a type of question", do something # engine = GrammarEngine('./dialogueSystem/grammar/response_question.txt') # return engine.generate('response') # return yes/ no type of response # return gameState.questionActType return self.engine.generate('obligation') # Strong sentiment def addressPositiveSentiment(self, gameState, userResponse): return self.p_engine.generate('positive') def addressNegativeSentiment(self, gameState, userResponse): return self.p_engine.generate('negative') # Subjectivity: Rie def addressSubj(self, gameState, userResponse): response = TextBlob(userResponse) subjectivity = response.sentiment.subjectivity if subjectivity < 0.4: return "That sounds like a fact. I believe you for that." elif subjectivity >= 0.4 and subjectivity < 0.8: return "Actually, are you sure about that? That sounds a bit subjective." else: return "Oh, but that's just your opinion, right? I don't know if I should trust that as a fact, my friend." return "ERROR" # Keyphrase: Allison ''' The keyphrases that trigger this: - askedWhoTheCallerIs: Markov Chain (Dracula) - greetedCaller - saidGoodbye ''' def keyphraseTrigger(self, gameState, userResponse, keyphrase): if keyphrase == "askedWhoTheCallerIs": # MarkovChain response = "Well, I'm glad you asked. I'm a travelling storyteller, and I would love to hear your story. But wait! I'll tell you my story first. I hope you like it.\n\n" return response + self.identity_chain.generate(55) elif keyphrase == "greetedCaller": return "Hello, my friend. Good to hear your voice. I hope I'm not interrupting anything. I just wanted to ask you a few questions if that's okay." elif keyphrase == "otherLanguage": return self.engine.generate('confused') else: # saidGoodbye return "I guess this is the end of our call. Maybe we'll talk again some other day...maybe we won't. Goodbye now." return "ERROR" # Profanity: Rie def addressProf(self, gameState, userResponse): # if profanity.contains_profanity(userResponse): dirtyWords = userResponse.split(" ") cleanWords = profanity.censor(userResponse).split(" ") for i in range(len(dirtyWords)): dirtyWord = "" cleanWord = "" if dirtyWords[i] != cleanWords[i]: if dirtyWords[i][-1] in [',', '.', '!', '?']: dirtyWord = dirtyWords[i][:-1] dirtyWord = '"' + dirtyWord + '"' else: dirtyWord = '"' + dirtyWords[i] + '"' if cleanWords[i][-1] in [',', '.', '!', '?']: cleanWord = cleanWords[i][:-1] else: cleanWord = cleanWords[i] self.engine.set_variable("dirty", dirtyWord) self.engine.set_variable("clean", cleanWord) return self.engine.generate('profanity') return "Something has gone wrong" # Nothing detected: Rie def fallback(self, gameState, userResponse): return self.engine.generate('fallback') # print("What do you mean? Give me more detail, or I will come to you tonight.") # Default/ user info getting questions: Allison # should be determined by which goal we have for this game gameState.getGoal() def askQuestion(self, gameState, userResponse): # print("INFO COUNT: ", gameState.informationCount) # response = self.questions[gameState.informationCount] if self.prev_count == gameState.informationCount: response = self.asked_questions[-1] elif gameState.informationCount == 0: response = self.questions[0] self.asked_questions.append(response) self.prev_count = gameState.informationCount else: response = random.choice(self.questions) while response in self.asked_questions: response = random.choice(self.questions) self.asked_questions.append(response) self.prev_count = gameState.informationCount if '%' in response: words = response.split() temp_words = words.copy() for i in range(0, len(words)): some_word = words[i] first_mem = "" last_mem = "" # print("SOME WORD: ", some_word) if some_word[-1] == "s" and some_word[-2] == "'": last_mem += some_word[-2:] some_word = some_word[:-2] # print("NO 's WORD: ", some_word) if some_word[-1] in '!#$&()*+, -./:;<=>?@[\]^_`{|}~': last_mem += some_word[-1] some_word = some_word[:-1] if some_word[0] in '!#$&()*+, -./:;<=>?@[\]^_`{|}~': first_mem += some_word[0] some_word = some_word[1:] # print("PROCESSED WORD: ", some_word) if some_word[0] == '%' and some_word[-1] == '%': key = some_word[1:-1] if key not in gameState.information: # self.askQuestion(gameState, userResponse) terms = key.split('_') if terms[0] == "grow": replace = "your town" else: replace = "a " + terms[0] else: replace = gameState.information[some_word[1:-1]] temp_words[i] = first_mem + replace + last_mem # print(temp_words) response = " ".join(temp_words) if response[-1] != '?': response += "?" if gameState.informationCount > 0 and self.prev_count != gameState.informationCount: transitions = [ "I see. ", "Interesting...", "Oh really? ", "Ah, ok. ", "Hm...", "Yeah? ", "Alright. ", "Aha. " ] response = random.choice(transitions) + response return response # Allison # If the user gave information, let's respond to it! # Use the gameState to see what info we asked for, and then say something about it def addressAvoidance(self, gameState, userResponse): options = [ "I'm not sure what you mean.", "You're avoiding my questions.", "I won't be able to write a good story if you don't answer my questions.", "I don't think I get it.", "Could you say that again?" ] return random.choice(options) def elizaTransformation(self, gameState, userResponse): transformed_response = eliza_transform.transform(userResponse) return transformed_response def addressQuestion(self, gameState, userResponse): options = [ "Let's focus on your experiences for now.", "Let's not talk about me.", "Are you sure you should be asking me questions?", "We don't have time to both ask questions!", "You can ask me questions later" ] return random.choice(options)
class MasterpieceWriter(object): def __init__(self, sentence_tokenizer, word_tokenizer): self.sentence_tokenizer = sentence_tokenizer self.word_tokenizer = word_tokenizer self.markov_chain = MarkovChain() self.word_contexts = defaultdict(list) self.word_counts = Counter() self.word_pair_counts = Counter() def _paragraphs_from_file(self, file_name): with open(file_name) as f: for line in f: line = line.strip() if line != "": yield line def _get_words_and_contexts(self, input_files): for file_name in input_files: for paragr in self._paragraphs_from_file(file_name): sentences = self.sentence_tokenizer.tokenize(paragr) if len(sentences) == 0: continue yield PARA_BEGIN, None for sentence in sentences: words, contexts = self.word_tokenizer.tokenize(sentence) if len(words) == 0: continue yield SENT_BEGIN, None for word in words: yield (word, None) yield SENT_END, None if contexts is not None: yield None, contexts yield PARA_END, None def train(self, training_files): prev_prev_word, prev_word = None, None for word, contexts in self._get_words_and_contexts(training_files): if contexts is not None: for ctx_key in contexts: self.word_contexts[ctx_key].extend(contexts[ctx_key]) if word is not None: # Train markov chain (need at least 3 tokens) if prev_prev_word is not None: self.markov_chain.add((prev_prev_word, prev_word), (prev_word, word)) # Collect stats if word not in ALL_SPECIAL: self.word_counts[word] += 1 if prev_word not in ALL_SPECIAL: self.word_pair_counts[(prev_word, word)] += 1 # Update prev_prev_word and prev_word prev_prev_word, prev_word = prev_word, word def stats(self, top=10): return dict(most_common_words=self.word_counts.most_common(top), most_common_word_pairs=self.word_pair_counts.most_common(top)) def generate_masterpiece(self, prng=None): yield PARA_BEGIN yield SENT_BEGIN for next in self.markov_chain.generate((PARA_BEGIN, SENT_BEGIN), prng): w1, w2 = next yield w2