async def be(self, user: discord.Member, other: discord.Member = None): """generate a markov chain based on the logs of `user`.""" log_path = log_dir + user.id + '.txt' if os.path.exists(log_path): with codecs.open(log_path, "r", encoding='utf-8', errors='ignore') as f: text = filter(None, (line.rstrip() for line in f)) text_model = markovify.NewlineText(text) name = user.display_name colour = user.colour if other: # fusion impersonations with open(log_dir + other.id + '.txt') as s: other_text = s.read() other_model = markovify.NewlineText(other_text) text_model = markovify.combine( [text_model, other_model], [1, 1]) name += " + " + other.display_name colour = discord.Colour( (user.colour.value + other.colour.value) / 2) sentence = text_model.make_sentence(tries=100) embed = discord.Embed(title='', colour=colour, description=sentence) embed.set_author(name=name, icon_url=user.avatar_url) await self.bot.say(embed=embed) else: await self.bot.say( "i don't have any messages logged from that user yet.")
def generate_chat(self, participant1, participant2, chat_length): try: with open(self.text_corpus_dir + "/" + participant1 +".txt", "r") as f: text = f.read() text_model_p1 = markovify.NewlineText(text) except Exception as e: raise e try: with open("chat_corpus/" + participant2 +".txt", "r") as f: text = f.read() text_model_p2 = markovify.NewlineText(text) except Exception as e: raise e p1_name = ' '.join([x.capitalize() for x in participant1.split('_')]) p2_name = ' '.join([x.capitalize() for x in participant2.split('_')]) max_tries = 10 for i in range(int(chat_length)): current_tries = 0 p1_conv = text_model_p1.make_sentence(); while p1_conv == None and current_tries < max_tries: p1_conv = text_model_p1.make_sentence(); current_tries += 1 current_tries = 0 p2_conv = text_model_p2.make_sentence(); while p2_conv == None and current_tries < max_tries: p2_conv = text_model_p2.make_sentence(); current_tries += 1 print(p1_name + ': ' + p1_conv) print(p2_name + ': ' + p2_conv)
async def on_message(message): if message.author == client.user: return ## await message.channel.send("I heard you! {0.name}".format(message.channel)) if message.content == '$gather': client.WORKING_HISTORIES[message.guild] = await message.channel.history(limit=client.HISTORY_LIMIT).flatten() ## await message.channel.send("Messages gathered successfully!") fulltext = '' for m in client.WORKING_HISTORIES[message.guild]: if re.match(client.RE_STRING, m.content) and not m.author == client.user: fulltext += m.content + '\n' client.TEXT_MODELS[message.guild] = markovify.NewlineText(fulltext, state_size=2) print("Done!") ## await message.channel.send("Markov model generated!") if re.match('^\$gather user=.+#[1-9]{4}$', message.content): target_username = re.match('^\$gather user=(.+)#[1-9]{4}$', message.content)[1] print("Conducting message scrape on user " + target_username + "...") client.WORKING_HISTORIES[message.guild] = await message.channel.history(limit=client.HISTORY_LIMIT).flatten() fulltext = '' tick = 0 for m in client.WORKING_HISTORIES[message.guild]: if re.match(client.RE_STRING, m.content) and m.author.name == target_username: fulltext += m.content + '\n' tick += 1 print(tick) client.TEXT_MODELS[message.guild] = markovify.NewlineText(fulltext, state_size=2) print("Done!") if re.match('^\$imitate.*', message.content): prompt = None imitation = client.TEXT_MODELS[message.guild].make_sentence(tries=500) print(imitation) if imitation: await message.channel.send(imitation)
def sync_speak(self, ctx, record, userId, repeats=None): text = '\n'.join([x[0] for x in record if len(x[0]) > 100]) try: text_model = markovify.NewlineText(text, state_size=2) except Exception as e: return e speech = "**{}:**\n".format(ctx.guild.get_member(userId).nick) if repeats is None: try: text_model = markovify.NewlineText(text, state_size=2) except Exception as e: return e else: repeats = min(repeats, 20) if repeats is None: try: variablename = text_model.make_short_sentence(randrange( 60, 130), tries=50) speech += "{}\n\n".format(variablename) except Exception as e: return e else: for _ in range(repeats): try: variablename = text_model.make_short_sentence(randrange( 60, 130), tries=50) speech += "{}\n\n".format(variablename) except: continue return speech
def build_model(db_passwd, model_weights=[1, 1, 1]): """Rebuild the markov model using quotes, inspire, and rants databases as seeds.""" with db.cursor(password=db_passwd) as c: # Fetch quote data c.execute('SELECT quote FROM quotes WHERE is_deleted = 0') quotes = c.fetchall() # Fetch inspire data c.execute('SELECT text FROM inspire') inspirations = c.fetchall() # Fetch iconic FOSS rants c.execute('SELECT text FROM markov_rants') rants = c.fetchall() # Normalize the quote data... Get rid of IRC junk clean_quotes = [normalize_quote(d['quote']) for d in quotes] # Normalize the inspire data... Just lightly prune authors clean_inspirations = [ normalize_inspiration(d['text']) for d in inspirations ] # Normalize the rant data... just remove ending punctuation clean_rants = [normalize_rant(d['text']) for d in rants] # Create the three models, and combine them. # More heavily weight our quotes and rants rants_model = markovify.NewlineText('\n'.join(clean_rants)) quotes_model = markovify.NewlineText('\n'.join(clean_quotes)) inspire_model = markovify.NewlineText('\n'.join(clean_inspirations)) return markovify.combine([quotes_model, rants_model, inspire_model], model_weights)
def create_model(): # get the list of (canonical) sentences spoken by vox sentencesFile = open("sentences.txt", "r") sentences = sentencesFile.read() # train main model mainModel = markovify.NewlineText(sentences, 2) # get list of all potential words (some of these don't appear in the game at all # but I wanna use them cuz they're neat) wordsFile = open("words.txt", "r") words = wordsFile.read() wordsList = words.split("\n") # create a series of 'sentences' consisting of random word combinations # the goal here is to create (very weak) links between some words # in order to introduce an element of chaos to the model combos = "" for i in range(500): nextSentence = "" for j in range(6): nextSentence = nextSentence + wordsList[random.randrange( len(wordsList))] + " " combos = combos + nextSentence + "\n" print("Size of primary model: " + str(len(sentences))) print("Size of secondary model: " + str(len(combos))) # train secondary model secondaryModel = markovify.NewlineText(combos) # merge models, make sure to give appropriate weighting completeModel = markovify.combine([mainModel, secondaryModel], [1.0, 0.2]) return completeModel
def sync_speak(self, ctx, firstRecord, secondRecord, userId, UserTwoId, repeats): firstText = '\n'.join([x[0] for x in firstRecord if len(x[0]) > 100]) secondText = '\n'.join([x[0] for x in secondRecord if len(x[0]) > 100]) try: text_model_one = markovify.NewlineText(firstText, state_size=2) text_model_two = markovify.NewlineText(secondText, state_size=2) combined_models = markovify.combine( [text_model_one, text_model_two]) except Exception as e: print(e) return -1 speech = "**{} & {}:**\n".format( ctx.guild.get_member(userId).nick, ctx.guild.get_member(UserTwoId).nick) repeats = min(repeats, 20) for _ in range(repeats): try: variablename = combined_models.make_short_sentence(randrange( 60, 130), tries=50) speech += "{}\n\n".format(variablename) except: continue return speech
def make_model(): with open('first.txt', 'r', encoding='utf-8') as f: text = f.read() model_1_3 = markovify.NewlineText(text) with open('second.txt', 'r', encoding='utf-8') as f: text = f.read() model_2_4 = markovify.NewlineText(text) return model_1_3, model_2_4
def test_custom_regex(self): with self.assertRaises(Exception) as context: model = markovify.NewlineText('This sentence contains a custom bad character: #.', reject_reg=r'#') with self.assertRaises(Exception) as context: model = markovify.NewlineText('This sentence (would normall fail') model = markovify.NewlineText('This sentence (would normall fail', well_formed = False)
def __init__(self): with open("data/insults.txt", "r", encoding="utf-8") as f: text = f.read() self.insult_model = markovify.NewlineText(text) self.insult_model.compile() with open("data/pickuplines.txt", "r", encoding="utf-8") as f: text = f.read() self.pickup_model = markovify.NewlineText(text) self.pickup_model.compile()
def build_model(messages, state_size=2): # Fit simple Markov Model - add period between logging.info("[build_model]") if len(messages) < 50: model = markovify.NewlineText("\n".join(messages), state_size=1) else: model = markovify.NewlineText("\n".join(messages), state_size=state_size) return model
def test_custom_regex(self): with self.assertRaises(Exception): markovify.NewlineText( "This sentence contains a custom bad character: #.", reject_reg=r"#") with self.assertRaises(Exception): markovify.NewlineText("This sentence (would normall fail") markovify.NewlineText("This sentence (would normall fail", well_formed=False)
def generation(id_question): #id_answer = id_question.replace("q", "a") id_ = id_question.replace(id_question[8:], "") dossier = glob.glob(sys.argv[2] + "/*/*/" + id_ + "/*.conll") with open("corpus_wiki.txt", "r") as f: text_a = f.read() model_a = markovify.NewlineText(text_a) mot_pos = "" for file in dossier: if "a" in file: with open(file, "r") as answer: text_b = answer.read() text_b = text_b.split("\n") for line in text_b: line = line.split("\t") if len(line) == 3: mot_pos = mot_pos + line[0] + " " mot_pos = mot_pos + "." mot_pos = mot_pos.replace(" .", ".") mot_pos = mot_pos.replace("' ", "'") with open("answer.txt", "w") as answer_text: answer_text.write(mot_pos) #answer_text.close() with open("answer.txt", "r") as f2: text_b = f2.read() text_b = text_b.replace("\"", "") #return text_b model_b = markovify.NewlineText(text_b) model_combo = markovify.combine([model_a, model_b], [1, 3]) answer1 = model_b.make_sentence(tries=140) answer2 = model_combo.make_sentence() if answer1 is not None: return answer1 elif answer2 is not None: return answer2 else: return "Désolée, je n'ai pas de réponse"
def baciniFratturati(): with open(BACI) as f: baci = f.read() baciModel = markovify.NewlineText(baci) with open(METAL) as f: metal = f.read() metalModel = markovify.NewlineText(metal) bmModel = markovify.combine([baciModel, metalModel], [1, 1.3]) for i in range(QUANTI_BACI): amorMetallo = bmModel.make_short_sentence( max_chars=140, max_overlap_ratio=.35, tries=200) or 'il mio cuore è ruggine sanguinante!' print('%s\n' % amorMetallo)
def make_lyric(history=set()): all_lyrics = '' for track in get_top_tracks(): all_lyrics += track.lyrics + '\n' all_lyrics = no_duplicate_lines(all_lyrics) models = [ markovify.NewlineText(all_lyrics, state_size=2), markovify.NewlineText(all_lyrics, state_size=3) ] tweet = None while not tweet or tweet in history: tweet = random.choice(models).make_short_sentence(280) return tweet
def Google(self, history): headlines = markovify.NewlineText(history.title, state_size=2) body_copy = markovify.NewlineText(history.body, state_size=2) if headlines and body_copy: ads = list() i = 0 while i < 20: ad_headline = headlines.make_sentence() ad_body = body_copy.make_sentence() if ad_headline and ad_body: ads.append({'headline': ad_headline, 'body': ad_body}) i += 1 return ads if ads else None
def get_model(state_size, dataset): units = 'data/{0}/units.txt'.format(dataset) abstract_units = 'data/{0}/abstract_units.txt'.format(dataset) # with codecs.open(units, 'r', 'utf-8') as f: text = f.read() model1 = markovify.NewlineText(text, state_size=state_size) # with codecs.open(abstract_units, 'r', 'utf-8') as f: text = f.read() model2 = markovify.NewlineText(text, state_size=state_size) # model = markovify.combine([model1, model2], [1.5, 1]) return model
def shitPost(): try: con = pymysql.connect(host=hostname, user=user, password=password, port=port, database=database) cursor = con.cursor() cursor.execute('SELECT `Text` FROM TwitterBot.4chanData') rows = cursor.fetchall() shitPosting = [] for row in rows: newRow = str(row).replace('(\'', '') updatedRow = str(newRow).replace('\',)', '') shitPosting.append(updatedRow) text_model = markovify.NewlineText(shitPosting) shitPostSupreme = text_model.make_short_sentence(280) newTweet = str(shitPostSupreme).replace(''', '\'') newestTweet = str(newTweet).replace('>', '') tweetBot.update_status(newestTweet) cursor = con.cursor() query = 'INSERT into VarianJoestar (Text) VALUES ' + '(\'' + shitPostSupreme + '\')' cursor.execute(query) con.commit() cursor.close() print(shitPostSupreme) except Exception as exception: print(exception)
def makeMarkov(twitter_handle): print(text) text_model = markovify.NewlineText(text) for i in range(5): print(text_model.make_sentence()) for i in range(3): print(text_model.make_short_sentence(140))
def main(): f1 = open('lp.txt', 'r') f2 = open('lp_tokenize.txt', 'w') for line in f1: tokens = word_tokenize(line) f2.write(' '.join(tokens)) #Read the file with open('lp.txt', 'r') as f: text21 = f.read().lower().split(' ') with open('lp_tokenize.txt', 'r') as f: text = f.read().lower().split(' ') model = markovify.NewlineText(text, state_size=1) #Count frequency wordfreq = {} for i in range(0, len(text) - 1): word1 = text[i] word2 = text[i + 1] if word1 not in wordfreq: wordfreq[word1] = {} if word2 not in wordfreq[word1]: wordfreq[word1][word2] = 1 else: wordfreq[word1][word2] += 1 song = "" v1 = make_verse(model, text) c = make_chorus(model, text) v2 = make_verse(model, text) v3 = make_verse(model, text) song = v1 + "\n\n" + c + v2 + "\n\n" + c + "\n\n" + v3 + "\n\n" + c print(song)
async def seedmarkov(self, ctx, *, seed: str): """ Generate a usermarkov based on a starting word or phrase. """ MemeCommand.check_rate_limit(ctx, 60, cooldown_group="usermarkov") # TODO: implement separate thing and clean this up try: with open(r'message_cache/users/{0.id}.txt'.format( ctx.message.author), 'r', encoding='utf-8') as g: textfile = g.read() except FileNotFoundError: await ctx.send("I don't have any logs from you.") return async with ctx.typing(): markov = markovify.NewlineText(textfile, state_size=2) try: output = markov.make_sentence_with_start(seed, tries=500) if output is None: await ctx.send( "Could not create markov with seed `{}`.".format(seed)) return elif utils.check_input(output): output = utils.extract_mentions(output, ctx.message) except (markovify.text.ParamError, KeyError): await ctx.send("Could not make a markov with given seed.") return await ctx.send(output)
async def mimic2(self, ctx): megastring = "" count = 0 async for message in ctx.history(limit=5000, oldest_first=False): if str(message.author) == str( ctx.message.author ) and not message.content == "" and not message.author.bot: if (not "$" in message.content): megastring += "\r\n" + message.content count += 1 if len(megastring) < 10: await ctx.send("No can do buckaroo Count: " + str(count) + megastring) try: chain = markovify.NewlineText(megastring) sentence = chain.make_short_sentence(max_chars=100, max_overlap_ratio=.95) # await ctx.send("Count: " + str(count)) if sentence is None: raise Exception('F**k') else: await ctx.send(sentence) except: # await ctx.send(megastring) await ctx.send("No can do buckaroo")
def make_chain(input_file): with open(input_file) as f: text = f.read() text_model = markovify.NewlineText(text) return text_model
async def mimic(self, ctx, user: discord.Member = None): mimicMessage = await ctx.send("Working on it...") megastring = "" count = 0 if user is None: mimicUser = str(ctx.message.author) else: mimicUser = str(user) async for message in ctx.history(limit=5000, oldest_first=False): if str( message.author ) == mimicUser and not message.content == "" and not message.author.bot: if not "$" in message.content: megastring += "\r\n" + message.content count += 1 if len(megastring) < 10: await ctx.send("No can do buckaroo Count: " + str(count) + megastring) try: chain = markovify.NewlineText(megastring) sentence = chain.make_short_sentence(max_chars=100, max_overlap_ratio=.95) if sentence is None: raise Exception('F**k') else: await mimicMessage.edit(content=sentence) except: await ctx.send("You don't talk enough in here :angry:")
async def markov(self, ctx): """Generates sentences up to 140 characters. The text for titles/comments/text-posts are generated using "markov chains", a random process that's "trained" from looking at real data. If you've ever used a keyboard on your phone that tries to predict which word you'll type next, those are often built using something similar. Basically, you feed in a bunch of sentences, and even though it has no understanding of the meaning of the text, it picks up on patterns like "word A is often followed by word B". Then when you want to generate a new sentence, it "walks" its way through from the start of a sentence to the end of one, picking sequences of words that it knows are valid based on that initial analysis. So generally short sequences of words in the generated sentences will make sense, but often not the whole thing. It's taught by messages that are being sent in the same server. All messages are stored anonymously. A more detailed explanation: http://www.reddit.com/r/Python/comments/2ife6d/pykov_a_tiny_python_module_on_finite_regular/cl3bybj""" async with ctx.channel.typing(): with open('markov/' + str(ctx.message.guild.id) + '.txt', 'r+', encoding="utf8") as f: text = f.read() f.seek(0) f.truncate() f.write(text) text_model = markovify.NewlineText(text) text = text_model.make_short_sentence(140) if text: await ctx.send(text) else: await ctx.send( "I failed to generate a sentence, might need more data to study on." )
def markov_mat_stuff(message, stuff): if not hasattr(markov_mat, "text_model"): with open('/home/simon/bertil/mat.txt') as file_: mat_text = file_.read() markov_mat.text_model = markovify.NewlineText(mat_text, state_size=1) if stuff: response = "" for _ in range(512): response = markov_mat.text_model.make_sentence(tries=64) if not response: continue if stuff.lower() in response.lower(): message.send(response) return message.send("Jag kunde inte hitta på en maträtt med {} :rip:".format(stuff)) else: response = markov_mat.text_model.make_sentence(tries=64) if not response: message.send("Jag lyckades inte generera en mening :rip:") else: message.send(response)
def markov(message, stuff): if not hasattr(markov, "text_model"): with open('/home/simon/bertil/user_messages.json') as file_: user_messages = json.load(file_) messages = '' for user in user_messages: messages += '\n'.join(user_messages[user]) markov.text_model = markovify.NewlineText(messages, state_size=3) if stuff: try: response = markov.text_model.make_sentence_with_start(stuff, False, tries=64) message.send(response) return except Exception: message.send("Jag kommer inte på något att säga med {} :rip:".format(stuff)) else: response = markov.text_model.make_sentence(tries=64) if not response: message.send("Jag lyckades inte generera en mening :rip:") else: message.send(response)
def __init__(self): with open("adjectives.txt") as f: self.adjs = f.read().strip().split("\n") with open("adverbs.txt") as f: self.advs = f.read().strip().split("\n") # make ingredient model ingr = self.load_ingredient_corpus() self.ingr_model = markovify.NewlineText(ingr, state_size=1) # load poetry with open("poetry_corpus.txt") as f: poetry = f.read() self.poem_model = markovify.Text(poetry, state_size=2) # make the instructions with open("instr_corpus.txt") as f: text = f.read() self.raw_text_model = markovify.Text(text) # combined recipe/poetry model self.text_model = markovify.combine([self.poem_model, self.raw_text_model], (1, 1.5))
def build_indexes(index, hord): if not os.path.exists(index): os.mkdir(index) index = create_in( index, Schema( quote=TEXT(stored=True), id=ID(stored=True), submitter=STORED, submitted=STORED, ), ) corpus = [] with index.writer() as writer: LOGGER.info("Building Whoosh index and markov model from hord.") for row in hord.get_rows(): corpus.append(row.quote) if row.submitted: submitted = row.submitted.strftime("%b %d %Y %H:%M:%S") else: submitted = None writer.update_document( quote=row.quote, id=str(row.id), submitter=(row.submitter), submitted=(submitted), ) LOGGER.info(f"Index built. {index.doc_count()} documents indexed.") if len(corpus) > 0: model = markovify.NewlineText("\n".join(corpus)) else: model = None LOGGER.info(f"Markov model built.") return index, model
def load_model(self): with open(self.corpus) as f: corpus = f.read() if self.new_line: self.model = markovify.NewlineText(corpus) else: self.model = markovify.Text(corpus)