def processTextOld(text): if builtins.markov == None: print("Setting to text") builtins.markov = markovify.Text(text) else: print("Combining") builtins.markov = markovify.combine([builtins.markov, markovify.Text(text)], [0.5, 0.5])
def add_message(self, message): new_model = markovify.Text(message) # If we have a chain saved, then make the model from it try: self.model = markovify.combine([self.model, new_model]) except AttributeError: self.model = new_model
def do_response(self, e, prompt): channel = e.source.nick if e.type == "privmsg" else self.channel combo = markovify.combine( [self.text_model, markovify.Text(prompt)], [0.5, 0.5] ) sentence = combo.make_sentence() if sentence: self.connection.privmsg(channel, sentence)
def test_from_mult_files_without_retaining(self): models = [] for (dirpath, _, filenames) in os.walk(os.path.join(os.path.dirname(__file__), "texts")): for filename in filenames: with open(os.path.join(dirpath, filename)) as f: models.append(markovify.Text(f, retain_original=False)) combined_model = markovify.combine(models) sent = combined_model.make_sentence() assert sent is not None assert len(sent) != 0
def combine(infiles=[DATAFILE_DOI, DATAFILE_ONION], n=20): ts = [] mdls = [] for infile in infiles: mdl, text = get_model(infile) ts.append(text) mdls.append(mdl) M = markovify.combine(mdls, weights=[1.0, 0.4]) for i in xrange(n): print get_msg(M, text='\n'.join(ts))
async def beid(self, ctx, user, other=None): """generate a markov chain based on the logs of `user`.""" server = ctx.message.server log_path = log_dir + user + '.txt' if os.path.exists(log_path): with codecs.open(log_path, "r",encoding='utf-8', errors='ignore') as f: text = filter(None, (line.rstrip() for line in f)) text_model = markovify.NewlineText(text) name = user if other: # fusion impersonations with open(log_dir + other + '.txt') as s: other_text = s.read() other_model = markovify.NewlineText(other_text) text_model = markovify.combine([text_model, other_model], [1, 1]) name += " + " + other sentence = text_model.make_sentence(tries=100) embed = discord.Embed(title='', description=sentence) embed.set_author(name=name) await self.bot.say(embed=embed) else: await self.bot.say("i don't have any messages logged from that user yet.")
# Play the audio using the platform's default player if sys.platform == "win32": os.startfile(output) else: subprocess.call(["afplay", output]) for _ in itertools.repeat(None, 50): model_cls = markovify.Text if level == "word" else SentencesByChar gen_a = model_cls(text_a, state_size=order) gen_b = model_cls(text_b, state_size=order) gen_c = model_cls(text_c, state_size=order) gen_combo = markovify.combine([gen_a, gen_b, gen_c], weights) counter=0 for i in range(output_n): out = gen_combo.make_short_sentence(length_limit, test_output=False) # WORDS = ("Ivy", "Joanna", "Kendra", "Kimberly", "Salli", "Raveena", "Nicole", "Amy", "Emma", "Joey", "Justin", "Matthew", "Brian", "Geraint") # word = random.choice(WORDS) try: # Request speech synthesis response = polly.synthesize_speech(Text= say_a+(out.lower())+say_b, OutputFormat="mp3", TextType="ssml", VoiceId="Brian") except (BotoCoreError, ClientError) as error: # The service returned an error, exit gracefully print(error) sys.exit(-1)
with open("../readings/satyricon.txt") as s: satyricon = s.read() # build and combine the models lovecraft_model = markovify.Text(lovecraft) satyricon_model = markovify.Text(satyricon) model_synthesis = markovify.combine([lovecraft_model, satyricon_model], [ 1.5, 1 ]) # generate a sentence from the markov model markov_text = model_synthesis.make_sentence() # get raw text as string with the write (w) or append (a) option # write a new line in markdown m.write("\n")
def combine_models(models=[], weights=[]): assert (len(models) == len(weights)) return markovify.combine(models, weights)
import markovify with open("beowulf.txt", errors="ignore") as f: text_a = f.read() with open("metamorphosis.txt", errors="ignore") as g: text_b = g.read() model_a = markovify.Text(text_a) model_b = markovify.Text(text_b) model_combo = markovify.combine([model_a, model_b], [.5, .5]) for i in range(5): print(model_combo.make_sentence()) print("\n") for i in range(3): print(model_combo.make_short_sentence(280))
# -*- coding: utf-8 -*- """ Created on Tue Oct 16 19:33:35 2018 @author: Windows 10 Pro """ # import libraries import markovify """ Combine text from Kaiya Smith's Letter to Hansel and F*ckboys """ # get raw text as string with open("../poems/HashtagHansel.txt") as f: Hansel = f.read() with open("../poems/Fboi.txt") as f: boys = f.read() # build and combine the models Hansel_model = markovify.Text(Hansel) boys_model = markovify.Text(boys) model_synthesis = markovify.combine([Hansel_model, boys_model], [1.5, 1]) # print five randomly-generated sentences for i in range(5): print model_synthesis.make_sentence()
def initializeModel(self): print('TRAINER BUILDING MODEL') tweet_models = [] for i in self.data['Text']: tweet_models.append(markovify.Text(str(i), self.stateSize)) self.model = markovify.combine(models=tweet_models)
def generate_text(tweet): #generates text using Markov chains num_texts = 0 with open("./the_picture_of_dorian_gray.txt", encoding="utf8") as f: the_picture_of_dorian_gray = f.read() num_texts += 1 with open("./declaration_of_independence.txt", encoding="utf8") as f2: declaration_of_independence = f2.read() num_texts += 1 with open("./jane_eyre.txt", encoding="utf8") as f3: declaration_of_independence = f3.read() num_texts += 1 with open("./war_and_peace.txt", encoding="utf8") as f4: war_and_peace = f4.read() num_texts += 1 with open("./jane_eyre.txt", encoding="utf8") as f5: jane_eyre = f5.read() num_texts += 1 with open("./jokes.txt", encoding="utf8") as f6: jokes = f6.read() num_texts += 1 with open("./moby_dick.txt", encoding="utf8") as f7: moby_dick = f7.read() num_texts += 1 with open("./pride_and_prejudice.txt", encoding="utf8") as f8: pride_and_prejudice = f8.read() num_texts += 1 with open("./art_of_love.txt", encoding="utf8") as f9: art_of_love = f9.read() num_texts += 1 state_size = 4 max_overlap_ratio = 50 num_tries = 100 models = read_JSON(num_texts) model_tweet = POSifiedText(tweet.full_text, state_size=state_size) if len(models) == 0: print("Creating language model") #creates custom models instead of using naive ones model_a = POSifiedText(the_picture_of_dorian_gray, state_size=state_size) model_b = POSifiedText(declaration_of_independence, state_size=state_size) model_c = POSifiedText(jane_eyre, state_size=state_size) model_d = POSifiedText(war_and_peace, state_size=state_size) model_e = POSifiedText(jokes, state_size=state_size) model_f = POSifiedText(pride_and_prejudice, state_size=state_size) model_g = POSifiedText(moby_dick, state_size=state_size) model_h = POSifiedText(art_of_love, state_size=state_size) models = [] models.append(model_a) models.append(model_b) models.append(model_c) models.append(model_d) models.append(model_e) models.append(model_f) models.append(model_g) models.append(model_h) write_JSON(models) else: print("Sucessfully created models from existing JSON files") model_combo = markovify.combine([ models[0], models[1], models[2], models[3], models[4], models[5], models[6], models[7], model_tweet ], [1.4, 1.25, 1.25, 1.4, 1.25, 1.0, 1.25, 2.0, 1.0]) return model_combo.make_short_sentence(280, max_overlap_ratio=max_overlap_ratio, tries=num_tries)
def test_combine_chains(self): chain = sherlock_model.chain combo = markovify.combine([ chain, chain ])
def test_combine_no_retain(self): text_model = sherlock_model_no_retain combo = markovify.combine([ text_model, text_model ]) assert(not combo.retain_original)
def test_combine_no_retain_on_retain(self): text_model_a = sherlock_model_no_retain text_model_b = sherlock_model combo = markovify.combine([ text_model_b, text_model_a ]) assert(combo.retain_original) assert(combo.parsed_sentences == text_model_b.parsed_sentences)
def test_mismatched_model_types(self): with self.assertRaises(Exception) as context: text_model_a = sherlock_model text_model_b = markovify.NewlineText(sherlock) combo = markovify.combine([ text_model_a, text_model_b ])
def test_mismatched_state_sizes(self): with self.assertRaises(Exception) as context: text_model_a = markovify.Text(sherlock, state_size=2) text_model_b = markovify.Text(sherlock, state_size=3) combo = markovify.combine([ text_model_a, text_model_b ])
def test_bad_weights(self): with self.assertRaises(Exception) as context: text_model = sherlock_model combo = markovify.combine([ text_model, text_model ], [ 0.5 ])
def test_double_weighted(self): text_model = markovify.Text(self.sherlock) combo = markovify.combine([ text_model, text_model ]) assert(combo.chain.model != text_model.chain.model)
def test_combine_dicts(self): _dict = sherlock_model.chain.model combo = markovify.combine([ _dict, _dict ])
def test_bad_types(self): with self.assertRaises(Exception) as context: combo = markovify.combine([ "testing", "testing" ])
async def run(self, ctx): if isinstance(ctx.channel, discord.DMChannel): ctx.channel.send("cannot use commands in dm channels") return None """\n Genrate sentence based upon messsages from channel or user.\nTo scan a users or a channels messages menton them """ msg = await ctx.channel.send("Processing") try: #get channel wanted and member wanted member = ctx.message.mentions channel = ctx.message.channel_mentions or [ctx.message.channel] print(member) print(channel) #format file location storage_loc = os.getcwd() + "\\data\\{ch}".format(ch=channel[0].id) print(storage_loc) #check if directoy exist if not make one if not os.path.exists(Path(storage_loc)): os.makedirs(storage_loc) print('dirmade') #add file final if member: storage_loc += "\\{}.json".format(member[0].id) print('eyy embmer') else: storage_loc += "\\main.json" print(storage_loc) storage_loc = Path(storage_loc) #check if file exists if not do main else load unread messages newdata = None newappend = "" jsonmodel = None filemodel = None date = None if os.path.exists(storage_loc): print('path exists') with open(storage_loc, "r") as f: data = json.load(f) date = data["time"] jsonmodel = markovify.NewlineText.from_json(data["model"]) newdata = channel[0].history( after=datetime.datetime.fromtimestamp(date)) print(newdata) else: print('path doesnt eyyy') newdata = channel[0].history(limit=15000) async for i in newdata: if member: if i.author == member[0]: newappend += i.content + "\n" else: newappend += i.content + "\n" if newappend: filemodel = markovify.NewlineText(newappend) if jsonmodel and newappend: filemodel = markovify.combine([jsonmodel, filemodel], [1, 1]) elif jsonmodel: filemodel = jsonmodel temp = None for i in range(100): temp = filemodel.make_sentence() if temp: break await ctx.channel.send(temp.replace("@", "") or "somthign wrong") del temp tempdata = {"time": time.time(), "model": filemodel.to_json()} with open(storage_loc, "w+") as f: f.write(json.dumps(tempdata, separators=(',', ':'))) print("writine") del filemodel await ctx.message.delete() await msg.delete() except: await ctx.message.delete() await msg.delete() await ctx.channel.send("somthings wrong")
text = '\n'.join([r for r in rows if r != '']) return text if __name__ == "__main__": progress = 0 #out of the max value on progress bar bar = progressbar.ProgressBar().start(max_value=4) texteroni = markovify.Text(cleanBook('./theDunwichHorror.txt')) book2 = markovify.Text(cleanBook('./theShunnedHouse.txt')) progress += 1 bar.update(progress) # COMBINE THE BOOKS allText = markovify.combine([texteroni, book2]) progress += 1 bar.update(progress) print "Here are 3 random sentences:\n\n" for i in range(3): string = allText.make_short_sentence(70) print string '''TODO for word in string.split(' '): print "Syllables in " + str( word ) + ": " + str( countSyllables(word)) ''' print "\n" progress += 1 bar.update(progress)
def combine(models): total_length = sum([m.length for m in models]) # weights are inversely proportional to size, so a large text doesn't drown out a small one weights = [((total_length - m.length) / total_length) for m in models] return markovify.combine(models, weights)
import markovify # Get raw text as string. f = open('parsedBible.txt','r+') text = f.read() fi = open('IStestfile.txt','r+') text2 = fi.read() # Build the model. modelHP = markovify.Text(text) modelIS = markovify.Text(text2) modelX = markovify.combine([modelHP, modelIS],[1,1]) # Print five randomly-generated sentences for i in range(5): print(modelX.make_short_sentence(200)) # Print three randomly-generated sentences of no more than 140 characters #for i in range(3): # print(text_model.make_short_sentence(140))
import sys import markovify with open('ico.json') as f: text = f.read() ico_model = markovify.Text.from_json(text) with open('erowid.json') as f: text = f.read() erowid_model = markovify.Text.from_json(text) # Combine models combo = markovify.combine([ico_model, erowid_model], [1.25, 1]) for i in range(int(sys.argv[1])): print(combo.make_sentence())
def addToModel(self, text): print('TRAINER ADDING TO MODEL') newModel = markovify.Text(text, self.stateSize) self.model = markovify.combine(models=[self.model, newModel])
def combine(combined_model, model): if combined_model is None: return model return markovify.combine(models=[combined_model, model])
def main(): if len(sys.argv) != 2: print("Path to config.ini must be passed as argument.") exit_on_error() # read config file config = configparser.ConfigParser() try: config.read(sys.argv[1]) except IOError: print("Failed to read config file: " + sys.argv[1]) exit_on_error() # set up apis twitter_api = twitter_setup(config) google_api = google_setup(config) cse_id = config.get('GOOGLE', 'CSE_ID') # create markov chain model model_clickbait = markov_chain_setup("clickbait_data_filtered.txt") model_non_clickbait = markov_chain_setup("non_clickbait_data_filtered.txt") model = markovify.combine([model_clickbait, model_non_clickbait], [1.60, 1]) # set nltk path if './nltk_data' not in nltk.data.path: nltk.data.path.append('./nltk_data') while True: # create headline tweet = create_headline(model, randint(40, 180)) if tweet is None: print( str(datetime.datetime.now()) + " Markov chain model failed to generate a tweet.") else: print(str(datetime.datetime.now()) + " Headline: " + tweet) # tokenized headline tokens = nltk.word_tokenize(str(tweet)) # add question mark punctuation for interrogative headlines interrogative = [ 'Does', 'Do', 'Can', 'Should', 'Would', 'Could', 'How', 'Which', "Is", "Are", "Was" ] if tokens[0] in interrogative: tweet += "?" # identify key words in tweet to use for hashtags using parts-of-speech tags # nouns only, tagger isn't accurate so remove helping verbs verbs = [ "am", "are", "is", "was", "were", "be", "being", "been", "have", "has", "had", "shall", "will", "do", "does", "did", "may", "must", "might", "can", "could", "would", "should", "who", "what", "why", "your", "you", "their", "or" ] tags = nltk.pos_tag(tokens) keywords_list = [ word for word, pos in tags if (pos.startswith('N') and word.lower() not in verbs) ] query_words = [ word for word, pos in tags if (pos.startswith('N') or pos.startswith('J')) ] print( str(datetime.datetime.now()) + " Identified key words: " + str(keywords_list)) print( str(datetime.datetime.now()) + " Identified query words: " + str(query_words)) # add key words as hash tags, removing any punctuations in the hashtags for kw in keywords_list: tweet += " #" + kw.translate( str.maketrans('', '', string.punctuation)) # full tweet print(str(datetime.datetime.now()) + " Tweet: " + tweet) # search for image to post with tweet message image = search_image(google_api, cse_id, ' '.join(query_words)) # post to twitter tweet_fake_buzz(twitter_api, tweet, image) print(str(datetime.datetime.now()) + " Done.") sleep(300) # tweet every 5 minutes
def scrape_data(): """ Pull the tweets from each congressman and save the markov chain to a file corresponding to their party. """ # connect to the twitter api twitter_keys = config['twitter'] auth = tweepy.OAuthHandler(twitter_keys['consumer_key'], twitter_keys['consumer_secret']) auth.set_access_token(twitter_keys['access_token'], twitter_keys['access_token_secret']) api = tweepy.API(auth, wait_on_rate_limit=True) for party in ['Republican', 'Democratic']: # get all the twitter urls into a list with congress_file.open(encoding='utf-8') as file: profiles = json.load(file) usernames = [] for profile in profiles: if (profile['party'] == party or # libertarians go with republicans because # there's not enough of them for a whole dataset profile['party'] in third_parties['Republican'] and party == 'Republican' or # independents go with democratics # for the same reason as above profile['party'] in third_parties['Democratic'] and party == 'Democratic'): for username in profile['twitter']: usernames.append(username) # set date from a week ago so we can separate tweets week_ago_timestamp = datetime.now().timestamp() - 604800 tweets_within_week = [] tweets_outside_week = [] retweets = [] likes = [] for username in usernames: try: page = api.user_timeline(screen_name=username, tweet_mode='extended') except tweepy.TweepyException as exc: print(f"Skipping @{username} - {exc}") logging.info(f"@{username} - {exc}", ) continue print(f"Grabbing {len(page)} tweets from @{username}") for tweet in page: if tweet.created_at.timestamp() > week_ago_timestamp: tweets_within_week.append(tweet.full_text) else: tweets_outside_week.append(tweet.full_text) # get likes and retweets from retweeted status or likes will be 0 if hasattr(tweet, 'retweeted_status'): retweets.append(tweet.retweeted_status.retweet_count) likes.append(tweet.retweeted_status.favorite_count) else: retweets.append(tweet.retweet_count) likes.append(tweet.favorite_count) # have it wait so we don't rate limit the api time.sleep(2) data = {'retweets': retweets, 'likes': likes} tweets_within_model = markovify.Text(' '.join(tweets_within_week), retain_original=False) tweets_outside_model = markovify.Text(' '.join(tweets_outside_week), retain_original=False) # place 75% more weight on tweets made within the week combined_model = markovify.combine( [tweets_within_model, tweets_outside_model], [1.75, 1]) combined_model.compile(inplace=True) tweets_file = data_file_path(party, 'tweets') tweets_file.write_text(combined_model.to_json()) data_file = data_file_path(party, 'data') data_file.write_text(json.dumps(data)) print( f"{len(tweets_within_week) + len(tweets_outside_week)} tweets scraped from {len(usernames)} {party} accounts" )
import markov_novel import re import spacy nlp = spacy.load("en") class POSifiedText(markovify.Text): def word_split(self, sentence): return ["::".join((word.orth_, word.pos_)) for word in nlp(sentence)] def word_join(self, words): sentence = " ".join(word.split("::")[0] for word in words) return sentence # Build the model. combined_model = None for (dirpath, _, filenames) in os.walk("texts"): for filename in filenames: with open(os.path.join(dirpath, filename)) as ind_file: ind_text = ind_file.read() model = markovify.Text(ind_text) if combined_model: combined_model = markovify.combine( models=[combined_model, model]) else: combined_model = model novel = markov_novel.Novel(combined_model, chapter_count=1) novel.write(novel_title='my-novel', filetype='md')
# Markov Chain Model df_text2 = df_text[:] mcm_model1 = markovify.NewlineText(df_text2['headline_text'], state_size = 2) for i in range(10): print(mcm_model1.make_sentence()) # Ensembling 3 Markov models mcm_model1 = markovify.Text(df_text2['headline_text'], state_size = 2) mcm_model2 = markovify.Text(df_text2['headline_text'], state_size = 2) mcm_model3 = markovify.Text(df_text2['headline_text'], state_size = 2) model_combo = markovify.combine([ mcm_model1, mcm_model2, mcm_model3 ], [ 1.5, 1.5, 1 ]) for i in range(5): print(model_combo.make_sentence()) # Part-Of-Speech tagging import re !pip install spacy import spacy !python -m spacy download en_core_web_lg nlp = spacy.load('en_core_web_lg') class POSifiedText(markovify.Text): def word_split(self, sentence):
subreddit = reddit.subreddit('me_irl') submissions = [] for submission in subreddit.hot(limit=100): submissions.append(submission) comments = [] for x in submissions: for y in x.comments.list(): comments.append(y) toMark = "" for comm in comments: try: toMark += '. ' + comm.body except: pass redditModel = markovify.Text(toMark) aynModel = markovify.Text(arand) marxModel = markovify.Text(marx) model = markovify.combine([redditModel, aynModel, marxModel]) def output(char=140, mod=model): print(mod.make_short_sentence(char)) def tweet(): out = model.make_short_sentence(140) status = api.PostUpdate(out) print(out)
] printable = set(string.printable) filter_printable = lambda x: filter(lambda y: y in printable, x) if os.path.exists(dump_file): with open(dump_file, 'rb') as handle: model = markovify.Text.from_dict(pickle.load(handle)) else: print("Generating combined model (once-off)...") model = None for source_file in sources: with open(source_file, 'r') as handle: print("Loading", source_file) text = filter_printable(handle.read()) new_model = markovify.Text(text, retain_original=False) model = markovify.combine( [model, new_model], [ 1.0, 1.0 ] ) if model else new_model with open(dump_file, 'wb') as handle: pickle.dump(model.to_dict(), handle) handle.flush() if __name__ == "__main__": for i in range(0, 100): x = model.make_sentence() #x = model.make_sentence_with_start('AIBU') #x = model.make_short_sentence(50) if x: print x
def test_simple(self): text_model = markovify.Text(self.sherlock) combo = markovify.combine([ text_model, text_model ], [ 0.5, 0.5 ]) assert(combo.chain.model == text_model.chain.model)
def gen_model(state_size = 2): models = [gen_facebook_model(state_size), gen_whatsapp_model(state_size), gen_reddit_model(state_size), gen_slack_model(state_size), gen_thl_model(state_size)] weights = [0.5, 0.5, 1.5, 3.0, 0.2] return markovify.combine(models, weights)
def update(self, corpus, contribution=1): # train a new model and merge it with old new_model = Chain(corpus, self.state_size) self.model = combine([self.model, new_model], [1, contribution]) return self
def test_combine_lists(self): _list = list(sherlock_model.chain.model.items()) combo = markovify.combine([ _list, _list ])
def gen_proper_model(state_size = 2): models = [gen_reddit_model(state_size), gen_slack_model(state_size)] weights = [1.0, 3.0] return markovify.combine(models, weights)
outputs = [] max_len = max([len(record['single_sentence_models']) for record in data]) weights = [len(record['single_sentence_models']) / max_len for record in data] for i in range(sentence_target): progress = i / sentence_target end_window_norm = (i + 50) / sentence_target book_models = [] for record in data: sentence_count = len(record['single_sentence_models']) start = int(progress * sentence_count) end = int(end_window_norm * sentence_count) end = end if end > start else start + 1 combined_model = markovify.combine( record['single_sentence_models'][start:end]) book_models.append(combined_model) multi_model = markovify.combine(book_models, weights) new_sent = multi_model.make_sentence(tries=1000) if new_sent: outputs.append(new_sent) output_text = ' '.join(outputs) timestamp = str(int(datetime.datetime.now().timestamp())) filename = "novel_" + timestamp + ".txt" with open(filename, "w") as text_file: text_file.write(output_text) print('words', len(output_text.split(' ')))
if "[" in string: return False return True # Apply the cleanups from above cookie_text_split[:] = [x for x in cookie_text_split if excluded(x)] const_text_split[:] = [x for x in const_text_split if exwifted(x)] # Merge the text back into one big blob like markovify expects. (There's probably a better way to do this, but again, fun project. Efficiency's not that important... cookie_text_model = POSifiedText("\n".join(cookie_text_split)) const_text_model = POSifiedText("\n".join(const_text_split)) tweet_text_model = POSifiedText("\n".join(tweet_text_split)) # Combine them into a terrifying structure const_and_cookie_model = markovify.combine( [cookie_text_model, const_text_model]) tweet_and_cookie_model = markovify.combine( [cookie_text_model, tweet_text_model], [4, 1]) everything_model = markovify.combine( [cookie_text_model, const_text_model, tweet_text_model], [4, 1, 1]) # Print a couple lines to the terminal to show that everything's working... print("Examples:") for i in range(5): print(const_and_cookie_model.make_short_sentence(240, tries=25)) # Now, open a temporary file and write some javascript surrounding our story. with open(datadir + "/cookie.js.new", "w+") as file: # NOTE: I don't escape anything here... with bad seed text it'd be quite possible to inject weird js, etc.
def generate_combined_tweet(self, tweet_sets, reply_to): models = [self.generate_text_model(tweets) for tweets in tweet_sets] weights = [1.0 for tweets in tweet_sets] combined_model = markovify.combine(models, weights) return self.generate_reply(combined_model.make_short_sentence(self.available_length(reply_to)), reply_to)