class BeerMarkov(): def __init__(self, reviews_file, markov_dir): self._reviews_file = reviews_file self._markov_dir = markov_dir self._markov = MarkovChain(markov_dir + '/beer_desc') self._name_markov = MarkovChain(markov_dir + '/beer_name') self.refresh_database() def refresh_database(self): with open(self._reviews_file, 'r') as review_data: reviews = json.load(review_data) reviews_string = [r['desc'] for r in reviews] names_string = [r['name'] for r in reviews] new_markov = MarkovChain(self._markov_dir + '/beer_desc') new_markov.generateDatabase(' '.join(reviews_string)) new_name_markov = MarkovChain(self._markov_dir + '/beer_name') new_name_markov.generateDatabase('.'.join(names_string)) self._markov = new_markov self._name_markov = new_name_markov def get_review(self): return self._markov.generateString() + '. ' + \ self._markov.generateString()
def markov(messages): # Create an instance of the markov chain. By default, it uses MarkovChain.py's location to # store and load its database files to. You probably want to give it another location, like so: mc = MarkovChain("./markov") # To generate the markov chain's language model, in case it's not present # mc.generateDatabase("\n".join(messages)) # To let the markov chain generate some text, execute for i in xrange(100): print mc.generateString()
def main(): auth = tweepy.OAuthHandler(key, secret) auth.set_access_token(token, token_secret) client = tweepy.API(auth) api = tweepy.API(auth) tweets = [] superString = "" mc = MarkovChain(markDirectory) superString = createSuperString('trump.txt') mc.generateDatabase(superString) while (True): phrase = mc.generateString() try: print(phrase) except UnicodeEncodeError: continue try: answer = input() if (answer == 'y'): client.update_status(phrase) except tweepy.TweepError: continue
class EuroMarkov: def __init__(self): self.mc = MarkovChain("./markovdata") def generateCountryList(self): countryList = [] for filename in os.listdir("json_lyrics/2015"): countryList.append(os.path.splitext(filename)[0]) return countryList def loadFiles(self,startYear,endYear,countryList): model = "" for year in range(startYear,endYear+1): for country in countryList: fname = "json_lyrics/"+str(year)+"/"+country+".json" if os.path.isfile((fname)): with open (fname,"r") as myfile: data = json.load(myfile) model += (data['lyrics']) + '\n'; return model def runMarkov(self,model): self.mc.generateDatabase(model) def generateString(self): return self.mc.generateString()
class Michiov(object): def __init__(self, autogen=True, markovdb=os.path.expanduser("~/markov"), twcreds=os.path.expanduser("~/.michiov_twitter_credentials"),twappcreds=os.path.expanduser("~/.michiov_twitter_appdata")): self.mc = MarkovChain(markovdb) self.reload() if not os.path.exists(twappcreds): print("Lack of app creds") sys.exit(1) twcons = json.loads(open(twappcreds).read()) conskey = twcons['key'] conssec = twcons['secret'] while not os.path.exists(twcreds): twitter.oauth_dance("MPRZ Tech Labs", conskey, conssec, twcreds) oauth_token, oauth_secret = twitter.read_token_file(twcreds) self.t = twitter.Twitter(auth=twitter.OAuth(oauth_token, oauth_secret, conskey, conssec)) def should(self): ret = input("Should I send it? (y/N) ") return ("y" in ret or "Y" in ret) def qas(self): idea = self.mc.generateString() print("Generated: %s" % idea) if self.should(): self.t.statuses.update(status=idea) def loop(self): try: while True: self.qas() #self.reload() except KeyboardInterrupt: pass def reload(self): with open("markovpredb.txt") as file: self.mc.generateDatabase(file.read())
def fetch_lyrics(artist, lines): API_KEY = os.environ.get('API_KEY') uri = "http://api.lyricsnmusic.com/songs" params = { 'api_key': API_KEY, 'artist': artist, } response = requests.get(uri, params=params) lyric_list = response.json() lyrics = '' for lyric_dict in lyric_list: lyrics += lyric_dict['snippet'].replace('...', '') + ' ' # Generate a Markov model mc = MarkovChain('./markov') mc.generateDatabase(lyrics) # Add lines of lyrics result = [] for line in range(0, lines): line_string = mc.generateString() result.append(line_string) return result
def lyrics(): artist = request.form['artist'] lines = int(request.form['lines']) if not artist: return redirect(url_for('index')) # Get a response of sample lyrics from the artist uri = "http://api.lyricsnmusic.com/songs" params = { 'api_key': API_KEY, 'artist': artist, } response = requests.get(uri, params=params) lyric_list = response.json() # Parse results into a long string of lyrics lyrics = '' for lyric_dict in lyric_list: lyrics += lyric_dict['snippet'].replace('...', '') + ' ' # Generate a Markov model mc = MarkovChain() mc.generateDatabase(lyrics) # Add lines of lyrics result = [] for line in range(0, lines): result.append(mc.generateString()) return render_template('lyrics.html', result=result, artist=artist)
def markov(msg, botName, channel, db): if msg.rawMatchRe('!markov (?P<source>#?[a-zA-Z]\S*)\s*$') or msg.rawMatchRe('what (would|does) (the )?(?P<source>#?[a-zA-Z]\S+) say\??'): m = msg.getRegExpResult() source = m.group('source') if source[0] == '#': logsList = db.getLogs(chan=source, lines=2000) else: logsList = db.getLogs(nick=source, lines=2000) if len(logsList) < 100: hexchat.command("msg %s Not enough data for %s" % (channel, source)) else: mc = MarkovChain("./markov_db") ircText = '' for line in logsList: # disqualify lines that are too short or are certain bot functions that start with '!' if len(line.split(' ')) >= 5 and line[0] != '!': ircText += line.replace('.','') + '. ' mc.generateDatabase(ircText) markovOutput = mc.generateString().capitalize() hexchat.command('msg %s "%s" --%s' % (channel, markovOutput, source)) return True return False
class TextGenerator: def __init__(self, generatorName, trainString, prefixLength): self.generatorName = generatorName self.chain = MarkovChain() self.chain.generateDatabase(trainString, n=prefixLength) self.currState = [] self.hyphenator = Hyphenator('en_US') self.syllableQ = Queue() self.stripPattern = re.compile('[\W_]+') while (len(self.currState) < prefixLength): self.currState = self.chain.generateString().split()[-(prefixLength+1):-1] def load_next_word(self): nextword = "" try: while nextword == "": nextword = self.stripPattern.sub('', self.chain._nextWord(self.currState)) self.currState = self.currState[1:] self.currState.append(nextword) if len(nextword) < 4: # because hyphenator doesnt work for words less than 4 letters self.syllableQ.put(nextword) else: for syllable in self.hyphenator.syllables(nextword): self.syllableQ.put(syllable) except UnicodeEncodeError: print("unicode error") def get_next_syllable(self): if (self.syllableQ.empty()): self.load_next_word() return self.syllableQ.get()
def lyrics(): artist = request.form['artist'] lines = int(request.form['lines']) if not artist: return redirect(url_for('index')) # Get a response of sample lyrics from the provided artist uri = "http://api.lyricsnmusic.com/songs" params = { 'api_key': API_KEY, 'artist': artist, } response = requests.get(uri, params=params) lyric_list = response.json() # Parse results into a long string of lyrics lyrics = '' for lyric_dict in lyric_list: lyrics += lyric_dict['snippet'].replace('...', '') + ' ' # Generate a Markov model mc = MarkovChain() mc.generateDatabase(lyrics) result = [] for line in range(0, lines): result.append(mc.generateString()) return render_template('lyrics.html', result=result, artist=artist)
class Haley(object): def __init__(self, backend): self.backend = backend self.mc = MarkovChain("markov.db") def loop(self): self.backend.connect() while True: for event in self.backend.update(): try: if event["type"] == "text": times = re.search(r"(?P<nm>\d+) times", event["content"].lower()) if times: if int(times.group("nm")) > 0: times = min(5,int(times.group("nm"))) else: self.backend.say("Okay, I won't say anything... Baka.") continue else: times = 1 for i in range(times): if "hi" in detox(event["content"].lower()).split() or "hello" in detox(event["content"].lower()).split(): self.backend.say(random.choice(["%s! Tutturuuu!","Hello, %s, so it was you making the noise up there!"]) % event["by"]) continue if "nano" in event["content"].lower() or "hakase" in event["content"].lower(): self.backend.say("%s%s"%("HAKASE"*len(re.findall("nano", event["content"].lower())),"NANO"*len(re.findall("hakase", event["content"].lower())))) continue if event["mentioned"]: if "roll" in detox(event["content"].lower()).split(): numb = re.search(r"(d|k)(?P<nm>\d+)", event["content"].lower()) if numb and int(numb.group("nm")) > 0: self.backend.say("Aaaand... %d!" % (random.randrange(1,int(numb.group("nm"))+1))) continue else: self.backend.say("Who do you think you are, rolling impossible dice... Baka.") continue if "say" in detox(event["content"].lower()).split(): if "something" in detox(event["content"].lower()).split(): tosay = self.mc.generateString() elif "name" in detox(event["content"].lower()).split(): tosay = self.backend.get_name(event["by"]) self.backend.say(tosay) continue if "xkcd" in detox(event["content"].lower()).split(): if "random" in detox(event["content"].lower()).split(): x = xkcd.getRandomComic() else: numb = re.search(r"(?P<nm>\d+)", event["content"]) if numb: x = xkcd.Comic(int(numb.group("nm"))) else: x = xkcd.getLatestComic() self.backend.say("*%s* - %s - _%s_" % (x.getTitle(), x.getImageLink(), x.getAltText())) continue self.backend.say("Hmm?") continue except: self.backend.say(str(sys.exc_info()[0]))
def markov(): """A simple markov function""" mc = MarkovChain("./tempchain") with open(CORPUS, 'r') as f: data = f.read() mc.generateDatabase(data) return mc.generateString()
async def markov(self, ctx): """Get a response from inputed text using a markov chain generated from the channels text""" results = '' async for message in self.bot.logs_from(ctx.message.channel, limit=10): line = message.content results += line+"\n" f = StringIO(results.encode('utf-8')) mc = MarkovChain(f.getvalue()) mc.generateDatabase(f) msg = mc.generateString() await self.bot.say(msg)
def main(): args = parser.parse_args() dirname=os.path.split(__file__)[0] filename=os.path.join(dirname,"phil.txt") title_filename=os.path.join(dirname,"phil_titles.txt") dbname1 = "database.pkl" dbname2 = "database_title.pkl" new_db = not os.path.exists(dbname1) body_maker = MarkovChain(dbname1) title_maker = MarkovChain(dbname2) if new_db: title_maker.generateDatabase(open(title_filename).read()) title_maker.dumpdb() body_maker.generateDatabase(open(filename).read()) body_maker.dumpdb() name = title_maker.generateString() body = ' '.join([body_maker.generateString()+'.' for i in xrange(3)]) if args.repo: if args.token: token = args.token else: token_filename = os.path.join(dirname, "token.txt") if not os.path.exists(token_filename): sys.stderr.write("Please either specify --token=XXX on the command line or put a github API token in token.txt\n") sys.stderr.write("You can generate a token here: https://github.com/settings/tokens\n") sys.exit(1) token = open(token_filename).read().strip() import github gh=github.Github(token) user=gh.get_user() repo=user.get_repo(args.repo) issue = repo.create_issue(title=name, body=body) print issue.html_url else: print print name print "-"*len(name) print body
def main(args): markov_filename = "./" + args.subreddit + ".mcd" new_chain = os.path.isfile(markov_filename) == False # this must come before the creation of the Markov Chain mc = MarkovChain(markov_filename) if args.new or new_chain: titles = getTitles(getSubmissions(100, args.subreddit)) training_data = str.join('.', titles) mc.generateDatabase(training_data) N = args.num_submissions while N > 0: print(mc.generateString()) N -= 1
def joke(): """ Produces a joke based on the existing database (creates database if one doesn't already exist). """ if not os.path.isfile('markovdb'): generate_database() chain = MarkovChain() generated_joke = '' while len(generated_joke) < MIN_LENGTH: generated_joke = chain.generateString() return generated_joke
def main(): df = fill_song_pd() lyrics = "" #going to be one huge string db_name = './markov/' + genre mc = MarkovChain(db_name) #creating new markov dataset if it doesn't exist if not os.path.isfile(db_name): print("creating new data set based on the " + str(genre) + " genre...") for index, row in df.iterrows(): if row['genre'] == genre_dict[genre]: lyrics += row["lyrics"] + " " mc.generateDatabase(lyrics) mc.dumpdb() for i in range(int(lines) + 1): print(mc.generateString())
class markovbuild(object): '''Builds a markov chain DB and outputs data''' def __init__(self, target, data, lines=5): self.database = '/tmp/markov_%s.db' % target self.lines = lines self.data = '\n'.join(data) self.mchain = MarkovChain(self.database) def build(self): '''Builds a markov chain''' self.mchain.generateDatabase(self.data) def output(self): '''Outputs markov chain data''' self.build() return [ self.mchain.generateString() for x in xrange(0, self.lines) ]
def generate_database(): """ Generates the database that the Markov Chain will use to make its word-by- word predictions. It will attempt to create this file in the same directory as where the script is currently located. """ currpath = os.path.dirname(__file__) path_to_data = os.path.join(currpath, 'in.txt') chain = MarkovChain() with open(path_to_data) as f: chain.generateDatabase(f.read()) chain.dumpdb() print(chain.generateString())
def analyze(self): # GenerateModel """ Generate a Markov chain based on retrieved strings. """ mc = MarkovChain() mc.generateDatabase(self.text) result = r'' print "Generating:" for i in range(0, 10): print "Sentence %d" % i # Create 10 sentences sentence = mc.generateString() result += sentence.capitalize() + '. ' return result
def poem(): story = str(request.form['story'].encode('ascii', 'ignore')) lines = int(request.form['lines']) if not story: return redirect(url_for('index')) mc = MarkovChain() mc.generateDatabase(story) result = [] for line in range(0, lines): new_line = mc.generateString() if new_line not in result: result.append(new_line) return render_template('poem.html', result=result, story=story)
class MarkovBot(BotPlugin): def __init__(self): self.markov = MarkovChain() @botcmd def talk(self, mess, args): """ Generate a sentence based on database """ return self.markov.generateString() @botcmd def complete(self, mess, args): """ Try to complete a sentence """ return self.markov.generateStringWithSeed(args) @botcmd def gendbfromfile(self, mess, args): """ Generate markov chain word database """ try: with open(args) as txtFile: txt = txtFile.read() except IOError as e: return 'Error: could not open text file' # At this point, we've got the file contents if self.markov.generateDatabase(txt): return 'Done.' else: return 'Error: Could not generate database' @botcmd def gendbfromstring(self, mess, args): if self.markov.generateDatabase(args): return 'Done.' else: return 'Error: Could not generate database from String' @botcmd def gendbfromurl(self, mess, args): req = requests.get(args) if req.ok and self.markov.generateDatabase(req.content): return 'Done.' else: return 'Error: Could not generate database from URL'
async def snakeme(self, ctx: Context): """ How would I talk if I were a snake? :param ctx: context :return: you, snakified based on your Discord message history """ mentions = list( filter(lambda m: m.id != self.bot.user.id, ctx.message.mentions)) author = ctx.message.author if (len(mentions) == 0) else ctx.message.mentions[0] channel: discord.TextChannel = ctx.channel channels = [ channel for channel in ctx.message.guild.channels if isinstance(channel, discord.TextChannel) ] channels_messages = [ await channel.history(limit=10000).flatten() for channel in channels ] msgs = [ msg for channel_messages in channels_messages for msg in channel_messages ][:MSG_MAX] my_msgs = list(filter(lambda msg: msg.author.id == author.id, msgs)) my_msgs_content = "\n".join(list(map(lambda x: x.content, my_msgs))) mc = MarkovChain() mc.generateDatabase(my_msgs_content) sentence = mc.generateString() snakeme = discord.Embed() snakeme.set_author( name="{0}#{1}".format(author.name, author.discriminator), icon_url="https://cdn.discordapp.com/avatars/{0}/{1}".format( author.id, author.avatar) if author.avatar is not None else "https://img00.deviantart.net/eee3/i/2017/168/3/4/" "discord__app__avatar_rev1_by_nodeviantarthere-dbd2tp9.png") snakeme.description = "*{0}*".format( snakify(sentence) if sentence is not None else ":question: Not enough messages") await channel.send(embed=snakeme)
def main(): with open("test.txt", "r") as myfile: data = myfile.read().replace('\n', '') mc = MarkovChain("./markovdb") # Start a session so we can have persistant cookies session = requests.Session() # This is the form data that the page sends when logging in login_data = { 'user_email': EMAIL, 'user_password': PASSWORD, 'login': '******', } # Authenticate r = session.post(URL, data=login_data) mc.generateDatabase(data) for x in range(0, 5): r = os.urandom(16).encode('hex') title = "Report#" + str(x) + " " + str(r) description = mc.generateString() #europe only because americans are fags y, x = uniform(-17, 43), uniform(28, 55) print (title) # Create new report based on random content report_data = { 'title': title, 'category': "2", 'description': description, 'latitude': x, 'longitude': y, 'newreport': "1", } r = session.post(newRep, data=report_data)
def generate_horoscope(chain: MarkovChain): return chain.generateString()
# coding: utf-8 # In[1]: from pymarkovchain import MarkovChain mc = MarkovChain(dbFilePath='./database.pkl') tweet_text = mc.generateString() import json with open('credentials.json') as data_file: credentials = json.load(data_file) import tweepy auth = tweepy.OAuthHandler(consumer_key=credentials['tw_key'], consumer_secret=credentials['tw_secret']) auth.set_access_token(key=credentials['tw_access_token'], secret=credentials['tw_access_token_secret']) tw = tweepy.API(auth) tw.update_status(status=tweet_text) # In[ ]:
# read in data and clean data = json.load(open("listicles.json")) text = "\n".join([d['title'] for d in data if d is not ""]).lower() regex = re.compile('[%s]' % re.escape(string.punctuation)) text = regex.sub(" b", text) # generate MC data mc = MarkovChain("./markov") mc.generateDatabase(text) f = open("potential_tweets.txt", "a") # generate and evaluate tweets while 1: try: seed = sys.argv[1] except: seed = None if seed is not None: tweet = mc.generateStringWithSeed(seed).title() else: tweet = mc.generateString().title() print tweet answer = raw_input("Tweet this text? (yes|no|edit) ") if answer == "yes": f.write(tweet) break elif answer == "edit": tweet = raw_input("Enter in the edited text: ") f.write(tweet) break
print(file) with open(file) as f: text = f.read() verse_lyrics = parse_file(text) verse_lyrics = re.sub("[\[\]\(\)\"]", " ", verse_lyrics) verse_lyrics = re.sub(" +", " ", verse_lyrics) all_lyrics += verse_lyrics mc = MarkovChain("test") mc.generateDatabase(all_lyrics) output_directory = "generated_lyrics/" if not os.path.exists(output_directory): os.makedirs(output_directory) number_of_phrases = 8 num_files = 1000 for i in range(num_files): # Printing a string with open(output_directory + "{}.txt".format(i), "w") as f: for i in range(0, int(number_of_phrases)): while True: line = mc.generateString() if len(line) > 1: break print(line) f.write(line + "\n") print("")
def main(username): r = praw.Reddit(user_agent='trollolol v0.1') r.config.decode_html_entities = True m = MarkovChain('markov-data/%s.chain' % username) last_comment = None try: last_comment = Node.objects( username=username).order_by('-created').first() if last_comment: print("Checking for new messages.") comments = r.get_redditor(username).get_comments( limit=500, params={'after': last_comment.node_id}) else: raise except: print("No messages fetched yet, doing inital import") comments = r.get_redditor(username).get_comments(limit=500) for comment in comments: try: node = Node.objects.get(node_id=comment.name) except: node = Node(node_id=comment.name, parent_id=comment.parent_id, body=comment.body, created=comment.created, username=username) node.save() first_comment = Node.objects( username=username).order_by('+created').first() if first_comment: print("Checking for messages before %s." % first_comment.node_id) comments = r.get_redditor(username).get_comments( limit=500, params={'before': first_comment.node_id}) for comment in comments: try: node = Node.objects.get(node_id=comment.name) except: node = Node(node_id=comment.name, parent_id=comment.parent_id, body=comment.body, created=comment.created, username=username) node.save() comments = Node.objects(username=username).all() corpus = [] for comment in comments: corpus.append(comment.body) shuffle(corpus) if len(corpus) > 0: print( "We have %i messages to work with. Building new markov corpus now." % len(corpus)) m.generateDatabase(" ".join(corpus)) print("Looking for acceptable output for first round of transforms.") output = [] tries = 0 while len(output) < 10: tries = tries + 1 result = m.generateString() if tries < 100: if len(result.split(" ")) >= 10: sys.stdout.write("x") output.append(result) else: sys.stdout.write(".") print("") response = "" for result in output: response = response + " " + result print response else: print("No comments found.")
run_regex = r'[^a-zA-Z. #@]+' max_id = None text_statuses = [] statuses = [1] while len(text_statuses) < get_count and len(statuses) != 0: if max_id: statuses = api.GetUserTimeline(screen_name=username, count=10000, max_id=max_id, include_rts=False) else: statuses = api.GetUserTimeline(screen_name=username, count=10000, include_rts=False) if len(statuses) > 0: max_id = min([status.id for status in statuses]) - 1 text_statuses = text_statuses + [status.text for status in statuses] print("got {} of {} statuses".format(len(text_statuses), get_count)) train_text = ".".join(text_statuses) if run_regex: train_text = re.sub(run_regex, ' ', train_text).replace('\n', '') print('generating db on ' + username) mc = MarkovChain("./markov") mc.generateDatabase(train_text) print('done generating db') while True: output = mc.generateString() print(output) time.sleep(1)
# coding: utf-8 # In[1]: from pymarkovchain import MarkovChain mc = MarkovChain(dbFilePath='./database.pkl') tweet_text = mc.generateString() import json with open('credentials.json') as data_file: credentials = json.load(data_file) import tweepy auth = tweepy.OAuthHandler(consumer_key=credentials['tw_key'], consumer_secret=credentials['tw_secret']) auth.set_access_token( key=credentials['tw_access_token'], secret=credentials['tw_access_token_secret']) tw = tweepy.API(auth) tw.update_status(status=tweet_text) # In[ ]:
class Trollette: def __init__(self): self.presenter = "" self.title = "" self.slide_count = 0 self.slide_min = 15 self.slide_max = 25 self.console = None self.output_dir = "" with open("terms.json", "r") as f: self.terms = json.load(f) with open(os.path.join("GIFs", "hashes.json"), "r") as f: self.gifs = json.load(f) with open(os.path.join("Images", "hashes.json"), "r") as f: self.images = json.load(f) # Load up the proverb data with open(os.path.join("Proverbs", "facts"), "r") as f: self.proverb_lines = f.readlines() self.proverbs = map(string.strip, self.proverb_lines) self.proverb_markov = MarkovChain("markov.db") self.proverb_markov.generateDatabase("".join(self.proverb_lines), n=1) # Make the text data # self.my_face = comptroller.face(self.title) # self.slide_titles = self.my_face.get_titles(50) # self.slide_bullets = self.my_face.get_bullets(100) self.my_face = Face("") self.slide_titles = ["shit", "balls", "butts"] self.slide_bullets = ["butts", "do", "stuff", "f***s", "more f***s"] self.ppt = Presentation() self.slide_weights = SlideWeights() def generate_slide_deck(self): # Create a place to put data and resources self.output_dir = os.path.join( "Output", "%s_%s_%s" % (self.title, self.presenter, datetime.datetime.strftime(datetime.datetime.now(), '%Y_%m_%d_%H_%M_%S'))) self.resources_dir = os.path.join(self.output_dir, "Resources") # Start with a fresh PowerPoint self.ppt = Presentation() # Make sure the directories exist try: os.makedirs(self.output_dir) os.makedirs(self.resources_dir) except: self.log("Directory %s already exists, overwriting..." % self.output_dir) self.slide_count = random.randint(self.slide_min, self.slide_max) self.log("Generating a slide deck of %d slides about %s" % (self.slide_count, self.title)) try: self.log("Getting slide content...") self.my_face.set_topic(self.title) self.log("Generating slide titles...") self.slide_titles = self.my_face.get_titles(self.slide_count) self.log("Generating slide bullets...") self.slide_bullets = self.my_face.get_bullets(self.slide_count * 3) except: self.log( "Problem generating content for a talk on %s, exiting..." % self.title) return #self.farm_gif_term(self.title) #sp = self.title.split(" ") #if len(sp) > 1: # for i in range(len(sp)): # if len(sp[i]) > 5: # self.farm_gif_term(sp[i]) #self.farm_image_term(self.title) self.log_slide_weights() self.create_title_slide() self.create_slides() slide_path = os.path.join(self.output_dir, "%s.pptx" % self.title) self.ppt.save(slide_path) self.log("Successfully generated PPT on %s to %s" % (self.title, slide_path)) def create_title_slide(self): title_slide_layout = self.ppt.slide_layouts[0] slide = self.ppt.slides.add_slide(title_slide_layout) title = slide.shapes.title subtitle = slide.placeholders[1] title.text = self.title subtitle.text = self.presenter def create_slides(self): for i in range(self.slide_count): choice = self.slide_weights.choose_weighted() self.log(" Generating slide #%d: %s" % (i + 1, choice)) new_slide_layout = None if choice == "Single GIF": ns = self.create_gif_slide(random.choice(self.slide_titles), self.get_giphy_search_term(), i) elif choice == "Full Slide GIF": ns = self.create_full_gif_slide(self.get_giphy_search_term(), i) elif choice == "Single Image": ns = self.create_image_slide(random.choice(self.slide_titles), self.get_image_search_term(), i) elif choice == "Full Slide Image": ns = self.create_full_image_slide(self.get_image_search_term(), i) elif choice == "Information": ns = self.create_info_slide(i) elif choice == "Quotation": ns = self.create_quote_slide() def create_single_full_image_slide(self, image_path): blank_slide_layout = self.ppt.slide_layouts[6] new_slide = self.ppt.slides.add_slide(blank_slide_layout) left = Inches(0) top = Inches(0) height = Inches(8) width = Inches(10) pic = new_slide.shapes.add_picture(image_path, left, top, height=height, width=width) return new_slide def create_single_image_slide(self, slide_title, image_path): blank_slide_layout = self.ppt.slide_layouts[1] new_slide = self.ppt.slides.add_slide(blank_slide_layout) for shape in new_slide.shapes: if shape.is_placeholder: phf = shape.placeholder_format if phf.type == 1: shape.text = slide_title left = Inches(1) top = Inches(1) height = Inches(6) width = Inches(8) pic = new_slide.shapes.add_picture(image_path, left, top, height=height, width=width) return new_slide def download_gif(self, term, slide_num): # If we have at least 3 local gifs, use one of those if (term in self.gifs) and (len(self.gifs[term]) > 3): return os.path.join("GIFs", "%s.gif" % random.choice(self.gifs[term])) try: # Download the gif img = translate(term) image_path = os.path.join(self.resources_dir, "%d.gif" % slide_num) wget.download(img.fixed_height.url, image_path) file_hasher = hashlib.md5() with open(image_path, "rb") as f: file_hasher.update(f.read()) file_md5 = file_hasher.hexdigest() if not (term in self.gifs): self.gifs[term] = [] if not (file_md5 in self.gifs[term]): self.gifs[term].append(file_md5) shutil.copy(image_path, os.path.join("GIFs", "%s.gif" % file_md5)) with open(os.path.join("GIFs", "hashes.json"), "w") as f: json.dump(self.gifs, f, indent=2) return image_path except: return None def download_image(self, term, slide_num): # If we have at least 3 local images, use one of those if (term in self.images) and (len(self.images[term]) > 3): return os.path.join("Images", "%s.img" % random.choice(self.images[term])) try: search_term = term if (random.randint(0, 100) % 2) == 0: search_term = self.title download_attempts = 0 image_bytes = "" image_path = "" while download_attempts < 10: fetcher = urllib2.build_opener() start_index = random.randint(0, 50) search_url = "http://ajax.googleapis.com/ajax/services/search/images?v=1.0&q=%s&start=%s" % ( search_term, str(start_index)) f = fetcher.open(search_url) deserialized_output = simplejson.load(f) image_url = deserialized_output['responseData']['results'][ random.randint( 0, len(deserialized_output['responseData']['results']) - 1)]['unescapedUrl'] image_path = os.path.join(self.resources_dir, "%d.img" % slide_num) wget.download(image_url, image_path) with open(image_path, "rb") as f: image_bytes = f.read() if (not image_bytes.startswith("<!DOCTYPE html>")) and ( not image_bytes.startswith("<html>")): break download_attempts += 1 self.log( " Attempting to download image about %s failed try #%d" % (search_term, download_attempts)) if image_bytes.startswith( "<!DOCTYPE html") or image_bytes.startswith("<html>"): return None file_hasher = hashlib.md5() file_hasher.update(image_bytes) file_md5 = file_hasher.hexdigest() if not (term in self.images): self.images[term] = [] if not (file_md5 in self.images[term]): self.images[term].append(file_md5) shutil.copy(image_path, os.path.join("Images", "%s.img" % file_md5)) with open(os.path.join("Images", "hashes.json"), "w") as f: json.dump(self.images, f, indent=2) return image_path except: return None def create_gif_slide(self, slide_title, term, slide_num): image_path = self.download_gif(term, slide_num) if image_path: return self.create_single_image_slide(slide_title, image_path) def create_full_gif_slide(self, term, slide_num): image_path = self.download_gif(term, slide_num) if image_path: return self.create_single_full_image_slide(image_path) def create_image_slide(self, slide_title, term, slide_num): while True: try: image_path = self.download_image(term, slide_num) if image_path: return self.create_single_image_slide( slide_title, image_path) except: pass def create_full_image_slide(self, term, slide_num): image_path = self.download_image(term, slide_num) if image_path: return self.create_single_full_image_slide(image_path) def create_info_slide(self, slide_num): slide_title_info = random.choice(self.slide_titles) slide_title = slide_title_info if (random.randint(0, 100) % 3) == 0: slide_title = self.get_markov_proverb() sb = random.sample(self.slide_bullets, random.randint(1, 4)) if (random.randint(0, 100) % 4) == 0: sb.append(self.get_markov_proverb()) bullet_slide_layout = self.ppt.slide_layouts[1] new_slide = self.ppt.slides.add_slide(bullet_slide_layout) shapes = new_slide.shapes title_shape = shapes.title body_shape = shapes.placeholders[1] body_shape.width = Inches(4) body_shape.left = Inches(1) body_shape.top = Inches(2) title_shape.text = slide_title tf = body_shape.text_frame for b in sb: p = tf.add_paragraph() #p.text = b p.alignment = PP_PARAGRAPH_ALIGNMENT.LEFT run1 = p.add_run() run1.text = b font1 = run1.font font1.name = 'Sans Serif' font1.size = Pt(20) font1.italic = True font1.bold = True image_path = None attempts = 0 while attempts < 10: try: tries = 0 while (not image_path) and (tries < 10): if (random.randint(0, 100) % 2) == 0: search_term = self.get_giphy_search_term() image_path = self.download_gif(search_term, slide_num) else: search_term = self.get_image_search_term() image_path = self.download_image( search_term, slide_num) tries += 1 if tries < 10: left = Inches(5.5) top = Inches(3) #height = Inches(3) width = Inches(3) pic = new_slide.shapes.add_picture(image_path, left, top, width=width) break attempts += 1 except: attempts += 1 return new_slide def create_quote_slide(self): # Pick a random quote category and quote cat = random.choice(self.terms["quote_categories"]) with open(os.path.join("Quotes", "quotes_%s.json" % cat)) as f: q1 = random.choice(json.load(f)) cat = random.choice(self.terms["quote_categories"]) with open(os.path.join("Quotes", "quotes_%s.json" % cat)) as f: q2 = random.choice(json.load(f)) quote_text = "\"%s\"" % q1["quote"] if (random.randint(0, 100) % 5) == 0: quote_text = random.choice(self.proverbs) quote_author = "- %s" % q2["name"] blank_slide_layout = self.ppt.slide_layouts[2] new_slide = self.ppt.slides.add_slide(blank_slide_layout) for shape in new_slide.shapes: if shape.is_placeholder: phf = shape.placeholder_format if phf.type == 1: # Put in the quote title shape.text = random.choice(self.terms["quote_titles"]) elif phf.type == 2: text_frame = shape.text_frame # Create the quote text paragraph p1 = text_frame.paragraphs[0] p1.alignment = PP_PARAGRAPH_ALIGNMENT.LEFT run1 = p1.add_run() run1.text = quote_text font1 = run1.font font1.name = 'Sans Serif' font1.size = Pt(30) font1.italic = True font1.bold = True # Create the Author text paragraph p2 = text_frame.add_paragraph() p2.alignment = PP_PARAGRAPH_ALIGNMENT.RIGHT run2 = p2.add_run() run2.text = quote_author font2 = run2.font font2.name = 'Calibri' font2.size = Pt(24) return new_slide def get_giphy_search_term(self): st = random.choice(self.terms["giphy_searches"]) if (random.randint(0, 100) % 5) == 0: st = self.title return st def get_image_search_term(self): st = random.choice(self.terms["image_searches"]) if (random.randint(0, 100) % 2) == 0: st = self.title return st def get_proverb(self): return random.choice(self.proverb_lines) def get_markov_proverb(self, min=5, max=10): b = "" while True: b = self.proverb_markov.generateString() s = b.split(" ") if min <= len(s) <= max: break return b def add_term(self, term_type, term): if term in self.terms[term_type]: return "Term \"%s\" is already in %s!" % (term, term_type) else: self.terms[term_type].append(term) with open("terms.json", "w") as f: json.dump(self.terms, f, indent=4) return "Term \"%s\" added to %s." % (term, term_type) def delete_term(self, term_type, term): if not (term in self.terms[term_type]): return "Term \"%s\" isn't in %s, can't delete!" % (term, term_type) else: self.terms[term_type].remove(term) with open("terms.json", "w") as f: json.dump(self.terms, f, indent=4) return "Term \"%s\" removed from %s." % (term, term_type) def show_term_counts(self, term_type, term_json): log_str = "%s Terms:\n" % term_type for term in self.terms[term_type]: if term in term_json: log_str += " %s: %d\n" % (term, len(term_json[term])) else: log_str += " %s: 0\n" % term self.log(log_str) def get_file_md5(self, file_path): with open(file_path, "rb") as f: image_bytes = f.read() file_hasher = hashlib.md5() file_hasher.update(image_bytes) return file_hasher.hexdigest() def farm_image_term(self, term, amount=25, threshold=10): self.log("Farming images for %s..." % term) if not (term in self.images): self.images[term] = [] attempt_count = 0 while (attempt_count < threshold) and (len(self.images[term]) < amount): myopener = MyOpener() page = myopener.open( 'https://www.google.pt/search?q=%s&source=lnms&tbm=isch&sa=X&tbs=isz:l&tbm=isch' % term.replace(" ", "+")) html = page.read() for match in re.finditer( r'<a href="/imgres\?imgurl=(.*?)&imgrefurl', html, re.IGNORECASE | re.DOTALL | re.MULTILINE): if len(self.images[term]) >= amount: break try: os.remove("test.img") except: pass try: path = urlparse.urlsplit(match.group(1)).path self.log(" Downloading %s" % match.group(1)) myopener.retrieve(match.group(1), "test.img") image_md5 = self.get_file_md5("test.img") if not (image_md5 in self.images[term]): self.images[term].append(image_md5) shutil.copy( "test.img", os.path.join("Images", "%s.img" % image_md5)) os.remove("test.img") self.log(" Image saved to archive. %d/%d images." % (len(self.images[term]), amount)) attempt_count = 0 else: self.log(" Already had image!") attempt_count += 1 except: self.log(" Downloading failed") attempt_count += 1 self.log("Farming of %s images complete, now holding %d images" % (term, len(self.images[term]))) with open(os.path.join("Images", "hashes.json"), "w") as f: json.dump(self.images, f, indent=2) def farm_images(self, amount=25, threshold=10): self.show_term_counts("image_searches", self.images) all_farm = self.terms["image_searches"] all_farm.extend(self.terms["talk_titles"]) for term in all_farm: self.farm_image_term(term, amount, threshold) def farm_gif_term(self, term, amount=25, threshold=10): self.log("Farming GIFs for %s..." % term) if not (term in self.gifs): self.gifs[term] = [] attempt_count = 0 while (attempt_count < threshold) and (len(self.gifs[term]) < amount): image_path = "test.gif" try: os.remove(image_path) except: pass try: img = translate(term) wget.download(img.fixed_height.url, image_path) image_md5 = self.get_file_md5("test.gif") if not (image_md5 in self.gifs[term]): self.gifs[term].append(image_md5) shutil.copy(image_path, os.path.join("GIFs", "%s.gif" % image_md5)) self.log(" GIF saved to archive. %d/%d GIFs." % (len(self.gifs[term]), amount)) attempt_count = 0 else: self.log(" Already had GIF!") attempt_count += 1 except: self.log(" Downloading failed") attempt_count += 1 self.log("Farming of %s GIFs complete, now holding %d GIFs" % (term, len(self.gifs[term]))) with open(os.path.join("GIFs", "hashes.json"), "w") as f: json.dump(self.gifs, f, indent=2) def farm_gifs(self, amount=25, threshold=10): self.show_term_counts("giphy_searches", self.gifs) all_farm = self.terms["giphy_searches"] all_farm.extend(self.terms["talk_titles"]) for term in all_farm: self.log("Farming GIFs for %s..." % term) if not (term in self.gifs): self.gifs[term] = [] self.farm_gif_term(term, amount, threshold) def farm_content(self, all_content): for talk_title in self.terms["talk_titles"]: talk_path = os.path.join("Content", "%s.txt" % talk_title) # Either we're replacing all content or we're only replacing files that don't exist if all_content or (not os.path.exists(talk_path)): self.log("Farming data on %s..." % talk_title) with open(talk_path, "w") as f: content = self.my_face.fully_research_topic( talk_title, self.log) if type(content) is str: clean_content = content else: clean_content = unicodedata.normalize( 'NFKD', content).encode('ascii', 'ignore') f.write(clean_content) def log_slide_weights(self): self.log(self.slide_weights.get_weights_string()) def log(self, message): if self.console: self.console.config(state=tk.NORMAL) self.console.insert(tk.END, "%s\n" % message) self.console.see(tk.END) self.console.config(state=tk.DISABLED) self.console.update() else: print(message)
# https://github.com/TehMillhouse/PyMarkovChain # pip install PyMarkovChain from pymarkovchain import MarkovChain mc = MarkovChain("./am_m") f = open('cap_short.txt','r') mc.generateDatabase(f.read()) for x in range(0,20): mc.generateString()
from pymarkovchain import MarkovChain mc = MarkovChain("./markov") texts = [ "text/confessions.txt", "text/discourses-and-social-contract.txt", "text/emile.txt" ] entire_string = "" for text_url in texts: f = open(text_url, 'r') entire_string += f.read() entire_string += "\n" f.close() test = open("test.txt", 'w') test.write(entire_string) test.close() mc.generateDatabase(entire_string, '\n') print(mc.generateString()) for i in range(10000): f = open("output/{0}.txt".format(i), 'w') f.write(mc.generateString().strip()) f.close()
# from Marky import marky from pymarkovchain import MarkovChain import json import re data = json.load(open("results-initial.json")) text = "\n".join([d['title'] for d in data if d is not ""]).lower() mc = MarkovChain("./markov") mc.generateDatabase(text) print mc.generateString()
next_val = cur_ps - first_ps # Limit the size of jumps? if next_val < 8.0 and next_val > -8.0: to_add = str(next_val) # Add the duration to_add = to_add + "!@#" + dur db = db + (' ' + to_add); db = db + ('\n'); keepGoing = 1 sen = "" while keepGoing == 1: mc.generateDatabase(db, '\n') sen = mc.generateString() sen = sen.split(' ') length = 0.0 counter = 0 for word in sen: counter = counter + 1 val_dur = word.split('!@#') dur = val_dur[1] cur_dur = assoc[dur] length += cur_dur.quarterLength if length == 4.0 and counter > 10: keepGoing = 0 s1 = stream.Stream()
name = cur_note.fullName '''if "Chord" in name: n = note.Note(cur_note.pitches[0]) n.duration = cur_note.duration print cur_note print n cur_note = n name = n.fullName''' name = name.replace(' ', '#') assoc[name] = cur_note db = db + (' ' + name) db = db + ('\n') mc.generateDatabase(db, '\n') sen = mc.generateString() s1 = stream.Stream() keepGoing = 1 sen = "" while keepGoing == 1: mc.generateDatabase(db, '\n') sen = mc.generateString() sen = sen.split(' ') length = 0.0 counter = 0 for word in sen: counter = counter + 1 val_dur = word.split('!@#') dur = val_dur[1]
help="The number of strings to generate.") PARSER.add_argument('--minlen', metavar="LENGTH", type=int, help="Throw out strings shorter than this.", default=3) PARSER.add_argument('--notags', action="store_true", help="Don't generate tags (legacy database compat behaviour)") ARGS = PARSER.parse_args() FILENAME = ARGS.filename NUMBER = ARGS.number BOT = MarkovChain(FILENAME) VALID_SENTENCES = 0 while VALID_SENTENCES < NUMBER: SENTENCE = BOT.generateString() if len(SENTENCE.split()) < ARGS.minlen: continue VALID_SENTENCES += 1 print(SENTENCE) if not ARGS.notags: try: TAGS=BOT.generateStringWithSeed("#") print(TAGS) print(" --- ") except pymarkovchain.StringContinuationImpossibleError as e: print("[FATAL] Your database does not have tag data.") print("You can still generate posts without tags using --notags") import sys sys.exit(1)
#!/usr/bin/env python # import PyMarkovChain from pymarkovchain import MarkovChain mc = MarkovChain("./markov") for i in range(1, 26): markovStr = mc.generateString() while len(markovStr) < 75: # or len(markovStr) > 115: markovStr = mc.generateString() # print "[" + str(i) + "] " + markovStr print markovStr
def getLines(): mc = MarkovChain("") mc.generateDatabase(r.content) tweet = mc.generateString() tweet = tweet[:140].rsplit(r'\n', 1)[0] return tweet
class MarkovBot(BotPlugin): def __init__(self): super(MarkovBot, self).__init__() self.sentenceSep = None self.markov = MarkovChain(dbFilePath="./markovdb") @botcmd def talk(self, mess, args): """ Generate a sentence based on database """ return self.markov.generateString() @botcmd def complete(self, mess, args): """ Try to complete a sentence """ return self.markov.generateStringWithSeed(args) @botcmd def gendbfromfile(self, mess, args): """ Generate markov chain word database based on local file """ try: with open(args) as txtFile: txt = txtFile.read() except IOError as e: return "Error: could not open text file" # At this point, we've got the file contents if self.sentenceSep: result = self.markov.generateDatabase(txt, self.sentenceSep) else: result = self.markov.generateDatabase(txt) if result: return "Done." else: return "Error: Could not generate database" @botcmd def setsentencesep(self, mess, args): """ Specify how to detect sentence borders """ self.sentenceSep = args @botcmd def gendbfromstring(self, mess, args): """ Generate markov chain word database based on given string """ if self.sentenceSep: result = self.markov.generateDatabase(args, self.sentenceSep) else: result = self.markov.generateDatabase(args) if result: return "Done." else: return "Error: Could not generate database from String" @botcmd def gendbfromurl(self, mess, args): """ Generate markov chain word database based on contents of url """ response, content = httplib2.Http().request(args, "GET") if response["status"] == "200": if self.sentenceSep: result = self.markov.generateDatabase(content.decode("utf-8"), self.sentenceSep) else: result = self.markov.generateDatabase(content.decode("utf-8")) if result: return "Done." else: return "Error: Could not generate database from URL"
if len(corpus) == 0: corpus = text else: corpus = corpus + ' ' + text print("Setting up Markov chain database...") chain = MarkovChain("./markov") print("Generating Markov chain database...") chain.generateDatabase(corpus) # 10 * 15 minutes = API update every 150 minutes # that will change based on new timing between tweets, *shrugs* print("Beginning tweet loop.") for x in xrange(9): print("Tweet " + str(x) + " of tweet loop. (max=9)") status = chain.generateString() print('Tweet created: "' + status + '"') status = status.replace("%2E", ".") status = status.replace("&", "&") status = status.replace("<", "<") status = status.replace(">", ">") status = status.replace("@", "") # cutting out all @'s entirely status = status.replace("twitter.com", "abc.xyz") # why did I do this? print('Tweet modified to "' + status + '".') if len(status) > 140: status = status[0:136] + '...' print('Tweet shortened to: "' + status + '"') print("Sending tweet.")
artist_name.lower().encode('utf-8')).hexdigest() mc = MarkovChain(db_name_hashed) # Checking if the database already exists, if so uses the cache instead another API call if not os.path.isfile(db_name_hashed): print( "No data cached. Please be patient while we search the lyrics of %s." % artist_name) # Adding lyrics to a single gigant string lyrics = '' # Parsing each lyric from this artist. # [http://api.wikia.com/wiki/LyricWiki_API] artist = requests.get(API_URI, params=params).json() for album in artist['albums']: for song in album['songs']: params = {'artist': artist_name, 'song': song} print("Parsing \"{}\" from Wikia.".format(song)) response = requests.get(API_URI, params=params).json()["lyrics"] lyrics += response.replace('[...]', '') + ' ' # Generating the database mc.generateDatabase(lyrics) mc.dumpdb() # Printing a string for i in range(0, int(number_of_phrases)): print(mc.generateString())
final += ts sd = True seed = ts.split()[-1] seed = seed.translate(string.maketrans("",""), string.punctuation) c += 1 print ("\n" + final + "\n") raw_input("press enter to continue...") if raw_input("press enter to begin.") == "beta": pgraph() sd = False s = "" while True: if not sd: ts = mc.generateString() else: ts = mc.generateStringWithSeed(s) if countString(ts) >= msl: os.system("clear") print ("\n" + ts + "\n") sd = False s = raw_input("\npress enter to generate string. : ") if s == '!pg': pgraph() if (len(s) > 0): if f.find(s) != -1: sd = True else: raw_input('could not find "' + s + '" in database\npress enter to continue') sd = False
# https://github.com/TehMillhouse/PyMarkovChain # pip install PyMarkovChain from pymarkovchain import MarkovChain mc = MarkovChain("./am_m") f = open('cap_short.txt', 'r') mc.generateDatabase(f.read()) for x in range(0, 20): mc.generateString()
class NaNoGenMo: def __init__(self, avg_wordlen, min_dataset_size, target_wordcount, num_chaps, min_graf_len, max_graf_len, search_term, related): self.WORDLEN = avg_wordlen self.DATASET = min_dataset_size self.WORDCNT = target_wordcount self.NUM_CHAPS = num_chaps self.GRAF_MIN = min_graf_len self.GRAF_MAX = max_graf_len self.SEARCH_TERM = search_term self.RELATED = related self.rand = Random() def set_dict(self, dictfile): self.dictfile = dictfile def build_source(self): source = "" # grab random Wikipedia pages until we have enough bytes to (probably) have at least DATASET words. iterations = 0 page = None while len(source) < self.WORDLEN * self.DATASET: title = wikipedia.random() if self.RELATED == True: sys.stderr.write("using related mode\n") if iterations == 0: sys.stderr.write("first page\n") if self.SEARCH_TERM is not None: sys.stderr.write("using search term \"%s\" instead of random\n" % self.SEARCH_TERM) title = self.SEARCH_TERM else: sys.stderr.write("using random page title \"%s\"\n" % title) else: if len(page.links) > 0: ix = self.rand.randint(0, len(page.links) - 1) title = page.links[ix] sys.stderr.write("using related page title \"%s\"\n" % title) else: sys.stderr.write("using all random page titles. this one is \"%s\"\n" % title) iterations += 1 # this is in a try/except because wikipedia.page() will throw an exception if it only gets a # disambiguation page. we don't care about that, so we just try another random title. try: page = wikipedia.page(title) # remove Wikipedia's section markers. there's probably an easier way to do that. If we leave them in # pymarkovchain treats them as "words", so the output text is full of "===" and whatnot. content = page.content.replace("====", "").replace("===", "").replace("==", "") source += "\n" + content # TODO: instead of completely random pages, this could start with a random wiki page, then expand its # dataset by following links from that page. that might produce somewhat more apparent thematic coherence # but on the other hand it might not. except: pass return source def prepare_dict(self): if self.dictfile is None: print "error: no dictfile" return # now build the markov database. just using pymarkovchain's default settings for now. will fail if it doesn't # have write access to $PWD. chain = MarkovChain("./markov") source = self.build_source() chain.generateDatabase(source) # seem to need to do this to reload the database after generating it self.chain = MarkovChain("./markov") def generate(self): novel = "" chap = "" chapnum = 1 # now generate the actual novel, sentence by sentence, until it's at least WORDCNT words. while wordcount(novel) < self.WORDCNT: # chapter headings and paragraph breaks make it more readable. chap = "\n\n===CHAPTER %d===\n\n" % chapnum # for now we're just making roughly equal-sized chapters. while wordcount(chap) < (self.WORDCNT / self.NUM_CHAPS): graf = "" s = 0 # how many sentences for this paragraph? gl = self.rand.randint(self.GRAF_MIN, self.GRAF_MAX) while s < gl: # if this isn't the first sentence in the paragraph, append a space after the last one. if len(graf) > 0: graf += " " # if this isn't the first paragraph in the chapter, start it with a tab. elif len(chap) > 0: graf += "\t" # generate the actual string graf += self.chain.generateString() # simplistic weighted random selection of sentence-ending punctuation. 70% chance of a period, # 20% chance of a question mark, 10% chance of an exclamation point. those are guessed values, I # haven't made any effort to assess whether it feels "right" in the resulting text. i = self.rand.randint(0, 10) if i <= 7: graf += "." elif i <= 9: graf += "?" else: graf += "!" s += 1 # blank lines between paragraphs chap += graf chap += "\n\n" chapnum += 1 novel += chap return novel
# generate a markov chain based text from the input if args.generate and args.generate > 0: # disable error message about on-the-fly database logging.disable(logging.WARNING) mc = MarkovChain("./markov-chain-database") mc.generateDatabase(inputText) # reinstate logging logging.disable(logging.NOTSET) generatedText = "" while len(generatedText) < args.generate: if generatedText is not "": generatedText = generatedText + " " generatedText = generatedText + mc.generateString() inputText = generatedText if args.filter_punctuation: inputText = text.removePunctuation(inputText) if args.filter_numbers: inputText = text.removeNumbers(inputText) force = [] if args.input_force: force = args.input_force inputText = inputText.split() inputNumWords = len(inputText)
class Trollette: def __init__(self): self.presenter = "" self.title = "" self.slide_count = 0 self.slide_min = 15 self.slide_max = 25 self.console = None self.output_dir = "" with open("terms.json", "r") as f: self.terms = json.load(f) with open(os.path.join("GIFs", "hashes.json"), "r") as f: self.gifs = json.load(f) with open(os.path.join("Images", "hashes.json"), "r") as f: self.images = json.load(f) # Load up the proverb data with open(os.path.join("Proverbs", "facts"), "r") as f: self.proverb_lines = f.readlines() self.proverbs = map(string.strip, self.proverb_lines) self.proverb_markov = MarkovChain("markov.db") self.proverb_markov.generateDatabase("".join(self.proverb_lines), n=1) # Make the text data # self.my_face = comptroller.face(self.title) # self.slide_titles = self.my_face.get_titles(50) # self.slide_bullets = self.my_face.get_bullets(100) self.my_face = Face("") self.slide_titles = ["shit", "balls", "butts"] self.slide_bullets = ["butts", "do", "stuff", "f***s", "more f***s"] self.ppt = Presentation() self.slide_weights = SlideWeights() def generate_slide_deck(self): # Create a place to put data and resources self.output_dir = os.path.join("Output", "%s_%s_%s" % (self.title, self.presenter, datetime.datetime.strftime(datetime.datetime.now(), '%Y_%m_%d_%H_%M_%S'))) self.resources_dir = os.path.join(self.output_dir, "Resources") # Start with a fresh PowerPoint self.ppt = Presentation() # Make sure the directories exist try: os.makedirs(self.output_dir) os.makedirs(self.resources_dir) except: self.log("Directory %s already exists, overwriting..." % self.output_dir) self.slide_count = random.randint(self.slide_min, self.slide_max) self.log("Generating a slide deck of %d slides about %s" % (self.slide_count, self.title)) try: self.log("Getting slide content...") self.my_face.set_topic(self.title) self.log("Generating slide titles...") self.slide_titles = self.my_face.get_titles(self.slide_count) self.log("Generating slide bullets...") self.slide_bullets = self.my_face.get_bullets(self.slide_count*3) except: self.log("Problem generating content for a talk on %s, exiting..." % self.title) return #self.farm_gif_term(self.title) #sp = self.title.split(" ") #if len(sp) > 1: # for i in range(len(sp)): # if len(sp[i]) > 5: # self.farm_gif_term(sp[i]) #self.farm_image_term(self.title) self.log_slide_weights() self.create_title_slide() self.create_slides() slide_path = os.path.join(self.output_dir, "%s.pptx" % self.title) self.ppt.save(slide_path) self.log("Successfully generated PPT on %s to %s" % (self.title, slide_path)) def create_title_slide(self): title_slide_layout = self.ppt.slide_layouts[0] slide = self.ppt.slides.add_slide(title_slide_layout) title = slide.shapes.title subtitle = slide.placeholders[1] title.text = self.title subtitle.text = self.presenter def create_slides(self): for i in range(self.slide_count): choice = self.slide_weights.choose_weighted() self.log(" Generating slide #%d: %s" % (i+1, choice)) new_slide_layout = None if choice == "Single GIF": ns = self.create_gif_slide(random.choice(self.slide_titles), self.get_giphy_search_term(), i) elif choice == "Full Slide GIF": ns = self.create_full_gif_slide(self.get_giphy_search_term(), i) elif choice == "Single Image": ns = self.create_image_slide(random.choice(self.slide_titles), self.get_image_search_term(), i) elif choice == "Full Slide Image": ns = self.create_full_image_slide(self.get_image_search_term(), i) elif choice == "Information": ns = self.create_info_slide(i) elif choice == "Quotation": ns = self.create_quote_slide() def create_single_full_image_slide(self, image_path): blank_slide_layout = self.ppt.slide_layouts[6] new_slide = self.ppt.slides.add_slide(blank_slide_layout) left = Inches(0) top = Inches(0) height = Inches(8) width = Inches(10) pic = new_slide.shapes.add_picture(image_path, left, top, height=height, width=width) return new_slide def create_single_image_slide(self, slide_title, image_path): blank_slide_layout = self.ppt.slide_layouts[1] new_slide = self.ppt.slides.add_slide(blank_slide_layout) for shape in new_slide.shapes: if shape.is_placeholder: phf = shape.placeholder_format if phf.type == 1: shape.text = slide_title left = Inches(1) top = Inches(1) height = Inches(6) width = Inches(8) pic = new_slide.shapes.add_picture(image_path, left, top, height=height, width=width) return new_slide def download_gif(self, term, slide_num): # If we have at least 3 local gifs, use one of those if (term in self.gifs) and (len(self.gifs[term]) > 3): return os.path.join("GIFs", "%s.gif" % random.choice(self.gifs[term])) try: # Download the gif img = translate(term) image_path = os.path.join(self.resources_dir, "%d.gif" % slide_num) wget.download(img.fixed_height.url, image_path) file_hasher = hashlib.md5() with open(image_path, "rb") as f: file_hasher.update(f.read()) file_md5 = file_hasher.hexdigest() if not (term in self.gifs): self.gifs[term] = [] if not (file_md5 in self.gifs[term]): self.gifs[term].append(file_md5) shutil.copy(image_path, os.path.join("GIFs", "%s.gif" % file_md5)) with open(os.path.join("GIFs", "hashes.json"), "w") as f: json.dump(self.gifs, f, indent=2) return image_path except: return None def download_image(self, term, slide_num): # If we have at least 3 local images, use one of those if (term in self.images) and (len(self.images[term]) > 3): return os.path.join("Images", "%s.img" % random.choice(self.images[term])) try: search_term = term if (random.randint(0, 100) % 2) == 0: search_term = self.title download_attempts = 0 image_bytes = "" image_path = "" while download_attempts < 10: fetcher = urllib2.build_opener() start_index = random.randint(0, 50) search_url = "http://ajax.googleapis.com/ajax/services/search/images?v=1.0&q=%s&start=%s" % (search_term, str(start_index)) f = fetcher.open(search_url) deserialized_output = simplejson.load(f) image_url = deserialized_output['responseData']['results'][random.randint(0, len(deserialized_output['responseData']['results'])-1)]['unescapedUrl'] image_path = os.path.join(self.resources_dir, "%d.img" % slide_num) wget.download(image_url, image_path) with open(image_path, "rb") as f: image_bytes = f.read() if (not image_bytes.startswith("<!DOCTYPE html>")) and (not image_bytes.startswith("<html>")): break download_attempts += 1 self.log(" Attempting to download image about %s failed try #%d" % (search_term, download_attempts)) if image_bytes.startswith("<!DOCTYPE html") or image_bytes.startswith("<html>"): return None file_hasher = hashlib.md5() file_hasher.update(image_bytes) file_md5 = file_hasher.hexdigest() if not (term in self.images): self.images[term] = [] if not (file_md5 in self.images[term]): self.images[term].append(file_md5) shutil.copy(image_path, os.path.join("Images", "%s.img" % file_md5)) with open(os.path.join("Images", "hashes.json"), "w") as f: json.dump(self.images, f, indent=2) return image_path except: return None def create_gif_slide(self, slide_title, term, slide_num): image_path = self.download_gif(term, slide_num) if image_path: return self.create_single_image_slide(slide_title, image_path) def create_full_gif_slide(self, term, slide_num): image_path = self.download_gif(term, slide_num) if image_path: return self.create_single_full_image_slide(image_path) def create_image_slide(self, slide_title, term, slide_num): while True: try: image_path = self.download_image(term, slide_num) if image_path: return self.create_single_image_slide(slide_title, image_path) except: pass def create_full_image_slide(self, term, slide_num): image_path = self.download_image(term, slide_num) if image_path: return self.create_single_full_image_slide(image_path) def create_info_slide(self, slide_num): slide_title_info = random.choice(self.slide_titles) slide_title = slide_title_info if (random.randint(0, 100) % 3) == 0: slide_title = self.get_markov_proverb() sb = random.sample(self.slide_bullets, random.randint(1, 4)) if (random.randint(0, 100) % 4) == 0: sb.append(self.get_markov_proverb()) bullet_slide_layout = self.ppt.slide_layouts[1] new_slide = self.ppt.slides.add_slide(bullet_slide_layout) shapes = new_slide.shapes title_shape = shapes.title body_shape = shapes.placeholders[1] body_shape.width = Inches(4) body_shape.left = Inches(1) body_shape.top = Inches(2) title_shape.text = slide_title tf = body_shape.text_frame for b in sb: p = tf.add_paragraph() #p.text = b p.alignment = PP_PARAGRAPH_ALIGNMENT.LEFT run1 = p.add_run() run1.text = b font1 = run1.font font1.name = 'Sans Serif' font1.size = Pt(20) font1.italic = True font1.bold = True image_path = None attempts = 0 while attempts < 10: try: tries = 0 while (not image_path) and (tries < 10): if (random.randint(0, 100) % 2) == 0: search_term = self.get_giphy_search_term() image_path = self.download_gif(search_term, slide_num) else: search_term = self.get_image_search_term() image_path = self.download_image(search_term, slide_num) tries += 1 if tries < 10: left = Inches(5.5) top = Inches(3) #height = Inches(3) width = Inches(3) pic = new_slide.shapes.add_picture(image_path, left, top, width=width) break attempts += 1 except: attempts += 1 return new_slide def create_quote_slide(self): # Pick a random quote category and quote cat = random.choice(self.terms["quote_categories"]) with open(os.path.join("Quotes", "quotes_%s.json" % cat)) as f: q1 = random.choice(json.load(f)) cat = random.choice(self.terms["quote_categories"]) with open(os.path.join("Quotes", "quotes_%s.json" % cat)) as f: q2 = random.choice(json.load(f)) quote_text = "\"%s\"" % q1["quote"] if (random.randint(0,100) % 5) == 0: quote_text = random.choice(self.proverbs) quote_author = "- %s" % q2["name"] blank_slide_layout = self.ppt.slide_layouts[2] new_slide = self.ppt.slides.add_slide(blank_slide_layout) for shape in new_slide.shapes: if shape.is_placeholder: phf = shape.placeholder_format if phf.type == 1: # Put in the quote title shape.text = random.choice(self.terms["quote_titles"]) elif phf.type == 2: text_frame = shape.text_frame # Create the quote text paragraph p1 = text_frame.paragraphs[0] p1.alignment = PP_PARAGRAPH_ALIGNMENT.LEFT run1 = p1.add_run() run1.text = quote_text font1 = run1.font font1.name = 'Sans Serif' font1.size = Pt(30) font1.italic = True font1.bold = True # Create the Author text paragraph p2 = text_frame.add_paragraph() p2.alignment = PP_PARAGRAPH_ALIGNMENT.RIGHT run2 = p2.add_run() run2.text = quote_author font2 = run2.font font2.name = 'Calibri' font2.size = Pt(24) return new_slide def get_giphy_search_term(self): st = random.choice(self.terms["giphy_searches"]) if (random.randint(0, 100) % 5) == 0: st = self.title return st def get_image_search_term(self): st = random.choice(self.terms["image_searches"]) if (random.randint(0, 100) % 2) == 0: st = self.title return st def get_proverb(self): return random.choice(self.proverb_lines) def get_markov_proverb(self, min=5, max=10): b = "" while True: b = self.proverb_markov.generateString() s = b.split(" ") if min <= len(s) <= max: break return b def add_term(self, term_type, term): if term in self.terms[term_type]: return "Term \"%s\" is already in %s!" % (term, term_type) else: self.terms[term_type].append(term) with open("terms.json", "w") as f: json.dump(self.terms, f, indent=4) return "Term \"%s\" added to %s." % (term, term_type) def delete_term(self, term_type, term): if not (term in self.terms[term_type]): return "Term \"%s\" isn't in %s, can't delete!" % (term, term_type) else: self.terms[term_type].remove(term) with open("terms.json", "w") as f: json.dump(self.terms, f, indent=4) return "Term \"%s\" removed from %s." % (term, term_type) def show_term_counts(self, term_type, term_json): log_str = "%s Terms:\n" % term_type for term in self.terms[term_type]: if term in term_json: log_str += " %s: %d\n" % (term, len(term_json[term])) else: log_str += " %s: 0\n" % term self.log(log_str) def get_file_md5(self, file_path): with open(file_path, "rb") as f: image_bytes = f.read() file_hasher = hashlib.md5() file_hasher.update(image_bytes) return file_hasher.hexdigest() def farm_image_term(self, term, amount=25, threshold=10): self.log("Farming images for %s..." % term) if not (term in self.images): self.images[term] = [] attempt_count = 0 while (attempt_count < threshold) and (len(self.images[term]) < amount): myopener = MyOpener() page = myopener.open('https://www.google.pt/search?q=%s&source=lnms&tbm=isch&sa=X&tbs=isz:l&tbm=isch' % term.replace(" ", "+")) html = page.read() for match in re.finditer(r'<a href="/imgres\?imgurl=(.*?)&imgrefurl', html, re.IGNORECASE | re.DOTALL | re.MULTILINE): if len(self.images[term]) >= amount: break try: os.remove("test.img") except: pass try: path = urlparse.urlsplit(match.group(1)).path self.log(" Downloading %s" % match.group(1)) myopener.retrieve(match.group(1), "test.img") image_md5 = self.get_file_md5("test.img") if not (image_md5 in self.images[term]): self.images[term].append(image_md5) shutil.copy("test.img", os.path.join("Images", "%s.img" % image_md5)) os.remove("test.img") self.log(" Image saved to archive. %d/%d images." % (len(self.images[term]), amount)) attempt_count = 0 else: self.log(" Already had image!") attempt_count += 1 except: self.log(" Downloading failed") attempt_count += 1 self.log("Farming of %s images complete, now holding %d images" % (term, len(self.images[term]))) with open(os.path.join("Images", "hashes.json"), "w") as f: json.dump(self.images, f, indent=2) def farm_images(self, amount=25, threshold=10): self.show_term_counts("image_searches", self.images) all_farm = self.terms["image_searches"] all_farm.extend(self.terms["talk_titles"]) for term in all_farm: self.farm_image_term(term, amount, threshold) def farm_gif_term(self, term, amount=25, threshold=10): self.log("Farming GIFs for %s..." % term) if not (term in self.gifs): self.gifs[term] = [] attempt_count = 0 while (attempt_count < threshold) and (len(self.gifs[term]) < amount): image_path = "test.gif" try: os.remove(image_path) except: pass try: img = translate(term) wget.download(img.fixed_height.url, image_path) image_md5 = self.get_file_md5("test.gif") if not (image_md5 in self.gifs[term]): self.gifs[term].append(image_md5) shutil.copy(image_path, os.path.join("GIFs", "%s.gif" % image_md5)) self.log(" GIF saved to archive. %d/%d GIFs." % (len(self.gifs[term]), amount)) attempt_count = 0 else: self.log(" Already had GIF!") attempt_count += 1 except: self.log(" Downloading failed") attempt_count += 1 self.log("Farming of %s GIFs complete, now holding %d GIFs" % (term, len(self.gifs[term]))) with open(os.path.join("GIFs", "hashes.json"), "w") as f: json.dump(self.gifs, f, indent=2) def farm_gifs(self, amount=25, threshold=10): self.show_term_counts("giphy_searches", self.gifs) all_farm = self.terms["giphy_searches"] all_farm.extend(self.terms["talk_titles"]) for term in all_farm: self.log("Farming GIFs for %s..." % term) if not (term in self.gifs): self.gifs[term] = [] self.farm_gif_term(term, amount, threshold) def farm_content(self, all_content): for talk_title in self.terms["talk_titles"]: talk_path = os.path.join("Content", "%s.txt" % talk_title) # Either we're replacing all content or we're only replacing files that don't exist if all_content or (not os.path.exists(talk_path)): self.log("Farming data on %s..." % talk_title) with open(talk_path, "w") as f: content = self.my_face.fully_research_topic(talk_title, self.log) if type(content) is str: clean_content = content else: clean_content = unicodedata.normalize('NFKD', content).encode('ascii', 'ignore') f.write(clean_content) def log_slide_weights(self): self.log(self.slide_weights.get_weights_string()) def log(self, message): if self.console: self.console.config(state=tk.NORMAL) self.console.insert(tk.END, "%s\n" % message) self.console.see(tk.END) self.console.config(state=tk.DISABLED) self.console.update() else: print(message)
from pymarkovchain import MarkovChain seed_file = open('./fixtures/wikipedia_india_content.txt') mc = MarkovChain("../markov_db") seed_text = seed_file.read() mc.generateDatabase(seed_text) print mc.generateString()
class ResponseGenerator: def __init__(self): self.eightball = EightBall() self.excuses = Excuses() self.commands = Commands() self.straws = Straws("/", "=", "/") self.chain = MarkovChain("./markovdb") self.chain.db = _db_factory() with open("markovsource", "r") as markov_file: self.chain.generateDatabase(markov_file.readline()) def generate_response(self, body): # Tokenize body body_tokens = body.lower().split(" ") # Important commands can only be run if line is started with the word command = body_tokens[0] if command == '!create': new_command = body_tokens[1] response_index = body.find(new_command) + len(new_command) + 1 response = body[response_index:] self.commands.set(new_command, response) return "Command !{0} created.".format(new_command) elif command == "!list": string = "!create !delete !reload !excuse !8ball !straws !image " for command_ in self.commands.list(): string += "!{0} ".format(command_) return string elif command == "!delete": cleaned_command = body_tokens[1].lower() success = self.commands.delete(cleaned_command) if success: return "Command !{0} deleted.".format(cleaned_command) else: return "Command !{0} does not exist.".format(cleaned_command) elif command == "!reload": with open("markovsource", "r") as markov_file: self.chain.generateDatabase(markov_file.readline()) return "Successfully reloaded my word database" # Not a system command, continue attempting to parse else: for token in body_tokens: if token == "!fortune": # TODO pass elif token == "!excuse": return self.excuses.get() elif token == "!8ball": return self.eightball.get() elif token == "!straws": return self.straws.get() elif token == "!image": return "/get " + self.chain.generateString() elif token == "tase": return self.chain.generateString() elif len(token) > 0 and token[0] == "!": return self.commands.get(token[1:]) # we have a sentence to listen to, arbitrary length requirement elif len(body) > 10: string_to_write = body + "." if body[len(body) - 1] == ".": string_to_write = body with open("markovsource", "a") as markov_file: markov_file.write(string_to_write)
#!/usr/bin/env python from pymarkovchain import MarkovChain # Create an instance of the markov chain. By default, it uses MarkovChain.py's location to # store and load its database files to. You probably want to give it another location, like so: mc = MarkovChain("C:/Users/Andrew/OneDrive/Documents/Northwestern/Courses/495-Data-Science/Final Project") # To generate the markov chain's language model, in case it's not present mc.generateDatabase("It is nice to meet you. I would like to meet your friend.") # To let the markov chain generate some text, execute for i in range(10): print(mc.generateString())
f.close() consumerkey = lines[0].split("#")[0] consumersecret = lines[1].split("#")[0] accesstoken = lines[2].split("#")[0] accesssec = lines[3].split("#")[0] self.api = Twython(consumerkey, consumersecret, accesstoken, accesssec) if __name__ == '__main__': api = (TwythonHelper("dynacoinc.keys")).api mc = MarkovChain("./markov") f = codecs.open("corpus.txt") text = " ".join(f.readlines()) f.close() mc.generateDatabase(text) status = mc.generateString() if len(status) > 110: status = status[:110] lr = status.rfind(" ") status = status[:lr] + "." else: status = status + "." r = random.Random() lf = os.listdir(".") ll = [l for l in lf if l.find("jpg") != -1] photo = open(r.choice(ll), "rb") api.update_status_with_media(media=photo, status=status)