class BeerMarkov():

    def __init__(self, reviews_file, markov_dir):
        self._reviews_file = reviews_file
        self._markov_dir = markov_dir
        self._markov = MarkovChain(markov_dir + '/beer_desc')
        self._name_markov = MarkovChain(markov_dir + '/beer_name')
        self.refresh_database()

    def refresh_database(self):
        with open(self._reviews_file, 'r') as review_data:
            reviews = json.load(review_data)

        reviews_string = [r['desc'] for r in reviews]
        names_string = [r['name'] for r in reviews]

        new_markov = MarkovChain(self._markov_dir + '/beer_desc')
        new_markov.generateDatabase(' '.join(reviews_string))

        new_name_markov = MarkovChain(self._markov_dir + '/beer_name')
        new_name_markov.generateDatabase('.'.join(names_string))

        self._markov = new_markov
        self._name_markov = new_name_markov

    def get_review(self):
        return  self._markov.generateString() + '. ' + \
            self._markov.generateString()
Beispiel #2
0
def markov(messages):
    # Create an instance of the markov chain. By default, it uses MarkovChain.py's location to
    # store and load its database files to. You probably want to give it another location, like so:
    mc = MarkovChain("./markov")

    # To generate the markov chain's language model, in case it's not present
    # mc.generateDatabase("\n".join(messages))

    # To let the markov chain generate some text, execute
    for i in xrange(100):
        print mc.generateString()
Beispiel #3
0
def main():
    auth = tweepy.OAuthHandler(key, secret)
    auth.set_access_token(token, token_secret)
    client = tweepy.API(auth)
    api = tweepy.API(auth)

    tweets = []
    superString = ""

    mc = MarkovChain(markDirectory)
    superString = createSuperString('trump.txt')
    mc.generateDatabase(superString)

    while (True):
        phrase = mc.generateString()
        try:
            print(phrase)
        except UnicodeEncodeError:
            continue
        try:
            answer = input()
            if (answer == 'y'):
                client.update_status(phrase)
        except tweepy.TweepError:
            continue
Beispiel #4
0
class EuroMarkov:
    def __init__(self):
        self.mc = MarkovChain("./markovdata")

    def generateCountryList(self):
        countryList = []
        for filename in os.listdir("json_lyrics/2015"):
            countryList.append(os.path.splitext(filename)[0])
        return countryList

    def loadFiles(self,startYear,endYear,countryList):
        model = ""
        for year in range(startYear,endYear+1):
            for country in countryList:
                fname = "json_lyrics/"+str(year)+"/"+country+".json"
                if os.path.isfile((fname)):
                    with open (fname,"r") as myfile:
                        data = json.load(myfile)
                        model += (data['lyrics']) + '\n';
        return model

    def runMarkov(self,model):
        self.mc.generateDatabase(model)

    def generateString(self):
        return self.mc.generateString()
Beispiel #5
0
class Michiov(object):
  def __init__(self, autogen=True, markovdb=os.path.expanduser("~/markov"), twcreds=os.path.expanduser("~/.michiov_twitter_credentials"),twappcreds=os.path.expanduser("~/.michiov_twitter_appdata")):
    self.mc = MarkovChain(markovdb)
    self.reload()
    if not os.path.exists(twappcreds):
      print("Lack of app creds")
      sys.exit(1)
    twcons = json.loads(open(twappcreds).read())
    conskey = twcons['key']
    conssec = twcons['secret']
    while not os.path.exists(twcreds):
      twitter.oauth_dance("MPRZ Tech Labs", conskey, conssec, twcreds)
    oauth_token, oauth_secret = twitter.read_token_file(twcreds)
    self.t = twitter.Twitter(auth=twitter.OAuth(oauth_token, oauth_secret, conskey, conssec))
  def should(self):
    ret = input("Should I send it? (y/N) ")
    return ("y" in ret or "Y" in ret)
  def qas(self):
    idea = self.mc.generateString()
    print("Generated: %s" % idea)
    if self.should():
      self.t.statuses.update(status=idea)
  def loop(self):
    try:
      while True:
        self.qas()
        #self.reload()
    except KeyboardInterrupt:
      pass
  def reload(self):
    with open("markovpredb.txt") as file:
      self.mc.generateDatabase(file.read())
Beispiel #6
0
def fetch_lyrics(artist, lines):
    API_KEY = os.environ.get('API_KEY')

    uri = "http://api.lyricsnmusic.com/songs"
    params = {
        'api_key': API_KEY,
        'artist': artist,
    }
    response = requests.get(uri, params=params)
    lyric_list = response.json()

    lyrics = ''
    for lyric_dict in lyric_list:
        lyrics += lyric_dict['snippet'].replace('...', '') + ' '

    # Generate a Markov model
    mc = MarkovChain('./markov')
    mc.generateDatabase(lyrics)

    # Add lines of lyrics
    result = []
    for line in range(0, lines):
        line_string = mc.generateString()
        result.append(line_string)
    return result
Beispiel #7
0
def lyrics():
    artist = request.form['artist']
    lines = int(request.form['lines'])

    if not artist:
        return redirect(url_for('index'))

    # Get a response of sample lyrics from the artist
    uri = "http://api.lyricsnmusic.com/songs"
    params = {
        'api_key': API_KEY,
        'artist': artist,
    }
    response = requests.get(uri, params=params)
    lyric_list = response.json()

    # Parse results into a long string of lyrics
    lyrics = ''
    for lyric_dict in lyric_list:
        lyrics += lyric_dict['snippet'].replace('...', '') + ' '

    # Generate a Markov model
    mc = MarkovChain()
    mc.generateDatabase(lyrics)

    # Add lines of lyrics
    result = []
    for line in range(0, lines):
        result.append(mc.generateString())

    return render_template('lyrics.html', result=result, artist=artist)
Beispiel #8
0
def markov(msg, botName, channel, db):
  if msg.rawMatchRe('!markov (?P<source>#?[a-zA-Z]\S*)\s*$') or msg.rawMatchRe('what (would|does) (the )?(?P<source>#?[a-zA-Z]\S+) say\??'):
    m = msg.getRegExpResult()
    source = m.group('source')

    if source[0] == '#':
      logsList = db.getLogs(chan=source, lines=2000)
    else:
      logsList = db.getLogs(nick=source, lines=2000)
    
    if len(logsList) < 100:
      hexchat.command("msg %s Not enough data for %s" % (channel, source))
      
    else:
      mc = MarkovChain("./markov_db")
      ircText = ''
      
      for line in logsList:
        # disqualify lines that are too short or are certain bot functions that start with '!'
        if len(line.split(' ')) >= 5 and line[0] != '!':
          ircText += line.replace('.','') + '. '
          
      mc.generateDatabase(ircText)
      markovOutput = mc.generateString().capitalize()
      hexchat.command('msg %s "%s"  --%s' % (channel, markovOutput, source))
      
    return True
  return False
class TextGenerator:
	def __init__(self, generatorName, trainString, prefixLength):
		self.generatorName = generatorName
		self.chain = MarkovChain()
		self.chain.generateDatabase(trainString, n=prefixLength)
		self.currState = []
		self.hyphenator = Hyphenator('en_US')
		self.syllableQ = Queue()
		self.stripPattern = re.compile('[\W_]+')
		while (len(self.currState) < prefixLength):
			self.currState = self.chain.generateString().split()[-(prefixLength+1):-1]
	
	def load_next_word(self):
		nextword = ""
		try:
			while nextword == "":
				nextword = self.stripPattern.sub('', self.chain._nextWord(self.currState))
				self.currState = self.currState[1:]
				self.currState.append(nextword)
			if len(nextword) < 4: # because hyphenator doesnt work for words less than 4 letters
				self.syllableQ.put(nextword)
			else: 
				for syllable in self.hyphenator.syllables(nextword):
					self.syllableQ.put(syllable)
		except UnicodeEncodeError:
			print("unicode error")
		
	def get_next_syllable(self):
		if (self.syllableQ.empty()):
			self.load_next_word()
		return self.syllableQ.get()
Beispiel #10
0
def lyrics():
    artist = request.form['artist']
    lines = int(request.form['lines'])

    if not artist:
        return redirect(url_for('index'))

    # Get a response of sample lyrics from the provided artist
    uri = "http://api.lyricsnmusic.com/songs"
    params = {
        'api_key': API_KEY,
        'artist': artist,
    }
    response = requests.get(uri, params=params)
    lyric_list = response.json()
    # Parse results into a long string of lyrics
    lyrics = ''
    for lyric_dict in lyric_list:
        lyrics += lyric_dict['snippet'].replace('...', '') + ' '

    # Generate a Markov model
    mc = MarkovChain()
    mc.generateDatabase(lyrics)

    result = []
    for line in range(0, lines):
        result.append(mc.generateString())

    return render_template('lyrics.html', result=result, artist=artist)
Beispiel #11
0
class Haley(object):
    def __init__(self, backend):
        self.backend = backend
        self.mc = MarkovChain("markov.db")
    def loop(self):
        self.backend.connect()
        while True:
            for event in self.backend.update():
                try:
                    if event["type"] == "text":
                        times = re.search(r"(?P<nm>\d+) times", event["content"].lower())
                        if times:
                            if int(times.group("nm")) > 0:
                                times = min(5,int(times.group("nm")))
                            else:
                                self.backend.say("Okay, I won't say anything... Baka.")
                                continue
                        else:
                            times = 1
                        for i in range(times):
                            if "hi" in detox(event["content"].lower()).split() or "hello" in detox(event["content"].lower()).split():
                                self.backend.say(random.choice(["%s! Tutturuuu!","Hello, %s, so it was you making the noise up there!"]) % event["by"])
                                continue
                            if "nano" in event["content"].lower() or "hakase" in event["content"].lower():
                                self.backend.say("%s%s"%("HAKASE"*len(re.findall("nano", event["content"].lower())),"NANO"*len(re.findall("hakase", event["content"].lower()))))
                                continue
                            if event["mentioned"]:
                                if "roll" in detox(event["content"].lower()).split():
                                    numb = re.search(r"(d|k)(?P<nm>\d+)", event["content"].lower())
                                    if numb and int(numb.group("nm")) > 0:
                                        self.backend.say("Aaaand... %d!" % (random.randrange(1,int(numb.group("nm"))+1)))
                                        continue
                                    else:
                                        self.backend.say("Who do you think you are, rolling impossible dice... Baka.")
                                        continue
                                if "say" in detox(event["content"].lower()).split():
                                    if "something" in detox(event["content"].lower()).split():
                                        tosay = self.mc.generateString()
                                    elif "name" in detox(event["content"].lower()).split():
                                        tosay = self.backend.get_name(event["by"])
                                    self.backend.say(tosay)
                                    continue
                                if "xkcd" in detox(event["content"].lower()).split():
                                    if "random" in detox(event["content"].lower()).split():
                                        x = xkcd.getRandomComic()
                                    else:
                                        numb = re.search(r"(?P<nm>\d+)", event["content"])
                                        if numb:
                                            x = xkcd.Comic(int(numb.group("nm")))
                                        else:
                                            x = xkcd.getLatestComic()
                                    self.backend.say("*%s* - %s - _%s_" % (x.getTitle(), x.getImageLink(), x.getAltText()))
                                    continue
                                self.backend.say("Hmm?")
                                continue
                except:
                    self.backend.say(str(sys.exc_info()[0]))
Beispiel #12
0
def markov():
    """A simple markov function"""
    mc = MarkovChain("./tempchain")

    with open(CORPUS, 'r') as f:
        data = f.read()

    mc.generateDatabase(data)

    return mc.generateString()
Beispiel #13
0
 async def markov(self, ctx):
   """Get a response from inputed text using a markov chain generated from the channels text"""
   results = ''
   async for message in self.bot.logs_from(ctx.message.channel, limit=10):
     line = message.content
     results += line+"\n"
   f = StringIO(results.encode('utf-8'))
   mc = MarkovChain(f.getvalue())
   mc.generateDatabase(f)
   msg = mc.generateString()
   await self.bot.say(msg)
Beispiel #14
0
def main():
    args = parser.parse_args()
    dirname=os.path.split(__file__)[0]
    filename=os.path.join(dirname,"phil.txt")
    title_filename=os.path.join(dirname,"phil_titles.txt")
    dbname1 = "database.pkl"
    dbname2 = "database_title.pkl"
    new_db = not os.path.exists(dbname1)
    body_maker = MarkovChain(dbname1)
    title_maker = MarkovChain(dbname2)
    if new_db:
        title_maker.generateDatabase(open(title_filename).read())
        title_maker.dumpdb()
        body_maker.generateDatabase(open(filename).read())
        body_maker.dumpdb()

    name = title_maker.generateString()
    body = '  '.join([body_maker.generateString()+'.' for i in xrange(3)])

    if args.repo:
        if args.token:
            token = args.token
        else:
            token_filename = os.path.join(dirname, "token.txt")
            if not os.path.exists(token_filename):
                sys.stderr.write("Please either specify --token=XXX on the command line or put a github API token in token.txt\n")
                sys.stderr.write("You can generate a token here: https://github.com/settings/tokens\n")
                sys.exit(1)
            token = open(token_filename).read().strip()

        import github
        gh=github.Github(token)
        user=gh.get_user()
        repo=user.get_repo(args.repo)
        issue = repo.create_issue(title=name, body=body)
        print issue.html_url
    else:
        print 
        print name
        print "-"*len(name)
        print body
def main(args):
	markov_filename = "./" + args.subreddit + ".mcd"
	new_chain = os.path.isfile(markov_filename) == False # this must come before the creation of the Markov Chain
	mc = MarkovChain(markov_filename)

	if args.new or new_chain:
		titles = getTitles(getSubmissions(100, args.subreddit))
		training_data = str.join('.', titles)
		mc.generateDatabase(training_data)

	N = args.num_submissions
	while N > 0:
		print(mc.generateString())
		N -= 1
Beispiel #16
0
def joke():
    """
    Produces a joke based on the existing database (creates database if 
    one doesn't already exist).
    """
    if not os.path.isfile('markovdb'):
        generate_database()

    chain = MarkovChain()
    generated_joke = ''

    while len(generated_joke) < MIN_LENGTH:
        generated_joke = chain.generateString()

    return generated_joke
Beispiel #17
0
def main():
    df = fill_song_pd()
    lyrics = ""  #going to be one huge string
    db_name = './markov/' + genre
    mc = MarkovChain(db_name)
    #creating new markov dataset if it doesn't exist
    if not os.path.isfile(db_name):
        print("creating new data set based on the " + str(genre) + " genre...")
        for index, row in df.iterrows():
            if row['genre'] == genre_dict[genre]:
                lyrics += row["lyrics"] + " "
            mc.generateDatabase(lyrics)
            mc.dumpdb()

    for i in range(int(lines) + 1):
        print(mc.generateString())
Beispiel #18
0
class markovbuild(object):
    '''Builds a markov chain DB and outputs data'''
    def __init__(self, target, data, lines=5):
        self.database = '/tmp/markov_%s.db' % target
        self.lines = lines
        self.data = '\n'.join(data)
        self.mchain = MarkovChain(self.database)

    def build(self):
        '''Builds a markov chain'''
        self.mchain.generateDatabase(self.data)

    def output(self):
        '''Outputs markov chain data'''
        self.build()
        return [ self.mchain.generateString() for x in xrange(0, self.lines) ]
Beispiel #19
0
def generate_database():
    """
    Generates the database that the Markov Chain will use to make its word-by-
    word predictions. It will attempt to create this file in the same directory
    as where the script is currently located.
    """
    currpath = os.path.dirname(__file__)
    path_to_data = os.path.join(currpath, 'in.txt')

    chain = MarkovChain()

    with open(path_to_data) as f:
        chain.generateDatabase(f.read())
        chain.dumpdb()

    print(chain.generateString())
Beispiel #20
0
    def analyze(self):
        # GenerateModel
        """ Generate a Markov chain based on retrieved strings. """

        mc = MarkovChain()
        mc.generateDatabase(self.text)
        result = r''

        print "Generating:"

        for i in range(0, 10):
            print "Sentence %d" % i
            # Create 10 sentences
            sentence = mc.generateString()
            result += sentence.capitalize() + '. '

        return result
Beispiel #21
0
def poem():
    story = str(request.form['story'].encode('ascii', 'ignore'))
    lines = int(request.form['lines'])

    if not story:
        return redirect(url_for('index'))

    mc = MarkovChain()
    mc.generateDatabase(story)

    result = []
    for line in range(0, lines):
        new_line = mc.generateString()
        if new_line not in result:
            result.append(new_line)

    return render_template('poem.html', result=result, story=story)
Beispiel #22
0
class MarkovBot(BotPlugin):

    def __init__(self):
        self.markov = MarkovChain()

    @botcmd
    def talk(self, mess, args):
        """ Generate a sentence based on database """
        return self.markov.generateString()

    @botcmd
    def complete(self, mess, args):
        """ Try to complete a sentence """
        return self.markov.generateStringWithSeed(args)

    @botcmd
    def gendbfromfile(self, mess, args):
        """ Generate markov chain word database """
        try:
            with open(args) as txtFile:
                txt = txtFile.read()
        except IOError as e:
            return 'Error: could not open text file'
        # At this point, we've got the file contents
        if self.markov.generateDatabase(txt):
            return 'Done.'
        else:
            return 'Error: Could not generate database'

    @botcmd
    def gendbfromstring(self, mess, args):
        if self.markov.generateDatabase(args):
            return 'Done.'
        else:
            return 'Error: Could not generate database from String'

    @botcmd
    def gendbfromurl(self, mess, args):
        req = requests.get(args)
        if req.ok and self.markov.generateDatabase(req.content):
            return 'Done.'
        else:
            return 'Error: Could not generate database from URL'
Beispiel #23
0
    async def snakeme(self, ctx: Context):
        """
        How would I talk if I were a snake?
        :param ctx: context
        :return: you, snakified based on your Discord message history
        """
        mentions = list(
            filter(lambda m: m.id != self.bot.user.id, ctx.message.mentions))
        author = ctx.message.author if (len(mentions)
                                        == 0) else ctx.message.mentions[0]
        channel: discord.TextChannel = ctx.channel

        channels = [
            channel for channel in ctx.message.guild.channels
            if isinstance(channel, discord.TextChannel)
        ]
        channels_messages = [
            await channel.history(limit=10000).flatten()
            for channel in channels
        ]
        msgs = [
            msg for channel_messages in channels_messages
            for msg in channel_messages
        ][:MSG_MAX]

        my_msgs = list(filter(lambda msg: msg.author.id == author.id, msgs))
        my_msgs_content = "\n".join(list(map(lambda x: x.content, my_msgs)))

        mc = MarkovChain()
        mc.generateDatabase(my_msgs_content)
        sentence = mc.generateString()

        snakeme = discord.Embed()
        snakeme.set_author(
            name="{0}#{1}".format(author.name, author.discriminator),
            icon_url="https://cdn.discordapp.com/avatars/{0}/{1}".format(
                author.id, author.avatar) if author.avatar is not None else
            "https://img00.deviantart.net/eee3/i/2017/168/3/4/"
            "discord__app__avatar_rev1_by_nodeviantarthere-dbd2tp9.png")
        snakeme.description = "*{0}*".format(
            snakify(sentence)
            if sentence is not None else ":question: Not enough messages")
        await channel.send(embed=snakeme)
Beispiel #24
0
class MarkovBot(BotPlugin):
    def __init__(self):
        self.markov = MarkovChain()

    @botcmd
    def talk(self, mess, args):
        """ Generate a sentence based on database """
        return self.markov.generateString()

    @botcmd
    def complete(self, mess, args):
        """ Try to complete a sentence """
        return self.markov.generateStringWithSeed(args)

    @botcmd
    def gendbfromfile(self, mess, args):
        """ Generate markov chain word database """
        try:
            with open(args) as txtFile:
                txt = txtFile.read()
        except IOError as e:
            return 'Error: could not open text file'
        # At this point, we've got the file contents
        if self.markov.generateDatabase(txt):
            return 'Done.'
        else:
            return 'Error: Could not generate database'

    @botcmd
    def gendbfromstring(self, mess, args):
        if self.markov.generateDatabase(args):
            return 'Done.'
        else:
            return 'Error: Could not generate database from String'

    @botcmd
    def gendbfromurl(self, mess, args):
        req = requests.get(args)
        if req.ok and self.markov.generateDatabase(req.content):
            return 'Done.'
        else:
            return 'Error: Could not generate database from URL'
Beispiel #25
0
def main():
    with open("test.txt", "r") as myfile:
        data = myfile.read().replace('\n', '')
    mc = MarkovChain("./markovdb")

    # Start a session so we can have persistant cookies
    session = requests.Session()

    # This is the form data that the page sends when logging in
    login_data = {
        'user_email': EMAIL,
        'user_password': PASSWORD,
        'login': '******',
    }

    # Authenticate
    r = session.post(URL, data=login_data)

    mc.generateDatabase(data)

    for x in range(0, 5):
        r = os.urandom(16).encode('hex')
        title = "Report#" + str(x) + " " + str(r)
        description = mc.generateString()

        #europe only because americans are fags
        y, x = uniform(-17, 43), uniform(28, 55)

        print (title)

        # Create new report based on random content
        report_data = {
            'title': title,
            'category': "2",
            'description': description,
            'latitude': x,
            'longitude': y,
            'newreport': "1",
        }

        r = session.post(newRep, data=report_data)
Beispiel #26
0
def generate_horoscope(chain: MarkovChain):
    return chain.generateString()
Beispiel #27
0
# coding: utf-8

# In[1]:

from pymarkovchain import MarkovChain

mc = MarkovChain(dbFilePath='./database.pkl')

tweet_text = mc.generateString()

import json

with open('credentials.json') as data_file:
    credentials = json.load(data_file)

import tweepy

auth = tweepy.OAuthHandler(consumer_key=credentials['tw_key'],
                           consumer_secret=credentials['tw_secret'])

auth.set_access_token(key=credentials['tw_access_token'],
                      secret=credentials['tw_access_token_secret'])

tw = tweepy.API(auth)

tw.update_status(status=tweet_text)

# In[ ]:
Beispiel #28
0
# read in data and clean
data = json.load(open("listicles.json"))
text = "\n".join([d['title'] for d in data if d is not ""]).lower()
regex = re.compile('[%s]' % re.escape(string.punctuation))
text = regex.sub(" b", text)

# generate MC data
mc = MarkovChain("./markov")
mc.generateDatabase(text)
f = open("potential_tweets.txt", "a")

# generate and evaluate tweets
while 1:
    try:
        seed = sys.argv[1]
    except:
        seed = None
    if seed is not None:
        tweet = mc.generateStringWithSeed(seed).title()
    else:
        tweet = mc.generateString().title()
    print tweet
    answer = raw_input("Tweet this text? (yes|no|edit) ")
    if answer == "yes":
        f.write(tweet)
        break
    elif answer == "edit":
        tweet = raw_input("Enter in the edited text: ")
        f.write(tweet)
        break
Beispiel #29
0
    print(file)
    with open(file) as f:
        text = f.read()
        verse_lyrics = parse_file(text)
        verse_lyrics = re.sub("[\[\]\(\)\"]", " ", verse_lyrics)
        verse_lyrics = re.sub(" +", " ", verse_lyrics)
        all_lyrics += verse_lyrics

mc = MarkovChain("test")
mc.generateDatabase(all_lyrics)

output_directory = "generated_lyrics/"
if not os.path.exists(output_directory):
    os.makedirs(output_directory)

number_of_phrases = 8
num_files = 1000
for i in range(num_files):
    # Printing a string
    with open(output_directory + "{}.txt".format(i), "w") as f:
        for i in range(0, int(number_of_phrases)):

            while True:
                line = mc.generateString()
                if len(line) > 1:
                    break

            print(line)
            f.write(line + "\n")
    print("")
Beispiel #30
0
def main(username):
    r = praw.Reddit(user_agent='trollolol v0.1')
    r.config.decode_html_entities = True

    m = MarkovChain('markov-data/%s.chain' % username)

    last_comment = None
    try:
        last_comment = Node.objects(
            username=username).order_by('-created').first()
        if last_comment:
            print("Checking for new messages.")
            comments = r.get_redditor(username).get_comments(
                limit=500, params={'after': last_comment.node_id})
        else:
            raise
    except:
        print("No messages fetched yet, doing inital import")
        comments = r.get_redditor(username).get_comments(limit=500)

    for comment in comments:
        try:
            node = Node.objects.get(node_id=comment.name)
        except:
            node = Node(node_id=comment.name,
                        parent_id=comment.parent_id,
                        body=comment.body,
                        created=comment.created,
                        username=username)
            node.save()

    first_comment = Node.objects(
        username=username).order_by('+created').first()
    if first_comment:
        print("Checking for messages before %s." % first_comment.node_id)
        comments = r.get_redditor(username).get_comments(
            limit=500, params={'before': first_comment.node_id})

        for comment in comments:
            try:
                node = Node.objects.get(node_id=comment.name)
            except:
                node = Node(node_id=comment.name,
                            parent_id=comment.parent_id,
                            body=comment.body,
                            created=comment.created,
                            username=username)
                node.save()

    comments = Node.objects(username=username).all()

    corpus = []
    for comment in comments:
        corpus.append(comment.body)

    shuffle(corpus)
    if len(corpus) > 0:
        print(
            "We have %i messages to work with. Building new markov corpus now."
            % len(corpus))
        m.generateDatabase(" ".join(corpus))

        print("Looking for acceptable output for first round of transforms.")
        output = []
        tries = 0
        while len(output) < 10:
            tries = tries + 1
            result = m.generateString()
            if tries < 100:
                if len(result.split(" ")) >= 10:
                    sys.stdout.write("x")
                    output.append(result)
                else:
                    sys.stdout.write(".")

        print("")

        response = ""
        for result in output:
            response = response + " " + result

        print response
    else:
        print("No comments found.")
Beispiel #31
0
run_regex = r'[^a-zA-Z. #@]+'

max_id = None
text_statuses = []
statuses = [1]

while len(text_statuses) < get_count and len(statuses) != 0:
    if max_id:
        statuses = api.GetUserTimeline(screen_name=username, count=10000, max_id=max_id, include_rts=False)
    else:
        statuses = api.GetUserTimeline(screen_name=username, count=10000, include_rts=False)
    if len(statuses) > 0:
        max_id = min([status.id for status in statuses]) - 1
        text_statuses = text_statuses + [status.text for status in statuses]
    print("got {} of {} statuses".format(len(text_statuses), get_count))


train_text = ".".join(text_statuses)
if run_regex:
    train_text = re.sub(run_regex, ' ', train_text).replace('\n', '')

print('generating db on ' + username)
mc = MarkovChain("./markov")
mc.generateDatabase(train_text)
print('done generating db')

while True:
    output = mc.generateString()
    print(output)
    time.sleep(1)
Beispiel #32
0
# coding: utf-8

# In[1]:

from pymarkovchain import MarkovChain

mc = MarkovChain(dbFilePath='./database.pkl')

tweet_text = mc.generateString()

import json

with open('credentials.json') as data_file:    
    credentials = json.load(data_file)

import tweepy

auth = tweepy.OAuthHandler(consumer_key=credentials['tw_key'], consumer_secret=credentials['tw_secret'])

auth.set_access_token(
    key=credentials['tw_access_token'], 
    secret=credentials['tw_access_token_secret'])

tw = tweepy.API(auth)

tw.update_status(status=tweet_text)


# In[ ]:
Beispiel #33
0
class Trollette:
    def __init__(self):
        self.presenter = ""
        self.title = ""

        self.slide_count = 0
        self.slide_min = 15
        self.slide_max = 25

        self.console = None
        self.output_dir = ""

        with open("terms.json", "r") as f:
            self.terms = json.load(f)

        with open(os.path.join("GIFs", "hashes.json"), "r") as f:
            self.gifs = json.load(f)

        with open(os.path.join("Images", "hashes.json"), "r") as f:
            self.images = json.load(f)

        # Load up the proverb data
        with open(os.path.join("Proverbs", "facts"), "r") as f:
            self.proverb_lines = f.readlines()
        self.proverbs = map(string.strip, self.proverb_lines)
        self.proverb_markov = MarkovChain("markov.db")
        self.proverb_markov.generateDatabase("".join(self.proverb_lines), n=1)

        # Make the text data
        # self.my_face = comptroller.face(self.title)
        # self.slide_titles = self.my_face.get_titles(50)
        # self.slide_bullets = self.my_face.get_bullets(100)

        self.my_face = Face("")

        self.slide_titles = ["shit", "balls", "butts"]
        self.slide_bullets = ["butts", "do", "stuff", "f***s", "more f***s"]

        self.ppt = Presentation()
        self.slide_weights = SlideWeights()

    def generate_slide_deck(self):
        # Create a place to put data and resources
        self.output_dir = os.path.join(
            "Output",
            "%s_%s_%s" % (self.title, self.presenter,
                          datetime.datetime.strftime(datetime.datetime.now(),
                                                     '%Y_%m_%d_%H_%M_%S')))

        self.resources_dir = os.path.join(self.output_dir, "Resources")

        # Start with a fresh PowerPoint
        self.ppt = Presentation()

        # Make sure the directories exist
        try:
            os.makedirs(self.output_dir)
            os.makedirs(self.resources_dir)
        except:
            self.log("Directory %s already exists, overwriting..." %
                     self.output_dir)

        self.slide_count = random.randint(self.slide_min, self.slide_max)
        self.log("Generating a slide deck of %d slides about %s" %
                 (self.slide_count, self.title))

        try:
            self.log("Getting slide content...")
            self.my_face.set_topic(self.title)

            self.log("Generating slide titles...")
            self.slide_titles = self.my_face.get_titles(self.slide_count)

            self.log("Generating slide bullets...")
            self.slide_bullets = self.my_face.get_bullets(self.slide_count * 3)
        except:
            self.log(
                "Problem generating content for a talk on %s, exiting..." %
                self.title)
            return

        #self.farm_gif_term(self.title)
        #sp = self.title.split(" ")
        #if len(sp) > 1:
        #    for i in range(len(sp)):
        #        if len(sp[i]) > 5:
        #            self.farm_gif_term(sp[i])
        #self.farm_image_term(self.title)

        self.log_slide_weights()

        self.create_title_slide()
        self.create_slides()

        slide_path = os.path.join(self.output_dir, "%s.pptx" % self.title)
        self.ppt.save(slide_path)

        self.log("Successfully generated PPT on %s to %s" %
                 (self.title, slide_path))

    def create_title_slide(self):
        title_slide_layout = self.ppt.slide_layouts[0]
        slide = self.ppt.slides.add_slide(title_slide_layout)
        title = slide.shapes.title
        subtitle = slide.placeholders[1]

        title.text = self.title
        subtitle.text = self.presenter

    def create_slides(self):
        for i in range(self.slide_count):
            choice = self.slide_weights.choose_weighted()

            self.log("  Generating slide #%d: %s" % (i + 1, choice))

            new_slide_layout = None
            if choice == "Single GIF":
                ns = self.create_gif_slide(random.choice(self.slide_titles),
                                           self.get_giphy_search_term(), i)
            elif choice == "Full Slide GIF":
                ns = self.create_full_gif_slide(self.get_giphy_search_term(),
                                                i)
            elif choice == "Single Image":
                ns = self.create_image_slide(random.choice(self.slide_titles),
                                             self.get_image_search_term(), i)
            elif choice == "Full Slide Image":
                ns = self.create_full_image_slide(self.get_image_search_term(),
                                                  i)
            elif choice == "Information":
                ns = self.create_info_slide(i)
            elif choice == "Quotation":
                ns = self.create_quote_slide()

    def create_single_full_image_slide(self, image_path):
        blank_slide_layout = self.ppt.slide_layouts[6]
        new_slide = self.ppt.slides.add_slide(blank_slide_layout)

        left = Inches(0)
        top = Inches(0)
        height = Inches(8)
        width = Inches(10)
        pic = new_slide.shapes.add_picture(image_path,
                                           left,
                                           top,
                                           height=height,
                                           width=width)
        return new_slide

    def create_single_image_slide(self, slide_title, image_path):

        blank_slide_layout = self.ppt.slide_layouts[1]
        new_slide = self.ppt.slides.add_slide(blank_slide_layout)

        for shape in new_slide.shapes:
            if shape.is_placeholder:
                phf = shape.placeholder_format

                if phf.type == 1:
                    shape.text = slide_title

        left = Inches(1)
        top = Inches(1)
        height = Inches(6)
        width = Inches(8)
        pic = new_slide.shapes.add_picture(image_path,
                                           left,
                                           top,
                                           height=height,
                                           width=width)

        return new_slide

    def download_gif(self, term, slide_num):
        # If we have at least 3 local gifs, use one of those
        if (term in self.gifs) and (len(self.gifs[term]) > 3):
            return os.path.join("GIFs",
                                "%s.gif" % random.choice(self.gifs[term]))

        try:
            # Download the gif
            img = translate(term)
            image_path = os.path.join(self.resources_dir, "%d.gif" % slide_num)
            wget.download(img.fixed_height.url, image_path)

            file_hasher = hashlib.md5()
            with open(image_path, "rb") as f:
                file_hasher.update(f.read())
            file_md5 = file_hasher.hexdigest()

            if not (term in self.gifs):
                self.gifs[term] = []

            if not (file_md5 in self.gifs[term]):
                self.gifs[term].append(file_md5)
                shutil.copy(image_path,
                            os.path.join("GIFs", "%s.gif" % file_md5))
                with open(os.path.join("GIFs", "hashes.json"), "w") as f:
                    json.dump(self.gifs, f, indent=2)

            return image_path
        except:
            return None

    def download_image(self, term, slide_num):
        # If we have at least 3 local images, use one of those
        if (term in self.images) and (len(self.images[term]) > 3):
            return os.path.join("Images",
                                "%s.img" % random.choice(self.images[term]))

        try:
            search_term = term
            if (random.randint(0, 100) % 2) == 0:
                search_term = self.title

            download_attempts = 0
            image_bytes = ""
            image_path = ""
            while download_attempts < 10:

                fetcher = urllib2.build_opener()
                start_index = random.randint(0, 50)
                search_url = "http://ajax.googleapis.com/ajax/services/search/images?v=1.0&q=%s&start=%s" % (
                    search_term, str(start_index))
                f = fetcher.open(search_url)
                deserialized_output = simplejson.load(f)

                image_url = deserialized_output['responseData']['results'][
                    random.randint(
                        0,
                        len(deserialized_output['responseData']['results']) -
                        1)]['unescapedUrl']
                image_path = os.path.join(self.resources_dir,
                                          "%d.img" % slide_num)
                wget.download(image_url, image_path)

                with open(image_path, "rb") as f:
                    image_bytes = f.read()

                if (not image_bytes.startswith("<!DOCTYPE html>")) and (
                        not image_bytes.startswith("<html>")):
                    break

                download_attempts += 1
                self.log(
                    "    Attempting to download image about %s failed try #%d"
                    % (search_term, download_attempts))

            if image_bytes.startswith(
                    "<!DOCTYPE html") or image_bytes.startswith("<html>"):
                return None

            file_hasher = hashlib.md5()
            file_hasher.update(image_bytes)
            file_md5 = file_hasher.hexdigest()

            if not (term in self.images):
                self.images[term] = []

            if not (file_md5 in self.images[term]):
                self.images[term].append(file_md5)
                shutil.copy(image_path,
                            os.path.join("Images", "%s.img" % file_md5))
                with open(os.path.join("Images", "hashes.json"), "w") as f:
                    json.dump(self.images, f, indent=2)

            return image_path
        except:
            return None

    def create_gif_slide(self, slide_title, term, slide_num):
        image_path = self.download_gif(term, slide_num)
        if image_path:
            return self.create_single_image_slide(slide_title, image_path)

    def create_full_gif_slide(self, term, slide_num):
        image_path = self.download_gif(term, slide_num)
        if image_path:
            return self.create_single_full_image_slide(image_path)

    def create_image_slide(self, slide_title, term, slide_num):
        while True:
            try:
                image_path = self.download_image(term, slide_num)
                if image_path:
                    return self.create_single_image_slide(
                        slide_title, image_path)
            except:
                pass

    def create_full_image_slide(self, term, slide_num):
        image_path = self.download_image(term, slide_num)
        if image_path:
            return self.create_single_full_image_slide(image_path)

    def create_info_slide(self, slide_num):
        slide_title_info = random.choice(self.slide_titles)
        slide_title = slide_title_info
        if (random.randint(0, 100) % 3) == 0:
            slide_title = self.get_markov_proverb()

        sb = random.sample(self.slide_bullets, random.randint(1, 4))
        if (random.randint(0, 100) % 4) == 0:
            sb.append(self.get_markov_proverb())

        bullet_slide_layout = self.ppt.slide_layouts[1]
        new_slide = self.ppt.slides.add_slide(bullet_slide_layout)
        shapes = new_slide.shapes

        title_shape = shapes.title
        body_shape = shapes.placeholders[1]
        body_shape.width = Inches(4)
        body_shape.left = Inches(1)
        body_shape.top = Inches(2)

        title_shape.text = slide_title

        tf = body_shape.text_frame
        for b in sb:
            p = tf.add_paragraph()
            #p.text = b

            p.alignment = PP_PARAGRAPH_ALIGNMENT.LEFT
            run1 = p.add_run()
            run1.text = b
            font1 = run1.font
            font1.name = 'Sans Serif'
            font1.size = Pt(20)
            font1.italic = True
            font1.bold = True

        image_path = None
        attempts = 0
        while attempts < 10:
            try:
                tries = 0
                while (not image_path) and (tries < 10):
                    if (random.randint(0, 100) % 2) == 0:
                        search_term = self.get_giphy_search_term()
                        image_path = self.download_gif(search_term, slide_num)
                    else:
                        search_term = self.get_image_search_term()
                        image_path = self.download_image(
                            search_term, slide_num)

                    tries += 1

                if tries < 10:
                    left = Inches(5.5)
                    top = Inches(3)
                    #height = Inches(3)
                    width = Inches(3)
                    pic = new_slide.shapes.add_picture(image_path,
                                                       left,
                                                       top,
                                                       width=width)
                    break
                attempts += 1

            except:
                attempts += 1

        return new_slide

    def create_quote_slide(self):
        # Pick a random quote category and quote
        cat = random.choice(self.terms["quote_categories"])
        with open(os.path.join("Quotes", "quotes_%s.json" % cat)) as f:
            q1 = random.choice(json.load(f))

        cat = random.choice(self.terms["quote_categories"])
        with open(os.path.join("Quotes", "quotes_%s.json" % cat)) as f:
            q2 = random.choice(json.load(f))

        quote_text = "\"%s\"" % q1["quote"]
        if (random.randint(0, 100) % 5) == 0:
            quote_text = random.choice(self.proverbs)

        quote_author = "- %s" % q2["name"]

        blank_slide_layout = self.ppt.slide_layouts[2]
        new_slide = self.ppt.slides.add_slide(blank_slide_layout)

        for shape in new_slide.shapes:
            if shape.is_placeholder:
                phf = shape.placeholder_format
                if phf.type == 1:
                    # Put in the quote title
                    shape.text = random.choice(self.terms["quote_titles"])

                elif phf.type == 2:
                    text_frame = shape.text_frame

                    # Create the quote text paragraph
                    p1 = text_frame.paragraphs[0]
                    p1.alignment = PP_PARAGRAPH_ALIGNMENT.LEFT
                    run1 = p1.add_run()
                    run1.text = quote_text
                    font1 = run1.font
                    font1.name = 'Sans Serif'
                    font1.size = Pt(30)
                    font1.italic = True
                    font1.bold = True

                    # Create the Author text paragraph
                    p2 = text_frame.add_paragraph()
                    p2.alignment = PP_PARAGRAPH_ALIGNMENT.RIGHT
                    run2 = p2.add_run()
                    run2.text = quote_author
                    font2 = run2.font
                    font2.name = 'Calibri'
                    font2.size = Pt(24)

        return new_slide

    def get_giphy_search_term(self):
        st = random.choice(self.terms["giphy_searches"])
        if (random.randint(0, 100) % 5) == 0:
            st = self.title
        return st

    def get_image_search_term(self):
        st = random.choice(self.terms["image_searches"])
        if (random.randint(0, 100) % 2) == 0:
            st = self.title
        return st

    def get_proverb(self):
        return random.choice(self.proverb_lines)

    def get_markov_proverb(self, min=5, max=10):
        b = ""

        while True:
            b = self.proverb_markov.generateString()
            s = b.split(" ")
            if min <= len(s) <= max:
                break

        return b

    def add_term(self, term_type, term):
        if term in self.terms[term_type]:
            return "Term \"%s\" is already in %s!" % (term, term_type)
        else:
            self.terms[term_type].append(term)
            with open("terms.json", "w") as f:
                json.dump(self.terms, f, indent=4)
            return "Term \"%s\" added to %s." % (term, term_type)

    def delete_term(self, term_type, term):
        if not (term in self.terms[term_type]):
            return "Term \"%s\" isn't in %s, can't delete!" % (term, term_type)
        else:
            self.terms[term_type].remove(term)
            with open("terms.json", "w") as f:
                json.dump(self.terms, f, indent=4)
            return "Term \"%s\" removed from %s." % (term, term_type)

    def show_term_counts(self, term_type, term_json):
        log_str = "%s Terms:\n" % term_type
        for term in self.terms[term_type]:
            if term in term_json:
                log_str += "  %s: %d\n" % (term, len(term_json[term]))
            else:
                log_str += "  %s: 0\n" % term
        self.log(log_str)

    def get_file_md5(self, file_path):
        with open(file_path, "rb") as f:
            image_bytes = f.read()

        file_hasher = hashlib.md5()
        file_hasher.update(image_bytes)
        return file_hasher.hexdigest()

    def farm_image_term(self, term, amount=25, threshold=10):
        self.log("Farming images for %s..." % term)

        if not (term in self.images):
            self.images[term] = []

        attempt_count = 0
        while (attempt_count < threshold) and (len(self.images[term]) <
                                               amount):
            myopener = MyOpener()
            page = myopener.open(
                'https://www.google.pt/search?q=%s&source=lnms&tbm=isch&sa=X&tbs=isz:l&tbm=isch'
                % term.replace(" ", "+"))
            html = page.read()

            for match in re.finditer(
                    r'<a href="/imgres\?imgurl=(.*?)&amp;imgrefurl', html,
                    re.IGNORECASE | re.DOTALL | re.MULTILINE):
                if len(self.images[term]) >= amount:
                    break

                try:
                    os.remove("test.img")
                except:
                    pass

                try:
                    path = urlparse.urlsplit(match.group(1)).path
                    self.log("  Downloading %s" % match.group(1))
                    myopener.retrieve(match.group(1), "test.img")

                    image_md5 = self.get_file_md5("test.img")

                    if not (image_md5 in self.images[term]):
                        self.images[term].append(image_md5)
                        shutil.copy(
                            "test.img",
                            os.path.join("Images", "%s.img" % image_md5))
                        os.remove("test.img")
                        self.log("    Image saved to archive. %d/%d images." %
                                 (len(self.images[term]), amount))
                        attempt_count = 0
                    else:
                        self.log("    Already had image!")
                        attempt_count += 1
                except:
                    self.log("    Downloading failed")
                    attempt_count += 1

        self.log("Farming of %s images complete, now holding %d images" %
                 (term, len(self.images[term])))

        with open(os.path.join("Images", "hashes.json"), "w") as f:
            json.dump(self.images, f, indent=2)

    def farm_images(self, amount=25, threshold=10):
        self.show_term_counts("image_searches", self.images)

        all_farm = self.terms["image_searches"]
        all_farm.extend(self.terms["talk_titles"])

        for term in all_farm:
            self.farm_image_term(term, amount, threshold)

    def farm_gif_term(self, term, amount=25, threshold=10):
        self.log("Farming GIFs for %s..." % term)

        if not (term in self.gifs):
            self.gifs[term] = []

        attempt_count = 0
        while (attempt_count < threshold) and (len(self.gifs[term]) < amount):

            image_path = "test.gif"
            try:
                os.remove(image_path)
            except:
                pass

            try:
                img = translate(term)
                wget.download(img.fixed_height.url, image_path)

                image_md5 = self.get_file_md5("test.gif")

                if not (image_md5 in self.gifs[term]):
                    self.gifs[term].append(image_md5)
                    shutil.copy(image_path,
                                os.path.join("GIFs", "%s.gif" % image_md5))
                    self.log("    GIF saved to archive. %d/%d GIFs." %
                             (len(self.gifs[term]), amount))
                    attempt_count = 0
                else:
                    self.log("    Already had GIF!")
                    attempt_count += 1
            except:
                self.log("    Downloading failed")
                attempt_count += 1

        self.log("Farming of %s GIFs complete, now holding %d GIFs" %
                 (term, len(self.gifs[term])))

        with open(os.path.join("GIFs", "hashes.json"), "w") as f:
            json.dump(self.gifs, f, indent=2)

    def farm_gifs(self, amount=25, threshold=10):
        self.show_term_counts("giphy_searches", self.gifs)

        all_farm = self.terms["giphy_searches"]
        all_farm.extend(self.terms["talk_titles"])

        for term in all_farm:

            self.log("Farming GIFs for %s..." % term)

            if not (term in self.gifs):
                self.gifs[term] = []

            self.farm_gif_term(term, amount, threshold)

    def farm_content(self, all_content):
        for talk_title in self.terms["talk_titles"]:
            talk_path = os.path.join("Content", "%s.txt" % talk_title)
            # Either we're replacing all content or we're only replacing files that don't exist
            if all_content or (not os.path.exists(talk_path)):
                self.log("Farming data on %s..." % talk_title)
                with open(talk_path, "w") as f:
                    content = self.my_face.fully_research_topic(
                        talk_title, self.log)
                    if type(content) is str:
                        clean_content = content
                    else:
                        clean_content = unicodedata.normalize(
                            'NFKD', content).encode('ascii', 'ignore')
                    f.write(clean_content)

    def log_slide_weights(self):
        self.log(self.slide_weights.get_weights_string())

    def log(self, message):
        if self.console:
            self.console.config(state=tk.NORMAL)
            self.console.insert(tk.END, "%s\n" % message)
            self.console.see(tk.END)
            self.console.config(state=tk.DISABLED)
            self.console.update()
        else:
            print(message)
Beispiel #34
0
# https://github.com/TehMillhouse/PyMarkovChain
# pip install PyMarkovChain
from pymarkovchain import MarkovChain

mc = MarkovChain("./am_m")
f = open('cap_short.txt','r')
mc.generateDatabase(f.read())
for x in range(0,20):
    mc.generateString()
from pymarkovchain import MarkovChain

mc = MarkovChain("./markov")

texts = [
    "text/confessions.txt",
    "text/discourses-and-social-contract.txt",
    "text/emile.txt"
]

entire_string = ""

for text_url in texts:
    f = open(text_url, 'r')
    entire_string += f.read()
    entire_string += "\n"
    f.close()

test = open("test.txt", 'w')
test.write(entire_string)
test.close()

mc.generateDatabase(entire_string, '\n')

print(mc.generateString())

for i in range(10000):
    f = open("output/{0}.txt".format(i), 'w')
    f.write(mc.generateString().strip())
    f.close()
Beispiel #36
0
# from Marky import marky
from pymarkovchain import MarkovChain
import json
import re

data = json.load(open("results-initial.json"))
text = "\n".join([d['title'] for d in data if d is not ""]).lower()

mc = MarkovChain("./markov")
mc.generateDatabase(text)
print mc.generateString()
        next_val = cur_ps - first_ps
        # Limit the size of jumps?
        if next_val < 8.0 and next_val > -8.0:
          to_add = str(next_val)

    # Add the duration
    to_add = to_add + "!@#" + dur
    db = db + (' ' + to_add);
  db = db + ('\n');

keepGoing = 1
sen = ""

while keepGoing == 1:
  mc.generateDatabase(db, '\n')
  sen = mc.generateString()
  sen = sen.split(' ')
  length = 0.0
  counter = 0
  for word in sen:
    counter = counter + 1
    val_dur = word.split('!@#')
    dur = val_dur[1]
    cur_dur = assoc[dur]
    length += cur_dur.quarterLength
  if length == 4.0 and counter > 10:
    keepGoing = 0


s1 = stream.Stream()
Beispiel #38
0
        name = cur_note.fullName
        '''if "Chord" in name:
      n = note.Note(cur_note.pitches[0])
      n.duration = cur_note.duration
      print cur_note
      print n
      cur_note = n
      name = n.fullName'''
        name = name.replace(' ', '#')
        assoc[name] = cur_note
        db = db + (' ' + name)
    db = db + ('\n')

mc.generateDatabase(db, '\n')
sen = mc.generateString()
s1 = stream.Stream()

keepGoing = 1
sen = ""

while keepGoing == 1:
    mc.generateDatabase(db, '\n')
    sen = mc.generateString()
    sen = sen.split(' ')
    length = 0.0
    counter = 0
    for word in sen:
        counter = counter + 1
        val_dur = word.split('!@#')
        dur = val_dur[1]
Beispiel #39
0
        help="The number of strings to generate.")
PARSER.add_argument('--minlen', metavar="LENGTH", type=int,
        help="Throw out strings shorter than this.", default=3)
PARSER.add_argument('--notags', action="store_true",
        help="Don't generate tags (legacy database compat behaviour)")

ARGS = PARSER.parse_args()

FILENAME = ARGS.filename
NUMBER = ARGS.number

BOT = MarkovChain(FILENAME)

VALID_SENTENCES = 0
while VALID_SENTENCES < NUMBER:
    SENTENCE = BOT.generateString()
    if len(SENTENCE.split()) < ARGS.minlen:
        continue
    VALID_SENTENCES += 1
    print(SENTENCE)

    if not ARGS.notags:
        try:
            TAGS=BOT.generateStringWithSeed("#")
            print(TAGS)    
            print(" --- ")
        except pymarkovchain.StringContinuationImpossibleError as e:
            print("[FATAL] Your database does not have tag data.")
            print("You can still generate posts without tags using --notags")
            import sys
            sys.exit(1)
Beispiel #40
0
#!/usr/bin/env python

# import PyMarkovChain
from pymarkovchain import MarkovChain

mc = MarkovChain("./markov")

for i in range(1, 26):
    markovStr = mc.generateString()
    while len(markovStr) < 75: # or len(markovStr) > 115:
        markovStr = mc.generateString()
    # print "[" + str(i) + "] " + markovStr
    print markovStr
Beispiel #41
0
def getLines():
    mc = MarkovChain("")
    mc.generateDatabase(r.content)
    tweet = mc.generateString()
    tweet = tweet[:140].rsplit(r'\n', 1)[0]
    return tweet
Beispiel #42
0
class MarkovBot(BotPlugin):
    def __init__(self):
        super(MarkovBot, self).__init__()
        self.sentenceSep = None
        self.markov = MarkovChain(dbFilePath="./markovdb")

    @botcmd
    def talk(self, mess, args):
        """ Generate a sentence based on database """
        return self.markov.generateString()

    @botcmd
    def complete(self, mess, args):
        """ Try to complete a sentence """
        return self.markov.generateStringWithSeed(args)

    @botcmd
    def gendbfromfile(self, mess, args):
        """ Generate markov chain word database based on local file """
        try:
            with open(args) as txtFile:
                txt = txtFile.read()
        except IOError as e:
            return "Error: could not open text file"
        # At this point, we've got the file contents
        if self.sentenceSep:
            result = self.markov.generateDatabase(txt, self.sentenceSep)
        else:
            result = self.markov.generateDatabase(txt)
        if result:
            return "Done."
        else:
            return "Error: Could not generate database"

    @botcmd
    def setsentencesep(self, mess, args):
        """ Specify how to detect sentence borders """
        self.sentenceSep = args

    @botcmd
    def gendbfromstring(self, mess, args):
        """ Generate markov chain word database based on given string """
        if self.sentenceSep:
            result = self.markov.generateDatabase(args, self.sentenceSep)
        else:
            result = self.markov.generateDatabase(args)
        if result:
            return "Done."
        else:
            return "Error: Could not generate database from String"

    @botcmd
    def gendbfromurl(self, mess, args):
        """ Generate markov chain word database based on contents of url """
        response, content = httplib2.Http().request(args, "GET")
        if response["status"] == "200":
            if self.sentenceSep:
                result = self.markov.generateDatabase(content.decode("utf-8"), self.sentenceSep)
            else:
                result = self.markov.generateDatabase(content.decode("utf-8"))
            if result:
                return "Done."
            else:
                return "Error: Could not generate database from URL"
Beispiel #43
0
        if len(corpus) == 0:
          corpus = text
        else:
          corpus = corpus + ' ' + text

    print("Setting up Markov chain database...")
    chain = MarkovChain("./markov")
    print("Generating Markov chain database...")
    chain.generateDatabase(corpus)

    # 10 * 15 minutes = API update every 150 minutes
    #  that will change based on new timing between tweets, *shrugs*
    print("Beginning tweet loop.")
    for x in xrange(9):
      print("Tweet " + str(x) + " of tweet loop. (max=9)")
      status = chain.generateString()
      print('Tweet created: "' + status + '"')

      status = status.replace("%2E", ".")
      status = status.replace("&amp;", "&")
      status = status.replace("&lt;", "<")
      status = status.replace("&gt;", ">")
      status = status.replace("@", "")                    # cutting out all @'s entirely
      status = status.replace("twitter.com", "abc.xyz")   # why did I do this?
      print('Tweet modified to "' + status + '".')

      if len(status) > 140:
        status = status[0:136] + '...'
        print('Tweet shortened to: "' + status + '"')

      print("Sending tweet.")
        artist_name.lower().encode('utf-8')).hexdigest()
    mc = MarkovChain(db_name_hashed)

    # Checking if the database already exists, if so uses the cache instead another API call
    if not os.path.isfile(db_name_hashed):
        print(
            "No data cached. Please be patient while we search the lyrics of %s."
            % artist_name)

        # Adding lyrics to a single gigant string
        lyrics = ''

        # Parsing each lyric from this artist.
        # [http://api.wikia.com/wiki/LyricWiki_API]
        artist = requests.get(API_URI, params=params).json()
        for album in artist['albums']:
            for song in album['songs']:
                params = {'artist': artist_name, 'song': song}
                print("Parsing \"{}\" from Wikia.".format(song))
                response = requests.get(API_URI,
                                        params=params).json()["lyrics"]
                lyrics += response.replace('[...]', '') + ' '

        # Generating the database
        mc.generateDatabase(lyrics)
        mc.dumpdb()

    # Printing a string
    for i in range(0, int(number_of_phrases)):
        print(mc.generateString())
Beispiel #45
0
			final += ts
			sd = True
			seed = ts.split()[-1]
			seed = seed.translate(string.maketrans("",""), string.punctuation)
			c += 1
		print ("\n" + final + "\n")
		raw_input("press enter to continue...")
if raw_input("press enter to begin.") == "beta":
	pgraph()

sd = False
s = ""
while True:
	if not sd:
		ts = mc.generateString()
	else:
		ts = mc.generateStringWithSeed(s)
	if countString(ts) >= msl:
		os.system("clear")
		print ("\n" + ts + "\n")
		sd = False
		s = raw_input("\npress enter to generate string. : ")
		if s == '!pg':
			pgraph()
		if (len(s) > 0):
			if f.find(s) != -1:
				sd = True
			else:
				raw_input('could not find "' + s + '" in database\npress enter to continue')
				sd = False
Beispiel #46
0
# https://github.com/TehMillhouse/PyMarkovChain
# pip install PyMarkovChain
from pymarkovchain import MarkovChain

mc = MarkovChain("./am_m")
f = open('cap_short.txt', 'r')
mc.generateDatabase(f.read())
for x in range(0, 20):
    mc.generateString()
Beispiel #47
0
class NaNoGenMo:
    def __init__(self, avg_wordlen, min_dataset_size, target_wordcount, num_chaps, min_graf_len, max_graf_len, search_term, related):
        self.WORDLEN = avg_wordlen
        self.DATASET = min_dataset_size
        self.WORDCNT = target_wordcount
        self.NUM_CHAPS = num_chaps
        self.GRAF_MIN = min_graf_len
        self.GRAF_MAX = max_graf_len
        self.SEARCH_TERM = search_term
        self.RELATED = related
        self.rand = Random()

    def set_dict(self, dictfile):
        self.dictfile = dictfile

    def build_source(self):
        source = ""
        # grab random Wikipedia pages until we have enough bytes to (probably) have at least DATASET words.
        iterations = 0
        page = None
        while len(source) < self.WORDLEN * self.DATASET:
            title = wikipedia.random()
            if self.RELATED == True:
                sys.stderr.write("using related mode\n")
                if iterations == 0:
                    sys.stderr.write("first page\n")
                    if self.SEARCH_TERM is not None:
                        sys.stderr.write("using search term \"%s\" instead of random\n" % self.SEARCH_TERM)
                        title = self.SEARCH_TERM
                    else:
                        sys.stderr.write("using random page title \"%s\"\n" % title)
                else:
                    if len(page.links) > 0:
                        ix = self.rand.randint(0, len(page.links) - 1)
                        title = page.links[ix]
                        sys.stderr.write("using related page title \"%s\"\n" % title)
            else:
                sys.stderr.write("using all random page titles. this one is \"%s\"\n" % title)
            iterations += 1

            # this is in a try/except because wikipedia.page() will throw an exception if it only gets a
            # disambiguation page. we don't care about that, so we just try another random title.
            try:
                page = wikipedia.page(title)
                # remove Wikipedia's section markers. there's probably an easier way to do that. If we leave them in
                # pymarkovchain treats them as "words", so the output text is full of "===" and whatnot.
                content = page.content.replace("====", "").replace("===", "").replace("==", "")
                source += "\n" + content
                # TODO: instead of completely random pages, this could start with a random wiki page, then expand its
                # dataset by following links from that page. that might produce somewhat more apparent thematic coherence
                # but on the other hand it might not.
            except:
                pass
        return source

    def prepare_dict(self):
        if self.dictfile is None:
            print "error: no dictfile"
            return
        # now build the markov database. just using pymarkovchain's default settings for now. will fail if it doesn't
        # have write access to $PWD.
        chain = MarkovChain("./markov")

        source = self.build_source()
        chain.generateDatabase(source)

        # seem to need to do this to reload the database after generating it
        self.chain = MarkovChain("./markov")

    def generate(self):
        novel = ""
        chap = ""
        chapnum = 1

        # now generate the actual novel, sentence by sentence, until it's at least WORDCNT words.
        while wordcount(novel) < self.WORDCNT:
            # chapter headings and paragraph breaks make it more readable.
            chap = "\n\n===CHAPTER %d===\n\n" % chapnum
            # for now we're just making roughly equal-sized chapters.
            while wordcount(chap) < (self.WORDCNT / self.NUM_CHAPS):
                graf = ""
                s = 0
                # how many sentences for this paragraph?
                gl = self.rand.randint(self.GRAF_MIN, self.GRAF_MAX)
                while s < gl:
                    # if this isn't the first sentence in the paragraph, append a space after the last one.
                    if len(graf) > 0:
                        graf += " "
                    # if this isn't the first paragraph in the chapter, start it with a tab.
                    elif len(chap) > 0:
                        graf += "\t"
                    # generate the actual string
                    graf += self.chain.generateString()
                    # simplistic weighted random selection of sentence-ending punctuation. 70% chance of a period,
                    # 20% chance of a question mark, 10% chance of an exclamation point. those are guessed values, I
                    # haven't made any effort to assess whether it feels "right" in the resulting text.
                    i = self.rand.randint(0, 10)
                    if i <= 7:
                        graf += "."
                    elif i <= 9:
                        graf += "?"
                    else:
                        graf += "!"
                    s += 1
                # blank lines between paragraphs
                chap += graf
                chap += "\n\n"
            chapnum += 1
            novel += chap
        return novel
Beispiel #48
0
    
    # generate a markov chain based text from the input
    if args.generate and args.generate > 0:
        # disable error message about on-the-fly database
        logging.disable(logging.WARNING)
        mc = MarkovChain("./markov-chain-database")
        mc.generateDatabase(inputText)

        # reinstate logging
        logging.disable(logging.NOTSET)

        generatedText = ""
        while len(generatedText) < args.generate:
            if generatedText is not "":
                generatedText = generatedText + " "
            generatedText = generatedText + mc.generateString()
        inputText = generatedText

    if args.filter_punctuation:
        inputText = text.removePunctuation(inputText)

    if args.filter_numbers:
        inputText = text.removeNumbers(inputText)

    force = []
    if args.input_force:
        force = args.input_force
        

    inputText = inputText.split()
    inputNumWords = len(inputText)
Beispiel #49
0
class Trollette:
    def __init__(self):
        self.presenter = ""
        self.title = ""

        self.slide_count = 0
        self.slide_min = 15
        self.slide_max = 25

        self.console = None
        self.output_dir = ""

        with open("terms.json", "r") as f:
            self.terms = json.load(f)

        with open(os.path.join("GIFs", "hashes.json"), "r") as f:
            self.gifs = json.load(f)

        with open(os.path.join("Images", "hashes.json"), "r") as f:
            self.images = json.load(f)

        # Load up the proverb data
        with open(os.path.join("Proverbs", "facts"), "r") as f:
            self.proverb_lines = f.readlines()
        self.proverbs = map(string.strip, self.proverb_lines)
        self.proverb_markov = MarkovChain("markov.db")
        self.proverb_markov.generateDatabase("".join(self.proverb_lines), n=1)

        # Make the text data
        # self.my_face = comptroller.face(self.title)
        # self.slide_titles = self.my_face.get_titles(50)
        # self.slide_bullets = self.my_face.get_bullets(100)

        self.my_face = Face("")

        self.slide_titles = ["shit", "balls", "butts"]
        self.slide_bullets = ["butts", "do", "stuff", "f***s", "more f***s"]

        self.ppt = Presentation()
        self.slide_weights = SlideWeights()

    def generate_slide_deck(self):
        # Create a place to put data and resources
        self.output_dir = os.path.join("Output", "%s_%s_%s" % (self.title,
                                                               self.presenter,
                                                               datetime.datetime.strftime(datetime.datetime.now(), '%Y_%m_%d_%H_%M_%S')))

        self.resources_dir = os.path.join(self.output_dir, "Resources")

        # Start with a fresh PowerPoint
        self.ppt = Presentation()

        # Make sure the directories exist
        try:
            os.makedirs(self.output_dir)
            os.makedirs(self.resources_dir)
        except:
            self.log("Directory %s already exists, overwriting..." % self.output_dir)

        self.slide_count = random.randint(self.slide_min, self.slide_max)
        self.log("Generating a slide deck of %d slides about %s" % (self.slide_count, self.title))

        try:
            self.log("Getting slide content...")
            self.my_face.set_topic(self.title)

            self.log("Generating slide titles...")
            self.slide_titles = self.my_face.get_titles(self.slide_count)

            self.log("Generating slide bullets...")
            self.slide_bullets = self.my_face.get_bullets(self.slide_count*3)
        except:
            self.log("Problem generating content for a talk on %s, exiting..." % self.title)
            return

        #self.farm_gif_term(self.title)
        #sp = self.title.split(" ")
        #if len(sp) > 1:
        #    for i in range(len(sp)):
        #        if len(sp[i]) > 5:
        #            self.farm_gif_term(sp[i])
        #self.farm_image_term(self.title)

        self.log_slide_weights()

        self.create_title_slide()
        self.create_slides()

        slide_path = os.path.join(self.output_dir, "%s.pptx" % self.title)
        self.ppt.save(slide_path)

        self.log("Successfully generated PPT on %s to %s" % (self.title, slide_path))

    def create_title_slide(self):
        title_slide_layout = self.ppt.slide_layouts[0]
        slide = self.ppt.slides.add_slide(title_slide_layout)
        title = slide.shapes.title
        subtitle = slide.placeholders[1]

        title.text = self.title
        subtitle.text = self.presenter

    def create_slides(self):
        for i in range(self.slide_count):
            choice = self.slide_weights.choose_weighted()

            self.log("  Generating slide #%d: %s" % (i+1, choice))

            new_slide_layout = None
            if choice == "Single GIF":
                ns = self.create_gif_slide(random.choice(self.slide_titles), self.get_giphy_search_term(), i)
            elif choice == "Full Slide GIF":
                ns = self.create_full_gif_slide(self.get_giphy_search_term(), i)
            elif choice == "Single Image":
                ns = self.create_image_slide(random.choice(self.slide_titles), self.get_image_search_term(), i)
            elif choice == "Full Slide Image":
                ns = self.create_full_image_slide(self.get_image_search_term(), i)
            elif choice == "Information":
                ns = self.create_info_slide(i)
            elif choice == "Quotation":
                ns = self.create_quote_slide()

    def create_single_full_image_slide(self, image_path):
        blank_slide_layout = self.ppt.slide_layouts[6]
        new_slide = self.ppt.slides.add_slide(blank_slide_layout)

        left = Inches(0)
        top = Inches(0)
        height = Inches(8)
        width = Inches(10)
        pic = new_slide.shapes.add_picture(image_path, left, top, height=height, width=width)
        return new_slide

    def create_single_image_slide(self, slide_title, image_path):

        blank_slide_layout = self.ppt.slide_layouts[1]
        new_slide = self.ppt.slides.add_slide(blank_slide_layout)

        for shape in new_slide.shapes:
            if shape.is_placeholder:
                phf = shape.placeholder_format

                if phf.type == 1:
                    shape.text = slide_title

        left = Inches(1)
        top = Inches(1)
        height = Inches(6)
        width = Inches(8)
        pic = new_slide.shapes.add_picture(image_path, left, top, height=height, width=width)

        return new_slide

    def download_gif(self, term, slide_num):
        # If we have at least 3 local gifs, use one of those
        if (term in self.gifs) and (len(self.gifs[term]) > 3):
            return os.path.join("GIFs", "%s.gif" % random.choice(self.gifs[term]))

        try:
            # Download the gif
            img = translate(term)
            image_path = os.path.join(self.resources_dir, "%d.gif" % slide_num)
            wget.download(img.fixed_height.url, image_path)

            file_hasher = hashlib.md5()
            with open(image_path, "rb") as f:
                file_hasher.update(f.read())
            file_md5 = file_hasher.hexdigest()

            if not (term in self.gifs):
                self.gifs[term] = []

            if not (file_md5 in self.gifs[term]):
                self.gifs[term].append(file_md5)
                shutil.copy(image_path, os.path.join("GIFs", "%s.gif" % file_md5))
                with open(os.path.join("GIFs", "hashes.json"), "w") as f:
                    json.dump(self.gifs, f, indent=2)

            return image_path
        except:
            return None

    def download_image(self, term, slide_num):
        # If we have at least 3 local images, use one of those
        if (term in self.images) and (len(self.images[term]) > 3):
            return os.path.join("Images", "%s.img" % random.choice(self.images[term]))

        try:
            search_term = term
            if (random.randint(0, 100) % 2) == 0:
                search_term = self.title

            download_attempts = 0
            image_bytes = ""
            image_path = ""
            while download_attempts < 10:

                fetcher = urllib2.build_opener()
                start_index = random.randint(0, 50)
                search_url = "http://ajax.googleapis.com/ajax/services/search/images?v=1.0&q=%s&start=%s" % (search_term, str(start_index))
                f = fetcher.open(search_url)
                deserialized_output = simplejson.load(f)

                image_url = deserialized_output['responseData']['results'][random.randint(0, len(deserialized_output['responseData']['results'])-1)]['unescapedUrl']
                image_path = os.path.join(self.resources_dir, "%d.img" % slide_num)
                wget.download(image_url, image_path)

                with open(image_path, "rb") as f:
                    image_bytes = f.read()

                if (not image_bytes.startswith("<!DOCTYPE html>")) and (not image_bytes.startswith("<html>")):
                    break

                download_attempts += 1
                self.log("    Attempting to download image about %s failed try #%d" % (search_term, download_attempts))

            if image_bytes.startswith("<!DOCTYPE html") or image_bytes.startswith("<html>"):
                return None

            file_hasher = hashlib.md5()
            file_hasher.update(image_bytes)
            file_md5 = file_hasher.hexdigest()

            if not (term in self.images):
                self.images[term] = []

            if not (file_md5 in self.images[term]):
                self.images[term].append(file_md5)
                shutil.copy(image_path, os.path.join("Images", "%s.img" % file_md5))
                with open(os.path.join("Images", "hashes.json"), "w") as f:
                    json.dump(self.images, f, indent=2)

            return image_path
        except:
            return None

    def create_gif_slide(self, slide_title, term, slide_num):
        image_path = self.download_gif(term, slide_num)
        if image_path:
            return self.create_single_image_slide(slide_title, image_path)

    def create_full_gif_slide(self, term, slide_num):
        image_path = self.download_gif(term, slide_num)
        if image_path:
            return self.create_single_full_image_slide(image_path)

    def create_image_slide(self, slide_title, term, slide_num):
        while True:
            try:
                image_path = self.download_image(term, slide_num)
                if image_path:
                    return self.create_single_image_slide(slide_title, image_path)
            except:
                pass

    def create_full_image_slide(self, term, slide_num):
        image_path = self.download_image(term, slide_num)
        if image_path:
            return self.create_single_full_image_slide(image_path)

    def create_info_slide(self, slide_num):
        slide_title_info = random.choice(self.slide_titles)
        slide_title = slide_title_info
        if (random.randint(0, 100) % 3) == 0:
            slide_title = self.get_markov_proverb()

        sb = random.sample(self.slide_bullets, random.randint(1, 4))
        if (random.randint(0, 100) % 4) == 0:
            sb.append(self.get_markov_proverb())

        bullet_slide_layout = self.ppt.slide_layouts[1]
        new_slide = self.ppt.slides.add_slide(bullet_slide_layout)
        shapes = new_slide.shapes

        title_shape = shapes.title
        body_shape = shapes.placeholders[1]
        body_shape.width = Inches(4)
        body_shape.left = Inches(1)
        body_shape.top = Inches(2)

        title_shape.text = slide_title

        tf = body_shape.text_frame
        for b in sb:
            p = tf.add_paragraph()
            #p.text = b

            p.alignment = PP_PARAGRAPH_ALIGNMENT.LEFT
            run1 = p.add_run()
            run1.text = b
            font1 = run1.font
            font1.name = 'Sans Serif'
            font1.size = Pt(20)
            font1.italic = True
            font1.bold = True

        image_path = None
        attempts = 0
        while attempts < 10:
            try:
                tries = 0
                while (not image_path) and (tries < 10):
                    if (random.randint(0, 100) % 2) == 0:
                        search_term = self.get_giphy_search_term()
                        image_path = self.download_gif(search_term, slide_num)
                    else:
                        search_term = self.get_image_search_term()
                        image_path = self.download_image(search_term, slide_num)

                    tries += 1

                if tries < 10:
                    left = Inches(5.5)
                    top = Inches(3)
                    #height = Inches(3)
                    width = Inches(3)
                    pic = new_slide.shapes.add_picture(image_path, left, top, width=width)
                    break
                attempts += 1

            except:
                attempts += 1

        return new_slide

    def create_quote_slide(self):
        # Pick a random quote category and quote
        cat = random.choice(self.terms["quote_categories"])
        with open(os.path.join("Quotes", "quotes_%s.json" % cat)) as f:
            q1 = random.choice(json.load(f))

        cat = random.choice(self.terms["quote_categories"])
        with open(os.path.join("Quotes", "quotes_%s.json" % cat)) as f:
            q2 = random.choice(json.load(f))

        quote_text = "\"%s\"" % q1["quote"]
        if (random.randint(0,100) % 5) == 0:
            quote_text = random.choice(self.proverbs)

        quote_author = "- %s" % q2["name"]

        blank_slide_layout = self.ppt.slide_layouts[2]
        new_slide = self.ppt.slides.add_slide(blank_slide_layout)

        for shape in new_slide.shapes:
            if shape.is_placeholder:
                phf = shape.placeholder_format
                if phf.type == 1:
                    # Put in the quote title
                    shape.text = random.choice(self.terms["quote_titles"])

                elif phf.type == 2:
                    text_frame = shape.text_frame

                    # Create the quote text paragraph
                    p1 = text_frame.paragraphs[0]
                    p1.alignment = PP_PARAGRAPH_ALIGNMENT.LEFT
                    run1 = p1.add_run()
                    run1.text = quote_text
                    font1 = run1.font
                    font1.name = 'Sans Serif'
                    font1.size = Pt(30)
                    font1.italic = True
                    font1.bold = True

                    # Create the Author text paragraph
                    p2 = text_frame.add_paragraph()
                    p2.alignment = PP_PARAGRAPH_ALIGNMENT.RIGHT
                    run2 = p2.add_run()
                    run2.text = quote_author
                    font2 = run2.font
                    font2.name = 'Calibri'
                    font2.size = Pt(24)

        return new_slide

    def get_giphy_search_term(self):
        st = random.choice(self.terms["giphy_searches"])
        if (random.randint(0, 100) % 5) == 0:
            st = self.title
        return st

    def get_image_search_term(self):
        st = random.choice(self.terms["image_searches"])
        if (random.randint(0, 100) % 2) == 0:
            st = self.title
        return st

    def get_proverb(self):
        return random.choice(self.proverb_lines)

    def get_markov_proverb(self, min=5, max=10):
        b = ""

        while True:
            b = self.proverb_markov.generateString()
            s = b.split(" ")
            if min <= len(s) <= max:
                break

        return b

    def add_term(self, term_type, term):
        if term in self.terms[term_type]:
            return "Term \"%s\" is already in %s!" % (term, term_type)
        else:
            self.terms[term_type].append(term)
            with open("terms.json", "w") as f:
                json.dump(self.terms, f, indent=4)
            return "Term \"%s\" added to %s." % (term, term_type)

    def delete_term(self, term_type, term):
        if not (term in self.terms[term_type]):
            return "Term \"%s\" isn't in %s, can't delete!" % (term, term_type)
        else:
            self.terms[term_type].remove(term)
            with open("terms.json", "w") as f:
                json.dump(self.terms, f, indent=4)
            return "Term \"%s\" removed from %s." % (term, term_type)

    def show_term_counts(self, term_type, term_json):
        log_str = "%s Terms:\n" % term_type
        for term in self.terms[term_type]:
            if term in term_json:
                log_str += "  %s: %d\n" % (term, len(term_json[term]))
            else:
                log_str += "  %s: 0\n" % term
        self.log(log_str)

    def get_file_md5(self, file_path):
        with open(file_path, "rb") as f:
            image_bytes = f.read()

        file_hasher = hashlib.md5()
        file_hasher.update(image_bytes)
        return file_hasher.hexdigest()

    def farm_image_term(self, term, amount=25, threshold=10):
        self.log("Farming images for %s..." % term)

        if not (term in self.images):
            self.images[term] = []

        attempt_count = 0
        while (attempt_count < threshold) and (len(self.images[term]) < amount):
            myopener = MyOpener()
            page = myopener.open('https://www.google.pt/search?q=%s&source=lnms&tbm=isch&sa=X&tbs=isz:l&tbm=isch' % term.replace(" ", "+"))
            html = page.read()

            for match in re.finditer(r'<a href="/imgres\?imgurl=(.*?)&amp;imgrefurl', html, re.IGNORECASE | re.DOTALL | re.MULTILINE):
                if len(self.images[term]) >= amount:
                    break

                try:
                    os.remove("test.img")
                except:
                    pass

                try:
                    path = urlparse.urlsplit(match.group(1)).path
                    self.log("  Downloading %s" % match.group(1))
                    myopener.retrieve(match.group(1), "test.img")

                    image_md5 = self.get_file_md5("test.img")

                    if not (image_md5 in self.images[term]):
                        self.images[term].append(image_md5)
                        shutil.copy("test.img", os.path.join("Images", "%s.img" % image_md5))
                        os.remove("test.img")
                        self.log("    Image saved to archive. %d/%d images." % (len(self.images[term]), amount))
                        attempt_count = 0
                    else:
                        self.log("    Already had image!")
                        attempt_count += 1
                except:
                    self.log("    Downloading failed")
                    attempt_count += 1

        self.log("Farming of %s images complete, now holding %d images" % (term, len(self.images[term])))

        with open(os.path.join("Images", "hashes.json"), "w") as f:
            json.dump(self.images, f, indent=2)

    def farm_images(self, amount=25, threshold=10):
        self.show_term_counts("image_searches", self.images)

        all_farm = self.terms["image_searches"]
        all_farm.extend(self.terms["talk_titles"])

        for term in all_farm:
            self.farm_image_term(term, amount, threshold)

    def farm_gif_term(self, term, amount=25, threshold=10):
        self.log("Farming GIFs for %s..." % term)

        if not (term in self.gifs):
            self.gifs[term] = []

        attempt_count = 0
        while (attempt_count < threshold) and (len(self.gifs[term]) < amount):

            image_path = "test.gif"
            try:
                os.remove(image_path)
            except:
                pass

            try:
                img = translate(term)
                wget.download(img.fixed_height.url, image_path)

                image_md5 = self.get_file_md5("test.gif")

                if not (image_md5 in self.gifs[term]):
                    self.gifs[term].append(image_md5)
                    shutil.copy(image_path, os.path.join("GIFs", "%s.gif" % image_md5))
                    self.log("    GIF saved to archive. %d/%d GIFs." % (len(self.gifs[term]), amount))
                    attempt_count = 0
                else:
                    self.log("    Already had GIF!")
                    attempt_count += 1
            except:
                self.log("    Downloading failed")
                attempt_count += 1

        self.log("Farming of %s GIFs complete, now holding %d GIFs" % (term, len(self.gifs[term])))

        with open(os.path.join("GIFs", "hashes.json"), "w") as f:
            json.dump(self.gifs, f, indent=2)

    def farm_gifs(self, amount=25, threshold=10):
        self.show_term_counts("giphy_searches", self.gifs)

        all_farm = self.terms["giphy_searches"]
        all_farm.extend(self.terms["talk_titles"])

        for term in all_farm:

            self.log("Farming GIFs for %s..." % term)

            if not (term in self.gifs):
                self.gifs[term] = []

            self.farm_gif_term(term, amount, threshold)

    def farm_content(self, all_content):
        for talk_title in self.terms["talk_titles"]:
            talk_path = os.path.join("Content", "%s.txt" % talk_title)
            # Either we're replacing all content or we're only replacing files that don't exist
            if all_content or (not os.path.exists(talk_path)):
                self.log("Farming data on %s..." % talk_title)
                with open(talk_path, "w") as f:
                    content = self.my_face.fully_research_topic(talk_title, self.log)
                    if type(content) is str:
                        clean_content = content
                    else:
                        clean_content = unicodedata.normalize('NFKD', content).encode('ascii', 'ignore')
                    f.write(clean_content)

    def log_slide_weights(self):
        self.log(self.slide_weights.get_weights_string())

    def log(self, message):
        if self.console:
            self.console.config(state=tk.NORMAL)
            self.console.insert(tk.END, "%s\n" % message)
            self.console.see(tk.END)
            self.console.config(state=tk.DISABLED)
            self.console.update()
        else:
            print(message)
Beispiel #50
0
from pymarkovchain import MarkovChain

seed_file = open('./fixtures/wikipedia_india_content.txt')
mc = MarkovChain("../markov_db")
seed_text = seed_file.read()
mc.generateDatabase(seed_text)
print mc.generateString()
class ResponseGenerator:
    def __init__(self):
        self.eightball = EightBall()
        self.excuses = Excuses()
        self.commands = Commands()
        self.straws = Straws("/", "=", "/")
        self.chain = MarkovChain("./markovdb")
        self.chain.db = _db_factory()
        with open("markovsource", "r") as markov_file:
            self.chain.generateDatabase(markov_file.readline())

    def generate_response(self, body):
        # Tokenize body
        body_tokens = body.lower().split(" ")
        # Important commands can only be run if line is started with the word
        command = body_tokens[0]

        if command == '!create':
            new_command = body_tokens[1]
            response_index = body.find(new_command) + len(new_command) + 1
            response = body[response_index:]
            self.commands.set(new_command, response)

            return "Command !{0} created.".format(new_command)

        elif command == "!list":
            string = "!create !delete !reload !excuse !8ball !straws !image "
            for command_ in self.commands.list():
                string += "!{0} ".format(command_)

            return string

        elif command == "!delete":
            cleaned_command = body_tokens[1].lower()
            success = self.commands.delete(cleaned_command)

            if success:
                return "Command !{0} deleted.".format(cleaned_command)
            else:
                return "Command !{0} does not exist.".format(cleaned_command)

        elif command == "!reload":
            with open("markovsource", "r") as markov_file:
                self.chain.generateDatabase(markov_file.readline())

            return "Successfully reloaded my word database"

        # Not a system command, continue attempting to parse
        else:
            for token in body_tokens:
                if token == "!fortune":
                    # TODO
                    pass
                elif token == "!excuse":
                    return self.excuses.get()

                elif token == "!8ball":
                    return self.eightball.get()

                elif token == "!straws":
                    return self.straws.get()

                elif token == "!image":
                    return "/get " + self.chain.generateString()

                elif token == "tase":
                    return self.chain.generateString()

                elif len(token) > 0 and token[0] == "!":
                    return self.commands.get(token[1:])

                # we have a sentence to listen to, arbitrary length requirement
                elif len(body) > 10:
                    string_to_write = body + "."
                    if body[len(body) - 1] == ".":
                        string_to_write = body

                    with open("markovsource", "a") as markov_file:
                        markov_file.write(string_to_write)
Beispiel #52
0
#!/usr/bin/env python

from pymarkovchain import MarkovChain
# Create an instance of the markov chain. By default, it uses MarkovChain.py's location to
# store and load its database files to. You probably want to give it another location, like so:
mc = MarkovChain("C:/Users/Andrew/OneDrive/Documents/Northwestern/Courses/495-Data-Science/Final Project")
# To generate the markov chain's language model, in case it's not present
mc.generateDatabase("It is nice to meet you.  I would like to meet your friend.")
# To let the markov chain generate some text, execute
for i in range(10):
        print(mc.generateString())
Beispiel #53
0
        f.close()
        consumerkey = lines[0].split("#")[0]
        consumersecret = lines[1].split("#")[0]
        accesstoken = lines[2].split("#")[0]
        accesssec = lines[3].split("#")[0]

        self.api = Twython(consumerkey, consumersecret, accesstoken, accesssec)

if __name__ == '__main__':
	api = (TwythonHelper("dynacoinc.keys")).api
	mc = MarkovChain("./markov")
	f = codecs.open("corpus.txt")
	text = " ".join(f.readlines())
	f.close()
	mc.generateDatabase(text)
	status = mc.generateString()

	if len(status) > 110:
		status = status[:110]
		lr = status.rfind(" ")
		status = status[:lr] + "."
	else:
		status = status + "."

	r = random.Random()
	lf = os.listdir(".")
	ll = [l for l in lf if l.find("jpg") != -1]

	photo = open(r.choice(ll), "rb")

	api.update_status_with_media(media=photo, status=status)