Exemple #1
0
def processTextOld(text):
    if builtins.markov == None:
        print("Setting to text")
        builtins.markov = markovify.Text(text)
    else:
        print("Combining")
        builtins.markov = markovify.combine([builtins.markov, markovify.Text(text)], [0.5, 0.5])
    def add_message(self, message):
        new_model = markovify.Text(message)

        # If we have a chain saved, then make the model from it
        try:
            self.model = markovify.combine([self.model, new_model])
        except AttributeError:
            self.model = new_model
Exemple #3
0
    def do_response(self, e, prompt):

        channel = e.source.nick if e.type == "privmsg" else self.channel
        combo = markovify.combine(
            [self.text_model, markovify.Text(prompt)], [0.5, 0.5]
        )
        sentence = combo.make_sentence()
        if sentence:
            self.connection.privmsg(channel, sentence)
 def test_from_mult_files_without_retaining(self):
     models = []
     for (dirpath, _, filenames) in os.walk(os.path.join(os.path.dirname(__file__), "texts")):
         for filename in filenames:
             with open(os.path.join(dirpath, filename)) as f:
                 models.append(markovify.Text(f, retain_original=False))
     combined_model = markovify.combine(models)
     sent = combined_model.make_sentence()
     assert sent is not None
     assert len(sent) != 0
Exemple #5
0
def combine(infiles=[DATAFILE_DOI, DATAFILE_ONION], n=20):
    ts = []
    mdls = []
    for infile in infiles:
        mdl, text = get_model(infile)
        ts.append(text)
        mdls.append(mdl)

    M = markovify.combine(mdls, weights=[1.0, 0.4])
    for i in xrange(n):
        print get_msg(M, text='\n'.join(ts))
Exemple #6
0
 async def beid(self, ctx, user, other=None):
     """generate a markov chain based on the logs of `user`."""
     server = ctx.message.server
     log_path = log_dir + user + '.txt'
     if os.path.exists(log_path):
         with codecs.open(log_path, "r",encoding='utf-8', errors='ignore') as f:
             text = filter(None, (line.rstrip() for line in f))
             text_model = markovify.NewlineText(text)
             name = user
             if other: # fusion impersonations
                 with open(log_dir + other + '.txt') as s:
                     other_text = s.read()
                     other_model = markovify.NewlineText(other_text)
                     text_model = markovify.combine([text_model, other_model], [1, 1])
                     name += " + " + other
             sentence = text_model.make_sentence(tries=100)
         embed = discord.Embed(title='', description=sentence)
         embed.set_author(name=name)
         await self.bot.say(embed=embed)
     else:
         await self.bot.say("i don't have any messages logged from that user yet.")
Exemple #7
0
# Play the audio using the platform's default player
if sys.platform == "win32":
    os.startfile(output)
else:
    subprocess.call(["afplay", output])




for _ in itertools.repeat(None, 50):
    model_cls = markovify.Text if level == "word" else SentencesByChar
    gen_a = model_cls(text_a, state_size=order)
    gen_b = model_cls(text_b, state_size=order)
    gen_c = model_cls(text_c, state_size=order)
    gen_combo = markovify.combine([gen_a, gen_b, gen_c], weights)
    counter=0
    for i in range(output_n):
        out = gen_combo.make_short_sentence(length_limit, test_output=False)

        # WORDS = ("Ivy", "Joanna", "Kendra", "Kimberly", "Salli", "Raveena", "Nicole", "Amy", "Emma", "Joey", "Justin", "Matthew", "Brian", "Geraint")
        # word = random.choice(WORDS)

        try:
            # Request speech synthesis
            response = polly.synthesize_speech(Text= say_a+(out.lower())+say_b, OutputFormat="mp3",
                                                TextType="ssml", VoiceId="Brian")
        except (BotoCoreError, ClientError) as error:
            # The service returned an error, exit gracefully
            print(error)
            sys.exit(-1)
Exemple #8
0

with open("../readings/satyricon.txt") as s:

    satyricon = s.read()



# build and combine the models

lovecraft_model = markovify.Text(lovecraft)

satyricon_model = markovify.Text(satyricon)

model_synthesis = markovify.combine([lovecraft_model, satyricon_model], 

    [ 1.5, 1 ])



# generate a sentence from the markov model

markov_text = model_synthesis.make_sentence()

  

# get raw text as string with the write (w) or append (a) option

    # write a new line in markdown

    m.write("\n")
Exemple #9
0
def combine_models(models=[], weights=[]):
    assert (len(models) == len(weights))
    return markovify.combine(models, weights)
Exemple #10
0
import markovify

with open("beowulf.txt", errors="ignore") as f:
    text_a = f.read()

with open("metamorphosis.txt", errors="ignore") as g:
    text_b = g.read()

model_a = markovify.Text(text_a)
model_b = markovify.Text(text_b)

model_combo = markovify.combine([model_a, model_b], [.5, .5])

for i in range(5):
    print(model_combo.make_sentence())

print("\n")

for i in range(3):
    print(model_combo.make_short_sentence(280))
# -*- coding: utf-8 -*-
"""
Created on Tue Oct 16 19:33:35 2018

@author: Windows 10 Pro
"""
# import libraries
import markovify
"""
Combine text from Kaiya Smith's Letter to Hansel and F*ckboys
"""
# get raw text as string
with open("../poems/HashtagHansel.txt") as f:
    Hansel = f.read()

with open("../poems/Fboi.txt") as f:
    boys = f.read()

# build and combine the models
Hansel_model = markovify.Text(Hansel)
boys_model = markovify.Text(boys)
model_synthesis = markovify.combine([Hansel_model, boys_model], [1.5, 1])

# print five randomly-generated sentences
for i in range(5):
    print model_synthesis.make_sentence()
Exemple #12
0
 def initializeModel(self):
     print('TRAINER BUILDING MODEL')
     tweet_models = []
     for i in self.data['Text']:
         tweet_models.append(markovify.Text(str(i), self.stateSize))
     self.model = markovify.combine(models=tweet_models)
def generate_text(tweet):
    #generates text using Markov chains

    num_texts = 0
    with open("./the_picture_of_dorian_gray.txt", encoding="utf8") as f:
        the_picture_of_dorian_gray = f.read()
        num_texts += 1
    with open("./declaration_of_independence.txt", encoding="utf8") as f2:
        declaration_of_independence = f2.read()
        num_texts += 1
    with open("./jane_eyre.txt", encoding="utf8") as f3:
        declaration_of_independence = f3.read()
        num_texts += 1
    with open("./war_and_peace.txt", encoding="utf8") as f4:
        war_and_peace = f4.read()
        num_texts += 1
    with open("./jane_eyre.txt", encoding="utf8") as f5:
        jane_eyre = f5.read()
        num_texts += 1
    with open("./jokes.txt", encoding="utf8") as f6:
        jokes = f6.read()
        num_texts += 1
    with open("./moby_dick.txt", encoding="utf8") as f7:
        moby_dick = f7.read()
        num_texts += 1
    with open("./pride_and_prejudice.txt", encoding="utf8") as f8:
        pride_and_prejudice = f8.read()
        num_texts += 1
    with open("./art_of_love.txt", encoding="utf8") as f9:
        art_of_love = f9.read()
        num_texts += 1

    state_size = 4
    max_overlap_ratio = 50
    num_tries = 100

    models = read_JSON(num_texts)
    model_tweet = POSifiedText(tweet.full_text, state_size=state_size)
    if len(models) == 0:
        print("Creating language model")
        #creates custom models instead of using naive ones
        model_a = POSifiedText(the_picture_of_dorian_gray,
                               state_size=state_size)
        model_b = POSifiedText(declaration_of_independence,
                               state_size=state_size)
        model_c = POSifiedText(jane_eyre, state_size=state_size)
        model_d = POSifiedText(war_and_peace, state_size=state_size)
        model_e = POSifiedText(jokes, state_size=state_size)
        model_f = POSifiedText(pride_and_prejudice, state_size=state_size)
        model_g = POSifiedText(moby_dick, state_size=state_size)
        model_h = POSifiedText(art_of_love, state_size=state_size)

        models = []
        models.append(model_a)
        models.append(model_b)
        models.append(model_c)
        models.append(model_d)
        models.append(model_e)
        models.append(model_f)
        models.append(model_g)
        models.append(model_h)

        write_JSON(models)

    else:
        print("Sucessfully created models from existing JSON files")

    model_combo = markovify.combine([
        models[0], models[1], models[2], models[3], models[4], models[5],
        models[6], models[7], model_tweet
    ], [1.4, 1.25, 1.25, 1.4, 1.25, 1.0, 1.25, 2.0, 1.0])
    return model_combo.make_short_sentence(280,
                                           max_overlap_ratio=max_overlap_ratio,
                                           tries=num_tries)
 def test_combine_chains(self):
     chain = sherlock_model.chain
     combo = markovify.combine([ chain, chain ])
 def test_combine_no_retain(self):
     text_model = sherlock_model_no_retain
     combo = markovify.combine([ text_model, text_model ])
     assert(not combo.retain_original)
 def test_combine_no_retain_on_retain(self):
     text_model_a = sherlock_model_no_retain
     text_model_b = sherlock_model
     combo = markovify.combine([ text_model_b, text_model_a ])
     assert(combo.retain_original)
     assert(combo.parsed_sentences == text_model_b.parsed_sentences)
 def test_mismatched_model_types(self):
     with self.assertRaises(Exception) as context:
         text_model_a = sherlock_model
         text_model_b = markovify.NewlineText(sherlock)
         combo = markovify.combine([ text_model_a, text_model_b ])
 def test_mismatched_state_sizes(self):
     with self.assertRaises(Exception) as context:
         text_model_a = markovify.Text(sherlock, state_size=2)
         text_model_b = markovify.Text(sherlock, state_size=3)
         combo = markovify.combine([ text_model_a, text_model_b ])
 def test_bad_weights(self):
     with self.assertRaises(Exception) as context:
         text_model = sherlock_model
         combo = markovify.combine([ text_model, text_model ], [ 0.5  ])
Exemple #20
0
 def test_double_weighted(self):
     text_model = markovify.Text(self.sherlock)
     combo = markovify.combine([ text_model, text_model ])
     assert(combo.chain.model != text_model.chain.model)
 def test_combine_dicts(self):
     _dict = sherlock_model.chain.model
     combo = markovify.combine([ _dict, _dict ])
 def test_bad_types(self):
     with self.assertRaises(Exception) as context:
         combo = markovify.combine([ "testing", "testing" ])
Exemple #23
0
    async def run(self, ctx):
        if isinstance(ctx.channel, discord.DMChannel):
            ctx.channel.send("cannot use commands in dm channels")
            return None
        """\n Genrate sentence based upon messsages from channel or user.\nTo scan a users or a channels messages menton them """
        msg = await ctx.channel.send("Processing")
        try:
            #get channel wanted and member wanted
            member = ctx.message.mentions
            channel = ctx.message.channel_mentions or [ctx.message.channel]
            print(member)
            print(channel)

            #format file location
            storage_loc = os.getcwd() + "\\data\\{ch}".format(ch=channel[0].id)
            print(storage_loc)
            #check if directoy exist if not make one
            if not os.path.exists(Path(storage_loc)):

                os.makedirs(storage_loc)
                print('dirmade')
            #add file final
            if member:
                storage_loc += "\\{}.json".format(member[0].id)
                print('eyy embmer')
            else:
                storage_loc += "\\main.json"
            print(storage_loc)
            storage_loc = Path(storage_loc)
            #check if file exists if not do main else load unread messages
            newdata = None
            newappend = ""
            jsonmodel = None
            filemodel = None
            date = None

            if os.path.exists(storage_loc):
                print('path exists')
                with open(storage_loc, "r") as f:
                    data = json.load(f)
                    date = data["time"]
                    jsonmodel = markovify.NewlineText.from_json(data["model"])
                newdata = channel[0].history(
                    after=datetime.datetime.fromtimestamp(date))
                print(newdata)

            else:
                print('path doesnt eyyy')
                newdata = channel[0].history(limit=15000)

            async for i in newdata:
                if member:
                    if i.author == member[0]:
                        newappend += i.content + "\n"
                else:
                    newappend += i.content + "\n"
            if newappend:
                filemodel = markovify.NewlineText(newappend)
            if jsonmodel and newappend:
                filemodel = markovify.combine([jsonmodel, filemodel], [1, 1])
            elif jsonmodel:
                filemodel = jsonmodel

            temp = None
            for i in range(100):
                temp = filemodel.make_sentence()
                if temp:
                    break
            await ctx.channel.send(temp.replace("@", "") or "somthign wrong")
            del temp

            tempdata = {"time": time.time(), "model": filemodel.to_json()}
            with open(storage_loc, "w+") as f:
                f.write(json.dumps(tempdata, separators=(',', ':')))
                print("writine")
            del filemodel
            await ctx.message.delete()
            await msg.delete()
        except:
            await ctx.message.delete()
            await msg.delete()
            await ctx.channel.send("somthings wrong")
        text = '\n'.join([r for r in rows if r != ''])
        return text


if __name__ == "__main__":

    progress = 0  #out of the max value on progress bar
    bar = progressbar.ProgressBar().start(max_value=4)

    texteroni = markovify.Text(cleanBook('./theDunwichHorror.txt'))
    book2 = markovify.Text(cleanBook('./theShunnedHouse.txt'))
    progress += 1
    bar.update(progress)

    # COMBINE THE BOOKS
    allText = markovify.combine([texteroni, book2])
    progress += 1
    bar.update(progress)

    print "Here are 3 random sentences:\n\n"
    for i in range(3):
        string = allText.make_short_sentence(70)
        print string
        '''TODO
        for word in string.split(' '):
            print "Syllables in " + str( word ) + ": " + str( countSyllables(word))
        '''
    print "\n"

    progress += 1
    bar.update(progress)
Exemple #25
0
def combine(models):
    total_length = sum([m.length for m in models])
    # weights are inversely proportional to size, so a large text doesn't drown out a small one
    weights = [((total_length - m.length) / total_length) for m in models]
    return markovify.combine(models, weights)
Exemple #26
0
import markovify

# Get raw text as string.
f = open('parsedBible.txt','r+')
text = f.read()
fi = open('IStestfile.txt','r+')
text2 = fi.read()
# Build the model.
modelHP = markovify.Text(text)
modelIS = markovify.Text(text2)
modelX  = markovify.combine([modelHP, modelIS],[1,1])

# Print five randomly-generated sentences
for i in range(5):
    print(modelX.make_short_sentence(200))

# Print three randomly-generated sentences of no more than 140 characters
#for i in range(3):
#   print(text_model.make_short_sentence(140))
Exemple #27
0
import sys
import markovify

with open('ico.json') as f:
    text = f.read()
ico_model = markovify.Text.from_json(text)

with open('erowid.json') as f:
    text = f.read()
erowid_model = markovify.Text.from_json(text)

# Combine models
combo = markovify.combine([ico_model, erowid_model], [1.25, 1])

for i in range(int(sys.argv[1])):
    print(combo.make_sentence())
Exemple #28
0
 def addToModel(self, text):
     print('TRAINER ADDING TO MODEL')
     newModel = markovify.Text(text, self.stateSize)
     self.model = markovify.combine(models=[self.model, newModel])
Exemple #29
0
def combine(combined_model, model):
    if combined_model is None:
        return model
    return markovify.combine(models=[combined_model, model])
Exemple #30
0
def main():
    if len(sys.argv) != 2:
        print("Path to config.ini must be passed as argument.")
        exit_on_error()

    # read config file
    config = configparser.ConfigParser()
    try:
        config.read(sys.argv[1])
    except IOError:
        print("Failed to read config file: " + sys.argv[1])
        exit_on_error()

    # set up apis
    twitter_api = twitter_setup(config)
    google_api = google_setup(config)
    cse_id = config.get('GOOGLE', 'CSE_ID')

    # create markov chain model
    model_clickbait = markov_chain_setup("clickbait_data_filtered.txt")
    model_non_clickbait = markov_chain_setup("non_clickbait_data_filtered.txt")
    model = markovify.combine([model_clickbait, model_non_clickbait],
                              [1.60, 1])

    # set nltk path
    if './nltk_data' not in nltk.data.path:
        nltk.data.path.append('./nltk_data')

    while True:
        # create headline
        tweet = create_headline(model, randint(40, 180))
        if tweet is None:
            print(
                str(datetime.datetime.now()) +
                " Markov chain model failed to generate a tweet.")
        else:
            print(str(datetime.datetime.now()) + " Headline: " + tweet)

            # tokenized headline
            tokens = nltk.word_tokenize(str(tweet))

            # add question mark punctuation for interrogative headlines
            interrogative = [
                'Does', 'Do', 'Can', 'Should', 'Would', 'Could', 'How',
                'Which', "Is", "Are", "Was"
            ]
            if tokens[0] in interrogative:
                tweet += "?"

            # identify key words in tweet to use for hashtags using parts-of-speech tags
            # nouns only, tagger isn't accurate so remove helping verbs
            verbs = [
                "am", "are", "is", "was", "were", "be", "being", "been",
                "have", "has", "had", "shall", "will", "do", "does", "did",
                "may", "must", "might", "can", "could", "would", "should",
                "who", "what", "why", "your", "you", "their", "or"
            ]
            tags = nltk.pos_tag(tokens)
            keywords_list = [
                word for word, pos in tags
                if (pos.startswith('N') and word.lower() not in verbs)
            ]
            query_words = [
                word for word, pos in tags
                if (pos.startswith('N') or pos.startswith('J'))
            ]

            print(
                str(datetime.datetime.now()) + " Identified key words: " +
                str(keywords_list))
            print(
                str(datetime.datetime.now()) + " Identified query words: " +
                str(query_words))

            # add key words as hash tags, removing any punctuations in the hashtags
            for kw in keywords_list:
                tweet += " #" + kw.translate(
                    str.maketrans('', '', string.punctuation))

            # full tweet
            print(str(datetime.datetime.now()) + " Tweet: " + tweet)

            # search for image to post with tweet message
            image = search_image(google_api, cse_id, ' '.join(query_words))

            # post to twitter
            tweet_fake_buzz(twitter_api, tweet, image)

            print(str(datetime.datetime.now()) + " Done.")

            sleep(300)  # tweet every 5 minutes
Exemple #31
0
def scrape_data():
    """
    Pull the tweets from each congressman and save the markov chain to a file
    corresponding to their party.
    """
    # connect to the twitter api
    twitter_keys = config['twitter']
    auth = tweepy.OAuthHandler(twitter_keys['consumer_key'],
                               twitter_keys['consumer_secret'])
    auth.set_access_token(twitter_keys['access_token'],
                          twitter_keys['access_token_secret'])
    api = tweepy.API(auth, wait_on_rate_limit=True)

    for party in ['Republican', 'Democratic']:
        # get all the twitter urls into a list
        with congress_file.open(encoding='utf-8') as file:
            profiles = json.load(file)
        usernames = []
        for profile in profiles:
            if (profile['party'] == party or
                    # libertarians go with republicans because
                    # there's not enough of them for a whole dataset
                    profile['party'] in third_parties['Republican']
                    and party == 'Republican' or
                    # independents go with democratics
                    # for the same reason as above
                    profile['party'] in third_parties['Democratic']
                    and party == 'Democratic'):
                for username in profile['twitter']:
                    usernames.append(username)

        # set date from a week ago so we can separate tweets
        week_ago_timestamp = datetime.now().timestamp() - 604800

        tweets_within_week = []
        tweets_outside_week = []
        retweets = []
        likes = []
        for username in usernames:
            try:
                page = api.user_timeline(screen_name=username,
                                         tweet_mode='extended')
            except tweepy.TweepyException as exc:
                print(f"Skipping @{username} - {exc}")
                logging.info(f"@{username} - {exc}", )
                continue
            print(f"Grabbing {len(page)} tweets from @{username}")
            for tweet in page:
                if tweet.created_at.timestamp() > week_ago_timestamp:
                    tweets_within_week.append(tweet.full_text)
                else:
                    tweets_outside_week.append(tweet.full_text)
                # get likes and retweets from retweeted status or likes will be 0
                if hasattr(tweet, 'retweeted_status'):
                    retweets.append(tweet.retweeted_status.retweet_count)
                    likes.append(tweet.retweeted_status.favorite_count)
                else:
                    retweets.append(tweet.retweet_count)
                    likes.append(tweet.favorite_count)
            # have it wait so we don't rate limit the api
            time.sleep(2)

        data = {'retweets': retweets, 'likes': likes}

        tweets_within_model = markovify.Text(' '.join(tweets_within_week),
                                             retain_original=False)
        tweets_outside_model = markovify.Text(' '.join(tweets_outside_week),
                                              retain_original=False)
        # place 75% more weight on tweets made within the week
        combined_model = markovify.combine(
            [tweets_within_model, tweets_outside_model], [1.75, 1])
        combined_model.compile(inplace=True)

        tweets_file = data_file_path(party, 'tweets')
        tweets_file.write_text(combined_model.to_json())

        data_file = data_file_path(party, 'data')
        data_file.write_text(json.dumps(data))

        print(
            f"{len(tweets_within_week) + len(tweets_outside_week)} tweets scraped from {len(usernames)} {party} accounts"
        )
Exemple #32
0
import markov_novel
import re
import spacy

nlp = spacy.load("en")


class POSifiedText(markovify.Text):
    def word_split(self, sentence):
        return ["::".join((word.orth_, word.pos_)) for word in nlp(sentence)]

    def word_join(self, words):
        sentence = " ".join(word.split("::")[0] for word in words)
        return sentence


# Build the model.
combined_model = None
for (dirpath, _, filenames) in os.walk("texts"):
    for filename in filenames:
        with open(os.path.join(dirpath, filename)) as ind_file:
            ind_text = ind_file.read()
            model = markovify.Text(ind_text)
            if combined_model:
                combined_model = markovify.combine(
                    models=[combined_model, model])
            else:
                combined_model = model

novel = markov_novel.Novel(combined_model, chapter_count=1)
novel.write(novel_title='my-novel', filetype='md')
Exemple #33
0
# Markov Chain Model

df_text2 = df_text[:]
mcm_model1 = markovify.NewlineText(df_text2['headline_text'], state_size = 2)

for i in range(10):
    print(mcm_model1.make_sentence())


# Ensembling 3 Markov models

mcm_model1 = markovify.Text(df_text2['headline_text'], state_size = 2)
mcm_model2 = markovify.Text(df_text2['headline_text'], state_size = 2)
mcm_model3 = markovify.Text(df_text2['headline_text'], state_size = 2)
model_combo = markovify.combine([ mcm_model1, mcm_model2, mcm_model3 ], [ 1.5, 1.5, 1 ])

for i in range(5):
    print(model_combo.make_sentence())

# Part-Of-Speech tagging
import re
!pip install spacy
import spacy

!python -m spacy download en_core_web_lg

nlp = spacy.load('en_core_web_lg')

class POSifiedText(markovify.Text):
    def word_split(self, sentence):
Exemple #34
0
subreddit = reddit.subreddit('me_irl')
submissions = []
for submission in subreddit.hot(limit=100):
    submissions.append(submission)
comments = []
for x in submissions:
    for y in x.comments.list():
        comments.append(y)
toMark = ""
for comm in comments:
    try:
        toMark += '. ' + comm.body
    except:
        pass

redditModel = markovify.Text(toMark)
aynModel = markovify.Text(arand)
marxModel = markovify.Text(marx)
model = markovify.combine([redditModel, aynModel, marxModel])


def output(char=140, mod=model):
    print(mod.make_short_sentence(char))


def tweet():
    out = model.make_short_sentence(140)
    status = api.PostUpdate(out)
    print(out)
Exemple #35
0
]

printable = set(string.printable)

filter_printable = lambda x: filter(lambda y: y in printable, x)

if os.path.exists(dump_file):
    with open(dump_file, 'rb') as handle:
        model = markovify.Text.from_dict(pickle.load(handle))
else:
    print("Generating combined model (once-off)...")
    model = None
    for source_file in sources:
        with open(source_file, 'r') as handle:
            print("Loading", source_file)
            text = filter_printable(handle.read())
            new_model = markovify.Text(text, retain_original=False)
            model = markovify.combine( [model, new_model], [ 1.0, 1.0 ] ) if model else new_model
    with open(dump_file, 'wb') as handle:
        pickle.dump(model.to_dict(), handle)
        handle.flush()

if __name__ == "__main__":
    for i in range(0, 100):
        x = model.make_sentence()
        #x = model.make_sentence_with_start('AIBU')
        #x = model.make_short_sentence(50)
        if x:
            print x
    
Exemple #36
0
 def test_simple(self):
     text_model = markovify.Text(self.sherlock)
     combo = markovify.combine([ text_model, text_model ], [ 0.5, 0.5 ])
     assert(combo.chain.model == text_model.chain.model)
Exemple #37
0
def gen_model(state_size = 2):

    models = [gen_facebook_model(state_size), gen_whatsapp_model(state_size), gen_reddit_model(state_size), gen_slack_model(state_size), gen_thl_model(state_size)]
    weights = [0.5, 0.5, 1.5, 3.0, 0.2]

    return markovify.combine(models, weights)
Exemple #38
0
 def update(self, corpus, contribution=1):
     # train a new model and merge it with old
     new_model = Chain(corpus, self.state_size)
     self.model = combine([self.model, new_model], [1, contribution])
     return self
 def test_combine_lists(self):
     _list = list(sherlock_model.chain.model.items())
     combo = markovify.combine([ _list, _list ])
Exemple #40
0
def gen_proper_model(state_size = 2):

    models = [gen_reddit_model(state_size), gen_slack_model(state_size)]
    weights = [1.0, 3.0]

    return markovify.combine(models, weights)
Exemple #41
0
outputs = []

max_len = max([len(record['single_sentence_models']) for record in data])
weights = [len(record['single_sentence_models']) / max_len for record in data]

for i in range(sentence_target):
    progress = i / sentence_target
    end_window_norm = (i + 50) / sentence_target
    book_models = []
    for record in data:
        sentence_count = len(record['single_sentence_models'])
        start = int(progress * sentence_count)
        end = int(end_window_norm * sentence_count)
        end = end if end > start else start + 1
        combined_model = markovify.combine(
            record['single_sentence_models'][start:end])
        book_models.append(combined_model)
    multi_model = markovify.combine(book_models, weights)
    new_sent = multi_model.make_sentence(tries=1000)
    if new_sent:
        outputs.append(new_sent)

output_text = ' '.join(outputs)

timestamp = str(int(datetime.datetime.now().timestamp()))
filename = "novel_" + timestamp + ".txt"

with open(filename, "w") as text_file:
    text_file.write(output_text)

print('words', len(output_text.split(' ')))
Exemple #42
0
    if "[" in string:
        return False
    return True


# Apply the cleanups from above
cookie_text_split[:] = [x for x in cookie_text_split if excluded(x)]
const_text_split[:] = [x for x in const_text_split if exwifted(x)]

# Merge the text back into one big blob like markovify expects. (There's probably a better way to do this, but again, fun project. Efficiency's not that important...
cookie_text_model = POSifiedText("\n".join(cookie_text_split))
const_text_model = POSifiedText("\n".join(const_text_split))
tweet_text_model = POSifiedText("\n".join(tweet_text_split))

# Combine them into a terrifying structure
const_and_cookie_model = markovify.combine(
    [cookie_text_model, const_text_model])
tweet_and_cookie_model = markovify.combine(
    [cookie_text_model, tweet_text_model], [4, 1])
everything_model = markovify.combine(
    [cookie_text_model, const_text_model, tweet_text_model], [4, 1, 1])

# Print a couple lines to the terminal to show that everything's working...

print("Examples:")
for i in range(5):
    print(const_and_cookie_model.make_short_sentence(240, tries=25))

# Now, open a temporary file and write some javascript surrounding our story.
with open(datadir + "/cookie.js.new", "w+") as file:

    # NOTE: I don't escape anything here... with bad seed text it'd be quite possible to inject weird js, etc.
Exemple #43
0
 def generate_combined_tweet(self, tweet_sets, reply_to):
     models = [self.generate_text_model(tweets) for tweets in tweet_sets]
     weights = [1.0 for tweets in tweet_sets]
     combined_model = markovify.combine(models, weights)
     return self.generate_reply(combined_model.make_short_sentence(self.available_length(reply_to)), reply_to)