def main(): twitter_auth = tweepy.OAuthHandler(API_KEY, API_SECRET) twitter_auth.set_access_token(ACCESS_TOKEN, ACCESS_SECRET) twitter_client = tweepy.API(twitter_auth) print("building model...") shuffle(names) text = ' '.join(names) chain = Markov([text]) print("making sentence...") length = randint(20, 35) result = '' while len(result) == 0: result = ''.join(chain.generate_text()[0:length]) result = result.rstrip() + '!' print('tweeting...') print(result) try: response = twitter_client.update_status(status=result) print(response) return response except tweepy.error.TweepError as e: print(e) return e
#!/usr/bin/env python # -*- coding: utf-8 -*- from smarkov import Markov chain = Markov(["AGACAGACGAC"]) print("".join(chain.generate_text()))
""" Transforms words into a their expanded form - replaces all abbreviations like "'ll" or "n't" There are some special case like can't (in tokens ("ca", "n't")) or won't where we want to replace both forms Args: words: words iterator to search and replace Returns: words iterator with replaced abbreviations """ for word in words: if word in REPLACE_WORDS: yield REPLACE_WORDS[word] else: yield word def tokenize(s): return expanding_words( re.findall("[A-Z]{2,}(?![a-z])|[A-Z][a-z]+(?=[A-Z])|[\'\w\-]+", s)) scriptDir = os.path.dirname(os.path.realpath(__file__)) inputFile = os.path.join(scriptDir, "./pg1342.txt") with open(inputFile, "r") as inFile: corpus = re.split('(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?)\s', inFile.read()) chain = Markov(corpus, tokenize=tokenize) print(join_tokens_to_sentences(chain.generate_text()))
from smarkov import Markov from glob import glob import random import markovify # https://github.com/jsvine/markovify allwords = [] files = glob('SouthParkData/Season*.csv') with open(files[random.randrange(len(files))]) as infile: for line in infile: words = line[line.find('"'):].replace('\n', '').replace('"', '').split(' ') if words != ['']: allwords.append(words) with open(files[random.randrange(len(files))]) as infile: text = infile.read() chain = Markov(allwords) print(" ".join(chain.generate_text())) text_model = markovify.Text(text) for i in range(5): print(text_model.make_sentence())
def expanding_words(words): """ Transforms words into a their expanded form - replaces all abbreviations like "'ll" or "n't" There are some special case like can't (in tokens ("ca", "n't")) or won't where we want to replace both forms Args: words: words iterator to search and replace Returns: words iterator with replaced abbreviations """ for word in words: if word in REPLACE_WORDS: yield REPLACE_WORDS[word] else: yield word def tokenize(s): return expanding_words(re.findall("[A-Z]{2,}(?![a-z])|[A-Z][a-z]+(?=[A-Z])|[\'\w\-]+", s)) scriptDir = os.path.dirname(os.path.realpath(__file__)) inputFile = os.path.join(scriptDir, "./pg1342.txt") with open(inputFile, "r") as inFile: corpus = re.split( '(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?)\s', inFile.read()) chain = Markov(corpus, tokenize=tokenize) print(join_tokens_to_sentences(chain.generate_text()))