lang='en')
 for t in tweets['statuses']:
     if EXCLUDE_WORDS.search(t['full_text']) is None:
         tweet = TEXT_ONLY.sub(' ', t['full_text'])
         tweet = RETWEET.sub(' ', tweet)
         tweet = USER_NAME.sub(' ', tweet)
         tweet = LINKS.sub(' ', tweet)
         tweet = TYPO_HASHTAGS.sub(fix_hashtag, tweet)
         tweet = TYPO_PERIOD.sub(fix_period, tweet)
         tweet = TYPO_QUESTION.sub(fix_question, tweet)
         tweet = TYPO_EXCLAMATION.sub(fix_exclamation, tweet)
         tweet = LONE_PUNCTUATION.sub(' ', tweet)
         tweet = AMPERSAND.sub('and', tweet)
         tweet = GT.sub('>', tweet)
         tweet = LT.sub('<', tweet)
         chain.train(tweet)
 for i in range(3):
     if 'next_results' not in tweets['search_metadata']:
         break
     next_id = re.split(r'\D+',
                        tweets['search_metadata']['next_results'])[1]
     tweets = twit.search.tweets(q=trend,
                                 count=100,
                                 tweet_mode='extended',
                                 max_id=next_id,
                                 lang='en')
     for t in tweets['statuses']:
         if EXCLUDE_WORDS.search(t['full_text']) is None:
             tweet = TEXT_ONLY.sub(' ', t['full_text'])
             tweet = RETWEET.sub(' ', tweet)
             tweet = USER_NAME.sub(' ', tweet)
Example #2
0
import sqlite3
from sqlite3 import Error
from markov_chain import MarkovChain

chain = MarkovChain()

database = '/home/drue/Deployment/star_trek_club/star_trek_db.sqlite3'
connection = sqlite3.connect(database)
cursor = connection.cursor()

char_name = 'PICARD'

cursor.execute('SELECT id FROM characters WHERE name=?', (char_name, ))
char_id = cursor.fetchone()[0]

cursor.execute('SELECT line FROM lines WHERE character_id=?', (char_id, ))
for result in cursor.fetchall():
    chain.train(result[0].replace('...', '').replace('--', ''))

chain.save_training(f'bin/star_trek/{char_name}.bin')
Example #3
0
def generate_text(author_id):
    corpora_address = app.root_path + "/corpora.xml"
    markov_chain = MarkovChain()
    markov_chain.train(corpora_address, author_id)
    return jsonify( {'author': markov_chain.name, 'generated_text': markov_chain.generate_quote()} )