lang='en') for t in tweets['statuses']: if EXCLUDE_WORDS.search(t['full_text']) is None: tweet = TEXT_ONLY.sub(' ', t['full_text']) tweet = RETWEET.sub(' ', tweet) tweet = USER_NAME.sub(' ', tweet) tweet = LINKS.sub(' ', tweet) tweet = TYPO_HASHTAGS.sub(fix_hashtag, tweet) tweet = TYPO_PERIOD.sub(fix_period, tweet) tweet = TYPO_QUESTION.sub(fix_question, tweet) tweet = TYPO_EXCLAMATION.sub(fix_exclamation, tweet) tweet = LONE_PUNCTUATION.sub(' ', tweet) tweet = AMPERSAND.sub('and', tweet) tweet = GT.sub('>', tweet) tweet = LT.sub('<', tweet) chain.train(tweet) for i in range(3): if 'next_results' not in tweets['search_metadata']: break next_id = re.split(r'\D+', tweets['search_metadata']['next_results'])[1] tweets = twit.search.tweets(q=trend, count=100, tweet_mode='extended', max_id=next_id, lang='en') for t in tweets['statuses']: if EXCLUDE_WORDS.search(t['full_text']) is None: tweet = TEXT_ONLY.sub(' ', t['full_text']) tweet = RETWEET.sub(' ', tweet) tweet = USER_NAME.sub(' ', tweet)
import sqlite3 from sqlite3 import Error from markov_chain import MarkovChain chain = MarkovChain() database = '/home/drue/Deployment/star_trek_club/star_trek_db.sqlite3' connection = sqlite3.connect(database) cursor = connection.cursor() char_name = 'PICARD' cursor.execute('SELECT id FROM characters WHERE name=?', (char_name, )) char_id = cursor.fetchone()[0] cursor.execute('SELECT line FROM lines WHERE character_id=?', (char_id, )) for result in cursor.fetchall(): chain.train(result[0].replace('...', '').replace('--', '')) chain.save_training(f'bin/star_trek/{char_name}.bin')
def generate_text(author_id): corpora_address = app.root_path + "/corpora.xml" markov_chain = MarkovChain() markov_chain.train(corpora_address, author_id) return jsonify( {'author': markov_chain.name, 'generated_text': markov_chain.generate_quote()} )