max_id=next_id, lang='en') for t in tweets['statuses']: if EXCLUDE_WORDS.search(t['full_text']) is None: tweet = TEXT_ONLY.sub(' ', t['full_text']) tweet = RETWEET.sub(' ', tweet) tweet = USER_NAME.sub(' ', tweet) tweet = LINKS.sub(' ', tweet) tweet = AMPERSAND.sub('and', tweet) tweet = TYPO_HASHTAGS.sub(fix_hashtag, tweet) tweet = TYPO_PERIOD.sub(fix_period, tweet) tweet = TYPO_QUESTION.sub(fix_question, tweet) tweet = TYPO_EXCLAMATION.sub(fix_exclamation, tweet) tweet = LONE_PUNCTUATION.sub(' ', tweet) tweet = GT.sub('>', tweet) tweet = LT.sub('<', tweet) chain.train(tweet) print(f'len(chain.tree): {len(chain.tree)}') chain.bulk_adjust_weights(fitness_functions=[ aw_mult(aw_favor_complexity, .001), aw_mult(aw_favor_punctuation, .00015), aw_mult(aw_favor_alternating_complexity, .1) ], iterations=len(chain.tree)) chain.save_training('bin/twitter/trending.bin') print( f'Sample tweet: {chain.generate_tweet(append_tag="Category: #trending")}')
print(f'search_term: {user}') tweets = twit.statuses.user_timeline(screen_name=user, count=200, tweet_mode='extended', include_rts=False, trim_user=True) for t in tweets: if EXCLUDE_WORDS.search(t['full_text']) is None: tweet = TEXT_ONLY.sub(' ', t['full_text']) tweet = USER_NAME.sub(' ', tweet) tweet = LINKS.sub(' ', tweet) tweet = TYPO_HASHTAGS.sub(fix_hashtag, tweet) tweet = TYPO_PERIOD.sub(fix_period, tweet) tweet = TYPO_QUESTION.sub(fix_question, tweet) tweet = TYPO_EXCLAMATION.sub(fix_exclamation, tweet) tweet = LONE_PUNCTUATION.sub(' ', tweet) tweet = AMPERSAND.sub('and', tweet) tweet = GT.sub('>', tweet) tweet = LT.sub('<', tweet) chain.train(tweet) # chain.train(t['full_text']) print(f'len(chain.tree): {len(chain.tree)}') chain.bulk_adjust_weights(fitness_functions=[aw_mult(aw_favor_complexity, .001), aw_mult(aw_favor_punctuation, .00015), aw_mult(dg_disfavor_consecutive_hashtags, .001)], iterations=len(chain.tree)) print('Sample tweet:', chain.generate_tweet()) # chain.save_training('bin/twitter/apologists.bin') # chain.save_training('bin/twitter/atheists.bin') chain.save_training('bin/twitter/news.bin') # chain.save_training('bin/twitter/newagers.bin') # chain.save_training('bin/twitter/churches.bin') # chain.save_training('bin/twitter/trumpsterfire.bin') # chain.save_training('bin/twitter/meta.bin')