max_id=next_id,
                                    lang='en')
        for t in tweets['statuses']:
            if EXCLUDE_WORDS.search(t['full_text']) is None:
                tweet = TEXT_ONLY.sub(' ', t['full_text'])
                tweet = RETWEET.sub(' ', tweet)
                tweet = USER_NAME.sub(' ', tweet)
                tweet = LINKS.sub(' ', tweet)
                tweet = AMPERSAND.sub('and', tweet)
                tweet = TYPO_HASHTAGS.sub(fix_hashtag, tweet)
                tweet = TYPO_PERIOD.sub(fix_period, tweet)
                tweet = TYPO_QUESTION.sub(fix_question, tweet)
                tweet = TYPO_EXCLAMATION.sub(fix_exclamation, tweet)
                tweet = LONE_PUNCTUATION.sub(' ', tweet)
                tweet = GT.sub('>', tweet)
                tweet = LT.sub('<', tweet)
                chain.train(tweet)
    print(f'len(chain.tree): {len(chain.tree)}')

chain.bulk_adjust_weights(fitness_functions=[
    aw_mult(aw_favor_complexity, .001),
    aw_mult(aw_favor_punctuation, .00015),
    aw_mult(aw_favor_alternating_complexity, .1)
],
                          iterations=len(chain.tree))

chain.save_training('bin/twitter/trending.bin')

print(
    f'Sample tweet: {chain.generate_tweet(append_tag="Category: #trending")}')
Beispiel #2
0
	print(f'search_term: {user}')
	tweets = twit.statuses.user_timeline(screen_name=user, count=200, tweet_mode='extended', include_rts=False, trim_user=True)
	for t in tweets:
		if EXCLUDE_WORDS.search(t['full_text']) is None:
			tweet = TEXT_ONLY.sub(' ', t['full_text'])
			tweet = USER_NAME.sub(' ', tweet)
			tweet = LINKS.sub(' ', tweet)
			tweet = TYPO_HASHTAGS.sub(fix_hashtag, tweet)
			tweet = TYPO_PERIOD.sub(fix_period, tweet)
			tweet = TYPO_QUESTION.sub(fix_question, tweet)
			tweet = TYPO_EXCLAMATION.sub(fix_exclamation, tweet)
			tweet = LONE_PUNCTUATION.sub(' ', tweet)
			tweet = AMPERSAND.sub('and', tweet)
			tweet = GT.sub('>', tweet)
			tweet = LT.sub('<', tweet)
			chain.train(tweet)
		# chain.train(t['full_text'])
	print(f'len(chain.tree): {len(chain.tree)}')

chain.bulk_adjust_weights(fitness_functions=[aw_mult(aw_favor_complexity, .001), aw_mult(aw_favor_punctuation, .00015), aw_mult(dg_disfavor_consecutive_hashtags, .001)], iterations=len(chain.tree))

print('Sample tweet:', chain.generate_tweet())

# chain.save_training('bin/twitter/apologists.bin')
# chain.save_training('bin/twitter/atheists.bin')
chain.save_training('bin/twitter/news.bin')
# chain.save_training('bin/twitter/newagers.bin')
# chain.save_training('bin/twitter/churches.bin')
# chain.save_training('bin/twitter/trumpsterfire.bin')
# chain.save_training('bin/twitter/meta.bin')