def main(): now = datetime.utcnow() yesterday = now - timedelta(days=1) start_date_str = yesterday.strftime('%Y-%m-%d') end_date_str = now.strftime('%Y-%m-%d') tweet_accounts = util.read_tweet_accounts() for screen_name in tweet_accounts: tweets = TweetScraper.get_tweets_from_user_timeline( screen_name, start_date_str, end_date_str) Tweet.init() print("Tweet account name: %s" % str(screen_name)) len_of_tweets = len(tweets) print("Total length of tweets: %s" % str(len_of_tweets)) for tweet in tweets: try: obj = Tweet(meta={'id': tweet['id']}) obj.screen_name = tweet['screen_name'] obj.full_text = tweet['full_text'] obj.created_at = tweet['created_at'] obj.save() except: pass
def _scrape_tweets(start_date_str, num_of_date_project, max_count_per_day): tweet_accounts = util.read_tweet_accounts() for i in range(num_of_date_project): print(datetime.now()) end_date_str = _add_one_day_to_date_string(start_date_str) print(start_date_str) for screen_name in tweet_accounts: #print(screen_name) tweets = TweetScraper.get_tweets_from_user_timeline( screen_name, start_date_str, end_date_str, max_count_per_day) Tweet.init() len_of_tweets = len(tweets) print("Total length of tweets: %s" % str(len_of_tweets)) no_of_tweets_saved = 1 for tweet in tweets: try: if no_of_tweets_saved % 1000 == 0: print("%s tweets have been saved to database." % str(no_of_tweets_saved)) obj = Tweet(meta={'id': tweet['id']}) obj.screen_name = tweet['screen_name'] obj.full_text = tweet['full_text'] obj.created_at = tweet['created_at'] obj.save() no_of_tweets_saved = no_of_tweets_saved + 1 except: no_of_tweets_saved = no_of_tweets_saved + 1 pass start_date_str = _add_one_day_to_date_string(start_date_str)