def update_source(self, twitter_users=twitter_users, max_pages=max_pages): """ Calling update_source will run the Twitter Scraper and update the database with latest Tweets from hard-coded handles. If a new handle is included, obtain all Tweets up to max_pages Otherwise, update database with new Tweets since last update Currently throws an exception if twitter account is invalid or if there are too few tweets May need to make the function more robust """ # Create/recall a marker in the database used to identify last update time last_update_article = Article.query.filter_by( article_author='twitter_last_update').first() if last_update_article is None: print('did not find last_update') last_update = datetime.strptime('Jun 1 2005 1:33PM', '%b %d %Y %I:%M%p') last_update_article = Article(source_type='Social Media', source_name='Twitter', article_author='twitter_last_update', article_publishdate=last_update, article_wordcount=0, article_title=None, article_summary=None, article_fulltext=None, article_url=None) db.session.add(last_update_article) db.session.commit() else: print('found last_update') last_update = last_update_article.article_publishdate for user in twitter_users: print(user) tweets = get_tweets(user, pages=max_pages) user_sample_article = Article.query.filter_by( article_author=user).first() # if twitter user is already in database, update database with only new tweets # by that user if user_sample_article is not None: for tweet in tweets: if tweet['time'] > last_update: wordcount = len(tweet['text'].split(" ")) article = Article(source_type='Social Media', source_name='Twitter', article_author=user, article_publishdate=tweet['time'], article_wordcount=wordcount, article_title=None, article_summary=tweet['text'], article_fulltext=tweet['text'], article_url=None) db.session.add(article) # if new twitter user to be followed, update database with older tweets as well else: for tweet in tweets: wordcount = len(tweet['text'].split(" ")) article = Article(source_type='Social Media', source_name='Twitter', article_author=user, article_publishdate=tweet['time'], article_wordcount=wordcount, article_title=None, article_summary=tweet['text'], article_fulltext=tweet['text'], article_url=None) db.session.add(article) last_update = datetime.utcnow() last_update_article.article_publishdate = last_update db.session.commit() print('twitter source updated!')