def initial_import(user_id, max_id=None): logging.critical('Importing all tweets older than %s for user %d...' % (max_id, user_id)) # We should just quit if the user doesn't exist or if their initial import # has already finished. user = User.get_by_key_name(str(user_id)) if not user: return logging.error('User not found.') if user.import_finished: return logging.error('Import already finished for user.') # Get a batch of tweets to work on tweets = user.api.user_timeline(max_id=max_id, count=settings.BATCH_SIZE) # Are there any tweets in the batch? if tweets: logging.info('Importing %d tweets in this batch' % len(tweets)) entities, max_id = [], None for tweet in tweets: entities.extend(make_tweet(user, tweet)) max_id = tweet.id user.tweet_count += len(tweets) db.put(entities + [user]) # Spawn another instance of this task to continue the import # process. The max_id needs to be decremented here because Twitter's # API will include the tweet with that ID, even though you might # expect it to only include tweets *older* than that one, like the # documentation says. max_id -= 1 deferred.defer( initial_import, user_id, max_id, _queue='import') # Otherwise, the import has finished. Update the user accordingly. else: logging.critical('Initial import finished!') # Note the latest tweet, so we know where to start importing new ones. last_tweet = user.tweets.get() user.latest_tweet_id = last_tweet.id if last_tweet else None # Note the date of the oldest tweet for the user, for graphs. old_key = db.Key.from_path('Tweet', str(max_id+1), parent=user.key()) old_tweet = db.get(old_key) user.oldest_tweet_at = old_tweet.created_at user.put() # Now start the post-processing deferred.defer(post_process_tweets, user.id, initial_import=True, _queue='postprocess')
def fetch_new_tweets(user_id, token_key, token_secret, since_id=None): return logging.info('Fetching new tweets for user %d...' % user_id) user = User.get_by_key_name(str(user_id)) if not user: return logging.error('User not found.') if since_id is None: last_tweet = user.tweets.order('-created_at').get() since_id = last_tweet.id if last_tweet else None logging.info('Found last tweet by date: %d' % since_id) api = make_api(token_key, token_secret) tweets = api.user_timeline(since_id=since_id, count=settings.BATCH_SIZE) if tweets: entities = [] for tweet in tweets: entities.append(make_tweet(tweet)) db.put(entities) deferred.defer(fetch_new_tweets, user_id, token_key, token_secret, tweets[-1].id)