def collect_tweets(): # Don't (ab)use the twitter API from dev and stage. if settings.STAGE: return """Collect new tweets about Firefox.""" with statsd.timer('customercare.tweets.time_elapsed'): t = Twython(settings.TWITTER_CONSUMER_KEY, settings.TWITTER_CONSUMER_SECRET, settings.TWITTER_ACCESS_TOKEN, settings.TWITTER_ACCESS_TOKEN_SECRET) search_options = { 'q': ('firefox OR #fxinput OR @firefoxbrasil OR #firefoxos ' 'OR @firefox_es'), 'count': settings.CC_TWEETS_PERPAGE, # Items per page. 'result_type': 'recent', # Retrieve tweets by date. } # If we already have some tweets, collect nothing older than what we # have. try: latest_tweet = Tweet.latest() except Tweet.DoesNotExist: log.debug('No existing tweets. Retrieving %d tweets from search.' % settings.CC_TWEETS_PERPAGE) else: search_options['since_id'] = latest_tweet.tweet_id log.info('Retrieving tweets with id >= %s' % latest_tweet.tweet_id) # Retrieve Tweets results = t.search(**search_options) if len(results['statuses']) == 0: # Twitter returned 0 results. return # Drop tweets into DB for item in results['statuses']: # Apply filters to tweet before saving # Allow links in #fxinput tweets statsd.incr('customercare.tweet.collected') item = _filter_tweet(item, allow_links='#fxinput' in item['text']) if not item: continue created_date = datetime.utcfromtimestamp(calendar.timegm( rfc822.parsedate(item['created_at']))) item_lang = item['metadata'].get('iso_language_code', 'en') tweet = Tweet(tweet_id=item['id'], raw_json=json.dumps(item), locale=item_lang, created=created_date) try: tweet.save() statsd.incr('customercare.tweet.saved') except IntegrityError: pass
def handle(self, **options): # Don't (ab)use the twitter API from dev and stage. if settings.STAGE: return """Collect new tweets about Firefox.""" t = get_twitter_api() search_options = { "q": "firefox OR #fxinput OR @firefoxbrasil OR #firefoxos OR @firefox_es", "count": settings.CC_TWEETS_PERPAGE, # Items per page. "result_type": "recent", # Retrieve tweets by date. } # If we already have some tweets, collect nothing older than what we # have. try: latest_tweet = Tweet.latest() except Tweet.DoesNotExist: log.debug("No existing tweets. Retrieving %d tweets from search." % settings.CC_TWEETS_PERPAGE) else: search_options["since_id"] = latest_tweet.tweet_id log.info("Retrieving tweets with id >= %s" % latest_tweet.tweet_id) # Retrieve Tweets results = t.search(**search_options) if len(results["statuses"]) == 0: # Twitter returned 0 results. return # Drop tweets into DB for item in results["statuses"]: # Apply filters to tweet before saving # Allow links in #fxinput tweets item = _filter_tweet(item, allow_links="#fxinput" in item["text"]) if not item: continue created_date = datetime.utcfromtimestamp( calendar.timegm(email_utils.parsedate(item["created_at"]))) item_lang = item["metadata"].get("iso_language_code", "en") tweet = Tweet( tweet_id=item["id"], raw_json=json.dumps(item), locale=item_lang, created=created_date, ) try: tweet.save() except IntegrityError: pass
def tweet(**kwargs): """Return a Tweet with valid default values or the ones passed in. :arg save: whether to save the Tweet before returning it :arg text: the `text` attribute of the Tweet's raw_json """ global next_tweet_id # TODO: Escape quotes and such defaults = { 'locale': 'en', 'raw_json': json.dumps({ 'iso_language_code': 'en', 'text': kwargs.pop('text', 'Hey #Firefox'), 'created_at': tweet_created, 'source': '<a href="http://www.tweetdeck.com" ' 'rel="nofollow">TweetDeck</a>', 'user': { 'screen_name': '__jimcasey__', 'profile_image_url': 'http://a1.twimg.com/profile_images/' '1117809237/cool_cat_normal.jpg', 'profile_image_url_https': 'http://si0.twimg.com/' 'profile_images/1117809237/' 'cool_cat_normal.jpg', }, 'to_user_id': None, 'geo': None, 'id': 25309168521, 'metadata': { 'results_type': 'recent', } }) } defaults.update(kwargs) if 'tweet_id' not in kwargs: defaults['tweet_id'] = next_tweet_id next_tweet_id += 1 return Tweet(**defaults)