def tweet(save=False, **kwargs): """Return a Tweet with valid default values or the ones passed in. Args: save: whether to save the Tweet before returning it text: the `text` attribute of the Tweet's raw_json """ global next_tweet_id defaults = { 'locale': 'en', 'raw_json': '{"iso_language_code": "en", "text": "%s", ' '"created_at": "Thu, 23 Sep 2010 13:58:06 +0000", ' '"profile_image_url": ' '"http://a1.twimg.com/profile_images/1117809237/cool_cat_normal.jpg", ' '"source": "<a href="http://www.tweetdeck.com" ' 'rel="nofollow">TweetDeck</a>", ' '"from_user": "******", "from_user_id": 142651388, ' '"to_user_id": null, "geo": null, "id": 25309168521, ' '"metadata": {"result_type": "recent"}}' % kwargs.pop('text', 'Hey #Firefox') } # TODO: Escape quotes and such defaults.update(kwargs) if 'tweet_id' not in kwargs: defaults['tweet_id'] = next_tweet_id next_tweet_id += 1 t = Tweet(**defaults) if save: t.save() return t
def collect_tweets(): """Collect new tweets about Firefox.""" with statsd.timer('customercare.tweets.time_elapsed'): auth = tweepy.OAuthHandler(settings.TWITTER_CONSUMER_KEY, settings.TWITTER_CONSUMER_SECRET, secure=True) auth.set_access_token(settings.TWITTER_ACCESS_TOKEN, settings.TWITTER_ACCESS_TOKEN_SECRET) api = tweepy.API(auth, parser=RawParser()) search_options = { 'q': 'firefox OR #fxinput OR @firefoxbrasil OR #firefoxos', 'rpp': settings.CC_TWEETS_PERPAGE, # Items per page. 'result_type': 'recent', # Retrieve tweets by date. } # If we already have some tweets, collect nothing older than what we # have. try: latest_tweet = Tweet.latest() except Tweet.DoesNotExist: log.debug('No existing tweets. Retrieving %d tweets from search.' % ( settings.CC_TWEETS_PERPAGE)) else: search_options['since_id'] = latest_tweet.tweet_id log.info('Retrieving tweets with id >= %s' % latest_tweet.tweet_id) # Retrieve Tweets try: raw_data = json.loads(str(api.search(**search_options))) except tweepy.TweepError, e: log.warning('Twitter request failed: %s' % e) return if not ('results' in raw_data and raw_data['results']): # Twitter returned 0 results. return # Drop tweets into DB for item in raw_data['results']: # Apply filters to tweet before saving # Allow links in #fxinput tweets statsd.incr('customercare.tweet.collected') item = _filter_tweet(item, allow_links='#fxinput' in item['text']) if not item: continue created_date = datetime.utcfromtimestamp(calendar.timegm( rfc822.parsedate(item['created_at']))) item_lang = item.get('iso_language_code', 'en') tweet = Tweet(tweet_id=item['id'], raw_json=json.dumps(item), locale=item_lang, created=created_date) try: tweet.save() statsd.incr('customercare.tweet.saved') except IntegrityError: pass
def tweet(save=False, **kwargs): """Return a Tweet with valid default values or the ones passed in. Args: save: whether to save the Tweet before returning it text: the `text` attribute of the Tweet's raw_json """ global next_tweet_id defaults = {'locale': 'en', 'raw_json': '{"iso_language_code": "en", "text": "%s", ' '"created_at": "Thu, 23 Sep 2010 13:58:06 +0000", ' '"profile_image_url": ' '"http://a1.twimg.com/profile_images/1117809237/cool_cat_normal.jpg", ' '"source": "<a href="http://www.tweetdeck.com" ' 'rel="nofollow">TweetDeck</a>", ' '"from_user": "******", "from_user_id": 142651388, ' '"to_user_id": null, "geo": null, "id": 25309168521, ' '"metadata": {"result_type": "recent"}}' % kwargs.pop('text', 'Hey #Firefox')} # TODO: Escape quotes and such defaults.update(kwargs) if 'tweet_id' not in kwargs: defaults['tweet_id'] = next_tweet_id next_tweet_id += 1 t = Tweet(**defaults) if save: t.save() return t
def collect_tweets(): """Collect new tweets about Firefox.""" with statsd.timer('customercare.tweets.time_elapsed'): search_options = { 'q': 'firefox OR #fxinput', 'rpp': settings.CC_TWEETS_PERPAGE, # Items per page. 'result_type': 'recent', # Retrieve tweets by date. } # If we already have some tweets, collect nothing older than what we have. try: latest_tweet = Tweet.latest() except Tweet.DoesNotExist: log.debug('No existing tweets. Retrieving %d tweets from search.' % (settings.CC_TWEETS_PERPAGE)) else: search_options['since_id'] = latest_tweet.tweet_id log.info('Retrieving tweets with id >= %s' % latest_tweet.tweet_id) # Retrieve Tweets try: raw_data = json.load( urllib.urlopen('%s?%s' % (SEARCH_URL, urllib.urlencode(search_options)))) except Exception, e: log.warning('Twitter request failed: %s' % e) return if not ('results' in raw_data and raw_data['results']): # Twitter returned 0 results. return # Drop tweets into DB for item in raw_data['results']: # Apply filters to tweet before saving # Allow links in #fxinput tweets statsd.incr('customercare.tweet.collected') item = _filter_tweet(item, allow_links='#fxinput' in item['text']) if not item: continue created_date = datetime.utcfromtimestamp( calendar.timegm(rfc822.parsedate(item['created_at']))) item_lang = item.get('iso_language_code', 'en') tweet = Tweet(tweet_id=item['id'], raw_json=json.dumps(item), locale=item_lang, created=created_date) try: tweet.save() statsd.incr('customercare.tweet.saved') except IntegrityError: pass
def collect_tweets(): """Collect new tweets about Firefox.""" with statsd.timer('customercare.tweets.time_elapsed'): search_options = { 'q': 'firefox OR #fxinput', 'rpp': settings.CC_TWEETS_PERPAGE, # Items per page. 'result_type': 'recent', # Retrieve tweets by date. } # If we already have some tweets, collect nothing older than what we have. try: latest_tweet = Tweet.latest() except Tweet.DoesNotExist: log.debug('No existing tweets. Retrieving %d tweets from search.' % ( settings.CC_TWEETS_PERPAGE)) else: search_options['since_id'] = latest_tweet.tweet_id log.info('Retrieving tweets with id >= %s' % latest_tweet.tweet_id) # Retrieve Tweets try: raw_data = json.load(urllib.urlopen('%s?%s' % ( SEARCH_URL, urllib.urlencode(search_options)))) except Exception, e: log.warning('Twitter request failed: %s' % e) return if not ('results' in raw_data and raw_data['results']): # Twitter returned 0 results. return # Drop tweets into DB for item in raw_data['results']: # Apply filters to tweet before saving # Allow links in #fxinput tweets statsd.incr('customercare.tweet.collected') item = _filter_tweet(item, allow_links='#fxinput' in item['text']) if not item: continue created_date = datetime.utcfromtimestamp(calendar.timegm( rfc822.parsedate(item['created_at']))) item_lang = item.get('iso_language_code', 'en') tweet = Tweet(tweet_id=item['id'], raw_json=json.dumps(item), locale=item_lang, created=created_date) try: tweet.save() statsd.incr('customercare.tweet.saved') except IntegrityError: pass
def collect_tweets(): """Collect new tweets about Firefox.""" search_options = { 'q': 'firefox', 'rpp': settings.CC_TWEETS_PERPAGE, # Items per page. 'result_type': 'recent', # Retrieve tweets by date. } # If we already have some tweets, collect nothing older than what we have. try: latest_tweet = Tweet.latest() except Tweet.DoesNotExist: log.debug('No existing tweets. Retrieving %d tweets from search.' % ( settings.CC_TWEETS_PERPAGE)) else: search_options['since_id'] = latest_tweet.tweet_id log.debug('Retrieving tweets with id >= %s' % latest_tweet.tweet_id) # Retrieve Tweets try: raw_data = json.load(urllib.urlopen('%s?%s' % ( SEARCH_URL, urllib.urlencode(search_options)))) except Exception, e: log.warning('Twitter request failed: %s' % e) return
def collect_tweets(): """Collect new tweets about Firefox.""" search_options = { 'q': 'firefox', 'rpp': settings.CC_TWEETS_PERPAGE, # Items per page. 'result_type': 'recent', # Retrieve tweets by date. } # If we already have some tweets, collect nothing older than what we have. try: latest_tweet = Tweet.latest() except Tweet.DoesNotExist: log.debug('No existing tweets. Retrieving %d tweets from search.' % (settings.CC_TWEETS_PERPAGE)) else: search_options['since_id'] = latest_tweet.tweet_id log.info('Retrieving tweets with id >= %s' % latest_tweet.tweet_id) # Retrieve Tweets try: raw_data = json.load( urllib.urlopen('%s?%s' % (SEARCH_URL, urllib.urlencode(search_options)))) except Exception, e: log.warning('Twitter request failed: %s' % e) return
def test_count_replies(self): """Test filtering when counting tweets""" tweet(save=True) id = Tweet.latest().tweet_id reply(reply_to_tweet_id=id, created=datetime.now(), save=True) reply(reply_to_tweet_id=id, created=datetime.now(), save=True) reply(created=datetime.now() - timedelta(days=1, minutes=1), save=True) yesterday = datetime.now() - timedelta(days=1) count_recent_answered = _count_answered_tweets(since=yesterday) eq_(count_recent_answered, 1)
return # Drop tweets into DB for item in raw_data['results']: log.debug('Handling tweet %d: %s...' % (item['id'], smart_str(item['text'][:50]))) # Apply filters to tweet before saving item = _filter_tweet(item) if not item: continue created_date = datetime.utcfromtimestamp(calendar.timegm( rfc822.parsedate(item['created_at']))) item_lang = item.get('iso_language_code', 'en') tweet = Tweet(tweet_id=item['id'], raw_json=json.dumps(item), locale=item_lang, created=created_date) try: tweet.save() except IntegrityError: continue else: log.debug('Tweet %d saved.' % item['id']) @cronjobs.register def purge_tweets(): """Periodically purge old tweets for each locale. This does a lot of DELETEs on master, so it shouldn't run too frequently. Probably once every hour or more.
def collect_tweets(): """Collect new tweets about Firefox.""" with statsd.timer('customercare.tweets.time_elapsed'): auth = tweepy.OAuthHandler(settings.TWITTER_CONSUMER_KEY, settings.TWITTER_CONSUMER_SECRET, secure=True) auth.set_access_token(settings.TWITTER_ACCESS_TOKEN, settings.TWITTER_ACCESS_TOKEN_SECRET) api = tweepy.API(auth, parser=RawParser()) search_options = { 'q': 'firefox OR #fxinput OR @firefoxbrasil OR #firefoxos', 'rpp': settings.CC_TWEETS_PERPAGE, # Items per page. 'result_type': 'recent', # Retrieve tweets by date. } # If we already have some tweets, collect nothing older than what we # have. try: latest_tweet = Tweet.latest() except Tweet.DoesNotExist: log.debug('No existing tweets. Retrieving %d tweets from search.' % (settings.CC_TWEETS_PERPAGE)) else: search_options['since_id'] = latest_tweet.tweet_id log.info('Retrieving tweets with id >= %s' % latest_tweet.tweet_id) # Retrieve Tweets try: raw_data = json.loads(str(api.search(**search_options))) except tweepy.TweepError, e: log.warning('Twitter request failed: %s' % e) return if not ('results' in raw_data and raw_data['results']): # Twitter returned 0 results. return # Drop tweets into DB for item in raw_data['results']: # Apply filters to tweet before saving # Allow links in #fxinput tweets statsd.incr('customercare.tweet.collected') item = _filter_tweet(item, allow_links='#fxinput' in item['text']) if not item: continue created_date = datetime.utcfromtimestamp( calendar.timegm(rfc822.parsedate(item['created_at']))) item_lang = item.get('iso_language_code', 'en') tweet = Tweet(tweet_id=item['id'], raw_json=json.dumps(item), locale=item_lang, created=created_date) try: tweet.save() statsd.incr('customercare.tweet.saved') except IntegrityError: pass
# Twitter returned 0 results. return # Drop tweets into DB for item in raw_data['results']: # Apply filters to tweet before saving item = _filter_tweet(item) if not item: continue created_date = datetime.utcfromtimestamp( calendar.timegm(rfc822.parsedate(item['created_at']))) item_lang = item.get('iso_language_code', 'en') tweet = Tweet(tweet_id=item['id'], raw_json=json.dumps(item), locale=item_lang, created=created_date) try: tweet.save() except IntegrityError: pass @cronjobs.register def purge_tweets(): """Periodically purge old tweets for each locale. This does a lot of DELETEs on master, so it shouldn't run too frequently. Probably once every hour or more. """
def test_latest_does_not_exist(self): """latest() should throw DoesNotExist when there are no tweets.""" Tweet.latest()
def test_latest(self): """Test the latest() class method when there is a latest tweet.""" NUM = 2 for x in xrange(NUM): last = tweet(save=True) eq_(last.tweet_id, Tweet.latest().tweet_id)