Exemple #1
0
def tweet(save=False, **kwargs):
    """Return a Tweet with valid default values or the ones passed in.

    Args:
        save: whether to save the Tweet before returning it
        text: the `text` attribute of the Tweet's raw_json
    """
    global next_tweet_id
    defaults = {
        'locale':
        'en',
        'raw_json':
        '{"iso_language_code": "en", "text": "%s", '
        '"created_at": "Thu, 23 Sep 2010 13:58:06 +0000", '
        '"profile_image_url": '
        '"http://a1.twimg.com/profile_images/1117809237/cool_cat_normal.jpg", '
        '"source": "<a href="http://www.tweetdeck.com" '
        'rel="nofollow">TweetDeck</a>", '
        '"from_user": "******", "from_user_id": 142651388, '
        '"to_user_id": null, "geo": null, "id": 25309168521, '
        '"metadata": {"result_type": "recent"}}' %
        kwargs.pop('text', 'Hey #Firefox')
    }  # TODO: Escape quotes and such
    defaults.update(kwargs)
    if 'tweet_id' not in kwargs:
        defaults['tweet_id'] = next_tweet_id
        next_tweet_id += 1
    t = Tweet(**defaults)
    if save:
        t.save()
    return t
Exemple #2
0
def collect_tweets():
    """Collect new tweets about Firefox."""
    with statsd.timer('customercare.tweets.time_elapsed'):
        auth = tweepy.OAuthHandler(settings.TWITTER_CONSUMER_KEY,
                                   settings.TWITTER_CONSUMER_SECRET,
                                   secure=True)

        auth.set_access_token(settings.TWITTER_ACCESS_TOKEN,
                              settings.TWITTER_ACCESS_TOKEN_SECRET)

        api = tweepy.API(auth, parser=RawParser())

        search_options = {
            'q': 'firefox OR #fxinput OR @firefoxbrasil OR #firefoxos',
            'rpp': settings.CC_TWEETS_PERPAGE,  # Items per page.
            'result_type': 'recent',  # Retrieve tweets by date.
        }

        # If we already have some tweets, collect nothing older than what we
        # have.
        try:
            latest_tweet = Tweet.latest()
        except Tweet.DoesNotExist:
            log.debug('No existing tweets. Retrieving %d tweets from search.' % (
                settings.CC_TWEETS_PERPAGE))
        else:
            search_options['since_id'] = latest_tweet.tweet_id
            log.info('Retrieving tweets with id >= %s' % latest_tweet.tweet_id)

        # Retrieve Tweets
        try:
            raw_data = json.loads(str(api.search(**search_options)))
        except tweepy.TweepError, e:
            log.warning('Twitter request failed: %s' % e)
            return

        if not ('results' in raw_data and raw_data['results']):
            # Twitter returned 0 results.
            return

        # Drop tweets into DB
        for item in raw_data['results']:
            # Apply filters to tweet before saving
            # Allow links in #fxinput tweets
            statsd.incr('customercare.tweet.collected')
            item = _filter_tweet(item, allow_links='#fxinput' in item['text'])
            if not item:
                continue

            created_date = datetime.utcfromtimestamp(calendar.timegm(
                rfc822.parsedate(item['created_at'])))

            item_lang = item.get('iso_language_code', 'en')
            tweet = Tweet(tweet_id=item['id'], raw_json=json.dumps(item),
                          locale=item_lang, created=created_date)
            try:
                tweet.save()
                statsd.incr('customercare.tweet.saved')
            except IntegrityError:
                pass
Exemple #3
0
def tweet(save=False, **kwargs):
    """Return a Tweet with valid default values or the ones passed in.

    Args:
        save: whether to save the Tweet before returning it
        text: the `text` attribute of the Tweet's raw_json
    """
    global next_tweet_id
    defaults = {'locale': 'en', 'raw_json':
        '{"iso_language_code": "en", "text": "%s", '
        '"created_at": "Thu, 23 Sep 2010 13:58:06 +0000", '
        '"profile_image_url": '
        '"http://a1.twimg.com/profile_images/1117809237/cool_cat_normal.jpg", '
        '"source": "<a href="http://www.tweetdeck.com" '
            'rel="nofollow">TweetDeck</a>", '
            '"from_user": "******", "from_user_id": 142651388, '
            '"to_user_id": null, "geo": null, "id": 25309168521, '
            '"metadata": {"result_type": "recent"}}' %
            kwargs.pop('text', 'Hey #Firefox')}  # TODO: Escape quotes and such
    defaults.update(kwargs)
    if 'tweet_id' not in kwargs:
        defaults['tweet_id'] = next_tweet_id
        next_tweet_id += 1
    t = Tweet(**defaults)
    if save:
        t.save()
    return t
Exemple #4
0
def collect_tweets():
    """Collect new tweets about Firefox."""
    with statsd.timer('customercare.tweets.time_elapsed'):
        search_options = {
            'q': 'firefox OR #fxinput',
            'rpp': settings.CC_TWEETS_PERPAGE,  # Items per page.
            'result_type': 'recent',  # Retrieve tweets by date.
        }

        # If we already have some tweets, collect nothing older than what we have.
        try:
            latest_tweet = Tweet.latest()
        except Tweet.DoesNotExist:
            log.debug('No existing tweets. Retrieving %d tweets from search.' %
                      (settings.CC_TWEETS_PERPAGE))
        else:
            search_options['since_id'] = latest_tweet.tweet_id
            log.info('Retrieving tweets with id >= %s' % latest_tweet.tweet_id)

        # Retrieve Tweets
        try:
            raw_data = json.load(
                urllib.urlopen('%s?%s' %
                               (SEARCH_URL, urllib.urlencode(search_options))))
        except Exception, e:
            log.warning('Twitter request failed: %s' % e)
            return

        if not ('results' in raw_data and raw_data['results']):
            # Twitter returned 0 results.
            return

        # Drop tweets into DB
        for item in raw_data['results']:
            # Apply filters to tweet before saving
            # Allow links in #fxinput tweets
            statsd.incr('customercare.tweet.collected')
            item = _filter_tweet(item, allow_links='#fxinput' in item['text'])
            if not item:
                continue

            created_date = datetime.utcfromtimestamp(
                calendar.timegm(rfc822.parsedate(item['created_at'])))

            item_lang = item.get('iso_language_code', 'en')
            tweet = Tweet(tweet_id=item['id'],
                          raw_json=json.dumps(item),
                          locale=item_lang,
                          created=created_date)
            try:
                tweet.save()
                statsd.incr('customercare.tweet.saved')
            except IntegrityError:
                pass
Exemple #5
0
def collect_tweets():
    """Collect new tweets about Firefox."""
    with statsd.timer('customercare.tweets.time_elapsed'):
        search_options = {
            'q': 'firefox OR #fxinput',
            'rpp': settings.CC_TWEETS_PERPAGE,  # Items per page.
            'result_type': 'recent',  # Retrieve tweets by date.
        }

        # If we already have some tweets, collect nothing older than what we have.
        try:
            latest_tweet = Tweet.latest()
        except Tweet.DoesNotExist:
            log.debug('No existing tweets. Retrieving %d tweets from search.' % (
                settings.CC_TWEETS_PERPAGE))
        else:
            search_options['since_id'] = latest_tweet.tweet_id
            log.info('Retrieving tweets with id >= %s' % latest_tweet.tweet_id)

        # Retrieve Tweets
        try:
            raw_data = json.load(urllib.urlopen('%s?%s' % (
                SEARCH_URL, urllib.urlencode(search_options))))
        except Exception, e:
            log.warning('Twitter request failed: %s' % e)
            return

        if not ('results' in raw_data and raw_data['results']):
            # Twitter returned 0 results.
            return

        # Drop tweets into DB
        for item in raw_data['results']:
            # Apply filters to tweet before saving
            # Allow links in #fxinput tweets
            statsd.incr('customercare.tweet.collected')
            item = _filter_tweet(item, allow_links='#fxinput' in item['text'])
            if not item:
                continue

            created_date = datetime.utcfromtimestamp(calendar.timegm(
                rfc822.parsedate(item['created_at'])))

            item_lang = item.get('iso_language_code', 'en')
            tweet = Tweet(tweet_id=item['id'], raw_json=json.dumps(item),
                          locale=item_lang, created=created_date)
            try:
                tweet.save()
                statsd.incr('customercare.tweet.saved')
            except IntegrityError:
                pass
Exemple #6
0
def collect_tweets():
    """Collect new tweets about Firefox."""
    search_options = {
        'q': 'firefox',
        'rpp': settings.CC_TWEETS_PERPAGE,  # Items per page.
        'result_type': 'recent',  # Retrieve tweets by date.
    }

    # If we already have some tweets, collect nothing older than what we have.
    try:
        latest_tweet = Tweet.latest()
    except Tweet.DoesNotExist:
        log.debug('No existing tweets. Retrieving %d tweets from search.' % (
            settings.CC_TWEETS_PERPAGE))
    else:
        search_options['since_id'] = latest_tweet.tweet_id
        log.debug('Retrieving tweets with id >= %s' % latest_tweet.tweet_id)

    # Retrieve Tweets
    try:
        raw_data = json.load(urllib.urlopen('%s?%s' % (
            SEARCH_URL, urllib.urlencode(search_options))))
    except Exception, e:
        log.warning('Twitter request failed: %s' % e)
        return
Exemple #7
0
def collect_tweets():
    """Collect new tweets about Firefox."""
    search_options = {
        'q': 'firefox',
        'rpp': settings.CC_TWEETS_PERPAGE,  # Items per page.
        'result_type': 'recent',  # Retrieve tweets by date.
    }

    # If we already have some tweets, collect nothing older than what we have.
    try:
        latest_tweet = Tweet.latest()
    except Tweet.DoesNotExist:
        log.debug('No existing tweets. Retrieving %d tweets from search.' %
                  (settings.CC_TWEETS_PERPAGE))
    else:
        search_options['since_id'] = latest_tweet.tweet_id
        log.info('Retrieving tweets with id >= %s' % latest_tweet.tweet_id)

    # Retrieve Tweets
    try:
        raw_data = json.load(
            urllib.urlopen('%s?%s' %
                           (SEARCH_URL, urllib.urlencode(search_options))))
    except Exception, e:
        log.warning('Twitter request failed: %s' % e)
        return
Exemple #8
0
    def test_count_replies(self):
        """Test filtering when counting tweets"""
        tweet(save=True)
        id = Tweet.latest().tweet_id

        reply(reply_to_tweet_id=id, created=datetime.now(), save=True)
        reply(reply_to_tweet_id=id, created=datetime.now(), save=True)
        reply(created=datetime.now() - timedelta(days=1, minutes=1), save=True)

        yesterday = datetime.now() - timedelta(days=1)
        count_recent_answered = _count_answered_tweets(since=yesterday)
        eq_(count_recent_answered, 1)
Exemple #9
0
    def test_count_replies(self):
        """Test filtering when counting tweets"""
        tweet(save=True)
        id = Tweet.latest().tweet_id

        reply(reply_to_tweet_id=id, created=datetime.now(), save=True)
        reply(reply_to_tweet_id=id, created=datetime.now(), save=True)
        reply(created=datetime.now() - timedelta(days=1, minutes=1), save=True)

        yesterday = datetime.now() - timedelta(days=1)
        count_recent_answered = _count_answered_tweets(since=yesterday)
        eq_(count_recent_answered, 1)
Exemple #10
0
        return

    # Drop tweets into DB
    for item in raw_data['results']:
        log.debug('Handling tweet %d: %s...' % (item['id'],
                                                smart_str(item['text'][:50])))
        # Apply filters to tweet before saving
        item = _filter_tweet(item)
        if not item:
            continue

        created_date = datetime.utcfromtimestamp(calendar.timegm(
            rfc822.parsedate(item['created_at'])))

        item_lang = item.get('iso_language_code', 'en')
        tweet = Tweet(tweet_id=item['id'], raw_json=json.dumps(item),
                      locale=item_lang, created=created_date)
        try:
            tweet.save()
        except IntegrityError:
            continue
        else:
            log.debug('Tweet %d saved.' % item['id'])


@cronjobs.register
def purge_tweets():
    """Periodically purge old tweets for each locale.

    This does a lot of DELETEs on master, so it shouldn't run too frequently.
    Probably once every hour or more.
Exemple #11
0
def collect_tweets():
    """Collect new tweets about Firefox."""
    with statsd.timer('customercare.tweets.time_elapsed'):
        auth = tweepy.OAuthHandler(settings.TWITTER_CONSUMER_KEY,
                                   settings.TWITTER_CONSUMER_SECRET,
                                   secure=True)

        auth.set_access_token(settings.TWITTER_ACCESS_TOKEN,
                              settings.TWITTER_ACCESS_TOKEN_SECRET)

        api = tweepy.API(auth, parser=RawParser())

        search_options = {
            'q': 'firefox OR #fxinput OR @firefoxbrasil OR #firefoxos',
            'rpp': settings.CC_TWEETS_PERPAGE,  # Items per page.
            'result_type': 'recent',  # Retrieve tweets by date.
        }

        # If we already have some tweets, collect nothing older than what we
        # have.
        try:
            latest_tweet = Tweet.latest()
        except Tweet.DoesNotExist:
            log.debug('No existing tweets. Retrieving %d tweets from search.' %
                      (settings.CC_TWEETS_PERPAGE))
        else:
            search_options['since_id'] = latest_tweet.tweet_id
            log.info('Retrieving tweets with id >= %s' % latest_tweet.tweet_id)

        # Retrieve Tweets
        try:
            raw_data = json.loads(str(api.search(**search_options)))
        except tweepy.TweepError, e:
            log.warning('Twitter request failed: %s' % e)
            return

        if not ('results' in raw_data and raw_data['results']):
            # Twitter returned 0 results.
            return

        # Drop tweets into DB
        for item in raw_data['results']:
            # Apply filters to tweet before saving
            # Allow links in #fxinput tweets
            statsd.incr('customercare.tweet.collected')
            item = _filter_tweet(item, allow_links='#fxinput' in item['text'])
            if not item:
                continue

            created_date = datetime.utcfromtimestamp(
                calendar.timegm(rfc822.parsedate(item['created_at'])))

            item_lang = item.get('iso_language_code', 'en')
            tweet = Tweet(tweet_id=item['id'],
                          raw_json=json.dumps(item),
                          locale=item_lang,
                          created=created_date)
            try:
                tweet.save()
                statsd.incr('customercare.tweet.saved')
            except IntegrityError:
                pass
Exemple #12
0
        # Twitter returned 0 results.
        return

    # Drop tweets into DB
    for item in raw_data['results']:
        # Apply filters to tweet before saving
        item = _filter_tweet(item)
        if not item:
            continue

        created_date = datetime.utcfromtimestamp(
            calendar.timegm(rfc822.parsedate(item['created_at'])))

        item_lang = item.get('iso_language_code', 'en')
        tweet = Tweet(tweet_id=item['id'],
                      raw_json=json.dumps(item),
                      locale=item_lang,
                      created=created_date)
        try:
            tweet.save()
        except IntegrityError:
            pass


@cronjobs.register
def purge_tweets():
    """Periodically purge old tweets for each locale.

    This does a lot of DELETEs on master, so it shouldn't run too frequently.
    Probably once every hour or more.

    """
Exemple #13
0
 def test_latest_does_not_exist(self):
     """latest() should throw DoesNotExist when there are no tweets."""
     Tweet.latest()
Exemple #14
0
 def test_latest(self):
     """Test the latest() class method when there is a latest tweet."""
     NUM = 2
     for x in xrange(NUM):
         last = tweet(save=True)
     eq_(last.tweet_id, Tweet.latest().tweet_id)