Example #1
0
    def post(self):
        session = data.Session.from_request(self.request)
        result, had_error = twitterappengine.exec_twitter_api(
            lambda: update_timeline(session),
            error_detail='updating timeline for %s' % session.twitter_id)

        if had_error:
            self._write_error(500)
            return

        had_updates, status_ids = result

        if had_updates:
            ping_hub([session.get_timeline_feed_url()])
            if not session.crawled_on_demand:
                session.enqueue_crawl_on_demand_task()
        self.response.out.write(
            'Updated %s, %s updates' %
                (session.twitter_id, had_updates and 'had' or 'didn\'t have'))

        # If this update was triggered in response to a ping, see if we actually
        # got the status that we were looking for, otherwise we have to try
        # again.
        try:
            expected_status_id = int(self.request.get('expected_status_id'))
            update_retry_count = int(self.request.get('update_retry_count'))

            logging.info('Looking for expected status %d...' % expected_status_id)

            if expected_status_id in status_ids:
                logging.info('...found')
                return

            if update_retry_count == PING_UPDATE_RETRY_MAX:
                logging.info('...not found, and no retries left')
                return

            update_retry_count += 1

            logging.info('...not found, queuing the %d-th retry' %
                update_retry_count)

            session.enqueue_update_task(
                countdown=update_retry_count * PING_UPDATE_DELAY_SEC,
                expected_status_id=expected_status_id,
                update_retry_count=update_retry_count)
        except ValueError:
            # Ignore mising/invalid values
            return
Example #2
0
    def _update():
        stored_data = FollowingData.get_by_key_name(FollowingData._SINGLETON_ID)

        if stored_data:
            # The serialized following data ends up having its keys converted to
            # strings; convert them to numbers when deserializing.
            FollowingData._following_map = {}
            for twitter_id, following_twitter_ids in stored_data.following_map.items():
                twitter_id = long(twitter_id)
                FollowingData._following_map[twitter_id] = following_twitter_ids
            FollowingData._last_update_time = stored_data.last_update_time
            if not FollowingData._is_stale():
                return

        following_map = {}
        for session in Session.all():
            twitter_id = long(session.twitter_id)
            following_twitter_ids, had_error = twitterappengine.exec_twitter_api(
                lambda: session.create_api().GetFriendIDs(user_id=twitter_id),
                error_detail="can't get friend IDs for %s, using stale data" % session.twitter_id,
            )

            if had_error:
                return

            # TODO(mihaip): remove this and replace with rate-limitting, the
            # real problem is not how many people are followed, but how often
            # they tweet.
            if len(following_twitter_ids) > 200:
                logging.warning(
                    "Not including followers for %d, following too "
                    "many people (%d)" % (twitter_id, len(following_twitter_ids))
                )
                continue

            for following_twitter_id in following_twitter_ids:
                following_map.setdefault(following_twitter_id, []).append(twitter_id)
            # Users are also considered to be following themselves (since their
            # updates update their timeline).
            following_map.setdefault(twitter_id, []).append(twitter_id)

        stored_data = FollowingData(key_name=FollowingData._SINGLETON_ID, following_map=following_map)
        stored_data.put()

        FollowingData._following_map = following_map
        FollowingData._last_update_time = stored_data.last_update_time
Example #3
0
    def _update():
        stored_data = FollowingData.get_by_key_name(FollowingData._SINGLETON_ID)

        if stored_data:
            # The serialized following data ends up having its keys converted to
            # strings; convert them to numbers when deserializing.
            FollowingData._following_map = {}
            for twitter_id, following_twitter_ids in stored_data.following_map.items():
                twitter_id = long(twitter_id)
                FollowingData._following_map[twitter_id] = following_twitter_ids
            FollowingData._last_update_time = stored_data.last_update_time
            if not FollowingData._is_stale():
                return

        following_map = {}
        for session in Session.all():
            twitter_id = long(session.twitter_id)
            following_twitter_ids, had_error = twitterappengine.exec_twitter_api(
                lambda: session.create_api().GetFriendIDs(user_id=twitter_id),
                error_detail='can\'t get friend IDs for %s, using stale data' %
                                session.twitter_id)

            if had_error: return

            for following_twitter_id in following_twitter_ids:
                following_map.setdefault(following_twitter_id, []).append(twitter_id)
            # Users are also considered to be following themselves (since their
            # updates update their timeline).
            following_map.setdefault(twitter_id, []).append(twitter_id)

        stored_data = FollowingData(
            key_name=FollowingData._SINGLETON_ID,
            following_map=following_map)
        stored_data.put()

        FollowingData._following_map = following_map
        FollowingData._last_update_time = stored_data.last_update_time
Example #4
0
def get_digest_for_list(list_owner, list_id, dev_mode):
    digest_start_time, digest_end_time, max_cache_age = _get_digest_timestamps()

    api = _get_digest_twitter_api(
        max_cache_age, key='%s/%s' % (list_owner, list_id))

    user, had_error = twitterappengine.exec_twitter_api(
        lambda: api.GetUser(list_owner),
        error_detail='user %s' % list_owner)
    if not had_error:
        timezone = twitterdisplay.get_timezone_for_user(user)
    else:
        timezone = None

    fetcher = ListTwitterFetcher(api, list_owner, list_id, digest_start_time)
    statuses, had_error = fetcher.fetch()

    return _process_digest_statuses(
        statuses,
        digest_start_time,
        digest_end_time,
        had_error,
        dev_mode,
        timezone=timezone)
Example #5
0
 def fetch(self):
     data, had_error = twitterappengine.exec_twitter_api(
         self._fetch, error_detail=self._id())
     return data or [], had_error
Example #6
0
    def _get_signed_in(self):
        twitter_id = self._session.twitter_id
        logging.info('Serving feed for %s' % twitter_id)

        user, had_error = twitterappengine.exec_twitter_api(
            lambda: self._api.GetUser(twitter_id),
            error_detail='user %s' % twitter_id)

        if had_error:
            self._write_error(500)
            return

        stream = data.StreamData.get_timeline_for_user(twitter_id)

        threshold_time = time.time() - FEED_STATUS_INTERVAL_SEC

        # It's wasteful to serve the hub the full set of items in the feed, so
        # we use a variant of the feed windowing technique described at
        # http://code.google.com/p/pubsubhubbub/wiki/PublisherEfficiency#Feed_windowing
        # to only give it new items. We treat the If-Modified-Since header as
        # an indication of the items that the hub already has, but we allow one
        # hour of overlap, in case of items getting dropped, replication delay,
        # cosmic rays, etc.
        if self._user_agent_contains('appid: pubsubhubbub'):
            if_modified_since = self._get_if_modified_since()
            if if_modified_since:
                logging.info('If-Modified-Since: %d' % if_modified_since)
                threshold_time = if_modified_since - IF_MODIFIED_SINCE_INTERVAL_SEC
                # Since we're serving a partial response, we don't want proxies
                # caching it.
                self.response.headers['Cache-Control'] = 'private'

        # We want the feed to have all tweets from the past day, but also at
        # at least 10 items.
        feed_status_ids = []
        if stream:
            for status_id, status_timestamp_sec in stream.status_pairs():
                if status_timestamp_sec < threshold_time and \
                        len(feed_status_ids) >= MIN_FEED_ITEMS:
                    break
                feed_status_ids.append(status_id)

        logging.info('  Feed has %d items' % len(feed_status_ids))

        status_data = data.StatusData.get_by_status_ids(feed_status_ids)
        statuses = [s.to_status() for s in status_data]

        timezone = twitterdisplay.get_timezone_for_user(user)

        # We don't actually want statuses grouped, instead we want one status
        # per item.
        status_groups = [
            twitterdisplay.DisplayStatusGroup(
                user=status.user,
                statuses=[status],
                thumbnail_size=thumbnails.LARGE_THUMBNAIL,
                timezone=timezone)
            for status in statuses
        ]

        updated_date = datetime.datetime.utcnow()

        self._write_template('birdfeeder/feed.atom', {
              'feed_title': '@%s Twitter Timeline' % user.screen_name,
              'updated_date_iso': updated_date.isoformat(),
              'feed_url': self.request.url,
              'status_groups': status_groups,
            },
            content_type='application/atom+xml')

        self._add_last_modified_header(updated_date)
Example #7
0
    def _get_signed_in(self):
        twitter_id = self._session.twitter_id
        logging.info('Serving feed for %s' % twitter_id)

        user, had_error = twitterappengine.exec_twitter_api(
            lambda: self._caching_api.GetUser(user_id=twitter_id,
                                              include_entities=False),
            error_detail='user %s' % twitter_id)

        if had_error:
            self._write_error(500)
            return

        stream = data.StreamData.get_timeline_for_user(twitter_id)

        threshold_time = time.time() - FEED_STATUS_INTERVAL_SEC

        if self._should_use_feed_windowing():
            if_modified_since = self._get_if_modified_since()
            if if_modified_since:
                logging.info('If-Modified-Since: %d' % if_modified_since)
                threshold_time = if_modified_since - IF_MODIFIED_SINCE_INTERVAL_SEC
                # Since we're serving a partial response, we don't want proxies
                # caching it.
                self.response.headers['Cache-Control'] = 'private'
            elif 'pubsubhubbub' in self.request.headers['User-Agent']:
                # Google's PubSubHubbub hub no longer seems to send an
                # If-Modified-Since header, but it crawls often enough that we
                # can use a shorter interval for it.
                threshold_time = time.time() - PUBSUBHUBBUB_HUB_INTERVAL_SEC
                self.response.headers['Cache-Control'] = 'private'

        # We want the feed to have all tweets from the past day, but also at
        # at least 10 items.
        feed_status_ids = []
        if stream:
            for status_id, status_timestamp_sec in stream.status_pairs():
                if status_timestamp_sec < threshold_time and \
                        len(feed_status_ids) >= MIN_FEED_ITEMS:
                    break
                feed_status_ids.append(status_id)

        logging.info('  Feed has %d items' % len(feed_status_ids))

        status_data = data.StatusData.get_by_status_ids(feed_status_ids)
        statuses = [s.to_status() for s in status_data]

        timezone = twitterdisplay.get_timezone_for_auth_user(self._caching_api)

        # We don't actually want statuses grouped, instead we want one status
        # per item.
        status_groups = [
            twitterdisplay.DisplayStatusGroup(
                user=status.user,
                statuses=[status],
                thumbnail_size=thumbnails.LARGE_THUMBNAIL,
                timezone=timezone)
            for status in statuses
        ]

        updated_date = datetime.datetime.utcnow()

        self._write_template('birdfeeder/feed.atom', {
              'feed_title': '@%s Twitter Timeline' % user.screen_name,
              'updated_date_iso': updated_date.isoformat(),
              'feed_url': self.request.url,
              'status_groups': status_groups,
            },
            content_type='application/atom+xml')

        self._add_last_modified_header(updated_date)