コード例 #1
0
ファイル: util_test.py プロジェクト: kylewm/webutil
  def test_cache_dict(self):
    data = {1: 2, 3: 4}
    cd = util.CacheDict(data)
    self.assert_equals(data, cd)
    self.assert_equals({}, cd.get_multi([]))
    self.assert_equals({}, cd.get_multi({9}))
    self.assert_equals({1: 2}, cd.get_multi({1, 9}))
    self.assert_equals(data, cd.get_multi({1, 3}))

    # get_multi should handle a generator args ok
    self.assert_equals(data, cd.get_multi(k for k in [1, 3]))
    self.assert_equals(data, cd.get_multi(xrange(4)))
コード例 #2
0
ファイル: util_test.py プロジェクト: kylewm/webutil
  def test_if_changed(self):
    cache = util.CacheDict()
    updates = {}

    for val in (0, '', []):  # should all be normalized to None
      self.assertIsNone(None, util.if_changed(cache, updates, 'x', val))
      cache['x'] = 0
      self.assertIsNone(None, util.if_changed(cache, updates, 'x', val))
      del cache['x']

    self.assertEquals(1, util.if_changed(cache, updates, 'x', 1))
    self.assertEquals(1, updates['x'])
    cache['x'] = 1
    self.assertIsNone(util.if_changed(cache, updates, 'x', 1))
    self.assertEquals(2, util.if_changed(cache, updates, 'x', 2))
    self.assertEquals(2, updates['x'])

    self.assertIsNone(util.if_changed(cache, updates, 'x', None))
    self.assertEquals(None, updates['x'])
コード例 #3
0
    def poll(self, source):
        """Actually runs the poll.

    Stores property names and values to update in source.updates.
    """
        if source.last_activities_etag or source.last_activity_id:
            logging.debug('Using ETag %s, last activity id %s',
                          source.last_activities_etag, source.last_activity_id)

        #
        # Step 1: fetch activities:
        # * posts by the user
        # * search all posts for the user's domain URLs to find links
        #
        cache = util.CacheDict()
        if source.last_activities_cache_json:
            cache.update(json_loads(source.last_activities_cache_json))

        # search for links first so that the user's activities and responses
        # override them if they overlap
        links = source.search_for_links()

        # this user's own activities (and user mentions)
        resp = source.get_activities_response(fetch_replies=True,
                                              fetch_likes=True,
                                              fetch_shares=True,
                                              fetch_mentions=True,
                                              count=50,
                                              etag=source.last_activities_etag,
                                              min_id=source.last_activity_id,
                                              cache=cache)
        etag = resp.get('etag')  # used later
        user_activities = resp.get('items', [])

        # these map ids to AS objects
        responses = {a['id']: a for a in links}
        activities = {a['id']: a for a in links + user_activities}

        # extract silo activity ids, update last_activity_id
        silo_activity_ids = set()
        last_activity_id = source.last_activity_id
        for id, activity in activities.items():
            # maybe replace stored last activity id
            parsed = util.parse_tag_uri(id)
            if parsed:
                id = parsed[1]
            silo_activity_ids.add(id)
            try:
                # try numeric comparison first
                greater = int(id) > int(last_activity_id)
            except (TypeError, ValueError):
                greater = str(id) > str(last_activity_id)
            if greater:
                last_activity_id = id

        if last_activity_id and last_activity_id != source.last_activity_id:
            source.updates['last_activity_id'] = last_activity_id

        # trim cache to just the returned activity ids, so that it doesn't grow
        # without bound. (WARNING: depends on get_activities_response()'s cache key
        # format, e.g. 'PREFIX ACTIVITY_ID'!)
        source.updates['last_activities_cache_json'] = json_dumps({
            k: v
            for k, v in cache.items() if k.split()[-1] in silo_activity_ids
        })

        self.backfeed(source, responses, activities=activities)

        source.updates.update({
            'last_polled': source.last_poll_attempt,
            'poll_status': 'ok'
        })
        if etag and etag != source.last_activities_etag:
            source.updates['last_activities_etag'] = etag

        #
        # Possibly refetch updated syndication urls.
        #
        # if the author has added syndication urls since the first time
        # original_post_discovery ran, we'll miss them. this cleanup task will
        # periodically check for updated urls. only kicks in if the author has
        # *ever* published a rel=syndication url
        if source.should_refetch():
            logging.info('refetching h-feed for source %s', source.label())
            relationships = original_post_discovery.refetch(source)

            now = util.now_fn()
            source.updates['last_hfeed_refetch'] = now

            if relationships:
                logging.info(
                    'refetch h-feed found new rel=syndication relationships: %s',
                    relationships)
                try:
                    self.repropagate_old_responses(source, relationships)
                except BaseException as e:
                    if ('BadRequestError' in str(e.__class__)
                            or 'Timeout' in str(e.__class__)
                            or util.is_connection_failure(e)):
                        logging.info('Timeout while repropagating responses.',
                                     stack_info=True)
                    else:
                        raise
        else:
            logging.info(
                'skipping refetch h-feed. last-syndication-url %s, last-refetch %s',
                source.last_syndication_url, source.last_hfeed_refetch)
コード例 #4
0
ファイル: tasks.py プロジェクト: LennonFlores/bridgy
    def poll(self, source):
        """Actually runs the poll.

    Stores property names and values to update in source.updates.
    """
        if source.last_activities_etag or source.last_activity_id:
            logging.debug('Using ETag %s, last activity id %s',
                          source.last_activities_etag, source.last_activity_id)

        #
        # Step 1: fetch activities:
        # * posts by the user
        # * search all posts for the user's domain URLs to find links
        #
        cache = util.CacheDict()
        if source.last_activities_cache_json:
            cache.update(json.loads(source.last_activities_cache_json))

        try:
            # search for links first so that the user's activities and responses
            # override them if they overlap
            links = source.search_for_links()

            # this user's own activities (and user mentions)
            resp = source.get_activities_response(
                fetch_replies=True,
                fetch_likes=True,
                fetch_shares=True,
                fetch_mentions=True,
                count=50,
                etag=source.last_activities_etag,
                min_id=source.last_activity_id,
                cache=cache)
            etag = resp.get('etag')  # used later
            user_activities = resp.get('items', [])

            # these map ids to AS objects
            responses = {a['id']: a for a in links}
            activities = {a['id']: a for a in links + user_activities}

        except Exception, e:
            code, body = util.interpret_http_exception(e)
            if code == '401':
                msg = 'Unauthorized error: %s' % e
                logging.warning(msg, exc_info=True)
                source.updates['poll_status'] = 'ok'
                raise models.DisableSource(msg)
            elif code in util.HTTP_RATE_LIMIT_CODES:
                logging.warning(
                    'Rate limited. Marking as error and finishing. %s', e)
                source.updates.update({
                    'poll_status': 'error',
                    'rate_limited': True
                })
                return
            elif (code
                  and int(code) / 100 == 5) or util.is_connection_failure(e):
                logging.error(
                    'API call failed. Marking as error and finishing. %s: %s\n%s',
                    code, body, e)
                self.abort(ERROR_HTTP_RETURN_CODE)
            else:
                raise