Esempio n. 1
0
 def extract_id(entry):
     if entry.get('entry_id'):
         return entry['entry_id']
     if entry.get('id'):
         return entry['id']
     if entry.get('link'):
         # entry_id is part of an index, limiting size here
         return digest(entry['link'], alg='sha1')
     return digest(json.dumps(entry, sort_keys=True), alg='sha1')
Esempio n. 2
0
    def test_matching_etag(self):
        self._reset_feeds_freshness(etag='fake etag')
        self.resp_headers = {'etag': 'fake etag'}
        self.assertEqual(BASE_COUNT, ArticleController().read().count())

        crawler()

        self.assertEqual(BASE_COUNT, ArticleController().read().count())
        self._reset_feeds_freshness(etag='jarr/"%s"' % digest(self._content))
        self.resp_headers = {'etag': 'jarr/"%s"' % digest(self._content)}

        crawler()
        self.assertEqual(BASE_COUNT, ArticleController().read().count())

        self._reset_feeds_freshness(etag='jarr/fake etag')
        self.resp_headers = {'etag': '########################'}

        crawler()
        self.assertNotEqual(BASE_COUNT, ArticleController().read().count())
Esempio n. 3
0
 def create(self, **attrs):
     # handling special denorm for article rights
     if 'feed_id' not in attrs:
         raise Unauthorized("must provide feed_id when creating article")
     feed = FeedController(attrs.get('user_id',
                                     self.user_id)).get(id=attrs['feed_id'])
     if 'user_id' in attrs and not (feed.user_id == attrs['user_id']
                                    or self.user_id is None):
         raise Forbidden("no right on feed %r" % feed.id)
     attrs['user_id'], attrs['category_id'] = feed.user_id, feed.category_id
     attrs['vector'] = to_vector(attrs)
     if not attrs.get('link_hash') and attrs.get('link'):
         attrs['link_hash'] = digest(attrs['link'], alg='sha1', out='bytes')
     return super().create(**attrs)
Esempio n. 4
0
def extract_feed_info(headers, text=None):
    """
    Providing the headers of a feed response,
    will calculate the headers needed for basic cache control,
    will extract etag and last modified,
    and will calculate expires, with limit define in configuration file by
    FEED_MIN_EXPIRES and FEED_MAX_EXPIRES.
    """

    feed_info = {'etag': headers.get('etag', ''),
                 'last_modified': headers.get('last-modified', rfc_1123_utc())}
    if text and not feed_info['etag']:
        feed_info['etag'] = 'jarr/"%s"' % digest(text)

    _extract_max_age(headers, feed_info)
    if 'expires' not in feed_info:
        _extract_expires(headers, feed_info)
    return feed_info
Esempio n. 5
0
 def to_hash(link):
     return digest(remove_utm_tags(link), alg='sha1', out='bytes')
Esempio n. 6
0
def response_calculated_etag_match(feed, resp):
    if ('jarr/"%s"' % digest(resp.text)) == feed.etag:
        logger.info("%r: calculated hash matches (%d)", feed, resp.status_code)
        return True
    return False
Esempio n. 7
0
 def test_etag_matching_w_constructed_etag(self):
     self.feed.etag = 'jarr/"%s"' % digest('text')
     self.assertFalse(response_etag_match(self.feed, self.resp))
     self.assertTrue(response_calculated_etag_match(self.feed, self.resp))