def extract_id(entry): if entry.get('entry_id'): return entry['entry_id'] if entry.get('id'): return entry['id'] if entry.get('link'): # entry_id is part of an index, limiting size here return digest(entry['link'], alg='sha1') return digest(json.dumps(entry, sort_keys=True), alg='sha1')
def test_matching_etag(self): self._reset_feeds_freshness(etag='fake etag') self.resp_headers = {'etag': 'fake etag'} self.assertEqual(BASE_COUNT, ArticleController().read().count()) crawler() self.assertEqual(BASE_COUNT, ArticleController().read().count()) self._reset_feeds_freshness(etag='jarr/"%s"' % digest(self._content)) self.resp_headers = {'etag': 'jarr/"%s"' % digest(self._content)} crawler() self.assertEqual(BASE_COUNT, ArticleController().read().count()) self._reset_feeds_freshness(etag='jarr/fake etag') self.resp_headers = {'etag': '########################'} crawler() self.assertNotEqual(BASE_COUNT, ArticleController().read().count())
def create(self, **attrs): # handling special denorm for article rights if 'feed_id' not in attrs: raise Unauthorized("must provide feed_id when creating article") feed = FeedController(attrs.get('user_id', self.user_id)).get(id=attrs['feed_id']) if 'user_id' in attrs and not (feed.user_id == attrs['user_id'] or self.user_id is None): raise Forbidden("no right on feed %r" % feed.id) attrs['user_id'], attrs['category_id'] = feed.user_id, feed.category_id attrs['vector'] = to_vector(attrs) if not attrs.get('link_hash') and attrs.get('link'): attrs['link_hash'] = digest(attrs['link'], alg='sha1', out='bytes') return super().create(**attrs)
def extract_feed_info(headers, text=None): """ Providing the headers of a feed response, will calculate the headers needed for basic cache control, will extract etag and last modified, and will calculate expires, with limit define in configuration file by FEED_MIN_EXPIRES and FEED_MAX_EXPIRES. """ feed_info = {'etag': headers.get('etag', ''), 'last_modified': headers.get('last-modified', rfc_1123_utc())} if text and not feed_info['etag']: feed_info['etag'] = 'jarr/"%s"' % digest(text) _extract_max_age(headers, feed_info) if 'expires' not in feed_info: _extract_expires(headers, feed_info) return feed_info
def to_hash(link): return digest(remove_utm_tags(link), alg='sha1', out='bytes')
def response_calculated_etag_match(feed, resp): if ('jarr/"%s"' % digest(resp.text)) == feed.etag: logger.info("%r: calculated hash matches (%d)", feed, resp.status_code) return True return False
def test_etag_matching_w_constructed_etag(self): self.feed.etag = 'jarr/"%s"' % digest('text') self.assertFalse(response_etag_match(self.feed, self.resp)) self.assertTrue(response_calculated_etag_match(self.feed, self.resp))