def callback(self, response): """will fetch the feed and interprete results (304, etag) or will challenge jarr to compare gotten entries with existing ones""" try: response = response.result() response.raise_for_status() except Exception as error: error_count = self.feed['error_count'] + 1 logger.exception('%r %r - an error occured while fetching ' 'feed; bumping error count to %r', self.feed['id'], self.feed['title'], error_count) future = self.query_jarr('put', 'feed/%d' % self.feed['id'], {'error_count': error_count, 'last_error': str(error), 'user_id': self.feed['user_id']}) return if response.status_code == 304: logger.info("%r %r - feed responded with 304", self.feed['id'], self.feed['title']) self.clean_feed() return if 'etag' not in response.headers: logger.debug('%r %r - manually generating etag', self.feed['id'], self.feed['title']) response.headers['etag'] = 'jarr/"%s"' % to_hash(response.text) if response.headers['etag'] and self.feed['etag'] \ and response.headers['etag'] == self.feed['etag']: if 'jarr' in self.feed['etag']: logger.info("%r %r - calculated hash matches (%d)", self.feed['id'], self.feed['title'], response.status_code) else: logger.info("%r %r - feed responded with same etag (%d)", self.feed['id'], self.feed['title'], response.status_code) self.clean_feed() return else: logger.debug('%r %r - etag mismatch %r != %r', self.feed['id'], self.feed['title'], response.headers['etag'], self.feed['etag']) logger.info('%r %r - cache validation failed, challenging entries', self.feed['id'], self.feed['title']) ids, entries = [], {} parsed_response = feedparser.parse(response.content) for entry in parsed_response['entries']: entry_ids = extract_id(entry) entry_ids['feed_id'] = self.feed['id'] entry_ids['user_id'] = self.feed['user_id'] entries[tuple(sorted(entry_ids.items()))] = entry ids.append(entry_ids) logger.debug('%r %r - found %d entries %r', self.feed['id'], self.feed['title'], len(ids), ids) future = self.query_jarr('get', 'articles/challenge', {'ids': ids}) updater = JarrUpdater(self.feed, entries, response.headers, parsed_response, self.auth, self.pool, self.session) future.add_done_callback(updater.callback)
def callback(self, response): """will fetch the feed and interprete results (304, etag) or will challenge pyagg to compare gotten entries with existing ones""" try: response = response.result() response.raise_for_status() except Exception as error: error_count = self.feed['error_count'] + 1 logger.exception('%r %r - an error occured while fetching ' 'feed; bumping error count to %r', self.feed['id'], self.feed['title'], error_count) future = self.query_pyagg('put', 'feed/%d' % self.feed['id'], {'error_count': error_count, 'last_error': str(error), 'user_id': self.feed['user_id']}) return if response.status_code == 304: logger.info("%r %r - feed responded with 304", self.feed['id'], self.feed['title']) self.clean_feed() return if 'etag' not in response.headers: logger.debug('%r %r - manually generating etag', self.feed['id'], self.feed['title']) response.headers['etag'] = 'pyagg/"%s"' % to_hash(response.text) if response.headers['etag'] and self.feed['etag'] \ and response.headers['etag'] == self.feed['etag']: if 'pyagg' in self.feed['etag']: logger.info("%r %r - calculated hash matches (%d)", self.feed['id'], self.feed['title'], response.status_code) else: logger.info("%r %r - feed responded with same etag (%d)", self.feed['id'], self.feed['title'], response.status_code) self.clean_feed() return else: logger.debug('%r %r - etag mismatch %r != %r', self.feed['id'], self.feed['title'], response.headers['etag'], self.feed['etag']) logger.info('%r %r - cache validation failed, challenging entries', self.feed['id'], self.feed['title']) ids, entries = [], {} parsed_response = feedparser.parse(response.content) for entry in parsed_response['entries']: entry_ids = extract_id(entry) entry_ids['feed_id'] = self.feed['id'] entry_ids['user_id'] = self.feed['user_id'] entries[tuple(sorted(entry_ids.items()))] = entry ids.append(entry_ids) logger.debug('%r %r - found %d entries %r', self.feed['id'], self.feed['title'], len(ids), ids) future = self.query_pyagg('get', 'articles/challenge', {'ids': ids}) updater = PyAggUpdater(self.feed, entries, response.headers, parsed_response, self.auth, self.pool, self.session) future.add_done_callback(updater.callback)
def wrapper(*args, **kwargs): response = func(*args, **kwargs) if isinstance(response, Response): etag = to_hash(response.data) headers = response.headers elif type(response) is str: etag = to_hash(response) headers = {} else: return response if request.headers.get('if-none-match') == etag: response = Response(status=304) response.headers['Cache-Control'] \ = headers.get('Cache-Control', 'pragma: no-cache') elif not isinstance(response, Response): response = make_response(response) response.headers['etag'] = etag return response
def extract_id(entry, keys=[('link', 'link')], force_id=False): """For a given entry will return a dict that allows to identify it. The dict will be constructed on the uid of the entry. if that identifier is absent, the dict will be constructed upon the values of "keys". """ entry_id = entry.get('entry_id') or entry.get('id') if entry_id: return {'entry_id': entry_id} if not entry_id and force_id: return to_hash("".join(entry[entry_key] for _, entry_key in keys if entry_key in entry).encode('utf8')) else: ids = {} for entry_key, key in keys: if entry_key in entry and key not in ids: ids[key] = entry[entry_key] if 'date' in key: ids[key] = dateutil.parser.parse(ids[key]).isoformat() return ids