Example #1
0
    def callback(self, response):
        """will fetch the feed and interprete results (304, etag) or will
        challenge jarr to compare gotten entries with existing ones"""
        try:
            response = response.result()
            response.raise_for_status()
        except Exception as error:
            error_count = self.feed['error_count'] + 1
            logger.exception('%r %r - an error occured while fetching '
                    'feed; bumping  error count to %r',
                    self.feed['id'], self.feed['title'], error_count)
            future = self.query_jarr('put', 'feed/%d' % self.feed['id'],
                                      {'error_count': error_count,
                                       'last_error': str(error),
                                       'user_id': self.feed['user_id']})
            return

        if response.status_code == 304:
            logger.info("%r %r - feed responded with 304",
                        self.feed['id'], self.feed['title'])
            self.clean_feed()
            return
        if 'etag' not in response.headers:
            logger.debug('%r %r - manually generating etag',
                         self.feed['id'], self.feed['title'])
            response.headers['etag'] = 'jarr/"%s"' % to_hash(response.text)
        if response.headers['etag'] and self.feed['etag'] \
                and response.headers['etag'] == self.feed['etag']:
            if 'jarr' in self.feed['etag']:
                logger.info("%r %r - calculated hash matches (%d)",
                            self.feed['id'], self.feed['title'],
                            response.status_code)
            else:
                logger.info("%r %r - feed responded with same etag (%d)",
                            self.feed['id'], self.feed['title'],
                            response.status_code)
            self.clean_feed()
            return
        else:
            logger.debug('%r %r - etag mismatch %r != %r',
                         self.feed['id'], self.feed['title'],
                         response.headers['etag'], self.feed['etag'])
        logger.info('%r %r - cache validation failed, challenging entries',
                    self.feed['id'], self.feed['title'])

        ids, entries = [], {}
        parsed_response = feedparser.parse(response.content)
        for entry in parsed_response['entries']:
            entry_ids = extract_id(entry)
            entry_ids['feed_id'] = self.feed['id']
            entry_ids['user_id'] = self.feed['user_id']
            entries[tuple(sorted(entry_ids.items()))] = entry
            ids.append(entry_ids)
        logger.debug('%r %r - found %d entries %r',
                     self.feed['id'], self.feed['title'], len(ids), ids)
        future = self.query_jarr('get', 'articles/challenge', {'ids': ids})
        updater = JarrUpdater(self.feed, entries, response.headers,
                               parsed_response,
                               self.auth, self.pool, self.session)
        future.add_done_callback(updater.callback)
Example #2
0
    def callback(self, response):
        """will fetch the feed and interprete results (304, etag) or will
        challenge pyagg to compare gotten entries with existing ones"""
        try:
            response = response.result()
            response.raise_for_status()
        except Exception as error:
            error_count = self.feed['error_count'] + 1
            logger.exception('%r %r - an error occured while fetching '
                    'feed; bumping  error count to %r',
                    self.feed['id'], self.feed['title'], error_count)
            future = self.query_pyagg('put', 'feed/%d' % self.feed['id'],
                                      {'error_count': error_count,
                                       'last_error': str(error),
                                       'user_id': self.feed['user_id']})
            return

        if response.status_code == 304:
            logger.info("%r %r - feed responded with 304",
                        self.feed['id'], self.feed['title'])
            self.clean_feed()
            return
        if 'etag' not in response.headers:
            logger.debug('%r %r - manually generating etag',
                         self.feed['id'], self.feed['title'])
            response.headers['etag'] = 'pyagg/"%s"' % to_hash(response.text)
        if response.headers['etag'] and self.feed['etag'] \
                and response.headers['etag'] == self.feed['etag']:
            if 'pyagg' in self.feed['etag']:
                logger.info("%r %r - calculated hash matches (%d)",
                            self.feed['id'], self.feed['title'],
                            response.status_code)
            else:
                logger.info("%r %r - feed responded with same etag (%d)",
                            self.feed['id'], self.feed['title'],
                            response.status_code)
            self.clean_feed()
            return
        else:
            logger.debug('%r %r - etag mismatch %r != %r',
                         self.feed['id'], self.feed['title'],
                         response.headers['etag'], self.feed['etag'])
        logger.info('%r %r - cache validation failed, challenging entries',
                    self.feed['id'], self.feed['title'])

        ids, entries = [], {}
        parsed_response = feedparser.parse(response.content)
        for entry in parsed_response['entries']:
            entry_ids = extract_id(entry)
            entry_ids['feed_id'] = self.feed['id']
            entry_ids['user_id'] = self.feed['user_id']
            entries[tuple(sorted(entry_ids.items()))] = entry
            ids.append(entry_ids)
        logger.debug('%r %r - found %d entries %r',
                     self.feed['id'], self.feed['title'], len(ids), ids)
        future = self.query_pyagg('get', 'articles/challenge', {'ids': ids})
        updater = PyAggUpdater(self.feed, entries, response.headers,
                               parsed_response,
                               self.auth, self.pool, self.session)
        future.add_done_callback(updater.callback)
Example #3
0
 def wrapper(*args, **kwargs):
     response = func(*args, **kwargs)
     if isinstance(response, Response):
         etag = to_hash(response.data)
         headers = response.headers
     elif type(response) is str:
         etag = to_hash(response)
         headers = {}
     else:
         return response
     if request.headers.get('if-none-match') == etag:
         response = Response(status=304)
         response.headers['Cache-Control'] \
                 = headers.get('Cache-Control', 'pragma: no-cache')
     elif not isinstance(response, Response):
         response = make_response(response)
     response.headers['etag'] = etag
     return response
Example #4
0
 def wrapper(*args, **kwargs):
     response = func(*args, **kwargs)
     if isinstance(response, Response):
         etag = to_hash(response.data)
         headers = response.headers
     elif type(response) is str:
         etag = to_hash(response)
         headers = {}
     else:
         return response
     if request.headers.get('if-none-match') == etag:
         response = Response(status=304)
         response.headers['Cache-Control'] \
                 = headers.get('Cache-Control', 'pragma: no-cache')
     elif not isinstance(response, Response):
         response = make_response(response)
     response.headers['etag'] = etag
     return response
Example #5
0
def extract_id(entry, keys=[('link', 'link')], force_id=False):
    """For a given entry will return a dict that allows to identify it. The
    dict will be constructed on the uid of the entry. if that identifier is
    absent, the dict will be constructed upon the values of "keys".
    """
    entry_id = entry.get('entry_id') or entry.get('id')
    if entry_id:
        return {'entry_id': entry_id}
    if not entry_id and force_id:
        return to_hash("".join(entry[entry_key] for _, entry_key in keys
                               if entry_key in entry).encode('utf8'))
    else:
        ids = {}
        for entry_key, key in keys:
            if entry_key in entry and key not in ids:
                ids[key] = entry[entry_key]
                if 'date' in key:
                    ids[key] = dateutil.parser.parse(ids[key]).isoformat()
        return ids
Example #6
0
def extract_id(entry, keys=[('link', 'link')], force_id=False):
    """For a given entry will return a dict that allows to identify it. The
    dict will be constructed on the uid of the entry. if that identifier is
    absent, the dict will be constructed upon the values of "keys".
    """
    entry_id = entry.get('entry_id') or entry.get('id')
    if entry_id:
        return {'entry_id': entry_id}
    if not entry_id and force_id:
        return to_hash("".join(entry[entry_key] for _, entry_key in keys
                                   if entry_key in entry).encode('utf8'))
    else:
        ids = {}
        for entry_key, key in keys:
            if entry_key in entry and key not in ids:
                ids[key] = entry[entry_key]
                if 'date' in key:
                    ids[key] = dateutil.parser.parse(ids[key]).isoformat()
        return ids