except (requests.exceptions.RequestException, requests.packages.urllib3.exceptions.HTTPError), e: logging.debug( ' ***> [%-30s] Page fetch failed using requests: %s' % (self.feed, e)) # mail_feed_error_to_admin(self.feed, e, local_vars=locals()) return self.fetch_page(urllib_fallback=True, requests_exception=e) except Exception, e: logging.debug('[%d] ! -------------------------' % (self.feed.id, )) tb = traceback.format_exc() logging.debug(tb) logging.debug('[%d] ! -------------------------' % (self.feed.id, )) self.feed.save_page_history(500, "Error", tb) mail_feed_error_to_admin(self.feed, e, local_vars=locals()) if not urllib_fallback: self.fetch_page(urllib_fallback=True) else: self.feed.save_page_history(200, "OK") return html def save_no_page(self): self.feed.has_page = False self.feed.save() self.feed.save_page_history(404, "Feed has no original page.") def rewrite_page(self, response): BASE_RE = re.compile(r'<head(.*?\>)', re.I) base_code = u'<base href="%s" />' % (self.feed.feed_link, )
except TimeoutError, e: logging.debug(" ---> [%-30s] ~FRFeed fetch timed out..." % (feed.title[:30])) feed.save_feed_history(505, "Timeout", "") feed_code = 505 fetched_feed = None except Exception, e: logging.debug("[%d] ! -------------------------" % (feed_id,)) tb = traceback.format_exc() logging.error(tb) logging.debug("[%d] ! -------------------------" % (feed_id,)) ret_feed = FEED_ERREXC feed = self.refresh_feed(feed.pk) feed.save_feed_history(500, "Error", tb) feed_code = 500 fetched_feed = None mail_feed_error_to_admin(feed, e, local_vars=locals()) if not feed_code: if ret_feed == FEED_OK: feed_code = 200 elif ret_feed == FEED_SAME: feed_code = 304 elif ret_feed == FEED_ERRHTTP: feed_code = 400 if ret_feed == FEED_ERREXC: feed_code = 500 elif ret_feed == FEED_ERRPARSE: feed_code = 550 elif ret_feed == FEED_ERRPARSE: feed_code = 550
except Feed.DoesNotExist, e: logging.debug(' ---> [%-30s] Feed is now gone...' % (unicode(feed_id)[:30])) continue except TimeoutError, e: logging.debug(' ---> [%-30s] Feed fetch timed out...' % (unicode(feed)[:30])) feed.save_feed_history(505, 'Timeout', '') fetched_feed = None except Exception, e: logging.debug('[%d] ! -------------------------' % (feed_id,)) tb = traceback.format_exc() logging.error(tb) logging.debug('[%d] ! -------------------------' % (feed_id,)) ret_feed = FEED_ERREXC feed.save_feed_history(500, "Error", tb) fetched_feed = None mail_feed_error_to_admin(feed, e) feed = self.refresh_feed(feed_id) if ((self.options['force']) or (fetched_feed and feed.feed_link and (ret_feed == FEED_OK or (ret_feed == FEED_SAME and feed.stories_last_month > 10)))): logging.debug(u' ---> [%-30s] Fetching page: %s' % (unicode(feed)[:30], feed.feed_link)) page_importer = PageImporter(feed.feed_link, feed) try: page_importer.fetch_page() except TimeoutError, e: logging.debug(' ---> [%-30s] Page fetch timed out...' % (unicode(feed)[:30])) feed.save_page_history(555, 'Timeout', '')
continue except TimeoutError, e: logging.debug(' ---> [%-30s] Feed fetch timed out...' % (unicode(feed)[:30])) feed.save_feed_history(505, 'Timeout', '') fetched_feed = None except Exception, e: logging.debug('[%d] ! -------------------------' % (feed_id, )) tb = traceback.format_exc() logging.error(tb) logging.debug('[%d] ! -------------------------' % (feed_id, )) ret_feed = FEED_ERREXC feed = self.refresh_feed(feed_id) feed.save_feed_history(500, "Error", tb) fetched_feed = None mail_feed_error_to_admin(feed, e) feed = self.refresh_feed(feed_id) if ((self.options['force']) or (fetched_feed and feed.feed_link and feed.has_page and (ret_feed == FEED_OK or (ret_feed == FEED_SAME and feed.stories_last_month > 10)))): logging.debug(u' ---> [%-30s] ~FYFetching page: %s' % (unicode(feed)[:30], feed.feed_link)) page_importer = PageImporter(feed) try: page_importer.fetch_page() except TimeoutError, e: logging.debug( ' ---> [%-30s] ~FRPage fetch timed out...' %
fp = feedparser.parse(self.feed.feed_address) self.feed.feed_link = fp.feed.get('link', "") self.feed.save() except (urllib2.HTTPError), e: self.feed.save_page_history(e.code, e.msg, e.fp.read()) return except (httplib.IncompleteRead), e: self.feed.save_page_history(500, "IncompleteRead", e) return except Exception, e: logging.debug('[%d] ! -------------------------' % (self.feed.id,)) tb = traceback.format_exc() logging.debug(tb) logging.debug('[%d] ! -------------------------' % (self.feed.id,)) self.feed.save_page_history(500, "Error", tb) mail_feed_error_to_admin(self.feed, e) return self.feed.save_page_history(200, "OK") def rewrite_page(self, response): BASE_RE = re.compile(r'<head(.*?\>)', re.I) base_code = u'<base href="%s" />' % (self.feed.feed_link,) try: html = BASE_RE.sub(r'<head\1 '+base_code, response) except: response = response.decode('latin1').encode('utf-8') html = BASE_RE.sub(r'<head\1 '+base_code, response) if '<base href' not in html: html = "%s %s" % (base_code, html)
return except (ValueError, urllib2.URLError, httplib.BadStatusLine, httplib.InvalidURL, requests.exceptions.ConnectionError), e: self.feed.save_page_history(401, "Bad URL", e) fp = feedparser.parse(self.feed.feed_address) feed_link = fp.feed.get('link', "") self.feed.save() except (urllib2.HTTPError), e: self.feed.save_page_history(e.code, e.msg, e.fp.read()) except (httplib.IncompleteRead), e: self.feed.save_page_history(500, "IncompleteRead", e) except (requests.exceptions.RequestException, LookupError, requests.packages.urllib3.exceptions.HTTPError), e: logging.debug(' ***> [%-30s] Page fetch failed using requests: %s' % (self.feed, e)) mail_feed_error_to_admin(self.feed, e, locals()) return self.fetch_page(urllib_fallback=True, requests_exception=e) except Exception, e: logging.debug('[%d] ! -------------------------' % (self.feed.id,)) tb = traceback.format_exc() logging.debug(tb) logging.debug('[%d] ! -------------------------' % (self.feed.id,)) self.feed.save_page_history(500, "Error", tb) mail_feed_error_to_admin(self.feed, e, locals()) if not urllib_fallback: self.fetch_page(urllib_fallback=True) else: self.feed.save_page_history(200, "OK") def save_no_page(self): self.feed.has_page = False
self.feed.save() except (urllib2.HTTPError), e: self.feed.save_page_history(e.code, e.msg, e.fp.read()) return except (httplib.IncompleteRead), e: self.feed.save_page_history(500, "IncompleteRead", e) return except Exception, e: logging.debug('[%d] ! -------------------------' % (self.feed.id, )) tb = traceback.format_exc() logging.debug(tb) logging.debug('[%d] ! -------------------------' % (self.feed.id, )) self.feed.save_page_history(500, "Error", tb) mail_feed_error_to_admin(self.feed, e) return self.feed.save_page_history(200, "OK") def save_no_page(self): self.feed.has_page = False self.feed.save() self.feed.save_page_history(404, "Feed has no original page.") def rewrite_page(self, response): BASE_RE = re.compile(r'<head(.*?\>)', re.I) base_code = u'<base href="%s" />' % (self.feed.feed_link, ) try: html = BASE_RE.sub(r'<head\1 ' + base_code, response) except: