self.save_page(html) except (ValueError, urllib2.URLError), e: self.feed.save_page_history(401, "Bad URL", e) fp = feedparser.parse(self.feed.feed_address) self.feed.feed_link = fp.feed.get('link', "") self.feed.save() except urllib2.HTTPError, e: self.feed.save_page_history(e.code, e.msg, e.fp.read()) return except Exception, e: logging.debug('[%d] ! -------------------------' % (self.feed.id,)) tb = traceback.format_exc() logging.debug(tb) logging.debug('[%d] ! -------------------------' % (self.feed.id,)) self.feed.save_page_history(500, "Error", tb) mail_error_to_admin(self.feed, e) return self.feed.save_page_history(200, "OK") def rewrite_page(self, response): BASE_RE = re.compile(r'<head(.*?\>)', re.I) base_code = u'<base href="%s" />' % (self.feed.feed_link,) try: html = BASE_RE.sub(r'<head\1 '+base_code, response) except: response = response.decode('latin1').encode('utf-8') html = BASE_RE.sub(r'<head\1 '+base_code, response) # html = self.fix_urls(html)
except Feed.DoesNotExist, e: logging.debug(' ---> [%-30s] Feed is now gone...' % (unicode(feed_id)[:30])) continue except TimeoutError, e: logging.debug(' ---> [%-30s] Feed fetch timed out...' % (unicode(feed)[:30])) feed.save_feed_history(505, 'Timeout', '') fetched_feed = None except Exception, e: logging.debug('[%d] ! -------------------------' % (feed_id,)) tb = traceback.format_exc() logging.error(tb) logging.debug('[%d] ! -------------------------' % (feed_id,)) ret_feed = FEED_ERREXC feed.save_feed_history(500, "Error", tb) fetched_feed = None mail_error_to_admin(feed, e) feed = self.refresh_feed(feed_id) if ((self.options['force']) or (fetched_feed and feed.feed_link and (ret_feed == FEED_OK or (ret_feed == FEED_SAME and feed.stories_last_month > 10)))): logging.debug(u' ---> [%-30s] Fetching page: %s' % (unicode(feed)[:30], feed.feed_link)) page_importer = PageImporter(feed.feed_link, feed) try: page_importer.fetch_page() except TimeoutError, e: logging.debug(' ---> [%-30s] Page fetch timed out...' % (unicode(feed)[:30])) feed.save_page_history(555, 'Timeout', '')