def update(self): """Download the feed to refresh the information. This does the actual work of pulling down the feed and if it changes updates the cached information about the feed and entries within it. """ info = feedparser.parse(self.url, etag=self.url_etag, modified=self.url_modified, agent=self._planet.user_agent) if info.has_key("status"): self.url_status = str(info.status) elif info.has_key("entries") and len(info.entries)>0: self.url_status = str(200) elif info.bozo and info.bozo_exception.__class__.__name__=='Timeout': self.url_status = str(408) else: from pprint import pprint pprint({ 'error': 'Feedparser failed for some reason', 'info': info }) self.url_status = str(500) if self.url_status == '301' and \ (info.has_key("entries") and len(info.entries)>0): log.warning("Feed has moved from <%s> to <%s>", self.url, info.url) try: os.link(cache.filename(self._planet.cache_directory, self.url), cache.filename(self._planet.cache_directory, info.url)) except: pass self.url = info.url elif self.url_status == '304': log.info("Feed %s unchanged", self.feed_information()) return elif self.url_status == '410': log.info("Feed %s gone", self.feed_information()) self.cache_write() return elif self.url_status == '408': log.warning("Feed %s timed out", self.feed_information()) return elif int(self.url_status) >= 400: log.error("Error %s while updating feed %s", self.url_status, self.feed_information()) return else: log.info("Updating feed %s", self.feed_information()) self.url_etag = info.has_key("etag") and info.etag or None self.url_modified = info.has_key("modified") and info.modified or None if self.url_etag is not None: log.debug("E-Tag: %s", self.url_etag) if self.url_modified is not None: log.debug("Last Modified: %s", time.strftime(TIMEFMT_ISO, self.url_modified)) self.update_info(info.feed) self.update_entries(info.entries) self.cache_write()
def update(self): """Download the feed to refresh the information. This does the actual work of pulling down the feed and if it changes updates the cached information about the feed and entries within it. """ info = feedparser.parse(self.url, etag=self.url_etag, modified=self.url_modified, agent=self._planet.user_agent) if info.has_key("status"): self.url_status = str(info.status) elif info.has_key("entries") and len(info.entries)>0: self.url_status = str(200) elif info.bozo and info.bozo_exception.__class__.__name__=='Timeout': self.url_status = str(408) else: self.url_status = str(500) if self.url_status == '301' and \ (info.has_key("entries") and len(info.entries)>0): log.warning("Feed has moved from <%s> to <%s>", self.url, info.url) try: os.link(cache.filename(self._planet.cache_directory, self.url), cache.filename(self._planet.cache_directory, info.url)) except: pass self.url = info.url elif self.url_status == '304': log.info("Feed %s unchanged", self.feed_information()) return elif self.url_status == '410': log.info("Feed %s gone", self.feed_information()) self.cache_write() return elif self.url_status == '408': log.warning("Feed %s timed out", self.feed_information()) return elif int(self.url_status) >= 400: log.error("Error %s while updating feed %s", self.url_status, self.feed_information()) return else: log.info("Updating feed %s", self.feed_information()) self.url_etag = info.has_key("etag") and info.etag or None self.url_modified = info.has_key("modified") and info.modified or None if self.url_etag is not None: log.debug("E-Tag: %s", self.url_etag) if self.url_modified is not None: #log.debug("Last Modified: %s",time.strftime(TIMEFMT_ISO, self.url_modified)) log.debug("Last Modified: %s", self.url_modified) self.update_info(info.feed) self.update_entries(info.entries) self.cache_write()
def __init__(self, planet, url): if not os.path.isdir(planet.cache_directory): os.makedirs(planet.cache_directory) cache_filename = cache.filename(planet.cache_directory, url) cache_file = dbhash.open(cache_filename, "c", 0666) cache.CachedInfo.__init__(self, cache_file, url, root=1) self._items = {} self._planet = planet self._expired = [] self.url = url # retain the original URL for error reporting self.configured_url = url self.url_etag = None self.url_status = None self.url_modified = None self.name = None self.updated = None self.last_updated = None self.filter = None self.exclude = None self.next_order = "0" self.relevant_tags = None self.cache_read() self.cache_read_entries() if planet.config.has_section(url): for option in planet.config.options(url): value = planet.config.get(url, option) self.set_as_string(option, value, cached=0)
def __init__(self, planet, url): if not os.path.isdir(planet.cache_directory): os.makedirs(planet.cache_directory) cache_filename = cache.filename(planet.cache_directory, url) cache_file = dbhash.open(cache_filename, "c", 0666) cache.CachedInfo.__init__(self, cache_file, url, root=1) self._items = {} self._planet = planet self._expired = [] self.url = url # retain the original URL for error reporting self.configured_url = url self.url_etag = None self.url_status = None self.url_modified = None self.name = None self.updated = None self.last_updated = None self.filter = None self.exclude = None self.next_order = "0" self.cache_read() self.cache_read_entries() if planet.config.has_section(url): for option in planet.config.options(url): value = planet.config.get(url, option) self.set_as_string(option, value, cached=0)
def update(self): """Download the feed to refresh the information. This does the actual work of pulling down the feed and if it changes updates the cached information about the feed and entries within it. """ info = feedparser.parse(self.url, etag=self.url_etag, modified=self.url_modified, agent=self._planet.user_agent) if not info.has_key("status"): log.info("Updating feed <%s>", self.url) elif info.status == 301 or info.status == 302: log.warning("Feed has moved from <%s> to <%s>", self.url, info.url) os.link(cache.filename(self._planet.cache_directory, self.url), cache.filename(self._planet.cache_directory, info.url)) self.url = info.url elif info.status == 304: log.info("Feed <%s> unchanged", self.url) return elif info.status >= 400: log.error("Error %d while updating feed <%s>", info.status, self.url) return else: log.info("Updating feed <%s>", self.url) self.url_etag = info.has_key("etag") and info.etag or None self.url_modified = info.has_key("modified") and info.modified or None if self.url_etag is not None: log.debug("E-Tag: %s", self.url_etag) if self.url_modified is not None: log.debug("Last Modified: %s", time.strftime(TIMEFMT_ISO, self.url_modified)) self.update_info(info.feed) self.update_entries(info.entries) self.cache_write()
def __init__(self, planet, url): if not os.path.isdir(planet.cache_directory): os.makedirs(planet.cache_directory) cache_filename = cache.filename(planet.cache_directory, url) cache_file = dbhash.open(cache_filename, "c", 0666) cache.CachedInfo.__init__(self, cache_file, url, root=1) self._items = {} self._planet = planet self._expired = [] self.url = url self.url_etag = None self.url_modified = None self.name = None self.updated = None self.last_updated = None self.next_order = "0" self.cache_read() self.cache_read_entries()
def cache_basename(self): return cache.filename("", self._id)
def cache_basename(self): return cache.filename('', self._id)