Ejemplo n.º 1
0
    def update(self):
        """Download the feed to refresh the information.

        This does the actual work of pulling down the feed and if it changes
        updates the cached information about the feed and entries within it.
        """
        info = feedparser.parse(self.url,
                                etag=self.url_etag, modified=self.url_modified,
                                agent=self._planet.user_agent)
        if info.has_key("status"):
           self.url_status = str(info.status)
        elif info.has_key("entries") and len(info.entries)>0:
           self.url_status = str(200)
        elif info.bozo and info.bozo_exception.__class__.__name__=='Timeout':
           self.url_status = str(408)
        else:
           from pprint import pprint
           pprint({
               'error': 'Feedparser failed for some reason',
               'info': info
           })
           self.url_status = str(500)

        if self.url_status == '301' and \
           (info.has_key("entries") and len(info.entries)>0):
            log.warning("Feed has moved from <%s> to <%s>", self.url, info.url)
            try:
                os.link(cache.filename(self._planet.cache_directory, self.url),
                        cache.filename(self._planet.cache_directory, info.url))
            except:
                pass
            self.url = info.url
        elif self.url_status == '304':
            log.info("Feed %s unchanged", self.feed_information())
            return
        elif self.url_status == '410':
            log.info("Feed %s gone", self.feed_information())
            self.cache_write()
            return
        elif self.url_status == '408':
            log.warning("Feed %s timed out", self.feed_information())
            return
        elif int(self.url_status) >= 400:
            log.error("Error %s while updating feed %s",
                      self.url_status, self.feed_information())
            return
        else:
            log.info("Updating feed %s", self.feed_information())

        self.url_etag = info.has_key("etag") and info.etag or None
        self.url_modified = info.has_key("modified") and info.modified or None
        if self.url_etag is not None:
            log.debug("E-Tag: %s", self.url_etag)
        if self.url_modified is not None:
            log.debug("Last Modified: %s",
                      time.strftime(TIMEFMT_ISO, self.url_modified))

        self.update_info(info.feed)
        self.update_entries(info.entries)
        self.cache_write()
Ejemplo n.º 2
0
    def update(self):
        """Download the feed to refresh the information.

        This does the actual work of pulling down the feed and if it changes
        updates the cached information about the feed and entries within it.
        """
        info = feedparser.parse(self.url,
                                etag=self.url_etag, modified=self.url_modified,
                                agent=self._planet.user_agent)
        if info.has_key("status"):
           self.url_status = str(info.status)
        elif info.has_key("entries") and len(info.entries)>0:
           self.url_status = str(200)
        elif info.bozo and info.bozo_exception.__class__.__name__=='Timeout':
           self.url_status = str(408)
        else:
           self.url_status = str(500)

        if self.url_status == '301' and \
           (info.has_key("entries") and len(info.entries)>0):
            log.warning("Feed has moved from <%s> to <%s>", self.url, info.url)
            try:
                os.link(cache.filename(self._planet.cache_directory, self.url),
                        cache.filename(self._planet.cache_directory, info.url))
            except:
                pass
            self.url = info.url
        elif self.url_status == '304':
            log.info("Feed %s unchanged", self.feed_information())
            return
        elif self.url_status == '410':
            log.info("Feed %s gone", self.feed_information())
            self.cache_write()
            return
        elif self.url_status == '408':
            log.warning("Feed %s timed out", self.feed_information())
            return
        elif int(self.url_status) >= 400:
            log.error("Error %s while updating feed %s",
                      self.url_status, self.feed_information())
            return
        else:
            log.info("Updating feed %s", self.feed_information())

        self.url_etag = info.has_key("etag") and info.etag or None
        self.url_modified = info.has_key("modified") and info.modified or None
        if self.url_etag is not None:
            log.debug("E-Tag: %s", self.url_etag)
        if self.url_modified is not None:
            #log.debug("Last Modified: %s",time.strftime(TIMEFMT_ISO, self.url_modified))

            log.debug("Last Modified: %s", self.url_modified)

        self.update_info(info.feed)
        self.update_entries(info.entries)
        self.cache_write()
Ejemplo n.º 3
0
    def __init__(self, planet, url):
        if not os.path.isdir(planet.cache_directory):
            os.makedirs(planet.cache_directory)
        cache_filename = cache.filename(planet.cache_directory, url)
        cache_file = dbhash.open(cache_filename, "c", 0666)

        cache.CachedInfo.__init__(self, cache_file, url, root=1)

        self._items = {}
        self._planet = planet
        self._expired = []
        self.url = url
        # retain the original URL for error reporting
        self.configured_url = url
        self.url_etag = None
        self.url_status = None
        self.url_modified = None
        self.name = None
        self.updated = None
        self.last_updated = None
        self.filter = None
        self.exclude = None
        self.next_order = "0"
        self.relevant_tags = None
        self.cache_read()
        self.cache_read_entries()

        if planet.config.has_section(url):
            for option in planet.config.options(url):
                value = planet.config.get(url, option)
                self.set_as_string(option, value, cached=0)
Ejemplo n.º 4
0
    def __init__(self, planet, url):
        if not os.path.isdir(planet.cache_directory):
            os.makedirs(planet.cache_directory)
        cache_filename = cache.filename(planet.cache_directory, url)
        cache_file = dbhash.open(cache_filename, "c", 0666)

        cache.CachedInfo.__init__(self, cache_file, url, root=1)

        self._items = {}
        self._planet = planet
        self._expired = []
        self.url = url
        # retain the original URL for error reporting
        self.configured_url = url
        self.url_etag = None
        self.url_status = None
        self.url_modified = None
        self.name = None
        self.updated = None
        self.last_updated = None
        self.filter = None
        self.exclude = None
        self.next_order = "0"
        self.cache_read()
        self.cache_read_entries()

        if planet.config.has_section(url):
            for option in planet.config.options(url):
                value = planet.config.get(url, option)
                self.set_as_string(option, value, cached=0)
Ejemplo n.º 5
0
    def update(self):
        """Download the feed to refresh the information.

        This does the actual work of pulling down the feed and if it changes
        updates the cached information about the feed and entries within it.
        """
        info = feedparser.parse(self.url,
                                etag=self.url_etag,
                                modified=self.url_modified,
                                agent=self._planet.user_agent)
        if not info.has_key("status"):
            log.info("Updating feed <%s>", self.url)
        elif info.status == 301 or info.status == 302:
            log.warning("Feed has moved from <%s> to <%s>", self.url, info.url)
            os.link(cache.filename(self._planet.cache_directory, self.url),
                    cache.filename(self._planet.cache_directory, info.url))
            self.url = info.url
        elif info.status == 304:
            log.info("Feed <%s> unchanged", self.url)
            return
        elif info.status >= 400:
            log.error("Error %d while updating feed <%s>", info.status,
                      self.url)
            return
        else:
            log.info("Updating feed <%s>", self.url)

        self.url_etag = info.has_key("etag") and info.etag or None
        self.url_modified = info.has_key("modified") and info.modified or None
        if self.url_etag is not None:
            log.debug("E-Tag: %s", self.url_etag)
        if self.url_modified is not None:
            log.debug("Last Modified: %s",
                      time.strftime(TIMEFMT_ISO, self.url_modified))

        self.update_info(info.feed)
        self.update_entries(info.entries)
        self.cache_write()
Ejemplo n.º 6
0
    def __init__(self, planet, url):
        if not os.path.isdir(planet.cache_directory):
            os.makedirs(planet.cache_directory)
        cache_filename = cache.filename(planet.cache_directory, url)
        cache_file = dbhash.open(cache_filename, "c", 0666)

        cache.CachedInfo.__init__(self, cache_file, url, root=1)

        self._items = {}
        self._planet = planet
        self._expired = []
        self.url = url
        self.url_etag = None
        self.url_modified = None
        self.name = None
        self.updated = None
        self.last_updated = None
        self.next_order = "0"
        self.cache_read()
        self.cache_read_entries()
Ejemplo n.º 7
0
 def cache_basename(self):
     return cache.filename("", self._id)
Ejemplo n.º 8
0
 def cache_basename(self):
     return cache.filename('', self._id)