def ishtml(self): cache = picklecache.picklecache(self.hash + "_head") try: headers = cache.serve() except: #TODO: re-enable the exceptions, test with a site that doesn't exist u = urllib.urlopen(self.url) headers = u.info() self.get_page(u.read()) cache.save(headers) return headers.get("Content-Type", "")=="text/html"
def get_page(self, html=None): """ Pretty sure there's a logic error here... """ cache = picklecache.picklecache(self.hash + "_page") if html is not None: data = Pagey(self.url, html) cache.save(data) return data try: data = cache.serve() except: data = Pagey(self.url) cache.save(data) return data