Exemple #1
0
 def ishtml(self):
     cache = picklecache.picklecache(self.hash + "_head")
     try:
         headers = cache.serve()
     except:
         #TODO: re-enable the exceptions, test with a site that doesn't exist
         u = urllib.urlopen(self.url)
         headers = u.info()
         self.get_page(u.read())
         
         cache.save(headers)
     
     return headers.get("Content-Type", "")=="text/html"
Exemple #2
0
 def get_page(self, html=None):
     """
     Pretty sure there's a logic error here...
     """
     cache = picklecache.picklecache(self.hash + "_page")
     if html is not None:
         data = Pagey(self.url, html)
         cache.save(data)
         return data
         
     try:
         data = cache.serve()
     except:
         data = Pagey(self.url)
         cache.save(data)
     return data