def get(self, uri, sha1=None): """Get the value associated with a key from the cache and return object""" if self.iscached(uri) and self._iskey(uri): # URI is a cache key, return absolute filename in cache quietprint('[bobo.cache][HIT]: key "%s" ' % (uri)) filename = self.abspath(uri) elif self.iscached(uri): # Convert URI to cache key, return absolute filename in cache quietprint('[bobo.cache][HIT]: "%s" key "%s" ' % (uri, self.key(uri))) filename = self.abspath(self.key(uri)) elif bobo.util.isurl(uri): quietprint('[bobo.cache][MISS]: downloading "%s"... ' % (uri)) self.discard(uri) filename = self.abspath(self.put(uri)) else: raise CacheError('[bobo.cache][ERROR]: invalid uri "%s"' % uri) # SHA1 check? if sha1 is not None: quietprint('[bobo.cache]: Verifying SHA1... ') if not bobo.viset.download.verify_sha1(filename, sha1): quietprint('[bobo.cache][ERROR]: invalid SHA1 - discarding and refetching... ') self.discard(uri) self.get(uri, sha1) # discard and try again # Return absolute file return filename
def put(self, obj, key=None, timeout=None, sha1=None): """Put a URI or numpy object into cache with the provided cache key""" if key is None: key = self.key(obj) if self.iscached(key): raise CacheError('[bobo.cache][Error]: Key collision! Existing object in cache with key "%s"' % key) # Numpy object - export to file in cache with provided key if isnumpy(obj): quietprint('[bobo.cache][PUT]: Exporting numpy object to cache with key "' + key + '"') f = h5py.File(self.abspath(key), 'a') f[key] = obj f.close() # URL - download and save to cache with provided key elif isurl(obj): quietprint('[bobo.cache][PUT]: "%s" key "%s"' % (obj, key)) filename = self._download(obj, timeout=timeout) shutil.move(filename, self.abspath(key)) # Unsupported type! else: raise CacheError('[bobo.cache][ERROR]: Unsupported object type for PUT') # Return cache key return key
def load(self): """Load images from cache""" if self.image is not None: return self.image elif self.cachedimage is not None: try: quietprint('[bobo.image]: loading "%s"'% self.cachedimage.uri, True); self.image = self.cachedimage.load() if self.cachedimage.size() < 10000: quietprint('[bobo.image][WARNING]: invalid download size - ignoring image', True); os.remove(self.cachedimage.filename()) return None except (httplib.BadStatusLine, urllib2.URLError, urllib2.HTTPError): quietprint('[bobo.image][WARNING]: download failed - ignoring image', True); self.cachedimage.discard() except CacheError: quietprint('[bobo.image][WARNING]: cache error during download - ignoring image', True); self.cachedimage.discard() except IOError: quietprint('[bobo.image][WARNING]: IO error during download - ignoring image', True); self.cachedimage.discard() except: raise return self.image else: return None
def _free(self): """FIXME: Garbage collection""" if self._free_ctr == 0: if self._cachesize is not None: if self._cachesize.get() > self._maxsize: print '[bobo.cache][WARNING]: cachesize is larger than maximum. Clean resources!' quietprint('[bobo.cache]: spawning cache garbage collection process') self._cachesize = Pool(1).apply_async(self.size(), self.root()) self._free_ctr = self._free_maxctr self._free_ctr -= 1
def discard(self, uri): """Delete single url from cache""" if self.iscached(uri): quietprint('[bobo.cache]: Removing key "%s" ' % (self.key(uri))) if os.path.isfile(self.abspath(self.key(uri))): os.remove(self.abspath(self.key(uri))) elif os.path.isdir(self.abspath(uri)): quietprint('[bobo.cache]: Removing cached directory "%s" ' % (uri)) shutil.rmtree(self.abspath(self.cacheid(url))) else: #quietprint('[bobo.cache][WARNING]: Key not found "%s" ' % (self.key(uri))) pass
def unpack(self, pkgkey, unpackto=None, sha1=None, cleanup=False): """Extract archive file to unpackdir directory, delete archive file and return archive directory""" if not self.iscached(pkgkey): raise CacheError('[bobo.cache][ERROR]: Key not found "%s" ' % pkgkey) filename = self.abspath(pkgkey) if isarchive(filename): # unpack directory is the same directory as filename if unpackto is None: unpackdir = self.root() else: unpackdir = self.abspath(unpackto) if not path.exists(unpackdir): os.makedirs(unpackdir) bobo.viset.download.extract(filename, unpackdir, sha1=sha1, verbose=self._verbose) if cleanup: quietprint('[bobo.cache]: Deleting archive "%s" ' % (pkgkey)) os.remove(filename) return unpackdir else: raise CacheError('[bobo.cache][ERROR]: Key not archive "%s" ' % pkgkey)
def load(self): """Load images from cache""" if self.image is not None: return self.image elif self.cachedimage is not None: try: quietprint('[bobo.image]: loading "%s"'% self.cachedimage.uri, True); self.image = self.cachedimage.load() except (httplib.BadStatusLine, urllib2.URLError, urllib2.HTTPError): quietprint('[bobo.image][WARNING]: download failed - ignoring image', True); except CacheError: quietprint('[bobo.image][WARNING]: cache error during download - ignoring image', True); except IOError: quietprint('[bobo.image][WARNING]: IO error during download - ignoring image', True); except: #raise quietprint('[bobo.image][WARNING]: error during download - ignoring image', True); pass return self.image else: return None
def delete(self): """Delete entire cache""" quietprint('[bobo.cache]: Deleting all cached data in "' + self.root() + '"') shutil.rmtree(self.root()) os.makedirs(self.root())