class Cache(object): def __init__(self): from diskcache import Cache self.cache = Cache('/tmp/navan') self.cache.stats(enable=True) def get(self, *args): return self.cache.get(':'.join(args)) def set(self, *args, **kwargs): expire = kwargs.get('expire') if len(args) < 2: raise Exception('cache set must contain `key` and `value`') key, value = args[:-1], args[-1] key = ':'.join(key) return self.cache.set(key, value, expire) def get_json(self, *args): ret = self.get(*args) if not ret: return ret return json.loads(ret) def set_json(self, *args, **kwargs): args = list(args) args[-1] = json.dumps(args[-1]) return self.set(*args, **kwargs)
class CachedClient(Client): def __init__(self, apikey, agent="unknown", host=None, cache_dir=None): super().__init__(apikey, agent=agent, host=host) self.cache = Cache(cache_dir, disk=VtCache, disk_compress_level=6, tag_index=True) self.cache_dir = cache_dir self.logger = logging.getLogger('kfinny.cachedvt.CachedClient') def _get(self, resource): data, tag = self.cache.get(resource, tag=True) if data and tag in ['sha1', 'md5']: data, tag = self.cache.get(data, tag=True) if data and tag == 'object': data = Object.from_dict(data) return data, tag def _put_object(self, obj): self.cache.set(obj.sha256, obj.to_dict(), tag='object') self.cache.set(obj.sha1, obj.sha256, tag='sha1') self.cache.set(obj.md5, obj.sha256, tag='md5') def _put_error(self, resource, error): self.cache.set(resource, { 'resource': resource, 'code': error.code, 'message': error.message }, tag='error') def yield_file_report(self, resource, include_notfound=False): queryset = set() if isinstance(resource, str): resource = resource.split(',') if isinstance(resource, (tuple, list, set, frozenset)): for r in resource: data, tag = self._get(r) if data is not None: if tag == 'object' or include_notfound: yield data else: queryset.add(r) resource = sorted(queryset) for i in resource: try: obj = self.get_object(f'/files/{i}') self._put_object(obj) yield obj except APIError as e: self._put_error(i, e) self.logger.debug("hits = {}, misses = {}".format(*self.cache.stats()))
class ReadCacheDataBackend(DataBackend): def __init__(self, config): read_cache_directory = config.get('dataBackend.readCache.directory', None, types=str) read_cache_maximum_size = config.get( 'dataBackend.readCache.maximumSize', None, types=int) if read_cache_directory and not read_cache_maximum_size or not read_cache_directory and read_cache_maximum_size: raise ConfigurationError( 'Both dataBackend.readCache.directory and dataBackend.readCache.maximumSize need to be set ' + 'to enable disk based caching.') if read_cache_directory and read_cache_maximum_size: os.makedirs(read_cache_directory, exist_ok=True) try: self._read_cache = Cache( read_cache_directory, size_limit=read_cache_maximum_size, eviction_policy='least-frequently-used', statistics=1, ) except Exception: logger.warning( 'Unable to enable disk based read caching. Continuing without it.' ) self._read_cache = None else: logger.debug( 'Disk based read caching instantiated (cache size {}).'. format(read_cache_maximum_size)) else: self._read_cache = None self._use_read_cache = True # Start reader and write threads after the disk cached is created, so that they see it. super().__init__(config) def _read(self, block, metadata_only): key = self._block_uid_to_key(block.uid) metadata_key = key + self._META_SUFFIX if self._read_cache is not None and self._use_read_cache: metadata = self._read_cache.get(metadata_key) if metadata and metadata_only: return block, None, metadata elif metadata: data = self._read_cache.get(key) if data: return block, data, metadata block, data, metadata = super()._read(block, metadata_only) # We always put blocks into the cache even when self._use_read_cache is False if self._read_cache is not None: self._read_cache.set(metadata_key, metadata) if not metadata_only: self._read_cache.set(key, data) return block, data, metadata def use_read_cache(self, enable): old_value = self._use_read_cache self._use_read_cache = enable return old_value def close(self): super().close() if self._read_cache is not None: (cache_hits, cache_misses) = self._read_cache.stats() logger.debug( 'Disk based cache statistics (since cache creation): {} hits, {} misses.' .format(cache_hits, cache_misses)) self._read_cache.close()