Ejemplo n.º 1
0
class Cache(object):
    def __init__(self):
        from diskcache import Cache
        self.cache = Cache('/tmp/navan')
        self.cache.stats(enable=True)

    def get(self, *args):
        return self.cache.get(':'.join(args))

    def set(self, *args, **kwargs):
        expire = kwargs.get('expire')
        if len(args) < 2:
            raise Exception('cache set must contain `key` and `value`')
        key, value = args[:-1], args[-1]
        key = ':'.join(key)
        return self.cache.set(key, value, expire)

    def get_json(self, *args):
        ret = self.get(*args)
        if not ret:
            return ret
        return json.loads(ret)

    def set_json(self, *args, **kwargs):
        args = list(args)
        args[-1] = json.dumps(args[-1])
        return self.set(*args, **kwargs)
Ejemplo n.º 2
0
class CachedClient(Client):
    def __init__(self, apikey, agent="unknown", host=None, cache_dir=None):
        super().__init__(apikey, agent=agent, host=host)
        self.cache = Cache(cache_dir,
                           disk=VtCache,
                           disk_compress_level=6,
                           tag_index=True)
        self.cache_dir = cache_dir
        self.logger = logging.getLogger('kfinny.cachedvt.CachedClient')

    def _get(self, resource):
        data, tag = self.cache.get(resource, tag=True)
        if data and tag in ['sha1', 'md5']:
            data, tag = self.cache.get(data, tag=True)
        if data and tag == 'object':
            data = Object.from_dict(data)
        return data, tag

    def _put_object(self, obj):
        self.cache.set(obj.sha256, obj.to_dict(), tag='object')
        self.cache.set(obj.sha1, obj.sha256, tag='sha1')
        self.cache.set(obj.md5, obj.sha256, tag='md5')

    def _put_error(self, resource, error):
        self.cache.set(resource, {
            'resource': resource,
            'code': error.code,
            'message': error.message
        },
                       tag='error')

    def yield_file_report(self, resource, include_notfound=False):
        queryset = set()
        if isinstance(resource, str):
            resource = resource.split(',')
        if isinstance(resource, (tuple, list, set, frozenset)):
            for r in resource:
                data, tag = self._get(r)
                if data is not None:
                    if tag == 'object' or include_notfound:
                        yield data
                else:
                    queryset.add(r)
        resource = sorted(queryset)
        for i in resource:
            try:
                obj = self.get_object(f'/files/{i}')
                self._put_object(obj)
                yield obj
            except APIError as e:
                self._put_error(i, e)
        self.logger.debug("hits = {}, misses = {}".format(*self.cache.stats()))
Ejemplo n.º 3
0
class ReadCacheDataBackend(DataBackend):
    def __init__(self, config):
        read_cache_directory = config.get('dataBackend.readCache.directory',
                                          None,
                                          types=str)
        read_cache_maximum_size = config.get(
            'dataBackend.readCache.maximumSize', None, types=int)

        if read_cache_directory and not read_cache_maximum_size or not read_cache_directory and read_cache_maximum_size:
            raise ConfigurationError(
                'Both dataBackend.readCache.directory and dataBackend.readCache.maximumSize need to be set '
                + 'to enable disk based caching.')

        if read_cache_directory and read_cache_maximum_size:
            os.makedirs(read_cache_directory, exist_ok=True)
            try:
                self._read_cache = Cache(
                    read_cache_directory,
                    size_limit=read_cache_maximum_size,
                    eviction_policy='least-frequently-used',
                    statistics=1,
                )
            except Exception:
                logger.warning(
                    'Unable to enable disk based read caching. Continuing without it.'
                )
                self._read_cache = None
            else:
                logger.debug(
                    'Disk based read caching instantiated (cache size {}).'.
                    format(read_cache_maximum_size))
        else:
            self._read_cache = None
        self._use_read_cache = True

        # Start reader and write threads after the disk cached is created, so that they see it.
        super().__init__(config)

    def _read(self, block, metadata_only):
        key = self._block_uid_to_key(block.uid)
        metadata_key = key + self._META_SUFFIX
        if self._read_cache is not None and self._use_read_cache:
            metadata = self._read_cache.get(metadata_key)
            if metadata and metadata_only:
                return block, None, metadata
            elif metadata:
                data = self._read_cache.get(key)
                if data:
                    return block, data, metadata

        block, data, metadata = super()._read(block, metadata_only)

        # We always put blocks into the cache even when self._use_read_cache is False
        if self._read_cache is not None:
            self._read_cache.set(metadata_key, metadata)
            if not metadata_only:
                self._read_cache.set(key, data)

        return block, data, metadata

    def use_read_cache(self, enable):
        old_value = self._use_read_cache
        self._use_read_cache = enable
        return old_value

    def close(self):
        super().close()
        if self._read_cache is not None:
            (cache_hits, cache_misses) = self._read_cache.stats()
            logger.debug(
                'Disk based cache statistics (since cache creation): {} hits, {} misses.'
                .format(cache_hits, cache_misses))
            self._read_cache.close()