def __init__(self, *, config: Config, name: str, storage_id: int, module_configuration: ConfigDict) -> None: read_cache_directory = Config.get_from_dict(module_configuration, 'readCache.directory', None, types=str) read_cache_maximum_size = Config.get_from_dict(module_configuration, 'readCache.maximumSize', None, types=int) read_cache_shards = Config.get_from_dict(module_configuration, 'readCache.shards', None, types=int) if read_cache_directory and read_cache_maximum_size: os.makedirs(read_cache_directory, exist_ok=True) try: self._read_cache = FanoutCache( read_cache_directory, size_limit=read_cache_maximum_size, shards=read_cache_shards, eviction_policy='least-frequently-used', statistics=1, ) except Exception: logger.warning('Unable to enable disk based read caching. Continuing without it.') self._read_cache = None else: logger.debug('Disk based read caching instantiated (cache size {}, shards {}).'.format( read_cache_maximum_size, read_cache_shards)) else: self._read_cache = None self._use_read_cache = True # Start reader and write threads after the disk cached is created, so that they see it. super().__init__(config=config, name=name, storage_id=storage_id, module_configuration=module_configuration)
def __init__(self, *args, directory=None, shards=8, **kwargs: Any) -> None: self.__is_init = False self._set_locks: Dict[str, asyncio.Lock] = {} self._sharded = shards > 1 if not self._sharded: self._cache = Cache(directory=directory, **kwargs) else: self._cache = FanoutCache(directory=directory, shards=shards, **kwargs) super().__init__()
def worker(queue, eviction_policy, processes, threads): timings = {'get': [], 'set': [], 'delete': []} cache = FanoutCache('tmp', eviction_policy=eviction_policy) for index, (action, key, value) in enumerate(iter(queue.get, None)): start = time.time() if action == 'set': cache.set(key, value, expire=EXPIRE) elif action == 'get': result = cache.get(key) else: assert action == 'delete' cache.delete(key) stop = time.time() if (action == 'get' and processes == 1 and threads == 1 and EXPIRE is None): assert result == value if index > WARMUP: timings[action].append(stop - start) queue.put(timings) cache.close()
def cache_pop(key, timeout_seconds=0.3, cache_dirname='/tmp/lccserver-cache'): ''' This sets a key to the value specified in the cache. ''' cachedir = os.path.abspath(cache_dirname) cache = FanoutCache(cachedir, timeout=timeout_seconds) val = cache.pop(key) cache.close() return val
def cache_flush(timeout_seconds=0.3, cache_dirname='/tmp/lccserver-cache'): ''' This removes all keys from the cache. ''' cachedir = os.path.abspath(cache_dirname) cache = FanoutCache(cachedir, timeout=timeout_seconds) items_removed = cache.clear() cache.close() return items_removed
def __init__(self): """ Function used to initialise the data manager. ** Modifications ** 1. Modify the '_transmission_keys' set to specify which values should be transmitted to each component. """ # Declare dictionaries of data self._surface = FanoutCache(path.join("cache", "surface"), shards=2) self._arduino_T = FanoutCache(path.join("cache", "arduino_t"), shards=2) self._arduino_A = FanoutCache(path.join("cache", "arduino_a"), shards=2) self._arduino_M = FanoutCache(path.join("cache", "arduino_m"), shards=2) self._arduino_I = FanoutCache(path.join("cache", "arduino_i"), shards=2) # Create a dictionary mapping each index to corresponding location self._data = { SURFACE: self._surface, ARDUINO_T: self._arduino_T, ARDUINO_A: self._arduino_A, ARDUINO_M: self._arduino_M, ARDUINO_I: self._arduino_I } # Create a dictionary mapping each index to a set of networking keys self._transmission_keys = { SURFACE: { "status_T", "status_A", "status_M", "status_I", "error_T", "error_A", "error_M", "error_I", "Sen_IMU_X", "Sen_IMU_Y", "Sen_IMU_Z", "Sen_IMU_Temp" }, ARDUINO_T: { "Thr_FP", "Thr_FS", "Thr_AP", "Thr_AS", "Thr_TFP", "Thr_TFS", "Thr_TAP", "Thr_TAS" }, ARDUINO_A: {"Mot_R", "Mot_G", "Mot_F", "LED_M"}, ARDUINO_M: {"Thr_M"}, ARDUINO_I: {} } # Create a key to ID lookup for performance reasons self._keys_lookup = { v: k for k, values in self._transmission_keys.items() if k != SURFACE for v in values }
def cache_delete(key, timeout_seconds=0.3, cache_dirname='/tmp/authnzerver-cache'): ''' This sets a key to the value specified in the cache. ''' cachedir = os.path.abspath(cache_dirname) cache = FanoutCache(cachedir, timeout=timeout_seconds) deleted = cache.delete(key) cache.close() return deleted
class LocalDiskCache(CacheBase): def __init__(self, path, size_limit_bytes, expected_row_size_bytes, shards=6, cleanup=False, **settings): """LocalDiskCache is an adapter to a diskcache implementation. LocalDiskCache can be used by a petastorm Reader class to temporarily keep parts of the dataset on a local file system storage. :param path: Path where the dataset cache is being stored. :param size_limit_bytes: Maximal size of the disk-space to be used by cache. The size of the cache may actually grow somewhat above the size_limit_bytes, so the limit is not very strict. :param expected_row_size_bytes: Approximate size of a single row. This argument is used to perform a sanity check on the capacity of individual shards. :param shards: Cache can be sharded. Larger number of shards improve writing parallelism. :param cleanup: If set to True, cache directory would be removed when cleanup() method is called. :param settings: these parameters passed-through to the diskcache.Cache class. For details, see: http://www.grantjenks.com/docs/diskcache/tutorial.html#settings """ if size_limit_bytes / shards < 5 * expected_row_size_bytes: raise ValueError( 'Condition \'size_limit_bytes / shards < 5 * expected_row_size_bytes\' needs to hold, ' 'otherwise, newly added cached values might end up being immediately evicted.' ) default_settings = { 'size_limit': size_limit_bytes, 'eviction_policy': 'least-recently-stored', } default_settings.update(settings) self._cleanup = cleanup self._path = path self._cache = FanoutCache(path, shards, **default_settings) def get(self, key, fill_cache_func): value = self._cache.get(key, default=None) if value is None: value = fill_cache_func() self._cache.set(key, value) return value def cleanup(self): if self._cleanup: shutil.rmtree(self._path)
def worker(queue, eviction_policy): timings = {'get': [], 'set': [], 'delete': []} cache = FanoutCache('tmp', eviction_policy=eviction_policy) for index, (action, key, value) in enumerate(iter(queue.get, None)): start = time.time() if action == 'set': cache.set(key, value, expire=EXPIRE) elif action == 'get': result = cache.get(key) else: assert action == 'delete' cache.delete(key) stop = time.time() if action == 'get' and PROCESSES == 1 and THREADS == 1 and EXPIRE is None: assert result == value if index > WARMUP: timings[action].append(stop - start) queue.put(timings) cache.close()
def cache_add(key, value, timeout_seconds=0.3, expires_seconds=None, cache_dirname='/tmp/authnzerver-cache'): ''' This sets a key to the value specified in the cache. ''' cachedir = os.path.abspath(cache_dirname) cache = FanoutCache(cachedir, timeout=timeout_seconds) added = cache.add(value, expire=expires_seconds) cache.close() return added
def __init__(self, model, cache_dir, chunk_size=1000, max_workers=None): self.model = model self.chunk_size = chunk_size self.cache_dir = cache_dir self.cache = FanoutCache(cache_dir, shards=24, size_limit=2**32) # 4GB cache self.max_workers = max_workers
def init_cache(self, shards: int = 0, timeout: int = 0, cache: FanoutCache = None): """ Init or reset (NOT clear!) the underlying DiskCache based on parameters and object attributes. If old cache already exists and shards have changed, this COPIES values into new cache to duplicate. Does NOT change the cache_path! That means old cache will be REPLACED by new one on disk. If you supply a new `diskcache.FanoutCache` object it MUST point to `self.cache_path`! Otherwise leave `cache` unset and this method will create a new `FanoutCache` for you. :param int shards: If `0` then use `self.shards` :param int timeout: If `0` then use `self.timeout` :param cache: If `None` then create new `FanoutCache` """ shards = shards or self.shards timeout = timeout or self.timeout # Allows the calling function to supply a FanoutCache to REPLACE the one on disk if cache and cache.directory != self.cache_path: raise Exception( "Given DiskCache and DataStore paths do not match!") cache = cache or FanoutCache(self.cache_path, shards=shards, timeout=timeout, tag_index=self.tag_index) # Already have a cache and number of shards changed? We'll have to copy. if self.cache and shards != self.shards: DataStore.copy_cache(self.cache, cache) self.cache = cache self.close()
def stress_test( create=True, delete=True, eviction_policy=u'least-recently-stored', processes=1, threads=1, ): shutil.rmtree('tmp', ignore_errors=True) if processes == 1: # Use threads. func = threading.Thread else: func = mp.Process subprocs = [ func(target=dispatch, args=(num, eviction_policy, processes, threads)) for num in range(processes) ] if create: operations = list(all_ops()) process_queue = [[] for _ in range(processes)] for index, ops in enumerate(operations): process_queue[index % processes].append(ops) for num in range(processes): with open('input-%s.pkl' % num, 'wb') as writer: pickle.dump(process_queue[num], writer, protocol=2) for process in subprocs: process.start() for process in subprocs: process.join() with FanoutCache('tmp') as cache: warnings.simplefilter('error') warnings.simplefilter('ignore', category=UnknownFileWarning) warnings.simplefilter('ignore', category=EmptyDirWarning) cache.check() timings = {'get': [], 'set': [], 'delete': [], 'self': 0.0} for num in range(processes): with open('output-%s.pkl' % num, 'rb') as reader: data = pickle.load(reader) for key in data: timings[key] += data[key] if delete: for num in range(processes): os.remove('input-%s.pkl' % num) os.remove('output-%s.pkl' % num) display(eviction_policy, timings) shutil.rmtree('tmp', ignore_errors=True)
def getCache(scope_str): return FanoutCache( 'data/cache/' + scope_str, disk=GzipDisk, shards=64, timeout=1, size_limit=3e11, # disk_min_file_size=2**20, ) # def disk_cache(base_path, memsize=2): # def disk_cache_decorator(f): # @functools.wraps(f) # def wrapper(*args, **kwargs): # args_str = repr(args) + repr(sorted(kwargs.items())) # file_str = hashlib.md5(args_str.encode('utf8')).hexdigest() # # cache_path = os.path.join(base_path, f.__name__, file_str + '.pkl.gz') # # if not os.path.exists(os.path.dirname(cache_path)): # os.makedirs(os.path.dirname(cache_path), exist_ok=True) # # if os.path.exists(cache_path): # return pickle_loadgz(cache_path) # else: # ret = f(*args, **kwargs) # pickle_dumpgz(cache_path, ret) # return ret # # return wrapper # # return disk_cache_decorator # # # def pickle_dumpgz(file_path, obj): # log.debug("Writing {}".format(file_path)) # with open(file_path, 'wb') as file_obj: # with gzip.GzipFile(mode='wb', compresslevel=1, fileobj=file_obj) as gz_file: # pickle.dump(obj, gz_file, pickle.HIGHEST_PROTOCOL) # # # def pickle_loadgz(file_path): # log.debug("Reading {}".format(file_path)) # with open(file_path, 'rb') as file_obj: # with gzip.GzipFile(mode='rb', fileobj=file_obj) as gz_file: # return pickle.load(gz_file) # # # def dtpath(dt=None): # if dt is None: # dt = datetime.datetime.now() # # return str(dt).rsplit('.', 1)[0].replace(' ', '--').replace(':', '.') # # # def safepath(s): # s = s.replace(' ', '_') # return re.sub('[^A-Za-z0-9_.-]', '', s)
def getCache(scope_str): return FanoutCache('../data-luna/cache/' + scope_str, disk=GzipDisk, shards=64, timeout=1, size_limit=3e11, # disk_min_file_size=2**20, )
def make_matcher(): lookup = get_title_lookup() cache = FanoutCache('title_cache', shards=24) matcher = layered_matcher([ exact_matcher(lookup), title_matcher(lookup, punct_lookup(cache, lookup)) ]) return matcher
def get_cache(value: str) -> FanoutCache: return FanoutCache( get_cache_path(value), size_limit=int(1e12), # 1000 GB cull_limit=0, shards=8, sqlite_mmap_size=256, )
def getCache(scope_str): return FanoutCache( "data-unversioned/cache/" + scope_str, disk=GzipDisk, shards=64, timeout=1, size_limit=3e11, # disk_min_file_size=2**20, )
def getCache(scope_str): return FanoutCache( 'data-unversioned/cache/' + scope_str, disk=GzipDisk, shards=128, timeout=1, size_limit=2e11, disk_min_file_size=2**20, )
def use_diskcache(self, **kwargs): """ use the PyPi package 'diskcache' as the main store """ try: from diskcache import FanoutCache as Cache # patch the class so it has "exists" method Cache.exists = Cache.__contains__ cachedir = kwargs.pop("cachedir", "cache") self._cache = Cache(cachedir, **kwargs) file_cachedir = kwargs.pop("file_cachedir", "files_cache") self._file_cache = Cache(file_cachedir, **kwargs) self._cache_engine = "diskcache" return except Exception as exc: warning = "diskcache connection failed with:\n\t" + str(exc) warning += "\nFalling back to in-memory cache." warnings.warn(warning) self.use_memory()
def __init__(self): """ Constructor function used to initialise the data manager. Adjust the `shards` amount in the cache constructor to increase or decrease the amount of parallelism in data-related computations, as well as modify the `self._transmission_keys` set to specify which data should be networked to the middle-level software. """ # Initialise the data cache self._data = FanoutCache(CACHE_PATH, shards=8) # Create a set of keys matching data which should be sent over the network self._transmission_keys = { "Thr_FP", "Thr_FS", "Thr_AP", "Thr_AS", "Thr_TFP", "Thr_TFS", "Thr_TAP", "Thr_TAS", "Mot_R", "Mot_G", "Mot_F" } # Initialise safeguard-related fields self._init_safeguards()
class ExpiringCache: def __init__(self, cache_path): __slots__ = ["cache"] self.cache = FanoutCache(directory=cache_path, timeout=2, retry=True) def set(self, domain, domain_record, seconds_to_live, tag=None): self.cache.set(domain, domain_record.json, expire=seconds_to_live, tag=tag) def get(self, key): return self.cache.get(key) def cache_dump(self, offset=0, limit=100): res = [] for eachkey in self.cache: if offset != 0: offset -= 1 continue if limit == 0: break val, expires = self.cache.get(eachkey, expire_time=True) if expires: exp_date = datetime.datetime.fromtimestamp(expires) else: exp_date = "Never" res.append({ "domain": eachkey, "cache_value": val, "cache_expires": exp_date }) limit -= 1 return res def cache_get(self, key): val, expires = self.cache.get(key, expire_time=True) if not val: return {"text": "That domain is not in the database."} if expires: exp_date = datetime.datetime.fromtimestamp(expires) else: exp_date = "Never" res = {"domain": key, "cache_value": val, "cache_expires": exp_date} return res def cache_info(self): hits, miss = self.cache.stats() size = self.cache.volume() warnings = self.cache.check(fix=True) return {"hit": hits, "miss": miss, "size": size, "warnings": warnings}
def cache_getrate(key, timeout_seconds=0.3, cache_dirname='/tmp/lccserver-cache'): '''This gets the rate of increment for the key by looking at the time of insertion inserted at key and the number of times it was incremented in key-counter. The rate is then: key-counter_val/((time_now - time_insertion)/60.0) ''' cachedir = os.path.abspath(cache_dirname) cache = FanoutCache(cachedir, timeout=timeout_seconds) # get the counter value counter_val = cache.get('%s-counter' % key, default=0) # get the time of insertion that we stored at the key itself time_of_insertion = cache.get(key, default=None) if time_of_insertion is not None: rate = (counter_val / (time.time() - time_of_insertion)) * 60.0 else: rate = 0.0 cache.close() return (rate, counter_val, datetime.fromtimestamp(time_of_insertion).isoformat())
def __init__(self, vec_path, cache_dir, sep=' ', chunk_size=1000, max_workers=None): self.embedding = Embedding(vec_path, sep=sep) self.chunk_size = chunk_size self.vec_path = vec_path self.sep = sep self.cache = FanoutCache(cache_dir, shards=24, size_limit=2**32) self.cache_dir = cache_dir self.max_workers = max_workers
def cache_decrement(key, timeout_seconds=0.3, cache_dirname='/tmp/lccserver-cache'): ''' This decrements the counter for key. ''' cachedir = os.path.abspath(cache_dirname) cache = FanoutCache(cachedir, timeout=timeout_seconds) decremented_val = cache.decr('%s-counter' % key) # if the counter hits zero, delete the key entirely from the cache if decremented_val == 0: cache.delete(key) cache.delete('%s-counter' % key) decremented_val = 0 cache.close() return decremented_val
def getCache(scope_str): # Built atop Cache is diskcache.FanoutCache which automatically # shards the underlying database. Sharding is the practice of # horizontally partitioning data. Here it is used to decrease # blocking writes. While readers and writers do not block each # other, writers block other writers. Therefore a shard for every # concurrent writer is suggested. This will depend on your scenario. # The default value is 8. # timeout sets a limit on how long to wait for database # transactions. # size_limit is used as the total size of the cache. The size limit # of individual cache shards is the total size divided by the number # of shards. return FanoutCache("data-unversioned/cache/" + scope_str, disk=GzipDisk, shards=64, timeout=1, size_limit=2e11)
def initialize_cache(self, shards=None, timeout=1, queue=False): self.reset_cache() # Create a temporary directory for the cache self.cache_directory = mkdtemp() # Create a queue? if queue: self.cache = Deque(directory=self.cache_directory) self.queue = True elif shards: self.cache = FanoutCache(self.cache_directory, shards=shards, timeout=timeout) self.queue = False else: self.cache = Cache(self.cache_directory, timeout=timeout) self.queue = False return self.cache
def cache_increment(key, timeout_seconds=0.3, cache_dirname='lccserver-cache'): ''' This sets up a counter for the key in the cache. Sets the key -> time of initial insertion Then increments 'key-counter'. ''' cachedir = os.path.abspath(cache_dirname) cache = FanoutCache(cachedir, timeout=timeout_seconds) # add the key if not already present key_added = cache.add(key, time.time()) # increment the counter in either case if key_added: key_count = cache.incr('%s-counter' % key) else: key_count = cache.incr('%s-counter' % key) cache.close() return key_count
#coding:utf-8 from timeit import Timer from diskcache import FanoutCache import sys sys.setrecursionlimit(3000) #缓存临时文件位置 cache = FanoutCache('tmp/diskcache/fanoutcache') @cache.memoize(typed=True, expire=None, tag='fib_disk') def fib_disk(n): if n <= 2: return 1 else: return fib_disk(n - 1) + fib_disk(n - 2) if __name__ == "__main__": t1 = Timer("fib_disk(100)", "from __main__ import fib_disk") print("fib_disk(100)", t1.timeit(number=1000), "seconds") #计算结果: #fib_disk(100) 0.12914266199999996 seconds #fib_disk(100) 0.129672597 seconds
def getCache(scope_str): return FanoutCache('data/cache/' + scope_str, disk=GzipDisk, shards=32, timeout=1, size_limit=2e11)
# -*- coding: utf-8 -*- """ Helper functions. """ import re import os import errno import pandas from diskcache import FanoutCache CACHE = FanoutCache('../cache') def mkdirs(path): """Make directories if they do not exist.""" try: os.makedirs(path) except OSError as e: if e.errno != errno.EEXIST: raise def write_to_csv(df, *path_parts): """Save a dataframe to CSV.""" here = os.path.abspath(os.path.dirname(__file__)) _dir = os.path.join(os.path.dirname(here), *path_parts[:-1]) fn = path_parts[-1] mkdirs(_dir) out_path = os.path.join(_dir, fn) df.to_csv(out_path, encoding='utf-8', index=False) print('CSV file saved to {}'.format(out_path))