class CacheManager(object): """The librosa cache manager class wraps joblib.Memory with a __call__ attribute, so that it may act as a function. Additionally, it provides a caching level filter, so that different functions can be cached or not depending on the user's preference for speed vs. storage usage. """ def __init__(self, *args, **kwargs): level = kwargs.pop("level", 10) # Initialize the memory object self.memory = Memory(*args, **kwargs) # The level parameter controls which data we cache # smaller numbers mean less caching self.level = level def __call__(self, level): """Example usage: @cache(level=2) def semi_important_function(some_arguments): ... """ def wrapper(function): """Decorator function. Adds an input/output cache to the specified function.""" if self.memory.location is not None and self.level >= level: return _decorator_apply(self.memory.cache, function) else: return function return wrapper def clear(self, *args, **kwargs): return self.memory.clear(*args, **kwargs) def eval(self, *args, **kwargs): return self.memory.eval(*args, **kwargs) def format(self, *args, **kwargs): return self.memory.format(*args, **kwargs) def reduce_size(self, *args, **kwargs): return self.memory.reduce_size(*args, **kwargs) def warn(self, *args, **kwargs): return self.memory.warn(*args, **kwargs)
# 'type': 'audio'}, # For use during debugging # from pprint import pprint NOW = datetime.datetime.now() TMPDIR = "/var/tmp" CACHE_DIR_PREFIX = os.getenv("SNAP_USER_COMMON") or TMPDIR TUNEIN_CACHE_LOCATION = os.path.join( CACHE_DIR_PREFIX, "tizonia-" + getpass.getuser() + "-tunein") MEMORY = Memory(TUNEIN_CACHE_LOCATION, compress=9, verbose=0, bytes_limit=10485760) MEMORY.reduce_size() FORMAT = ("[%(asctime)s] [%(levelname)5s] [%(thread)d] " "[%(module)s:%(funcName)s:%(lineno)d] - %(message)s") logging.captureWarnings(True) logging.getLogger().setLevel(logging.DEBUG) if os.environ.get("TIZONIA_TUNEINPROXY_DEBUG"): logging.basicConfig(format=FORMAT) from traceback import print_exception else: logging.getLogger().addHandler(logging.NullHandler()) class ConfigColors:
class Context(object): """Handle function disk and memory caching with joblib. Memcaching a function is used to save *in memory* the output of the function for all passed inputs. Input should be hashable. NumPy arrays are supported. The contents of the memcache in memory can be persisted to disk with `context.save_memcache()` and `context.load_memcache()`. Caching a function is used to save *on disk* the output of the function for all passed inputs. Input should be hashable. NumPy arrays are supported. This is to be preferred over memcache when the inputs or outputs are large, and when the computations are longer than loading the result from disk. Constructor ----------- cache_dir : str The directory in which the cache will be created. verbose : int The verbosity level passed to joblib Memory. Examples -------- ```python @context.memcache def my_function(x): return x * x @context.cache def my_function(x): return x * x ``` """ """Maximum cache size, in bytes.""" cache_limit = 2 * 1024**3 # 2 GB def __init__(self, cache_dir, verbose=0): self.verbose = verbose # Make sure the cache directory exists. self.cache_dir = Path(cache_dir).expanduser() if not self.cache_dir.exists(): logger.debug("Create cache directory `%s`.", self.cache_dir) os.makedirs(str(self.cache_dir)) # Ensure the memcache directory exists. path = self.cache_dir / 'memcache' if not path.exists(): path.mkdir() self._set_memory(self.cache_dir) self._memcache = {} def _set_memory(self, cache_dir): """Create the joblib Memory instance.""" # Try importing joblib. try: from joblib import Memory self._memory = Memory(location=self.cache_dir, mmap_mode=None, verbose=self.verbose, bytes_limit=self.cache_limit) logger.debug("Initialize joblib cache dir at `%s`.", self.cache_dir) logger.debug("Reducing the size of the cache if needed.") self._memory.reduce_size() except ImportError: # pragma: no cover logger.warning( "Joblib is not installed. Install it with `conda install joblib`." ) self._memory = None def cache(self, f): """Cache a function using the context's cache directory.""" if self._memory is None: # pragma: no cover logger.debug("Joblib is not installed: skipping caching.") return f assert f # NOTE: discard self in instance methods. if 'self' in inspect.getfullargspec(f).args: ignore = ['self'] else: ignore = None disk_cached = self._memory.cache(f, ignore=ignore) return disk_cached def load_memcache(self, name): """Load the memcache from disk (pickle file), if it exists.""" path = self.cache_dir / 'memcache' / (name + '.pkl') if path.exists(): logger.debug("Load memcache for `%s`.", name) with open(str(path), 'rb') as fd: cache = load(fd) else: cache = {} self._memcache[name] = cache return cache def save_memcache(self): """Save the memcache to disk using pickle.""" for name, cache in self._memcache.items(): path = self.cache_dir / 'memcache' / (name + '.pkl') logger.debug("Save memcache for `%s`.", name) with open(str(path), 'wb') as fd: dump(cache, fd) def memcache(self, f): """Cache a function in memory using an internal dictionary.""" name = _fullname(f) cache = self.load_memcache(name) @wraps(f) def memcached(*args, **kwargs): """Cache the function in memory.""" # The arguments need to be hashable. Much faster than using hash(). h = args out = cache.get(h, None) if out is None: out = f(*args, **kwargs) cache[h] = out return out return memcached def _get_path(self, name, location, file_ext='.json'): """Get the path to the cache file.""" if location == 'local': return self.cache_dir / (name + file_ext) elif location == 'global': return phy_config_dir() / (name + file_ext) def save(self, name, data, location='local', kind='json'): """Save a dictionary in a JSON/pickle file within the cache directory. Parameters ---------- name : str The name of the object to save to disk. data : dict Any serializable dictionary that will be persisted to disk. location : str Can be `local` or `global`. kind : str Can be `json` or `pickle`. """ file_ext = '.json' if kind == 'json' else '.pkl' path = self._get_path(name, location, file_ext=file_ext) ensure_dir_exists(path.parent) logger.debug("Save data to `%s`.", path) if kind == 'json': save_json(path, data) else: save_pickle(path, data) def load(self, name, location='local'): """Load a dictionary saved in the cache directory. Parameters ---------- name : str The name of the object to save to disk. location : str Can be `local` or `global`. """ path = self._get_path(name, location, file_ext='.json') if path.exists(): return load_json(path) path = self._get_path(name, location, file_ext='.pkl') if path.exists(): return load_pickle(path) logger.debug("The file `%s` doesn't exist.", path) return {} def __getstate__(self): """Make sure that this class is picklable.""" state = self.__dict__.copy() state['_memory'] = None return state def __setstate__(self, state): """Make sure that this class is picklable.""" self.__dict__ = state # Recreate the joblib Memory instance. self._set_memory(state['cache_dir'])
class CacheManager(object): '''The librosa cache manager class wraps joblib.Memory with a __call__ attribute, so that it may act as a function. Additionally, it provides a caching level filter, so that different functions can be cached or not depending on the user's preference for speed vs. storage usage. ''' def __init__(self, *args, **kwargs): level = kwargs.pop('level', 10) # Initialize the memory object self.memory = Memory(*args, **kwargs) # The level parameter controls which data we cache # smaller numbers mean less caching self.level = level def __call__(self, level): '''Example usage: @cache(level=2) def semi_important_function(some_arguments): ... ''' def wrapper(function): '''Decorator function. Adds an input/output cache to the specified function.''' from decorator import FunctionMaker def decorator_apply(dec, func): """Decorate a function by preserving the signature even if dec is not a signature-preserving decorator. This recipe is derived from http://micheles.googlecode.com/hg/decorator/documentation.html#id14 """ return FunctionMaker.create(func, 'return decorated(%(signature)s)', dict(decorated=dec(func)), __wrapped__=func) if self.memory.location is not None and self.level >= level: return decorator_apply(self.memory.cache, function) else: return function return wrapper def clear(self, *args, **kwargs): return self.memory.clear(*args, **kwargs) def eval(self, *args, **kwargs): return self.memory.eval(*args, **kwargs) def format(self, *args, **kwargs): return self.memory.format(*args, **kwargs) def reduce_size(self, *args, **kwargs): return self.memory.reduce_size(*args, **kwargs) def warn(self, *args, **kwargs): return self.memory.warn(*args, **kwargs)
class CacheManager(object): '''The librosa cache manager class wraps joblib.Memory with a __call__ attribute, so that it may act as a function. Additionally, it provides a caching level filter, so that different functions can be cached or not depending on the user's preference for speed vs. storage usage. ''' def __init__(self, *args, **kwargs): level = kwargs.pop('level', 10) # Initialize the memory object self.memory = Memory(*args, **kwargs) # The level parameter controls which data we cache # smaller numbers mean less caching self.level = level def __call__(self, level): '''Example usage: @cache(level=2) def semi_important_function(some_arguments): ... ''' def wrapper(function): '''Decorator function. Adds an input/output cache to the specified function.''' from decorator import FunctionMaker def decorator_apply(dec, func): """Decorate a function by preserving the signature even if dec is not a signature-preserving decorator. This recipe is derived from http://micheles.googlecode.com/hg/decorator/documentation.html#id14 """ return FunctionMaker.create( func, 'return decorated(%(signature)s)', dict(decorated=dec(func)), __wrapped__=func) if self.memory.location is not None and self.level >= level: return decorator_apply(self.memory.cache, function) else: return function return wrapper def clear(self, *args, **kwargs): return self.memory.clear(*args, **kwargs) def eval(self, *args, **kwargs): return self.memory.eval(*args, **kwargs) def format(self, *args, **kwargs): return self.memory.format(*args, **kwargs) def reduce_size(self, *args, **kwargs): return self.memory.reduce_size(*args, **kwargs) def warn(self, *args, **kwargs): return self.memory.warn(*args, **kwargs)
"""Feature extraction turn texts into matrices.""" from content_extraction import get_text from basilica import Connection from joblib import Memory from nltk.data import load as nltk_load from numpy import array from os import environ _sent_detector = nltk_load("tokenizers/punkt/english.pickle") _memory = Memory(cachedir="feature-cache-basilica", verbose=1, bytes_limit=10 ** 9) _memory.reduce_size() @_memory.cache(ignore=["entry"]) def entry2mat(entry, url): """Transform a feed entry into a matrix. Only one of the two parameters needs to be supplied. Parameters ---------- entry : feedcache feed entry The entry to transform. url : type The url pointing to the content of the entry . Returns ------- numpy matrix