def trim_caches(caches, path, min_free_space, max_age_secs): """Trims multiple caches. The goal here is to coherently trim all caches in a coherent LRU fashion, deleting older items independent of which container they belong to. Two policies are enforced first: - max_age_secs - min_free_space Once that's done, then we enforce each cache's own policies. Returns: Slice containing the size of all items evicted. """ min_ts = time.time() - max_age_secs if max_age_secs else 0 free_disk = file_path.get_free_space(path) if min_free_space else 0 total = [] if min_ts or free_disk: while True: oldest = [(c, c.get_oldest()) for c in caches if len(c) > 0] if not oldest: break oldest.sort(key=lambda (_, ts): ts) c, ts = oldest[0] if ts >= min_ts and free_disk >= min_free_space: break total.append(c.remove_oldest()) if min_free_space: free_disk = file_path.get_free_space(path) # Evaluate each cache's own policies. for c in caches: total.extend(c.trim()) return total
def trim(self, min_free_space): """Purges cache. Removes cache directories that were not accessed for a long time until there is enough free space and the number of caches is sane. If min_free_space is None, disk free space is not checked. Requires NamedCache to be open. """ self._lock.assert_locked() if not os.path.isdir(self.root_dir): return free_space = 0 if min_free_space is not None: file_path.get_free_space(self.root_dir) while ((min_free_space is not None and free_space < min_free_space) or len(self._lru) > MAX_CACHE_SIZE): try: name, (path, _) = self._lru.get_oldest() except KeyError: return named_dir = self._get_named_path(name) if fs.islink(named_dir): fs.unlink(named_dir) path_abs = os.path.join(self.root_dir, path) if os.path.isdir(path_abs): file_path.rmtree(path_abs) if min_free_space is not None: free_space = file_path.get_free_space(self.root_dir) self._lru.pop(name)
def __init__(self, cache_dir, policies, hash_algo, trim, time_fn=None): """ Arguments: cache_dir: directory where to place the cache. policies: CachePolicies instance, cache retention policies. algo: hashing algorithm used. trim: if True to enforce |policies| right away. It can be done later by calling trim() explicitly. """ # All protected methods (starting with '_') except _path should be called # with self._lock held. super(DiskContentAddressedCache, self).__init__(cache_dir) self.policies = policies self.hash_algo = hash_algo self.state_file = os.path.join(cache_dir, self.STATE_FILE) # Items in a LRU lookup dict(digest: size). self._lru = lru.LRUDict() # Current cached free disk space. It is updated by self._trim(). file_path.ensure_tree(self.cache_dir) self._free_disk = file_path.get_free_space(self.cache_dir) # The first item in the LRU cache that must not be evicted during this run # since it was referenced. All items more recent that _protected in the LRU # cache are also inherently protected. It could be a set() of all items # referenced but this increases memory usage without a use case. self._protected = None # Cleanup operations done by self._load(), if any. self._operations = [] with tools.Profiler('Setup'): with self._lock: self._load(trim, time_fn)
def trim(self): evicted = [] with self._lock: if not fs.isdir(self.cache_dir): return evicted # Trim according to maximum number of items. if self._policies.max_items: while len(self._lru) > self._policies.max_items: name, size = self._remove_lru_item() evicted.append(size) logging.info( 'NamedCache.trim(): Removed %r(%d) due to max_items(%d)', name, size, self._policies.max_items) # Trim according to maximum age. if self._policies.max_age_secs: cutoff = self._lru.time_fn() - self._policies.max_age_secs while self._lru: _name, (_data, ts) = self._lru.get_oldest() if ts >= cutoff: break name, size = self._remove_lru_item() evicted.append(size) logging.info( 'NamedCache.trim(): Removed %r(%d) due to max_age_secs(%d)', name, size, self._policies.max_age_secs) # Trim according to minimum free space. if self._policies.min_free_space: while self._lru: free_space = file_path.get_free_space(self.cache_dir) if free_space >= self._policies.min_free_space: break name, size = self._remove_lru_item() evicted.append(size) logging.info( 'NamedCache.trim(): Removed %r(%d) due to min_free_space(%d)', name, size, self._policies.min_free_space) # Trim according to maximum total size. if self._policies.max_cache_size: while self._lru: total = sum(size for _rel_cache, size in self._lru.values()) if total <= self._policies.max_cache_size: break name, size = self._remove_lru_item() evicted.append(size) logging.info( 'NamedCache.trim(): Removed %r(%d) due to max_cache_size(%d)', name, size, self._policies.max_cache_size) self._save() return evicted
def trim(self, min_free_space): """Purges cache. Removes cache directories that were not accessed for a long time until there is enough free space and the number of caches is sane. If min_free_space is None, disk free space is not checked. Requires NamedCache to be open. Returns: Number of caches deleted. """ self._lock.assert_locked() if not os.path.isdir(self.root_dir): return 0 total = 0 free_space = 0 if min_free_space: free_space = file_path.get_free_space(self.root_dir) while ((min_free_space and free_space < min_free_space) or len(self._lru) > MAX_CACHE_SIZE): logging.info('Making space for named cache %s > %s or %s > %s', free_space, min_free_space, len(self._lru), MAX_CACHE_SIZE) try: name, (path, _) = self._lru.get_oldest() except KeyError: return total named_dir = self._get_named_path(name) if fs.islink(named_dir): fs.unlink(named_dir) path_abs = os.path.join(self.root_dir, path) if os.path.isdir(path_abs): logging.info('Removing named cache %s', path_abs) file_path.rmtree(path_abs) if min_free_space: free_space = file_path.get_free_space(self.root_dir) self._lru.pop(name) total += 1 return total
def _trim(self): """Trims anything we don't know, make sure enough free space exists.""" self._lock.assert_locked() evicted = [] # Trim old items. if self.policies.max_age_secs: cutoff = self._lru.time_fn() - self.policies.max_age_secs while self._lru: oldest = self._lru.get_oldest() # (key, (data, ts) if oldest[1][1] >= cutoff: break evicted.append(self._remove_lru_file(True)) # Ensure maximum cache size. if self.policies.max_cache_size: total_size = sum(self._lru.itervalues()) while total_size > self.policies.max_cache_size: e = self._remove_lru_file(True) evicted.append(e) total_size -= e # Ensure maximum number of items in the cache. if self.policies.max_items and len(self._lru) > self.policies.max_items: for _ in xrange(len(self._lru) - self.policies.max_items): evicted.append(self._remove_lru_file(True)) # Ensure enough free space. self._free_disk = file_path.get_free_space(self.cache_dir) while ( self.policies.min_free_space and self._lru and self._free_disk < self.policies.min_free_space): # self._free_disk is updated by this call. evicted.append(self._remove_lru_file(True)) if evicted: total_usage = sum(self._lru.itervalues()) usage_percent = 0. if total_usage: usage_percent = 100. * float(total_usage) / self.policies.max_cache_size logging.warning( 'Trimmed %d file(s) (%.1fkb) due to not enough free disk space:' ' %.1fkb free, %.1fkb cache (%.1f%% of its maximum capacity of ' '%.1fkb)', len(evicted), sum(evicted) / 1024., self._free_disk / 1024., total_usage / 1024., usage_percent, self.policies.max_cache_size / 1024.) self._save() return evicted
def test_get_disks_info(self): info = os_utilities.get_disks_info() self.assertGreater(len(info), 0) root_path = u'C:\\' if sys.platform == 'win32' else u'/' root = info[root_path] # Round the same way. free_disk = round( float(file_path.get_free_space(root_path)) / 1024. / 1024., 1) delta = math.fabs(free_disk - root['free_mb']) # Check that they are mostly equal. There can be some gitter as there is # disk I/O during the two calls. self.assertLess(delta, 2., (delta, free_disk, root['free_mb']))
def trim(self): """Purges cache entries that do not comply with the cache policies. NamedCache must be open. Returns: Number of caches deleted. """ with self._lock: if not os.path.isdir(self.cache_dir): return 0 removed = [] def _remove_lru_file(): """Removes the oldest LRU entry. LRU must not be empty.""" name, _data = self._lru.get_oldest() logging.info('Removing named cache %r', name) self._remove(name) removed.append(name) # Trim according to maximum number of items. while len(self._lru) > self._policies.max_items: _remove_lru_file() # Trim according to maximum age. if self._policies.max_age_secs: cutoff = self._lru.time_fn() - self._policies.max_age_secs while self._lru: _name, (_content, timestamp) = self._lru.get_oldest() if timestamp >= cutoff: break _remove_lru_file() # Trim according to minimum free space. if self._policies.min_free_space: while True: free_space = file_path.get_free_space(self.cache_dir) if not self._lru or free_space >= self._policies.min_free_space: break _remove_lru_file() # TODO(maruel): Trim according to self._policies.max_cache_size. Do it # last as it requires counting the size of each entry. # TODO(maruel): Trim empty directories. An empty directory is not a cache, # something needs to be in it. return len(removed)
def trim(self, min_free_space): """Purges cache. Removes cache directories that were not accessed for a long time until there is enough free space and the number of caches is sane. If min_free_space is None, disk free space is not checked. NamedCache must be open. Returns: Number of caches deleted. """ self._lock.assert_locked() if not os.path.isdir(self.root_dir): return 0 total = 0 free_space = 0 if min_free_space: free_space = file_path.get_free_space(self.root_dir) while ((min_free_space and free_space < min_free_space) or len(self._lru) > MAX_CACHE_SIZE): logging.info('Making space for named cache %d > %d or %d > %d', free_space, min_free_space, len(self._lru), MAX_CACHE_SIZE) try: name, _ = self._lru.get_oldest() except KeyError: return total logging.info('Removing named cache %r', name) self._remove(name) if min_free_space: free_space = file_path.get_free_space(self.root_dir) total += 1 return total
def _trim(self): """Trims anything we don't know, make sure enough free space exists.""" self._lock.assert_locked() # Trim old items. if self.policies.max_age_secs: cutoff = self._lru.time_fn() - self.policies.max_age_secs while self._lru: oldest = self._lru.get_oldest() if oldest[1][1] >= cutoff: break self._remove_lru_file(True) # Ensure maximum cache size. if self.policies.max_cache_size: total_size = sum(self._lru.itervalues()) while total_size > self.policies.max_cache_size: total_size -= self._remove_lru_file(True) # Ensure maximum number of items in the cache. if self.policies.max_items and len( self._lru) > self.policies.max_items: for _ in xrange(len(self._lru) - self.policies.max_items): self._remove_lru_file(True) # Ensure enough free space. self._free_disk = file_path.get_free_space(self.cache_dir) trimmed_due_to_space = 0 while (self.policies.min_free_space and self._lru and self._free_disk < self.policies.min_free_space): trimmed_due_to_space += 1 self._remove_lru_file(True) if trimmed_due_to_space: total_usage = sum(self._lru.itervalues()) usage_percent = 0. if total_usage: usage_percent = 100. * float( total_usage) / self.policies.max_cache_size logging.warning( 'Trimmed %s file(s) due to not enough free disk space: %.1fkb free,' ' %.1fkb cache (%.1f%% of its maximum capacity of %.1fkb)', trimmed_due_to_space, self._free_disk / 1024., total_usage / 1024., usage_percent, self.policies.max_cache_size / 1024.) self._save() return trimmed_due_to_space
def _load(self, trim, time_fn): """Loads state of the cache from json file. If cache_dir does not exist on disk, it is created. """ self._lock.assert_locked() if not fs.isfile(self.state_file): if not fs.isdir(self.cache_dir): fs.makedirs(self.cache_dir) else: # Load state of the cache. try: self._lru = lru.LRUDict.load(self.state_file) except ValueError as err: logging.error('Failed to load cache state: %s' % (err, )) # Don't want to keep broken cache dir. file_path.rmtree(self.cache_dir) fs.makedirs(self.cache_dir) self._free_disk = file_path.get_free_space(self.cache_dir) if time_fn: self._lru.time_fn = time_fn if trim: self._trim()