def display_at(self, now, total_num_units, unit_abbrev_display, include_elapsed=False): pct_complete = '%1.2f%%' % (( (self.units_copied * 100.0 if total_num_units else 1) / (total_num_units or 1) )) elapsed_total = now - self.begin_time rate_mb = rate_units = 0.0 if elapsed_total: rate_mb = self.total_size / elapsed_total rate_units = self.units_copied / elapsed_total rate_mb_str = byte_display(rate_mb) rate_unit_str = '%1.2f' % rate_units result = "%d/%d,%7s, %6s/s %6s %s/s, %s" % ( self.units_copied, total_num_units, pct_complete, rate_mb_str, rate_unit_str, unit_abbrev_display, byte_display(self.total_size), ) if include_elapsed: result += ' %4.1f minutes' % (elapsed_total / 60.0) return result
def do_minor_log(self, trans, _num_txn_records, txn_byte_size, _num_txn_blobs, _copy_duration): logger.debug("Copied %d objects of size %s (%d blobs) (Most recent oid=%s size=%s)", trans.num_records_since_last_log, byte_display(trans.record_size_since_last_log), trans.num_blobs_since_last_log, trans.last_oid, byte_display(txn_byte_size)) trans.reset_interval()
def loaded(self, byte_count): with self._lock: self.bytes_loaded_since_last_check += byte_count if self.bytes_loaded_since_last_check >= self.bytes_loaded_check_threshold: logger.debug( "Loaded %s bytes (>= %s) into %s, may need to check.", byte_display(self.bytes_loaded_since_last_check), byte_display(self.bytes_loaded_check_threshold), self.blob_dir) self._check()
def __run_with_lock(self): while 1: size, files_by_atime = self.__size_blob_dir() self.blob_dir_size = size if size <= self.target_size: logger.info( 'Traversed %s to compute size %s (<= %s); quitting.', self.blob_dir, byte_display(self.blob_dir_size), byte_display(self.target_size)) break self.__shrink_blob_dir(size, files_by_atime)
def bulk_update(self, keys_and_values, source='<unknown>', log_count=None, mem_usage_before=None): """ Insert all the ``(key, value)`` pairs found in *keys_and_values*. This will permute the most-recently-used status of any existing entries. Entries in the *keys_and_values* iterable should be returned from least recent to most recent, as the items at the end will be considered to be the most recent. (Alternately, you can think of them as needing to be in order from lowest priority to highest priority.) This will never evict existing entries from the cache. """ now = time.time() mem_usage_before = mem_usage_before if mem_usage_before is not None else get_memory_usage( ) mem_usage_before_this = get_memory_usage() log_count = log_count or len(keys_and_values) data = self._cache if data: # Loading more data into an existing bucket. # Load only the *new* keys, trying to get the newest ones # because LRU is going to get messed up anyway. # # If we were empty, then take what they give us, LRU # first, so that as we iterate the last item in the list # becomes the MRU item. new_entries_newest_first = [ t for t in keys_and_values if t[0] not in data ] new_entries_newest_first.reverse() keys_and_values = new_entries_newest_first del new_entries_newest_first stored = data.add_MRUs(keys_and_values, return_count_only=True) then = time.time() del keys_and_values # For memory reporting. mem_usage_after = get_memory_usage() log.info( "Examined %d and stored %d items from %s in %s using %s. " "(%s local) (%s)", log_count, stored, getattr(source, 'name', source), then - now, byte_display(mem_usage_after - mem_usage_before), byte_display(mem_usage_after - mem_usage_before_this), self) return log_count, stored
def _when_done(self, checker, holding_clean_lock): """ Callback to be run from the cleanup thread. Cleans up the state of *self*. """ with self._lock: # checker is the BlobCacheSizeChecker, but self._checker # is the spawned thread. # This is the last thing the BlobCacheSizeChecker does, so by # definition it cannot be ready yet. self._checker_thread = None if not holding_clean_lock: self._reduced_event.set() return # In principle, if the checker finished sizing the directory and got # a cache size under its target and wanted to return to us, # but then some other threads immediately loaded a bunch of blobs, # we could go over that size. We prevent this by checking # the size again here, while we're holding our lock, and if we're # too big, we'll go again. This happens during the test cases. # checker can be None if we did this externally. dir_size = -1 duration = 0.0 if checker is not None: dir_size = checker.blob_dir_size duration = checker.duration logger.info( "Finished checking %s (in %.3fs) with size of %s (max: %s; target %s)", self.blob_dir, duration, byte_display(dir_size), byte_display(self.blob_cache_max_size), byte_display(self.blob_cache_target_cleanup_size) ) if self._exceeded_counter or dir_size > self.blob_cache_target_cleanup_size: logger.debug( "Requesting new check for %s with size of %s (max: %s; target %s)", self.blob_dir, dir_size, self.blob_cache_max_size, byte_display(self.blob_cache_target_cleanup_size) ) self._check() else: self._reduced_event.set()
def report(client, duration, extra=''): gc.collect() gc.collect() mem_before = client.b_mem_before objects_before = client.b_objects_before mem_after = get_memory_usage() mem_used = mem_after - mem_before objects_after = len(gc.get_objects()) logger.info( "%s:%sExecuted in %s; Mem delta: %s; Object Delta: %s; " "Num keys: %d; Weight: %s; " "Mem before: %s; Mem after: %s", os.getpid(), extra, duration, byte_display(mem_used), objects_after - objects_before, len(client), byte_display(client.size), byte_display(mem_before), byte_display(mem_after), )
def __size_blob_dir(self, is_cache_dir_name=re.compile(r'\d+$').match): # Calculate the sizes of the blobs stored in the blob_dir. # Return the total size, and a BTree {atime: [full path to blob file]} # TODO: nti.zodb.containers has support for mapping # time.time() values into integers for use with the (smaller, # faster) IOBTree. Use that if we can prove that we can pop # the min atime successfully (that is, while the # nti.zodb.containers transformation is lossless and # reversible, we need to prove that it also maintains order; # I'm not sure it does). # # Other optimizations: Don't use a list until we get more than one # file with a matching atime. And/or use tuples and not lists: # tuples aren't tracked by the GC like lists are (after they survive one # collection, anyway). blob_dir = self.blob_dir blob_suffix = ZODB.blob.BLOB_SUFFIX files_by_atime = OOBTree.BTree() size = 0 # Use os.walk() instead of os.listdir(); on 3.5+ this is much faster # thanks to the use of os.scandir(). When we're on Python 3.5+ *only* # we could use os.scandir ourself and maybe save some stat calls? for dirpath, dirnames, filenames in os.walk(blob_dir): # Walk top-down, only recursing into directories matching the # OID components (of which there should be one level) dirnames[:] = [d for d in dirnames if is_cache_dir_name(d)] # Examine blob files. blobfile_paths = [ os.path.join(dirpath, f) for f in filenames if f.endswith(blob_suffix) ] for file_path in blobfile_paths: stat = os.stat(file_path) size += stat.st_size t = stat.st_atime if t not in files_by_atime: files_by_atime[t] = [] # The ZEO version returns a weird version of the path, # # os.path.join(dirname, file_name) # # which it must later re-combine to get an actual path: # # os.path.join(blob_dir, file_name) # # It's not clear why it doesn't return the full path # that it already has. Temporary memory savings, # perhaps? If so, is that even a concern anymore? files_by_atime[t].append(file_path) logger.debug("Blob cache size for %s: %s", self.blob_dir, byte_display(size)) return size, files_by_atime
def __shrink_blob_dir(self, current_size, files_by_atime): size = current_size target_size = self.target_size remove = self.remove_blob_at_path while size > target_size and files_by_atime: for file_path in files_by_atime.pop(files_by_atime.minKey()): size -= remove(file_path) if size <= target_size: break logger.debug("Reduced blob cache size for %s: %s", self.blob_dir, byte_display(size))
def populate_empty(loops, bucket_kind): client = makeOne(bucket_kind, populate=False) mem_before = get_memory_usage() begin = perf_counter() for _ in range(loops): for k, v in ALL_DATA: client[k] = v duration = perf_counter() - begin mem_used = get_memory_usage() - mem_before logger.info("Populated in %s; took %s mem; size: %d", duration, byte_display(mem_used), len(client)) return duration
def display_at(self, now, total_num_txns, include_elapsed=False): pct_complete = '%1.2f%%' % (self.txns_copied * 100.0 / total_num_txns) elapsed_total = now - self.begin_time if elapsed_total: rate_mb = self.total_size / elapsed_total rate_tx = self.txns_copied / elapsed_total else: rate_mb = rate_tx = 0.0 rate_mb_str = byte_display(rate_mb) rate_tx_str = '%1.3f' % rate_tx result = "%d/%d,%7s, %6s/s %6s TX/s, %s" % ( self.txns_copied, total_num_txns, pct_complete, rate_mb_str, rate_tx_str, byte_display(self.total_size), ) if include_elapsed: result += ' %4.1f minutes' % (elapsed_total / 60.0) return result
def _bulk_update(self, keys_and_values, source='<unknown>', log_count=None, mem_usage_before=None): """ Insert all the ``(oid, (state, actual_tid, frozen, frequency)))`` pairs found in *keys_and_values*, rejecting entries once we get too full. *keys_and_values* should be ordered from least to most recently used. This can only be done in an empty cache. """ now = time.time() mem_usage_before = mem_usage_before if mem_usage_before is not None else get_memory_usage( ) mem_usage_before_this = get_memory_usage() log_count = log_count or len(keys_and_values) data = self._cache if data: raise ValueError("Only bulk load into an empty cache.") stored = data.add_MRUs(keys_and_values, return_count_only=True) then = time.time() del keys_and_values # For memory reporting. mem_usage_after = get_memory_usage() logger.info( "Examined %d and stored %d items from %s in %s using %s. " "(%s local) (%s)", log_count, stored, getattr(source, 'name', source), then - now, byte_display(mem_usage_after - mem_usage_before), byte_display(mem_usage_after - mem_usage_before_this), self) return log_count, stored
def __call__(self): with self: logger.info("Checking blob cache size for %s. (target: %s)", self.blob_dir, byte_display(self.target_size)) check_lock = self.__acquire_check_lock() try: if check_lock is None: logger.info( "Failed to get filesystem clean lock (%s); quitting.", self.blob_dir) return self.__run_with_lock() finally: if check_lock is not None: check_lock.close() self._finished_callback(self, check_lock is not None)
def populate_not_equal(loops, bucket_kind): # Because we will populate when we make, # capture memory now to be able to include that. mem_before = get_memory_usage() client = makeOne(bucket_kind) begin = perf_counter() for _ in range(loops): for k, v in ALL_DATA: # install a copy that's not quite equal. # This should require saving it. state = v[0] state = state + b'1' new_v = (state, v[1]) client[k] = new_v duration = perf_counter() - begin mem_used = get_memory_usage() - mem_before logger.info("Populated in %s; took %s mem; size: %d", duration, byte_display(mem_used), len(client)) return duration
def populate_equal(loops, bucket_kind): # Because we will populate when we make, # capture memory now to be able to include that. mem_before = get_memory_usage() client = makeOne(bucket_kind) begin = perf_counter() for _ in range(loops): for k, v in ALL_DATA: # install a copy that's equal; # this should mean no extra copies. state = v[0] state = state[:-1] + state[-1:] new_v = (state, v[1]) assert new_v == v assert new_v is not v client[k] = new_v duration = perf_counter() - begin mem_used = get_memory_usage() - mem_before logger.info("Populated in %s; took %s mem; size: %d", duration, byte_display(mem_used), len(client)) return duration
def write_to_sqlite(self, connection): # pylint:disable=too-many-locals mem_before = get_memory_usage() cur = connection.cursor() db = Database.from_connection(connection) begin = time.time() # In a large benchmark, store_temp() accounts for 32% # of the total time, while move_from_temp accounts for 49%. # The batch size depends on how many params a stored proc can # have; if we go too big we get OperationalError: too many SQL # variables. The default is 999. # Note that the multiple-value syntax was added in # 3.7.11, 2012-03-20. with _timer() as batch_timer: cur.execute('BEGIN') stored_oid_tid = db.oid_to_tid fetch_current = time.time() count_written, _ = db.store_temp( self._items_to_write(stored_oid_tid)) cur.execute("COMMIT") # Take out (reserved write) locks now; if we don't, we can get # 'OperationalError: database is locked' (the SQLITE_BUSY # error code; the SQLITE_LOCKED error code is about table # locks and has the string 'database table is locked') But # beginning immediate lets us stand in line. # # Note that the SQLITE_BUSY error on COMMIT happens "if an # another thread or process has a shared lock on the database # that prevented the database from being updated.", But "When # COMMIT fails in this way, the transaction remains active and # the COMMIT can be retried later after the reader has had a # chance to clear." So we could retry the commit a couple # times and give up if can't get there. It looks like one # production database takes about 2.5s to execute this entire function; # it seems like most instances that are shutting down get to this place # at roughly the same time and stack up waiting: # # Instance | Save Time | write_to_sqlite time # 1 | 2.36s | 2.35s # 2 | 5.31s | 5.30s # 3 | 6.51s | 6.30s # 4 | 7.91s | 7.90s # # That either suggests that all waiting happens just to acquire this lock and # commit this transaction (and that subsequent operations are inconsequential # in terms of time) OR that the exclusive lock isn't truly getting dropped, # OR that some function in subsequent processes is taking longer and longer. # And indeed, our logging showed that a gc.collect() at the end of this function was # taking more and more time: # # Instance | GC Time | Reported memory usage after GC # 1 | 2.00s | 34.4MB # 2 | 3.50s | 83.2MB # 3 | 4.94s | 125.9MB # 4 | 6.44 | 202.1MB # # The exclusive lock time did vary, but not as much; trim time # didn't really vary: # # Instance | Exclusive Write Time | Batch Insert Time | Fetch Current # 1 | 0.09s | 0.12s | 0.008s # 2 | 1.19s | 0.44s | 0.008s # 3 | 0.91s | 0.43s | 0.026s # 4 | 0.92s | 0.34s | 0.026s with _timer() as exclusive_timer: cur.execute('BEGIN IMMEDIATE') # During the time it took for us to commit, and the time that we # got the lock, it's possible that someone else committed and # changed the data in object_state # Things might have changed in the database since our # snapshot where we put temp objects in, so we can't rely on # just assuming that's the truth anymore. rows_inserted = db.move_from_temp() if self.checkpoints: db.update_checkpoints(*self.checkpoints) cur.execute('COMMIT') # TODO: Maybe use BTrees.family.intersection to get the common keys? # Or maybe use a SQLite Python function? # 'DELETE from object_state where is_stale(zoid, tid)' # What's fastest? The custom function version would require scanning the entire # table; unfortunately this sqlite interface doesn't allow defining virtual tables. with _timer() as trim_timer: # Delete anything we still know is stale, because we # saw a new TID for it, but we had nothing to replace it with. min_allowed_writeback = OID_TID_MAP_TYPE() for k, v in self._min_allowed_writeback.items(): if stored_oid_tid.get(k, MAX_TID) < v: min_allowed_writeback[k] = v db.trim_to_size(self.limit, min_allowed_writeback) del min_allowed_writeback # Typically we write when we're closing and don't expect to do # anything else, so there's no need to keep tracking this info. self._min_allowed_writeback = OID_TID_MAP_TYPE() # Cleanups cur.close() del cur db.close() # closes the connection. del db del stored_oid_tid # We're probably shutting down, don't perform a GC; we see # that can get quite lengthy. end = time.time() stats = self.stats() mem_after = get_memory_usage() logger.info( "Wrote %d items to %s in %s " "(%s to fetch current, %s to insert batch (%d rows), %s to write, %s to trim). " "Used %s memory. (Storage: %s) " "Total hits %s; misses %s; ratio %s (stores: %s)", count_written, connection, end - begin, fetch_current - begin, batch_timer.duration, rows_inserted, exclusive_timer.duration, trim_timer.duration, byte_display(mem_after - mem_before), self._bucket0, stats['hits'], stats['misses'], stats['ratio'], stats['sets']) return count_written
def __transaction_display(self, trans, copy_result): num_txn_records, txn_byte_size, num_txn_blobs = copy_result return 'transaction %s <%4d records, %3d blobs, %9s>' % ( readable_tid_repr(trans.tid), num_txn_records, num_txn_blobs, byte_display(txn_byte_size))