def _compact_hashX(self, hashX, hist_map, hist_list, write_items, keys_to_delete): """Compress history for a hashX. hist_list is an ordered list of the histories to be compressed.""" # History entries (tx numbers) are 4 bytes each. Distribute # over rows of up to 50KB in size. A fixed row size means # future compactions will not need to update the first N - 1 # rows. max_row_size = self.max_hist_row_entries * 4 full_hist = b''.join(hist_list) nrows = (len(full_hist) + max_row_size - 1) // max_row_size if nrows > 4: self.logger.info('hashX {} is large: {:,d} entries across ' '{:,d} rows'.format(hash_to_hex_str(hashX), len(full_hist) // 4, nrows)) # Find what history needs to be written, and what keys need to # be deleted. Start by assuming all keys are to be deleted, # and then remove those that are the same on-disk as when # compacted. write_size = 0 keys_to_delete.update(hist_map) for n, chunk in enumerate(util.chunks(full_hist, max_row_size)): key = hashX + pack_be_uint16(n) if hist_map.get(key) == chunk: keys_to_delete.remove(key) else: write_items.append((key, chunk)) write_size += len(chunk) assert n + 1 == nrows self.comp_flush_count = max(self.comp_flush_count, n) return write_size
def _compact_history(self, limit): """Inner loop of history compaction. Loops until limit bytes have been processed. """ keys_to_delete = set() write_items = [] # A list of (key, value) pairs write_size = 0 # Loop over 2-byte prefixes cursor = self.comp_cursor while write_size < limit and cursor < 65536: prefix = pack_be_uint16(cursor) write_size += self._compact_prefix(prefix, write_items, keys_to_delete) cursor += 1 max_rows = self.comp_flush_count + 1 self._flush_compaction(cursor, write_items, keys_to_delete) self.logger.info( 'history compaction: wrote {:,d} rows ({:.1f} MB), ' 'removed {:,d} rows, largest: {:,d}, {:.1f}% complete'.format( len(write_items), write_size / 1000000, len(keys_to_delete), max_rows, 100 * cursor / 65536)) return write_size
def flush(self): start_time = time.time() self.flush_count += 1 flush_id = pack_be_uint16(self.flush_count) unflushed = self.unflushed with self.db.write_batch() as batch: for hashX in sorted(unflushed): key = hashX + flush_id batch.put(key, unflushed[hashX].tobytes()) self.write_state(batch) count = len(unflushed) unflushed.clear() self.unflushed_count = 0 if self.db.for_sync: elapsed = time.time() - start_time self.logger.info(f'flushed history in {elapsed:.1f}s ' f'for {count:,d} addrs')