def from_checkpoints(self, chunk_size=1000): """Initial sync strategy: read from blocks on disk. This methods scans for files matching ./checkpoints/*.json.lst and uses them for hive's initial sync. Each line must contain exactly one block in JSON format. """ # pylint: disable=no-self-use last_block = Blocks.head_num() tuplize = lambda path: [int(path.split('/')[-1].split('.')[0]), path] basedir = os.path.dirname(os.path.realpath(__file__ + "/../..")) files = glob.glob(basedir + "/checkpoints/*.json.lst") tuples = sorted(map(tuplize, files), key=lambda f: f[0]) last_read = 0 for (num, path) in tuples: if last_block < num: log.info("[SYNC] Load %s. Last block: %d", path, last_block) with open(path) as f: # each line in file represents one block # we can skip the blocks we already have skip_lines = last_block - last_read remaining = drop(skip_lines, f) for lines in partition_all(chunk_size, remaining): Blocks.process_multi(map(json.loads, lines), True) last_block = num last_read = num
def sync_from_file(file_path, skip_lines, chunk_size=250): with open(file_path) as f: # each line in file represents one block # we can skip the blocks we already have remaining = drop(skip_lines, f) for batch in partition_all(chunk_size, remaining): Blocks.process_multi(map(json.loads, batch), True)
def sync_from_steemd(): is_initial_sync = DbState.is_initial_sync() steemd = get_adapter() lbound = Blocks.head_num() + 1 ubound = steemd.last_irreversible() if ubound <= lbound: return _abort = False try: print("[SYNC] start block %d, +%d to sync" % (lbound, ubound-lbound+1)) timer = Timer(ubound - lbound, entity='block', laps=['rps', 'wps']) while lbound < ubound: to = min(lbound + 1000, ubound) timer.batch_start() blocks = steemd.get_blocks_range(lbound, to) timer.batch_lap() Blocks.process_multi(blocks, is_initial_sync) timer.batch_finish(len(blocks)) date = blocks[-1]['timestamp'] print(timer.batch_status("[SYNC] Got block %d @ %s" % (to-1, date))) lbound = to except KeyboardInterrupt: traceback.print_exc() print("\n\n[SYNC] Aborted.. cleaning up..") _abort = True if not is_initial_sync: # Follows flushing may need to be moved closer to core (i.e. moved # into main block transactions). Important to keep in sync since # we need to prevent expensive recounts. This will fail if we aborted # in the middle of a transaction, meaning data loss. Better than # forcing it, however, since in-memory cache will be out of sync # with db state. Follow.flush(trx=True) # This flush is low importance; accounts are swept regularly. if not _abort: Accounts.flush(trx=True) # If this flush fails, all that could potentially be lost here is # edits and pre-payout votes. If the post has not been paid out yet, # then the worst case is it will be synced upon payout. If the post # is already paid out, worst case is to lose an edit. CachedPost.flush(trx=True) if _abort: print("[SYNC] Aborted") exit()
def from_steemd(cls, is_initial_sync=False, chunk_size=1000): """Fast sync strategy: read/process blocks in batches.""" steemd = SteemClient.instance() lbound = Blocks.head_num() + 1 ubound = steemd.last_irreversible() count = ubound - lbound if count < 1: return _abort = False try: print("[SYNC] start block %d, +%d to sync" % (lbound, count)) timer = Timer(count, entity='block', laps=['rps', 'wps']) while lbound < ubound: timer.batch_start() # fetch blocks to = min(lbound + chunk_size, ubound) blocks = steemd.get_blocks_range(lbound, to) lbound = to timer.batch_lap() # process blocks Blocks.process_multi(blocks, is_initial_sync) timer.batch_finish(len(blocks)) date = blocks[-1]['timestamp'] print( timer.batch_status("[SYNC] Got block %d @ %s" % (to - 1, date))) except KeyboardInterrupt: traceback.print_exc() print("\n\n[SYNC] Aborted.. cleaning up..") _abort = True if not is_initial_sync: # This flush is low importance; accounts are swept regularly. if not _abort: Accounts.flush(trx=True) # If this flush fails, all that could potentially be lost here is # edits and pre-payout votes. If the post has not been paid out yet, # then the worst case is it will be synced upon payout. If the post # is already paid out, worst case is to lose an edit. CachedPost.flush(trx=True) if _abort: print("[SYNC] Aborted") exit()
def from_checkpoints(cls, chunk_size=1000): last_block = Blocks.head_num() tuplize = lambda path: [int(path.split('/')[-1].split('.')[0]), path] basedir = os.path.dirname(os.path.realpath(__file__ + "/../..")) files = glob.glob(basedir + "/checkpoints/*.json.lst") tuples = sorted(map(tuplize, files), key=lambda f: f[0]) last_read = 0 for (num, path) in tuples: if last_block < num: print("[SYNC] Load %s -- last block: %d" % (path, last_block)) with open(path) as f: # each line in file represents one block # we can skip the blocks we already have skip_lines = last_block - last_read remaining = drop(skip_lines, f) for lines in partition_all(chunk_size, remaining): Blocks.process_multi(map(json.loads, lines), True) last_block = num last_read = num
def from_dpayd(self, is_initial_sync=False, chunk_size=1000): """Fast sync strategy: read/process blocks in batches.""" # pylint: disable=no-self-use dpayd = self._dpay lbound = Blocks.head_num() + 1 ubound = self._conf.get('test_max_block') or dpayd.last_irreversible() count = ubound - lbound if count < 1: return log.info("[SYNC] start block %d, +%d to sync", lbound, count) timer = Timer(count, entity='block', laps=['rps', 'wps']) while lbound < ubound: timer.batch_start() # fetch blocks to = min(lbound + chunk_size, ubound) blocks = dpayd.get_blocks_range(lbound, to) lbound = to timer.batch_lap() # process blocks Blocks.process_multi(blocks, is_initial_sync) timer.batch_finish(len(blocks)) _prefix = ("[SYNC] Got block %d @ %s" % (to - 1, blocks[-1]['timestamp'])) log.info(timer.batch_status(_prefix)) if not is_initial_sync: # This flush is low importance; accounts are swept regularly. Accounts.flush(dpayd, trx=True) # If this flush fails, all that could potentially be lost here is # edits and pre-payout votes. If the post has not been paid out yet, # then the worst case is it will be synced upon payout. If the post # is already paid out, worst case is to lose an edit. CachedPost.flush(dpayd, trx=True)
def _block_consumer(blocks_data_provider, is_initial_sync, lbound, ubound): from hive.utils.stats import minmax is_debug = log.isEnabledFor(10) num = 0 time_start = OPSM.start() rate = {} LIMIT_FOR_PROCESSED_BLOCKS = 1000 rate = minmax(rate, 0, 1.0, 0) sync_type_prefix = "[INITIAL SYNC]" if is_initial_sync else "[FAST SYNC]" def print_summary(): stop = OPSM.stop(time_start) log.info("=== TOTAL STATS ===") wtm = WSM.log_global("Total waiting times") ftm = FSM.log_global("Total flush times") otm = OPSM.log_global("All operations present in the processed blocks") ttm = ftm + otm + wtm log.info( f"Elapsed time: {stop :.4f}s. Calculated elapsed time: {ttm :.4f}s. Difference: {stop - ttm :.4f}s" ) if rate: log.info( f"Highest block processing rate: {rate['max'] :.4f} bps. From: {rate['max_from']} To: {rate['max_to']}" ) log.info( f"Lowest block processing rate: {rate['min'] :.4f} bps. From: {rate['min_from']} To: {rate['min_to']}" ) log.info("=== TOTAL STATS ===") try: Blocks.set_end_of_sync_lib(ubound) count = ubound - lbound timer = Timer(count, entity='block', laps=['rps', 'wps']) while lbound < ubound: number_of_blocks_to_proceed = min( [LIMIT_FOR_PROCESSED_BLOCKS, ubound - lbound]) time_before_waiting_for_data = perf() blocks = blocks_data_provider.get(number_of_blocks_to_proceed) if not can_continue_thread(): break assert len(blocks) == number_of_blocks_to_proceed to = min(lbound + number_of_blocks_to_proceed, ubound) timer.batch_start() block_start = perf() Blocks.process_multi(blocks, is_initial_sync) block_end = perf() timer.batch_lap() timer.batch_finish(len(blocks)) time_current = perf() prefix = ("%s Got block %d @ %s" % (sync_type_prefix, to - 1, blocks[-1].get_date())) log.info(timer.batch_status(prefix)) log.info("%s Time elapsed: %fs", sync_type_prefix, time_current - time_start) log.info("%s Current system time: %s", sync_type_prefix, datetime.now().strftime("%H:%M:%S")) log.info(log_memory_usage()) rate = minmax(rate, len(blocks), time_current - time_before_waiting_for_data, lbound) if block_end - block_start > 1.0 or is_debug: otm = OPSM.log_current( "Operations present in the processed blocks") ftm = FSM.log_current("Flushing times") wtm = WSM.log_current("Waiting times") log.info(f"Calculated time: {otm+ftm+wtm :.4f} s.") OPSM.next_blocks() FSM.next_blocks() WSM.next_blocks() lbound = to PC.broadcast( BroadcastObject('sync_current_block', lbound, 'blocks')) num = num + 1 if not can_continue_thread(): break except Exception: log.exception("Exception caught during processing blocks...") set_exception_thrown() print_summary() raise print_summary() return num
def listen(self, trail_blocks, max_sync_block, do_stale_block_check): """Live (block following) mode. trail_blocks - how many blocks need to be collected to start processed the oldest ( delay in blocks processing against blocks collecting ) max_sync_block - limit of blocks to sync, the function will return if it is reached do_stale_block_check - check if the last collected block is not older than 60s """ # debug: no max gap if disable_sync in effect max_gap = None if self._conf.get('test_disable_sync') else 100 steemd = self._steem hive_head = Blocks.head_num() log.info("[LIVE SYNC] Entering listen with HM head: %d", hive_head) if hive_head >= max_sync_block: self.refresh_sparse_stats() log.info( "[LIVE SYNC] Exiting due to block limit exceeded: synced block number: %d, max_sync_block: %d", hive_head, max_sync_block) return for block in self._stream_blocks(hive_head + 1, can_continue_thread, set_exception_thrown, trail_blocks, max_gap, do_stale_block_check): if not can_continue_thread(): break num = block.get_num() log.info( "[LIVE SYNC] =====> About to process block %d with timestamp %s", num, block.get_date()) start_time = perf() Blocks.process_multi([block], False) otm = OPSM.log_current( "Operations present in the processed blocks") ftm = FSM.log_current("Flushing times") ms = (perf() - start_time) * 1000 log.info( "[LIVE SYNC] <===== Processed block %d at %s --% 4d txs" " --% 5dms%s", num, block.get_date(), block.get_number_of_transactions(), ms, ' SLOW' if ms > 1000 else '') log.info("[LIVE SYNC] Current system time: %s", datetime.now().strftime("%H:%M:%S")) if num % 1200 == 0: #1hour log.warning("head block %d @ %s", num, block.get_date()) log.info("[LIVE SYNC] hourly stats") log.info("[LIVE SYNC] filling payout_stats_view executed") with ThreadPoolExecutor(max_workers=2) as executor: executor.submit(PayoutStats.generate) executor.submit(Mentions.refresh) if num % 200 == 0: #10min update_communities_posts_and_rank(self._db) if num % 20 == 0: #1min self._update_chain_state() PC.broadcast(BroadcastObject('sync_current_block', num, 'blocks')) FSM.next_blocks() OPSM.next_blocks() if num >= max_sync_block: log.info( "Stopping [LIVE SYNC] because of specified block limit: %d", max_sync_block) break