def task_add_missing_blocks(missing_block_nums, max_procs, max_threads, database_url, steemd_http_url, task_num=5): task_message = fmt_task_message( 'Adding missing blocks to db, this may take a while', emoji_code_point=u'\U0001F4DD', task_num=task_num) click.echo(task_message) max_workers = max_procs or os.cpu_count() or 1 chunksize = len(missing_block_nums) // max_workers if chunksize <= 0: chunksize = 1 #counter = Value('L',0) map_func = partial( block_adder_process_worker, database_url, steemd_http_url, max_threads=max_threads) chunks = chunkify(missing_block_nums, 10000) with Pool(processes=max_workers) as pool: results = pool.map(map_func, chunks) success_msg = fmt_success_message('added missing blocks') click.echo(success_msg)
def block_adder_process_worker(database_url, rpc_url, block_nums, max_threads=5): with isolated_engine(database_url) as engine: session = Session(bind=engine) raw_blocks = block_fetcher_thread_worker( rpc_url, block_nums, max_threads=max_threads) for raw_blocks_chunk in chunkify(raw_blocks, 1000): # pylint: disable=unused-variable # we could do something here with results, like retry failures results = bulk_add(raw_blocks_chunk, session)
def block_adder_process_worker(database_url, rpc_url, block_nums, max_threads=5): try: engine_config = configure_engine(database_url) session = Session(bind=engine_config.engine) raw_blocks = block_fetcher_thread_worker( rpc_url, block_nums, max_threads=max_threads) for raw_blocks_chunk in chunkify(raw_blocks, 1000): # pylint: disable=unused-variable # we could do something here with results, like retry failures results = bulk_add(raw_blocks_chunk, session) except Exception as e: logger.exception(e) finally: Session.close_all()
def bulk_add_blocks(ctx, blocks, chunksize): """Insert many blocks in the database""" engine = ctx.obj['engine'] database_url = ctx.obj['database_url'] metadata = ctx.obj['metadata'] # init tables first init_tables(database_url, metadata) # configure session Session.configure(bind=engine) session = Session() click.echo("SQL: 'SET SESSION innodb_lock_wait_timeout=150'", err=True) session.execute('SET SESSION innodb_lock_wait_timeout=150') try: for chunk in chunkify(blocks, chunksize): bulk_add(chunk, session) except Exception as e: raise e finally: session.close_all()
async def process_blocks(missing_block_nums, url, client, pool, db_meta, blocks_pbar=None, ops_pbar=None): CONCURRENCY_LIMIT = 5 BATCH_SIZE = 100 db_tables = db_meta.tables block_num_batches = chunkify(missing_block_nums, BATCH_SIZE) futures = (process_block_chunk(block_num_batch, url, client, pool, db_tables, blocks_pbar=blocks_pbar, ops_pbar=ops_pbar) for block_num_batch in block_num_batches) for results_future in as_completed_limit_concurrent( futures, CONCURRENCY_LIMIT): results = await results_future
def _get_blocks_fast(start=None, end=None, chunksize=None, max_workers=None, rpc=None, url=None): extra = dict(start=start, end=end, chunksize=chunksize, max_workers=max_workers, rpc=rpc, url=url) logger.debug('get_blocks_fast', extra=extra) rpc = rpc or SimpleSteemAPIClient(url) with concurrent.futures.ThreadPoolExecutor( max_workers=max_workers) as executor: for i, chunk in enumerate( chunkify(range(start, end), chunksize=chunksize), 1): logger.debug('get_block_fast loop', extra=dict(chunk_count=i)) for b in executor.map(rpc.get_block, chunk): # dont yield anything when we encounter a null output # from an HTTP 503 error if b: yield b
def _populate(database_url, steemd_http_url, steemd_websocket_url, max_procs, max_threads): # pylint: disable=too-many-locals, too-many-statements rpc = SimpleSteemAPIClient(steemd_http_url) engine_config = configure_engine(database_url) db_name = engine_config.url.database db_user_name = engine_config.url.username Session.configure(bind=engine_config.engine) session = Session() # [1/7] confirm db connectivity task_message = fmt_task_message( 'Confirm database connectivity', emoji_code_point=u'\U0001F4DE', counter=1) click.echo(task_message) url, table_count = test_connection(database_url) if url: success_msg = fmt_success_message( 'connected to %s and found %s tables', url.__repr__(), table_count) click.echo(success_msg) if not url: raise Exception('Unable to connect to database') del url del table_count # [2/7] kill existing db threads task_message = fmt_task_message( 'Killing active db threads', emoji_code_point='\U0001F4A5', counter=2) click.echo(task_message) all_procs, killed_procs = kill_db_processes(database_url, db_name, db_user_name) if len(killed_procs) > 0: success_msg = fmt_success_message('killed %s processes', len(killed_procs)) click.echo(success_msg) del all_procs del killed_procs # [3/7] init db if required task_message = fmt_task_message( 'Initialising db if required', emoji_code_point=u'\U0001F50C', counter=3) click.echo(task_message) init_tables(database_url, Base.metadata) # [4/7] find last irreversible block last_chain_block = rpc.last_irreversible_block_num() task_message = fmt_task_message( 'Finding highest blockchain block', emoji_code_point='\U0001F50E', counter=4) click.echo(task_message) success_msg = fmt_success_message( 'learned highest irreversible block is %s', last_chain_block) click.echo(success_msg) # [5/7] get missing block_nums task_message = fmt_task_message( 'Finding blocks missing from db', emoji_code_point=u'\U0001F52D', counter=5) click.echo(task_message) missing_block_nums_gen = Block.get_missing_block_num_iterator( session, last_chain_block, chunksize=100000) with click.progressbar( missing_block_nums_gen, label='Finding missing block_nums', color=True, show_eta=False, show_percent=False, empty_char='░', fill_char='█', show_pos=True, bar_template='%(bar)s %(info)s') as pbar: all_missing_block_nums = [] for missing_gen in pbar: all_missing_block_nums.extend(missing_gen()) success_msg = fmt_success_message('found %s missing blocks', len(all_missing_block_nums)) click.echo(success_msg) del missing_block_nums_gen del pbar session.invalidate() # [6/7] adding missing blocks task_message = fmt_task_message( 'Adding missing blocks to db, this may take a while', emoji_code_point=u'\U0001F4DD', counter=6) click.echo(task_message) max_workers = max_procs or os.cpu_count() or 1 chunksize = len(all_missing_block_nums) // max_workers if chunksize <= 0: chunksize = 1 map_func = partial( block_adder_process_worker, database_url, steemd_http_url, max_threads=max_threads) chunks = chunkify(all_missing_block_nums, 10000) with concurrent.futures.ProcessPoolExecutor( max_workers=max_workers) as executor: executor.map(map_func, chunks , chunksize=1) success_msg = fmt_success_message('added missing blocks') click.echo(success_msg) del all_missing_block_nums # [7/7] stream blocks task_message = fmt_task_message( 'Streaming blocks', emoji_code_point=u'\U0001F4DD', counter=7) click.echo(task_message) highest_db_block = Block.highest_block(session) ws_rpc = SteemNodeRPC(steemd_websocket_url) blocks = ws_rpc.block_stream(highest_db_block) add_blocks(blocks, session)