def export_blocks_and_transactions(start_block, end_block, batch_size, provider_uri, max_workers, blocks_output, transactions_output, chain='ethereum'): """Exports blocks and transactions.""" provider_uri = check_classic_provider_uri(chain, provider_uri) if blocks_output is None and transactions_output is None: raise ValueError( 'Either --blocks-output or --transactions-output options must be provided' ) job = ExportBlocksJob( start_block=start_block, end_block=end_block, batch_size=batch_size, batch_web3_provider=ThreadLocalProxy( lambda: get_provider_from_uri(provider_uri, batch=True)), max_workers=max_workers, item_exporter=blocks_and_transactions_item_exporter( blocks_output, transactions_output), export_blocks=blocks_output is not None, export_transactions=transactions_output is not None) job.run()
def export_blocks_and_transactions( self, start_block: int, end_block: int, export_blocks: bool = True, export_transactions: bool = True, ) -> Tuple[Iterable, Iterable]: """Export blocks and transactions for specified block range.""" blocks_and_transactions_item_exporter = InMemoryItemExporter( item_types=["block", "transaction"] ) blocks_and_transactions_job = ExportBlocksJob( start_block=start_block, end_block=end_block, batch_size=self.batch_size, batch_web3_provider=self.batch_web3_provider, max_workers=self.max_workers, item_exporter=blocks_and_transactions_item_exporter, export_blocks=export_blocks, export_transactions=export_transactions, ) blocks_and_transactions_job.run() blocks = blocks_and_transactions_item_exporter.get_items("block") transactions = blocks_and_transactions_item_exporter.get_items( "transaction" ) return blocks, transactions
def extractBlockData(): while True: blockConfig = eth_config.find_one({'config_id': 1}) export_flag = blockConfig["export_flag"] if export_flag is False: blockid = blockConfig["blockid"] print(blockid) web3 = ThreadLocalProxy( lambda: Web3(IPCProvider(geth_ipc, timeout=300))) # web3 = ThreadLocalProxy(lambda: Web3(HTTPProvider(http_address))) blockidNow = web3.eth.blockNumber print(blockidNow) if blockidNow > blockid: blockConfig["export_flag"] = True blockid += 1 blockConfig["blockid"] = blockid t = time.time() blockConfig['timestamp'] = int(round(t * 1000)) eth_config.save(blockConfig) job = ExportBlocksJob( start_block=blockid, end_block=blockidNow, batch_size=100, ipc_wrapper=ThreadLocalProxy( lambda: IPCWrapper(geth_ipc, timeout=300)), web3=ThreadLocalProxy( lambda: Web3(IPCProvider(geth_ipc, timeout=300))), max_workers=5, blocks_output="", transactions_output="") job.run() time.sleep(3)
def test_export_blocks_job(tmpdir, start_block, end_block, batch_size, resource_group, web3_provider_type): blocks_output_file = str(tmpdir.join('actual_blocks.csv')) transactions_output_file = str(tmpdir.join('actual_transactions.csv')) job = ExportBlocksJob( start_block=start_block, end_block=end_block, batch_size=batch_size, batch_web3_provider=ThreadLocalProxy(lambda: get_web3_provider( web3_provider_type, lambda file: read_resource(resource_group, file), batch=True)), max_workers=5, item_exporter=blocks_and_transactions_item_exporter( blocks_output_file, transactions_output_file), export_blocks=blocks_output_file is not None, export_transactions=transactions_output_file is not None) job.run() compare_lines_ignore_order( read_resource(resource_group, 'expected_blocks.csv'), read_file(blocks_output_file)) compare_lines_ignore_order( read_resource(resource_group, 'expected_transactions.csv'), read_file(transactions_output_file))
def test_export_blocks_job(tmpdir, start_block, end_block, batch_size, resource_group): blocks_output_file = tmpdir.join('actual_blocks.csv') transactions_output_file = tmpdir.join('actual_transactions.csv') job = ExportBlocksJob( start_block=start_block, end_block=end_block, batch_size=batch_size, ipc_wrapper=ThreadLocalProxy(lambda: MockIPCWrapper( lambda file: read_resource(resource_group, file))), max_workers=5, item_exporter=export_blocks_job_item_exporter( blocks_output_file, transactions_output_file), export_blocks=blocks_output_file is not None, export_transactions=transactions_output_file is not None) job.run() compare_lines_ignore_order( read_resource(resource_group, 'expected_blocks.csv'), read_file(blocks_output_file)) compare_lines_ignore_order( read_resource(resource_group, 'expected_transactions.csv'), read_file(transactions_output_file))
def _export_blocks_and_transactions(self, start_block, end_block): blocks_and_transactions_item_exporter = InMemoryItemExporter(item_types=['block', 'transaction']) blocks_and_transactions_job = ExportBlocksJob( start_block=start_block, end_block=end_block, batch_size=self.batch_size, batch_web3_provider=self.batch_web3_provider, max_workers=self.max_workers, item_exporter=blocks_and_transactions_item_exporter, export_blocks=self._should_export(EntityType.BLOCK), export_transactions=self._should_export(EntityType.TRANSACTION) ) blocks_and_transactions_job.run() blocks = blocks_and_transactions_item_exporter.get_items('block') transactions = blocks_and_transactions_item_exporter.get_items('transaction') return blocks, transactions
parser.add_argument( '--blocks-output', default=None, type=str, help= 'The output file for blocks. If not provided blocks will not be exported. ' 'Use "-" for stdout') parser.add_argument( '--transactions-output', default=None, type=str, help= 'The output file for transactions. If not provided transactions will not be exported. ' 'Use "-" for stdout') args = parser.parse_args() job = ExportBlocksJob( start_block=args.start_block, end_block=args.end_block, batch_size=args.batch_size, ipc_wrapper=ThreadLocalProxy( lambda: IPCWrapper(args.ipc_path, timeout=args.ipc_timeout)), max_workers=args.max_workers, item_exporter=export_blocks_job_item_exporter(args.blocks_output, args.transactions_output), export_blocks=args.blocks_output is not None, export_transactions=args.transactions_output is not None) job.run()
def export_all_common(partitions, output_dir, provider_uri, max_workers, batch_size): for batch_start_block, batch_end_block, partition_dir in partitions: # # # start # # # start_time = time() padded_batch_start_block = str(batch_start_block).zfill(8) padded_batch_end_block = str(batch_end_block).zfill(8) block_range = '{padded_batch_start_block}-{padded_batch_end_block}'.format( padded_batch_start_block=padded_batch_start_block, padded_batch_end_block=padded_batch_end_block, ) file_name_suffix = '{padded_batch_start_block}_{padded_batch_end_block}'.format( padded_batch_start_block=padded_batch_start_block, padded_batch_end_block=padded_batch_end_block, ) # # # blocks_and_transactions # # # blocks_output_dir = '{output_dir}/blocks{partition_dir}'.format( output_dir=output_dir, partition_dir=partition_dir, ) os.makedirs(os.path.dirname(blocks_output_dir), exist_ok=True) transactions_output_dir = '{output_dir}/transactions{partition_dir}'.format( output_dir=output_dir, partition_dir=partition_dir, ) os.makedirs(os.path.dirname(transactions_output_dir), exist_ok=True) blocks_file = '{blocks_output_dir}/blocks_{file_name_suffix}.csv'.format( blocks_output_dir=blocks_output_dir, file_name_suffix=file_name_suffix, ) transactions_file = '{transactions_output_dir}/transactions_{file_name_suffix}.csv'.format( transactions_output_dir=transactions_output_dir, file_name_suffix=file_name_suffix, ) logger.info('Exporting blocks {block_range} to {blocks_file}'.format( block_range=block_range, blocks_file=blocks_file, )) logger.info( 'Exporting transactions from blocks {block_range} to {transactions_file}' .format( block_range=block_range, transactions_file=transactions_file, )) job = ExportBlocksJob( start_block=batch_start_block, end_block=batch_end_block, batch_size=batch_size, batch_web3_provider=ThreadLocalProxy( lambda: get_provider_from_uri(provider_uri, batch=True)), max_workers=max_workers, item_exporter=blocks_and_transactions_item_exporter( blocks_file, transactions_file), export_blocks=blocks_file is not None, export_transactions=transactions_file is not None) job.run() # # # token_transfers # # # token_transfers_file = None if is_log_filter_supported(provider_uri): token_transfers_output_dir = '{output_dir}/token_transfers{partition_dir}'.format( output_dir=output_dir, partition_dir=partition_dir, ) os.makedirs(os.path.dirname(token_transfers_output_dir), exist_ok=True) token_transfers_file = '{token_transfers_output_dir}/token_transfers_{file_name_suffix}.csv'.format( token_transfers_output_dir=token_transfers_output_dir, file_name_suffix=file_name_suffix, ) logger.info( 'Exporting ERC20 transfers from blocks {block_range} to {token_transfers_file}' .format( block_range=block_range, token_transfers_file=token_transfers_file, )) job = ExportTokenTransfersJob( start_block=batch_start_block, end_block=batch_end_block, batch_size=batch_size, web3=ThreadLocalProxy( lambda: Web3(get_provider_from_uri(provider_uri))), item_exporter=token_transfers_item_exporter( token_transfers_file), max_workers=max_workers) job.run() # # # receipts_and_logs # # # cache_output_dir = '{output_dir}/.tmp{partition_dir}'.format( output_dir=output_dir, partition_dir=partition_dir, ) os.makedirs(os.path.dirname(cache_output_dir), exist_ok=True) transaction_hashes_file = '{cache_output_dir}/transaction_hashes_{file_name_suffix}.csv'.format( cache_output_dir=cache_output_dir, file_name_suffix=file_name_suffix, ) logger.info( 'Extracting hash column from transaction file {transactions_file}'. format(transactions_file=transactions_file, )) extract_csv_column_unique(transactions_file, transaction_hashes_file, 'hash') receipts_output_dir = '{output_dir}/receipts{partition_dir}'.format( output_dir=output_dir, partition_dir=partition_dir, ) os.makedirs(os.path.dirname(receipts_output_dir), exist_ok=True) logs_output_dir = '{output_dir}/logs{partition_dir}'.format( output_dir=output_dir, partition_dir=partition_dir, ) os.makedirs(os.path.dirname(logs_output_dir), exist_ok=True) receipts_file = '{receipts_output_dir}/receipts_{file_name_suffix}.csv'.format( receipts_output_dir=receipts_output_dir, file_name_suffix=file_name_suffix, ) logs_file = '{logs_output_dir}/logs_{file_name_suffix}.csv'.format( logs_output_dir=logs_output_dir, file_name_suffix=file_name_suffix, ) logger.info( 'Exporting receipts and logs from blocks {block_range} to {receipts_file} and {logs_file}' .format( block_range=block_range, receipts_file=receipts_file, logs_file=logs_file, )) with smart_open(transaction_hashes_file, 'r') as transaction_hashes: job = ExportReceiptsJob( transaction_hashes_iterable=( transaction_hash.strip() for transaction_hash in transaction_hashes), batch_size=batch_size, batch_web3_provider=ThreadLocalProxy( lambda: get_provider_from_uri(provider_uri, batch=True)), max_workers=max_workers, item_exporter=receipts_and_logs_item_exporter( receipts_file, logs_file), export_receipts=receipts_file is not None, export_logs=logs_file is not None) job.run() # # # contracts # # # contract_addresses_file = '{cache_output_dir}/contract_addresses_{file_name_suffix}.csv'.format( cache_output_dir=cache_output_dir, file_name_suffix=file_name_suffix, ) logger.info( 'Extracting contract_address from receipt file {receipts_file}'. format(receipts_file=receipts_file)) extract_csv_column_unique(receipts_file, contract_addresses_file, 'contract_address') contracts_output_dir = '{output_dir}/contracts{partition_dir}'.format( output_dir=output_dir, partition_dir=partition_dir, ) os.makedirs(os.path.dirname(contracts_output_dir), exist_ok=True) contracts_file = '{contracts_output_dir}/contracts_{file_name_suffix}.csv'.format( contracts_output_dir=contracts_output_dir, file_name_suffix=file_name_suffix, ) logger.info( 'Exporting contracts from blocks {block_range} to {contracts_file}' .format( block_range=block_range, contracts_file=contracts_file, )) with smart_open(contract_addresses_file, 'r') as contract_addresses_file: contract_addresses = ( contract_address.strip() for contract_address in contract_addresses_file if contract_address.strip()) job = ExportContractsJob( contract_addresses_iterable=contract_addresses, batch_size=batch_size, batch_web3_provider=ThreadLocalProxy( lambda: get_provider_from_uri(provider_uri, batch=True)), item_exporter=contracts_item_exporter(contracts_file), max_workers=max_workers) job.run() # # # tokens # # # if token_transfers_file is not None: token_addresses_file = '{cache_output_dir}/token_addresses_{file_name_suffix}'.format( cache_output_dir=cache_output_dir, file_name_suffix=file_name_suffix, ) logger.info( 'Extracting token_address from token_transfers file {token_transfers_file}' .format(token_transfers_file=token_transfers_file, )) extract_csv_column_unique(token_transfers_file, token_addresses_file, 'token_address') tokens_output_dir = '{output_dir}/tokens{partition_dir}'.format( output_dir=output_dir, partition_dir=partition_dir, ) os.makedirs(os.path.dirname(tokens_output_dir), exist_ok=True) tokens_file = '{tokens_output_dir}/tokens_{file_name_suffix}.csv'.format( tokens_output_dir=tokens_output_dir, file_name_suffix=file_name_suffix, ) logger.info( 'Exporting tokens from blocks {block_range} to {tokens_file}'. format( block_range=block_range, tokens_file=tokens_file, )) with smart_open(token_addresses_file, 'r') as token_addresses: job = ExportTokensJob( token_addresses_iterable=( token_address.strip() for token_address in token_addresses), web3=ThreadLocalProxy( lambda: Web3(get_provider_from_uri(provider_uri))), item_exporter=tokens_item_exporter(tokens_file), max_workers=max_workers) job.run() # # # finish # # # shutil.rmtree(os.path.dirname(cache_output_dir)) end_time = time() time_diff = round(end_time - start_time, 5) logger.info( 'Exporting blocks {block_range} took {time_diff} seconds'.format( block_range=block_range, time_diff=time_diff, ))
parser.add_argument( '--blocks-output', default=None, type=str, help= 'The output file for blocks. If not provided blocks will not be exported. ' 'Use "-" for stdout') parser.add_argument( '--transactions-output', default=None, type=str, help= 'The output file for transactions. If not provided transactions will not be exported. ' 'Use "-" for stdout') args = parser.parse_args() job = ExportBlocksJob( start_block=args.start_block, end_block=args.end_block, batch_size=args.batch_size, batch_web3_provider=ThreadLocalProxy( lambda: get_provider_from_uri(args.provider_uri, batch=True)), max_workers=args.max_workers, item_exporter=blocks_and_transactions_item_exporter( args.blocks_output, args.transactions_output), export_blocks=args.blocks_output is not None, export_transactions=args.transactions_output is not None) job.run()
default=5, type=int, help='The maximum number of workers.') parser.add_argument( '--blocks-output', default=None, type=str, help= 'The output file for blocks. If not provided blocks will not be exported. ' 'Use "-" for stdout') parser.add_argument( '--transactions-output', default=None, type=str, help= 'The output file for transactions. If not provided transactions will not be exported. ' 'Use "-" for stdout') args = parser.parse_args() job = ExportBlocksJob(start_block=args.start_block, end_block=args.end_block, batch_size=args.batch_size, ipc_wrapper=ThreadLocalProxy( lambda: IPCWrapper(args.ipc_path, args.ipc_timeout)), max_workers=args.max_workers, blocks_output=args.blocks_output, transactions_output=args.transactions_output) job.run()