def export_contracts(batch_size, receipts, output, max_workers, provider_uri, chain='ethereum'): """Exports contracts bytecode and sighashes.""" check_classic_provider_uri(chain, provider_uri) with smart_open(receipts, 'r') as receipts_file: contracts_iterable = ( { "contract_address": json.loads(receipt)["contract_address"].strip(), "block_number": json.loads(receipt)["block_number"] } for receipt in receipts_file if json.loads(receipt)["contract_address"] is not None) job = ExportContractsJob( contracts_iterable=contracts_iterable, batch_size=batch_size, batch_web3_provider=ThreadLocalProxy( lambda: get_provider_from_uri(provider_uri, batch=True)), item_exporter=contracts_item_exporter(output), max_workers=max_workers) job.run()
def export_events(start_block, end_block, block_list, provider_uri, batch_size, max_workers, event_hash, events_output, timeout=60, chain='ethereum'): provider_uri = check_classic_provider_uri(chain, provider_uri) if end_block is not None: validate_range(start_block, end_block) block_iterator = range(start_block, end_block + 1) elif block_list is not None: block_iterator = set([int(block) for block in block_list.split(':')]) else: raise ValueError( 'Either --end-block or --block-list options must be provided') job = ExportEventsJob( block_iterator=block_iterator, batch_web3_provider=ThreadLocalProxy(lambda: get_provider_from_uri( provider_uri, timeout=timeout, batch=True)), batch_size=batch_size, max_workers=max_workers, item_exporter=events_item_exporter(events_output), event_hash=event_hash) job.run()
def export_blocks_and_transactions(start_block, end_block, batch_size, provider_uri, max_workers, blocks_output, transactions_output, chain='ethereum'): """Exports blocks and transactions.""" provider_uri = check_classic_provider_uri(chain, provider_uri) if blocks_output is None and transactions_output is None: raise ValueError( 'Either --blocks-output or --transactions-output options must be provided' ) job = ExportBlocksJob( start_block=start_block, end_block=end_block, batch_size=batch_size, batch_web3_provider=ThreadLocalProxy( lambda: get_provider_from_uri(provider_uri, batch=True)), max_workers=max_workers, item_exporter=blocks_and_transactions_item_exporter( blocks_output, transactions_output), export_blocks=blocks_output is not None, export_transactions=transactions_output is not None) job.run()
def _extract_tokens(self, contracts): exporter = InMemoryItemExporter(item_types=['token']) web3 = Web3(self.batch_web3_provider) web3.middleware_stack.inject(geth_poa_middleware, layer=0) job = ExtractTokensJob(contracts_iterable=contracts, web3=ThreadLocalProxy(lambda: web3), max_workers=self.max_workers, item_exporter=exporter) job.run() tokens = exporter.get_items('token') return tokens
def export_geth_traces(start_block, end_block, batch_size, output, max_workers, provider_uri, timeout): """Exports traces from geth node.""" job = ExportGethTracesJob( start_block=start_block, end_block=end_block, batch_size=batch_size, batch_web3_provider=ThreadLocalProxy(lambda: get_provider_from_uri( provider_uri, timeout=timeout, batch=True)), max_workers=max_workers, item_exporter=geth_traces_item_exporter(output)) job.run()
def export_receipts_and_logs(batch_size, transactions, provider_uri, max_workers, receipts_output, logs_output, chain='ethereum'): """Exports receipts and logs.""" provider_uri = check_classic_provider_uri(chain, provider_uri) with smart_open(transactions, 'r') as transactions_file: job = ExportReceiptsJob( transaction_hashes_iterable=(json.loads(transaction)['hash'].strip() for transaction in transactions_file), batch_size=batch_size, batch_web3_provider=ThreadLocalProxy(lambda: get_provider_from_uri(provider_uri, batch=True)), max_workers=max_workers, item_exporter=receipts_and_logs_item_exporter(receipts_output, logs_output), export_receipts=receipts_output is not None, export_logs=logs_output is not None) job.run()
def _export_traces(self, start_block, end_block): exporter = InMemoryItemExporter(item_types=['trace']) web3 = Web3(self.batch_web3_provider) web3.middleware_stack.inject(geth_poa_middleware, layer=0) job = ExportTracesJob(start_block=start_block, end_block=end_block, batch_size=self.batch_size, web3=ThreadLocalProxy(lambda: web3), max_workers=self.max_workers, item_exporter=exporter) job.run() traces = exporter.get_items('trace') return traces
def export_token_transfers(start_block, end_block, batch_size, output, max_workers, provider_uri, tokens): """Exports ERC20/ERC721 transfers.""" web3 = Web3(get_provider_from_uri(provider_uri)) web3.middleware_stack.inject(geth_poa_middleware, layer=0) job = ExportTokenTransfersJob( start_block=start_block, end_block=end_block, batch_size=batch_size, web3=ThreadLocalProxy(lambda: web3), item_exporter=token_transfers_item_exporter(output), max_workers=max_workers, tokens=tokens) job.run()
def extract_tokens(contracts, provider_uri, output, max_workers): """Extracts tokens from contracts file.""" set_max_field_size_limit() web3 = Web3(get_provider_from_uri(provider_uri)) web3.middleware_stack.inject(geth_poa_middleware, layer=0) with smart_open(contracts, 'r') as contracts_file: if contracts.endswith('.json'): contracts_iterable = (json.loads(line) for line in contracts_file) else: contracts_iterable = csv.DictReader(contracts_file) job = ExtractTokensJob(contracts_iterable=contracts_iterable, web3=ThreadLocalProxy(lambda: web3), max_workers=max_workers, item_exporter=tokens_item_exporter(output)) job.run()
def export_traces(start_block, end_block, batch_size, output, max_workers, provider_uri, genesis_traces, daofork_traces, timeout=60, chain='ethereum'): """Exports traces from parity node.""" if chain == 'classic' and daofork_traces == True: raise ValueError( 'Classic chain does not include daofork traces. Disable daofork traces with --no-daofork-traces option.') web3 = Web3(get_provider_from_uri(provider_uri, timeout=timeout)) web3.middleware_stack.inject(geth_poa_middleware, layer=0) job = ExportTracesJob( start_block=start_block, end_block=end_block, batch_size=batch_size, web3=ThreadLocalProxy(lambda: web3), item_exporter=traces_item_exporter(output), max_workers=max_workers, include_genesis_traces=genesis_traces, include_daofork_traces=daofork_traces) job.run()
def stream(last_synced_block_file, lag, provider_uri, domain, output, start_block, entity_types, period_seconds=10, batch_size=2, block_batch_size=10, max_workers=5, log_file=None, pid_file=None): """Streams all data types to console or Google Pub/Sub.""" configure_logging(log_file) configure_signals() entity_types = parse_entity_types(entity_types) from blockchainetl.streaming.streaming_utils import get_item_exporter from klaytnetl.streaming.eth_streamer_adapter import EthStreamerAdapter from blockchainetl.streaming.streamer import Streamer streamer_adapter = EthStreamerAdapter(batch_web3_provider=ThreadLocalProxy( lambda: get_provider_from_uri(provider_uri, batch=True)), item_exporter=get_item_exporter( domain, output), batch_size=batch_size, max_workers=max_workers, entity_types=entity_types) streamer = Streamer(blockchain_streamer_adapter=streamer_adapter, last_synced_block_file=last_synced_block_file, lag=lag, start_block=start_block, period_seconds=period_seconds, block_batch_size=block_batch_size, pid_file=pid_file) streamer.stream()
def export_tokens(contracts, output, max_workers, provider_uri, chain='ethereum'): """Exports ERC20/ERC721 tokens.""" provider_uri = check_classic_provider_uri(chain, provider_uri) web3 = Web3(get_provider_from_uri(provider_uri)) web3.middleware_stack.inject(geth_poa_middleware, layer=0) with smart_open(contracts, 'r') as contracts_file: tokens_iterable = ({ "contract_address": contract["address"].strip(), "block_number": contract["block_number"] } for contract in (json.loads(contract) for contract in contracts_file) if contract["is_erc20"] or contract["is_erc721"]) job = ExportTokensJob(tokens_iterable=tokens_iterable, web3=ThreadLocalProxy(lambda: web3), item_exporter=tokens_item_exporter(output), max_workers=max_workers) job.run()
def export_all_common(partitions, output_dir, provider_uri, max_workers, batch_size): for batch_start_block, batch_end_block, partition_dir in partitions: # # # start # # # start_time = time() padded_batch_start_block = str(batch_start_block).zfill(8) padded_batch_end_block = str(batch_end_block).zfill(8) block_range = '{padded_batch_start_block}-{padded_batch_end_block}'.format( padded_batch_start_block=padded_batch_start_block, padded_batch_end_block=padded_batch_end_block, ) file_name_suffix = '{padded_batch_start_block}_{padded_batch_end_block}'.format( padded_batch_start_block=padded_batch_start_block, padded_batch_end_block=padded_batch_end_block, ) # # # blocks_and_transactions # # # blocks_output_dir = '{output_dir}/blocks{partition_dir}'.format( output_dir=output_dir, partition_dir=partition_dir, ) os.makedirs(os.path.dirname(blocks_output_dir), exist_ok=True) transactions_output_dir = '{output_dir}/transactions{partition_dir}'.format( output_dir=output_dir, partition_dir=partition_dir, ) os.makedirs(os.path.dirname(transactions_output_dir), exist_ok=True) blocks_file = '{blocks_output_dir}/blocks_{file_name_suffix}.csv'.format( blocks_output_dir=blocks_output_dir, file_name_suffix=file_name_suffix, ) transactions_file = '{transactions_output_dir}/transactions_{file_name_suffix}.csv'.format( transactions_output_dir=transactions_output_dir, file_name_suffix=file_name_suffix, ) logger.info('Exporting blocks {block_range} to {blocks_file}'.format( block_range=block_range, blocks_file=blocks_file, )) logger.info('Exporting transactions from blocks {block_range} to {transactions_file}'.format( block_range=block_range, transactions_file=transactions_file, )) job = ExportBlocksJob( start_block=batch_start_block, end_block=batch_end_block, batch_size=batch_size, batch_web3_provider=ThreadLocalProxy(lambda: get_provider_from_uri(provider_uri, batch=True)), max_workers=max_workers, item_exporter=blocks_and_transactions_item_exporter(blocks_file, transactions_file), export_blocks=blocks_file is not None, export_transactions=transactions_file is not None) job.run() # # # token_transfers # # # token_transfers_file = None if is_log_filter_supported(provider_uri): token_transfers_output_dir = '{output_dir}/token_transfers{partition_dir}'.format( output_dir=output_dir, partition_dir=partition_dir, ) os.makedirs(os.path.dirname(token_transfers_output_dir), exist_ok=True) token_transfers_file = '{token_transfers_output_dir}/token_transfers_{file_name_suffix}.csv'.format( token_transfers_output_dir=token_transfers_output_dir, file_name_suffix=file_name_suffix, ) logger.info('Exporting ERC20 transfers from blocks {block_range} to {token_transfers_file}'.format( block_range=block_range, token_transfers_file=token_transfers_file, )) web3 = Web3(get_provider_from_uri(provider_uri)) web3.middleware_stack.inject(geth_poa_middleware, layer=0) job = ExportTokenTransfersJob( start_block=batch_start_block, end_block=batch_end_block, batch_size=batch_size, web3=ThreadLocalProxy(lambda: web3), item_exporter=token_transfers_item_exporter(token_transfers_file), max_workers=max_workers) job.run() # # # receipts_and_logs # # # cache_output_dir = '{output_dir}/.tmp{partition_dir}'.format( output_dir=output_dir, partition_dir=partition_dir, ) os.makedirs(os.path.dirname(cache_output_dir), exist_ok=True) transaction_hashes_file = '{cache_output_dir}/transaction_hashes_{file_name_suffix}.csv'.format( cache_output_dir=cache_output_dir, file_name_suffix=file_name_suffix, ) logger.info('Extracting hash column from transaction file {transactions_file}'.format( transactions_file=transactions_file, )) extract_csv_column_unique(transactions_file, transaction_hashes_file, 'hash') receipts_output_dir = '{output_dir}/receipts{partition_dir}'.format( output_dir=output_dir, partition_dir=partition_dir, ) os.makedirs(os.path.dirname(receipts_output_dir), exist_ok=True) logs_output_dir = '{output_dir}/logs{partition_dir}'.format( output_dir=output_dir, partition_dir=partition_dir, ) os.makedirs(os.path.dirname(logs_output_dir), exist_ok=True) receipts_file = '{receipts_output_dir}/receipts_{file_name_suffix}.csv'.format( receipts_output_dir=receipts_output_dir, file_name_suffix=file_name_suffix, ) logs_file = '{logs_output_dir}/logs_{file_name_suffix}.csv'.format( logs_output_dir=logs_output_dir, file_name_suffix=file_name_suffix, ) logger.info('Exporting receipts and logs from blocks {block_range} to {receipts_file} and {logs_file}'.format( block_range=block_range, receipts_file=receipts_file, logs_file=logs_file, )) with smart_open(transaction_hashes_file, 'r') as transaction_hashes: job = ExportReceiptsJob( transaction_hashes_iterable=(transaction_hash.strip() for transaction_hash in transaction_hashes), batch_size=batch_size, batch_web3_provider=ThreadLocalProxy(lambda: get_provider_from_uri(provider_uri, batch=True)), max_workers=max_workers, item_exporter=receipts_and_logs_item_exporter(receipts_file, logs_file), export_receipts=receipts_file is not None, export_logs=logs_file is not None) job.run() # # # contracts # # # contract_addresses_file = '{cache_output_dir}/contract_addresses_{file_name_suffix}.csv'.format( cache_output_dir=cache_output_dir, file_name_suffix=file_name_suffix, ) logger.info('Extracting contract_address from receipt file {receipts_file}'.format( receipts_file=receipts_file )) extract_csv_column_unique(receipts_file, contract_addresses_file, 'contract_address') contracts_output_dir = '{output_dir}/contracts{partition_dir}'.format( output_dir=output_dir, partition_dir=partition_dir, ) os.makedirs(os.path.dirname(contracts_output_dir), exist_ok=True) contracts_file = '{contracts_output_dir}/contracts_{file_name_suffix}.csv'.format( contracts_output_dir=contracts_output_dir, file_name_suffix=file_name_suffix, ) logger.info('Exporting contracts from blocks {block_range} to {contracts_file}'.format( block_range=block_range, contracts_file=contracts_file, )) with smart_open(contract_addresses_file, 'r') as contract_addresses_file: contract_addresses = (contract_address.strip() for contract_address in contract_addresses_file if contract_address.strip()) job = ExportContractsJob( contract_addresses_iterable=contract_addresses, batch_size=batch_size, batch_web3_provider=ThreadLocalProxy(lambda: get_provider_from_uri(provider_uri, batch=True)), item_exporter=contracts_item_exporter(contracts_file), max_workers=max_workers) job.run() # # # tokens # # # if token_transfers_file is not None: token_addresses_file = '{cache_output_dir}/token_addresses_{file_name_suffix}'.format( cache_output_dir=cache_output_dir, file_name_suffix=file_name_suffix, ) logger.info('Extracting token_address from token_transfers file {token_transfers_file}'.format( token_transfers_file=token_transfers_file, )) extract_csv_column_unique(token_transfers_file, token_addresses_file, 'token_address') tokens_output_dir = '{output_dir}/tokens{partition_dir}'.format( output_dir=output_dir, partition_dir=partition_dir, ) os.makedirs(os.path.dirname(tokens_output_dir), exist_ok=True) tokens_file = '{tokens_output_dir}/tokens_{file_name_suffix}.csv'.format( tokens_output_dir=tokens_output_dir, file_name_suffix=file_name_suffix, ) logger.info('Exporting tokens from blocks {block_range} to {tokens_file}'.format( block_range=block_range, tokens_file=tokens_file, )) web3 = Web3(get_provider_from_uri(provider_uri)) web3.middleware_stack.inject(geth_poa_middleware, layer=0) with smart_open(token_addresses_file, 'r') as token_addresses: job = ExportTokensJob( token_addresses_iterable=(token_address.strip() for token_address in token_addresses), web3=ThreadLocalProxy(lambda: web3), item_exporter=tokens_item_exporter(tokens_file), max_workers=max_workers) job.run() # # # finish # # # shutil.rmtree(os.path.dirname(cache_output_dir)) end_time = time() time_diff = round(end_time - start_time, 5) logger.info('Exporting blocks {block_range} took {time_diff} seconds'.format( block_range=block_range, time_diff=time_diff, ))