def export_blocks_and_transactions(start_block, end_block, batch_size, provider_uri, max_workers, blocks_output, transactions_output, chain='ethereum'): """Exports blocks and transactions.""" provider_uri = check_classic_provider_uri(chain, provider_uri) if blocks_output is None and transactions_output is None: raise ValueError( 'Either --blocks-output or --transactions-output options must be provided' ) job = ExportBlocksJob( start_block=start_block, end_block=end_block, batch_size=batch_size, batch_web3_provider=ThreadLocalProxy( lambda: get_provider_from_uri(provider_uri, batch=True)), max_workers=max_workers, item_exporter=blocks_and_transactions_item_exporter( blocks_output, transactions_output), export_blocks=blocks_output is not None, export_transactions=transactions_output is not None) job.run()
def get_block_range_for_date(provider_uri, date, output, chain='ethereum'): """Outputs start and end blocks for given date.""" provider_uri = check_classic_provider_uri(chain, provider_uri) provider = get_provider_from_uri(provider_uri) web3 = Web3(provider) web3.middleware_stack.inject(geth_poa_middleware, layer=0) eth_service = EthService(web3) start_block, end_block = eth_service.get_block_range_for_date(date) with smart_open(output, 'w') as output_file: output_file.write('{},{}\n'.format(start_block, end_block))
def get_partitions(start, end, partition_batch_size, provider_uri): """Yield partitions based on input data type.""" if is_date_range(start, end) or is_unix_time_range(start, end): if is_date_range(start, end): start_date = datetime.strptime(start, '%Y-%m-%d').date() end_date = datetime.strptime(end, '%Y-%m-%d').date() elif is_unix_time_range(start, end): if len(start) == 10 and len(end) == 10: start_date = datetime.utcfromtimestamp(int(start)).date() end_date = datetime.utcfromtimestamp(int(end)).date() elif len(start) == 13 and len(end) == 13: start_date = datetime.utcfromtimestamp(int(start) / 1e3).date() end_date = datetime.utcfromtimestamp(int(end) / 1e3).date() day = timedelta(days=1) provider = get_provider_from_uri(provider_uri) web3 = Web3(provider) web3.middleware_stack.inject(geth_poa_middleware, layer=0) eth_service = EthService(web3) while start_date <= end_date: batch_start_block, batch_end_block = eth_service.get_block_range_for_date( start_date) partition_dir = '/date={start_date!s}/'.format( start_date=start_date) yield batch_start_block, batch_end_block, partition_dir start_date += day elif is_block_range(start, end): start_block = int(start) end_block = int(end) for batch_start_block in range(start_block, end_block + 1, partition_batch_size): batch_end_block = batch_start_block + partition_batch_size - 1 if batch_end_block > end_block: batch_end_block = end_block padded_batch_start_block = str(batch_start_block).zfill(8) padded_batch_end_block = str(batch_end_block).zfill(8) partition_dir = '/start_block={padded_batch_start_block}/end_block={padded_batch_end_block}'.format( padded_batch_start_block=padded_batch_start_block, padded_batch_end_block=padded_batch_end_block, ) yield batch_start_block, batch_end_block, partition_dir else: raise ValueError( 'start and end must be either block numbers or ISO dates or Unix times' )
def export_token_transfers(start_block, end_block, batch_size, output, max_workers, provider_uri, tokens): """Exports ERC20/ERC721 transfers.""" web3 = Web3(get_provider_from_uri(provider_uri)) web3.middleware_stack.inject(geth_poa_middleware, layer=0) job = ExportTokenTransfersJob( start_block=start_block, end_block=end_block, batch_size=batch_size, web3=ThreadLocalProxy(lambda: web3), item_exporter=token_transfers_item_exporter(output), max_workers=max_workers, tokens=tokens) job.run()
def extract_tokens(contracts, provider_uri, output, max_workers): """Extracts tokens from contracts file.""" set_max_field_size_limit() web3 = Web3(get_provider_from_uri(provider_uri)) web3.middleware_stack.inject(geth_poa_middleware, layer=0) with smart_open(contracts, 'r') as contracts_file: if contracts.endswith('.json'): contracts_iterable = (json.loads(line) for line in contracts_file) else: contracts_iterable = csv.DictReader(contracts_file) job = ExtractTokensJob(contracts_iterable=contracts_iterable, web3=ThreadLocalProxy(lambda: web3), max_workers=max_workers, item_exporter=tokens_item_exporter(output)) job.run()
def stream(last_synced_block_file, lag, provider_uri, domain, output, start_block, entity_types, period_seconds=10, batch_size=2, block_batch_size=10, max_workers=5, log_file=None, pid_file=None): """Streams all data types to console or Google Pub/Sub.""" configure_logging(log_file) configure_signals() entity_types = parse_entity_types(entity_types) from blockchainetl.streaming.streaming_utils import get_item_exporter from klaytnetl.streaming.eth_streamer_adapter import EthStreamerAdapter from blockchainetl.streaming.streamer import Streamer streamer_adapter = EthStreamerAdapter(batch_web3_provider=ThreadLocalProxy( lambda: get_provider_from_uri(provider_uri, batch=True)), item_exporter=get_item_exporter( domain, output), batch_size=batch_size, max_workers=max_workers, entity_types=entity_types) streamer = Streamer(blockchain_streamer_adapter=streamer_adapter, last_synced_block_file=last_synced_block_file, lag=lag, start_block=start_block, period_seconds=period_seconds, block_batch_size=block_batch_size, pid_file=pid_file) streamer.stream()
def export_tokens(contracts, output, max_workers, provider_uri, chain='ethereum'): """Exports ERC20/ERC721 tokens.""" provider_uri = check_classic_provider_uri(chain, provider_uri) web3 = Web3(get_provider_from_uri(provider_uri)) web3.middleware_stack.inject(geth_poa_middleware, layer=0) with smart_open(contracts, 'r') as contracts_file: tokens_iterable = ({ "contract_address": contract["address"].strip(), "block_number": contract["block_number"] } for contract in (json.loads(contract) for contract in contracts_file) if contract["is_erc20"] or contract["is_erc721"]) job = ExportTokensJob(tokens_iterable=tokens_iterable, web3=ThreadLocalProxy(lambda: web3), item_exporter=tokens_item_exporter(output), max_workers=max_workers) job.run()