def export_tokens(token_addresses, output, max_workers, provider_uri, chain='ethereum'): """Exports ERC20/ERC721 tokens.""" provider_uri = check_classic_provider_uri(chain, provider_uri) with smart_open(token_addresses, 'r') as token_addresses_file: job = ExportTokensJob( token_addresses_iterable=(token_address.strip() for token_address in token_addresses_file), web3=ThreadLocalProxy(lambda: Web3(get_provider_from_uri(provider_uri))), item_exporter=tokens_item_exporter(output), max_workers=max_workers) job.run()
def _extract_tokens(self, contracts): exporter = InMemoryItemExporter(item_types=['token']) job = ExtractTokensJob( contracts_iterable=contracts, web3=ThreadLocalProxy( lambda: Web3(self.batch_web3_provider).middleware_stack.inject( geth_poa_middleware, layer=0)), max_workers=self.max_workers, item_exporter=exporter) job.run() tokens = exporter.get_items('token') return tokens
def _export_traces(self, start_block, end_block): exporter = InMemoryItemExporter(item_types=['trace']) job = ExportTracesJob( start_block=start_block, end_block=end_block, batch_size=self.batch_size, web3=ThreadLocalProxy(lambda: Web3(self.batch_web3_provider)), max_workers=self.max_workers, item_exporter=exporter) job.run() traces = exporter.get_items('trace') return traces
def export_token_transfers(start_block, end_block, batch_size, output, max_workers, provider_uri, tokens): """Exports ERC20/ERC721 transfers.""" job = ExportTokenTransfersJob( start_block=start_block, end_block=end_block, batch_size=batch_size, web3=ThreadLocalProxy( lambda: Web3(get_provider_from_uri(provider_uri))), item_exporter=token_transfers_item_exporter(output), max_workers=max_workers, tokens=tokens) job.run()
def export_geth_traces(start_block, end_block, batch_size, output, max_workers, provider_uri): """Exports traces from geth node.""" job = ExportGethTracesJob( start_block=start_block, end_block=end_block, batch_size=batch_size, batch_web3_provider=ThreadLocalProxy( lambda: get_provider_from_uri(provider_uri, batch=True)), max_workers=max_workers, item_exporter=geth_traces_item_exporter(output)) job.run()
def export_contracts(batch_size, contract_addresses, output, max_workers, provider_uri): """Exports contracts bytecode and sighashes.""" with smart_open(contract_addresses, 'r') as contract_addresses_file: contract_addresses = (contract_address.strip() for contract_address in contract_addresses_file if contract_address.strip()) job = ExportContractsJob( contract_addresses_iterable=contract_addresses, batch_size=batch_size, batch_web3_provider=ThreadLocalProxy(lambda: get_provider_from_uri(provider_uri, batch=True)), item_exporter=contracts_item_exporter(output), max_workers=max_workers) job.run()
def test_export_erc20_tokens_job(tmpdir, token_addresses, resource_group): output_file = tmpdir.join('erc20_tokens.csv') job = ExportErc20TokensJob( token_addresses_iterable=token_addresses, web3=ThreadLocalProxy(lambda: Web3(MockWeb3Provider(resource_group))), item_exporter=erc20_tokens_item_exporter(output_file), max_workers=5) job.run() compare_lines_ignore_order( read_resource(resource_group, 'expected_erc20_tokens.csv'), read_file(output_file))
def test_export_erc20_transfers_job(tmpdir, start_block, end_block, batch_size, resource_group): output_file = tmpdir.join('erc20_transfers.csv') job = ExportErc20TransfersJob( start_block=start_block, end_block=end_block, batch_size=batch_size, web3=ThreadLocalProxy(lambda: Web3(MockWeb3Provider(resource_group))), item_exporter=erc20_transfers_item_exporter(output_file), max_workers=5 ) job.run() compare_lines_ignore_order( read_resource(resource_group, 'expected_erc20_transfers.csv'), read_file(output_file) )
def export_receipts_and_logs(batch_size, transaction_hashes, provider_uri, max_workers, receipts_output, logs_output, chain='ethereum'): """Exports receipts and logs.""" provider_uri = check_classic_provider_uri(chain, provider_uri) with smart_open(transaction_hashes, 'r') as transaction_hashes_file: job = ExportReceiptsJob( transaction_hashes_iterable=(transaction_hash.strip() for transaction_hash in transaction_hashes_file), batch_size=batch_size, batch_web3_provider=ThreadLocalProxy(lambda: get_provider_from_uri(provider_uri, batch=True)), max_workers=max_workers, item_exporter=receipts_and_logs_item_exporter(receipts_output, logs_output), export_receipts=receipts_output is not None, export_logs=logs_output is not None) job.run()
def export_traces(start_block, end_block, batch_size, output, max_workers, provider_uri, genesis_traces, daofork_traces): """Exports traces from parity node.""" job = ExportTracesJob( start_block=start_block, end_block=end_block, batch_size=batch_size, web3=ThreadLocalProxy( lambda: Web3(get_provider_from_uri(provider_uri))), item_exporter=traces_item_exporter(output), max_workers=max_workers, include_genesis_traces=genesis_traces, include_daofork_traces=daofork_traces) job.run()
def export_blocks_and_transactions(start_block, end_block, batch_size, provider_uri, max_workers, blocks_output, transactions_output): """Exports blocks and transactions.""" if blocks_output is None and transactions_output is None: raise ValueError('Either --blocks-output or --transactions-output options must be provided') job = ExportBlocksJob( start_block=start_block, end_block=end_block, batch_size=batch_size, batch_web3_provider=ThreadLocalProxy(lambda: get_provider_from_uri(provider_uri, batch=True)), max_workers=max_workers, item_exporter=blocks_and_transactions_item_exporter(blocks_output, transactions_output), export_blocks=blocks_output is not None, export_transactions=transactions_output is not None) job.run()
def export_origin(start_block, end_block, batch_size, marketplace_output, shop_output, max_workers, provider_uri): """Exports Origin Protocol data.""" job = ExportOriginJob( start_block=start_block, end_block=end_block, batch_size=batch_size, web3=ThreadLocalProxy( lambda: Web3(get_provider_from_uri(provider_uri))), ipfs_client=get_origin_ipfs_client(), marketplace_listing_exporter=origin_marketplace_listing_item_exporter( marketplace_output), shop_product_exporter=origin_shop_product_item_exporter(shop_output), max_workers=max_workers) job.run()
def test_export_contracts_job(tmpdir, batch_size, contract_addresses, output_format, resource_group): contracts_output_file = tmpdir.join('actual_contracts.' + output_format) job = ExportContractsJob( contract_addresses_iterable=CONTRACT_ADDRESSES_UNDER_TEST, batch_size=batch_size, batch_web3_provider=ThreadLocalProxy(lambda: MockBatchWeb3Provider( lambda file: read_resource(resource_group, file))), max_workers=5, item_exporter=contracts_item_exporter(contracts_output_file)) job.run() compare_lines_ignore_order( read_resource(resource_group, 'expected_contracts.' + output_format), read_file(contracts_output_file))
def test_export_tokens_job(tmpdir, token_addresses, resource_group, web3_provider_type): output_file = str(tmpdir.join('tokens.csv')) job = ExportTokensJob( token_addresses_iterable=token_addresses, web3=ThreadLocalProxy(lambda: Web3( get_web3_provider(web3_provider_type, lambda file: read_resource( resource_group, file)))), item_exporter=tokens_item_exporter(output_file), max_workers=5) job.run() compare_lines_ignore_order( read_resource(resource_group, 'expected_tokens.csv'), read_file(output_file))
def test_export_token_transfers_job(tmpdir, start_block, end_block, batch_size, resource_group, web3_provider_type): output_file = tmpdir.join('token_transfers.csv') job = ExportTokenTransfersJob( start_block=start_block, end_block=end_block, batch_size=batch_size, web3=ThreadLocalProxy(lambda: Web3( get_web3_provider(web3_provider_type, lambda file: read_resource( resource_group, file)))), item_exporter=token_transfers_item_exporter(output_file), max_workers=5) job.run() compare_lines_ignore_order( read_resource(resource_group, 'expected_token_transfers.csv'), read_file(output_file))
def extract_tokens(contracts, provider_uri, output, max_workers): """Extracts tokens from contracts file.""" set_max_field_size_limit() with smart_open(contracts, 'r') as contracts_file: if contracts.endswith('.json'): contracts_iterable = (json.loads(line) for line in contracts_file) else: contracts_iterable = csv.DictReader(contracts_file) job = ExtractTokensJob( contracts_iterable=contracts_iterable, web3=ThreadLocalProxy( lambda: Web3(get_provider_from_uri(provider_uri))), max_workers=max_workers, item_exporter=tokens_item_exporter(output)) job.run()
def _extract(self, start_block, end_block): logging.info("Running extraction job for block range {}-{}".format( start_block, end_block)) job = ExportOriginJob( start_block=start_block, end_block=end_block, batch_size=JOB_BLOCK_BATCH_SIZE, web3=ThreadLocalProxy( lambda: Web3(get_provider_from_uri(self.provider_url))), ipfs_client=get_origin_ipfs_client(), marketplace_listing_exporter= origin_marketplace_listing_item_exporter( JOB_MARKETPLACE_OUTPUT_FILENAME), shop_product_exporter=origin_shop_product_item_exporter( JOB_DSHOP_OUTPUT_FILENAME), max_workers=JOB_MAX_WORKERS) job.run() logging.info("Extraction done.")
def extract_tokens(contracts, provider_uri, output, max_workers, values_as_strings=False): """Extracts tokens from contracts file.""" set_max_field_size_limit() with smart_open(contracts, 'r') as contracts_file: if contracts.endswith('.json'): contracts_iterable = (json.loads(line) for line in contracts_file) else: contracts_iterable = csv.DictReader(contracts_file) converters = [IntToStringItemConverter(keys=['decimals', 'total_supply'])] if values_as_strings else [] job = ExtractTokensJob( contracts_iterable=contracts_iterable, web3=ThreadLocalProxy(lambda: build_web3(get_provider_from_uri(provider_uri))), max_workers=max_workers, item_exporter=tokens_item_exporter(output, converters)) job.run()
def test_export_traces_job(tmpdir, start_block, end_block, resource_group, web3_provider_type): traces_output_file = str(tmpdir.join('actual_traces.csv')) job = ExportTracesJob( start_block=start_block, end_block=end_block, batch_size=1, web3=ThreadLocalProxy(lambda: Web3( get_web3_provider(web3_provider_type, lambda file: read_resource( resource_group, file)))), max_workers=5, item_exporter=traces_item_exporter(traces_output_file), ) job.run() compare_lines_ignore_order( read_resource(resource_group, 'expected_traces.csv'), read_file(traces_output_file))
def test_export_blocks_job(tmpdir, start_block, end_block, batch_size, resource_group): blocks_output_file = tmpdir.join('actual_blocks.csv') transactions_output_file = tmpdir.join('actual_transactions.csv') job = ExportBlocksJob( start_block=start_block, end_block=end_block, batch_size=batch_size, ipc_wrapper=ThreadLocalProxy(lambda: MockIPCWrapper(resource_group)), blocks_output=blocks_output_file, transactions_output=transactions_output_file ) job.run() compare_lines_ignore_order( read_resource(resource_group, 'expected_blocks.csv'), read_file(blocks_output_file) ) compare_lines_ignore_order( read_resource(resource_group, 'expected_transactions.csv'), read_file(transactions_output_file) )
def test_export_contracts_job(tmpdir, batch_size, contract_addresses, output_format, resource_group, web3_provider_type): contracts_output_file = tmpdir.join('actual_contracts.' + output_format) job = ExportContractsJob( contract_addresses_iterable=contract_addresses, batch_size=batch_size, batch_web3_provider=ThreadLocalProxy(lambda: get_web3_provider( web3_provider_type, lambda file: read_resource(resource_group, file), batch=True)), max_workers=5, item_exporter=contracts_item_exporter(contracts_output_file)) job.run() compare_lines_ignore_order( read_resource(resource_group, 'expected_contracts.' + output_format), read_file(contracts_output_file))
def test_export_geth_traces_job(tmpdir, start_block, end_block, resource_group, web3_provider_type): traces_output_file = tmpdir.join('actual_geth_traces.json') job = ExportGethTracesJob( start_block=start_block, end_block=end_block, batch_size=1, batch_web3_provider=ThreadLocalProxy(lambda: get_web3_provider( web3_provider_type, lambda file: read_resource(resource_group, file), batch=True)), max_workers=5, item_exporter=geth_traces_item_exporter(traces_output_file), ) job.run() compare_lines_ignore_order( read_resource(resource_group, 'geth_traces.json'), read_file(traces_output_file))
def stream(last_synced_block_file, lag, provider_uri, output, start_block, entity_types, period_seconds=10, batch_size=2, block_batch_size=10, max_workers=5, log_file=None, pid_file=None): """Streams all data types to console or Google Pub/Sub.""" configure_logging(log_file) configure_signals() entity_types = parse_entity_types(entity_types) validate_entity_types(entity_types, output) from ethereumetl.streaming.item_exporter_creator import create_item_exporter from ethereumetl.streaming.eth_streamer_adapter import EthStreamerAdapter from blockchainetl.streaming.streamer import Streamer # TODO: Implement fallback mechanism for provider uris instead of picking randomly provider_uri = pick_random_provider_uri(provider_uri) logging.info('Using ' + provider_uri) streamer_adapter = EthStreamerAdapter( batch_web3_provider=ThreadLocalProxy( lambda: get_provider_from_uri(provider_uri, batch=True)), item_exporter=create_item_exporter(output), batch_size=batch_size, max_workers=max_workers, entity_types=entity_types) streamer = Streamer(blockchain_streamer_adapter=streamer_adapter, last_synced_block_file=last_synced_block_file, lag=lag, start_block=start_block, period_seconds=period_seconds, block_batch_size=block_batch_size, pid_file=pid_file) streamer.stream()
def test_export_blocks_job(tmpdir, start_block, end_block, batch_size, resource_group): blocks_output_file = tmpdir.join('actual_blocks.csv') transactions_output_file = tmpdir.join('actual_transactions.csv') job = ExportBlocksJob( start_block=start_block, end_block=end_block, batch_size=batch_size, batch_web3_provider=ThreadLocalProxy(lambda: MockBatchWeb3Provider(lambda file: read_resource(resource_group, file))), max_workers=5, item_exporter=export_blocks_job_item_exporter(blocks_output_file, transactions_output_file), export_blocks=blocks_output_file is not None, export_transactions=transactions_output_file is not None ) job.run() compare_lines_ignore_order( read_resource(resource_group, 'expected_blocks.csv'), read_file(blocks_output_file) ) compare_lines_ignore_order( read_resource(resource_group, 'expected_transactions.csv'), read_file(transactions_output_file) )
def test_export_receipts_job(tmpdir, batch_size, tx_hashes, resource_group): receipts_output_file = tmpdir.join('actual_receipts.csv') logs_output_file = tmpdir.join('actual_logs.csv') job = ExportReceiptsJob( tx_hashes_iterable=tx_hashes, batch_size=batch_size, batch_web3_provider=ThreadLocalProxy(lambda: MockBatchWeb3Provider(lambda file: read_resource(resource_group, file))), max_workers=5, item_exporter=export_receipts_job_item_exporter(receipts_output_file, logs_output_file), export_receipts=receipts_output_file is not None, export_logs=logs_output_file is not None ) job.run() compare_lines_ignore_order( read_resource(resource_group, 'expected_receipts.csv'), read_file(receipts_output_file) ) compare_lines_ignore_order( read_resource(resource_group, 'expected_logs.csv'), read_file(logs_output_file) )
def export_traces( self, start_block: int, end_block: int, include_genesis_traces: bool = False, include_daofork_traces: bool = False, ) -> Iterable[Dict]: """Export traces for specified block range.""" exporter = InMemoryItemExporter(item_types=["trace"]) job = ExportTracesJob( start_block=start_block, end_block=end_block, batch_size=self.batch_size, web3=ThreadLocalProxy(lambda: Web3(self.batch_web3_provider)), max_workers=self.max_workers, item_exporter=exporter, include_genesis_traces=include_genesis_traces, include_daofork_traces=include_daofork_traces, ) job.run() traces = exporter.get_items("trace") return traces
def test_export_receipts_job(tmpdir, batch_size, transaction_hashes, output_format, resource_group, web3_provider_type): receipts_output_file = str(tmpdir.join('actual_receipts.' + output_format)) logs_output_file = str(tmpdir.join('actual_logs.' + output_format)) job = ExportReceiptsJob( transaction_hashes_iterable=transaction_hashes, batch_size=batch_size, batch_web3_provider=ThreadLocalProxy( lambda: get_web3_provider(web3_provider_type, lambda file: read_resource(resource_group, file), batch=True) ), max_workers=5, item_exporter=receipts_and_logs_item_exporter(receipts_output_file, logs_output_file), export_receipts=receipts_output_file is not None, export_logs=logs_output_file is not None ) job.run() compare_lines_ignore_order( read_resource(resource_group, 'expected_receipts.' + output_format), read_file(receipts_output_file) ) compare_lines_ignore_order( read_resource(resource_group, 'expected_logs.' + output_format), read_file(logs_output_file) )
parser.add_argument( '--receipts-output', default=None, type=str, help= 'The output file for receipts. If not provided receipts will not be exported. ' 'Use "-" for stdout') parser.add_argument( '--logs-output', default=None, type=str, help= 'The output file for receipt logs. If not provided receipt logs will not be exported. ' 'Use "-" for stdout') args = parser.parse_args() with smart_open(args.tx_hashes, 'r') as tx_hashes_file: job = ExportReceiptsJob( tx_hashes_iterable=(tx_hash.strip() for tx_hash in tx_hashes_file), batch_size=args.batch_size, batch_web3_provider=ThreadLocalProxy( lambda: get_provider_from_uri(args.provider_uri, batch=True)), max_workers=args.max_workers, item_exporter=receipts_and_logs_item_exporter(args.receipts_output, args.logs_output), export_receipts=args.receipts_output is not None, export_logs=args.logs_output is not None) job.run()
'--max-workers', default=5, type=int, help='The maximum number of workers.') parser.add_argument('-p', '--provider-uri', default=None, type=str, help='The URI of the web3 provider e.g. ' 'file://$HOME/Library/Ethereum/geth.ipc') parser.add_argument('-t', '--tokens', default=None, type=str, nargs='+', help='The list of token addresses to filter by.') args = parser.parse_args() job = ExportErc20TransfersJob( start_block=args.start_block, end_block=args.end_block, batch_size=args.batch_size, web3=ThreadLocalProxy( lambda: Web3(get_provider_from_uri(args.provider_uri))), item_exporter=export_erc20_transfers_job_item_exporter(args.output), max_workers=args.max_workers, tokens=args.tokens) job.run()
parser.add_argument( '--blocks-output', default=None, type=str, help= 'The output file for blocks. If not provided blocks will not be exported. ' 'Use "-" for stdout') parser.add_argument( '--transactions-output', default=None, type=str, help= 'The output file for transactions. If not provided transactions will not be exported. ' 'Use "-" for stdout') args = parser.parse_args() job = ExportBlocksJob( start_block=args.start_block, end_block=args.end_block, batch_size=args.batch_size, ipc_wrapper=ThreadLocalProxy( lambda: IPCWrapper(args.ipc_path, timeout=args.ipc_timeout)), max_workers=args.max_workers, item_exporter=export_blocks_job_item_exporter(args.blocks_output, args.transactions_output), export_blocks=args.blocks_output is not None, export_transactions=args.transactions_output is not None) job.run()