def _start(self): # Using bounded executor prevents unlimited queue growth # and allows monitoring in-progress futures and failing fast in case of errors. self.executor = FailSafeExecutor(BoundedExecutor(1, self.max_workers)) self.output_file = get_file_handle(self.output, binary=True) self.exporter = CsvItemExporter(self.output_file, fields_to_export=self.fields_to_export)
def _start(self): super()._start() self.output_file = get_file_handle(self.output, binary=True, create_parent_dirs=True) self.exporter = CsvItemExporter(self.output_file, fields_to_export=self.fields_to_export)
class ExportErc20TransfersJob(BatchExportJob): def __init__(self, start_block, end_block, batch_size, web3, output, max_workers=5, tokens=None, fields_to_export=FIELDS_TO_EXPORT): super().__init__(start_block, end_block, batch_size, max_workers) self.web3 = web3 self.output = output self.tokens = tokens self.fields_to_export = fields_to_export self.receipt_log_mapper = EthReceiptLogMapper() self.erc20_transfer_mapper = EthErc20TransferMapper() self.erc20_processor = EthErc20Processor() self.output_file = None self.exporter = None def _start(self): super()._start() self.output_file = get_file_handle(self.output, binary=True, create_parent_dirs=True) self.exporter = CsvItemExporter(self.output_file, fields_to_export=self.fields_to_export) def _export_batch(self, batch_start, batch_end): # https://github.com/ethereum/wiki/wiki/JSON-RPC#eth_getfilterlogs filter_params = { 'fromBlock': batch_start, 'toBlock': batch_end, 'topics': [TRANSFER_EVENT_TOPIC] } if self.tokens is not None and len(self.tokens) > 0: filter_params['address'] = self.tokens event_filter = self.web3.eth.filter(filter_params) events = event_filter.get_all_entries() for event in events: log = self.receipt_log_mapper.web3_dict_to_receipt_log(event) erc20_transfer = self.erc20_processor.filter_transfer_from_log(log) if erc20_transfer is not None: self.exporter.export_item( self.erc20_transfer_mapper.erc20_transfer_to_dict( erc20_transfer)) self.web3.eth.uninstallFilter(event_filter.filter_id) def _end(self): super()._end() close_silently(self.output_file)
def _start(self): self.blocks_output_file = get_file_handle(self.blocks_output, binary=True) self.transactions_output_file = get_file_handle( self.transactions_output, binary=True) self.blocks_exporter = CsvItemExporter(self.blocks_output_file) self.transactions_exporter = CsvItemExporter( self.transactions_output_file)
def _start(self): super()._start() self.blocks_output_file = get_file_handle(self.blocks_output, binary=True) self.blocks_exporter = CsvItemExporter( self.blocks_output_file, fields_to_export=self.block_fields_to_export) self.transactions_output_file = get_file_handle(self.transactions_output, binary=True) self.transactions_exporter = CsvItemExporter( self.transactions_output_file, fields_to_export=self.transaction_fields_to_export)
def open(self): for item_type, filename in self.filename_mapping.items(): self.file_mapping[item_type] = get_file_handle(filename, binary=True) self.exporter_mapping[item_type] = CsvItemExporter( self.file_mapping[item_type], fields_to_export=self.field_mapping[item_type])
def open(self): for item_type, filename in self.filename_mapping.items(): file = get_file_handle(filename, binary=True) fields = self.field_mapping[item_type] self.file_mapping[item_type] = file if str(filename).endswith('.json'): item_exporter = JsonLinesItemExporter(file, fields_to_export=fields) else: item_exporter = CsvItemExporter(file, fields_to_export=fields) self.exporter_mapping[item_type] = item_exporter self.counter_mapping[item_type] = AtomicCounter()
class ExportBlocksJob(BaseJob): def __init__(self, start_block, end_block, batch_size, ipc_wrapper, max_workers=5, blocks_output=None, transactions_output=None, block_fields_to_export=BLOCK_FIELDS_TO_EXPORT, transaction_fields_to_export=TRANSACTION_FIELDS_TO_EXPORT): self.start_block = start_block self.end_block = end_block self.batch_size = batch_size self.ipc_wrapper = ipc_wrapper self.max_workers = max_workers self.blocks_output = blocks_output self.transactions_output = transactions_output self.block_fields_to_export = block_fields_to_export self.transaction_fields_to_export = transaction_fields_to_export self.export_blocks = blocks_output is not None self.export_transactions = transactions_output is not None if not self.export_blocks and not self.export_transactions: raise ValueError( 'Either blocks_output or transactions_output must be provided') self.block_mapper = EthBlockMapper() self.transaction_mapper = EthTransactionMapper() self.blocks_output_file = None self.transactions_output_file = None self.blocks_exporter = None self.transactions_exporter = None self.executor: FailSafeExecutor = None def _start(self): # Using bounded executor prevents unlimited queue growth # and allows monitoring in-progress futures and failing fast in case of errors. self.executor = FailSafeExecutor(BoundedExecutor(1, self.max_workers)) self.blocks_output_file = get_file_handle(self.blocks_output, binary=True) self.blocks_exporter = CsvItemExporter( self.blocks_output_file, fields_to_export=self.block_fields_to_export) self.transactions_output_file = get_file_handle( self.transactions_output, binary=True) self.transactions_exporter = CsvItemExporter( self.transactions_output_file, fields_to_export=self.transaction_fields_to_export) def _export(self): for batch_start, batch_end in split_to_batches(self.start_block, self.end_block, self.batch_size): self.executor.submit(self._fail_safe_export_batch, batch_start, batch_end) def _fail_safe_export_batch(self, batch_start, batch_end): try: self._export_batch(batch_start, batch_end) except (Timeout, OSError): # try exporting blocks one by one for block_number in range(batch_start, batch_end + 1): self._export_batch(block_number, block_number) def _export_batch(self, batch_start, batch_end): blocks_rpc = list( generate_get_block_by_number_json_rpc(batch_start, batch_end, self.export_transactions)) response = self.ipc_wrapper.make_request(json.dumps(blocks_rpc)) for response_item in response: result = response_item['result'] block = self.block_mapper.json_dict_to_block(result) self._export_block(block) def _export_block(self, block): if self.export_blocks: self.blocks_exporter.export_item( self.block_mapper.block_to_dict(block)) if self.export_transactions: for tx in block.transactions: self.transactions_exporter.export_item( self.transaction_mapper.transaction_to_dict(tx)) def _end(self): self.executor.shutdown() close_silently(self.blocks_output_file) close_silently(self.transactions_output_file)
class ExportBlocksJob(BaseJob): def __init__(self, start_block, end_block, batch_size, ipc_wrapper, blocks_output=None, transactions_output=None): self.start_block = start_block self.end_block = end_block self.batch_size = batch_size self.ipc_wrapper = ipc_wrapper self.blocks_output = blocks_output self.transactions_output = transactions_output self.export_blocks = blocks_output is not None self.export_transactions = transactions_output is not None if not self.export_blocks and not self.export_transactions: raise ValueError( 'Either blocks_output or transactions_output must be provided') self.block_mapper = EthBlockMapper() self.transaction_mapper = EthTransactionMapper() self.blocks_output_file = None self.transactions_output_file = None self.blocks_exporter = None self.transactions_exporter = None def _start(self): self.blocks_output_file = get_file_handle(self.blocks_output, binary=True) self.transactions_output_file = get_file_handle( self.transactions_output, binary=True) self.blocks_exporter = CsvItemExporter(self.blocks_output_file) self.transactions_exporter = CsvItemExporter( self.transactions_output_file) def _export(self): for batch_start, batch_end in split_to_batches(self.start_block, self.end_block, self.batch_size): try: self._export_batch(batch_start, batch_end) except (Timeout, SocketTimeoutException): # try exporting blocks one by one for block_number in range(batch_start, batch_end + 1): self._export_batch(block_number, block_number) def _export_batch(self, batch_start, batch_end): blocks_rpc = list( generate_get_block_by_number_json_rpc(batch_start, batch_end, self.export_transactions)) response = self.ipc_wrapper.make_request(json.dumps(blocks_rpc)) for response_item in response: result = response_item['result'] block = self.block_mapper.json_dict_to_block(result) self._export_block(block) def _export_block(self, block): if self.export_blocks: self.blocks_exporter.export_item( self.block_mapper.block_to_dict(block)) if self.export_transactions: for tx in block.transactions: self.transactions_exporter.export_item( self.transaction_mapper.transaction_to_dict(tx)) def _end(self): if self.blocks_output_file is not None: self.blocks_output_file.close() if self.transactions_output_file is not None: self.transactions_output_file.close()
def _start(self): self.output_file = get_file_handle(self.output, binary=True) self.exporter = CsvItemExporter(self.output_file)
class ExportErc20TransfersJob(BaseJob): def __init__(self, start_block, end_block, batch_size, web3, output, tokens=None): self.start_block = start_block self.end_block = end_block self.batch_size = batch_size self.web3 = web3 self.output = output self.tokens = tokens self.receipt_log_mapper = EthReceiptLogMapper() self.erc20_transfer_mapper = EthErc20TransferMapper() self.erc20_processor = EthErc20Processor() self.output_file = None self.exporter = None def _start(self): self.output_file = get_file_handle(self.output, binary=True) self.exporter = CsvItemExporter(self.output_file) def _export(self): for batch_start, batch_end in split_to_batches(self.start_block, self.end_block, self.batch_size): try: self._export_batch(batch_start, batch_end) except (Timeout, SocketTimeoutException): # try exporting one by one for block_number in range(batch_start, batch_end + 1): self._export_batch(block_number, block_number) def _export_batch(self, batch_start, batch_end): filter_params = { 'fromBlock': batch_start, 'toBlock': batch_end, 'topics': [TRANSFER_EVENT_TOPIC] } if self.tokens is not None and len(self.tokens) > 0: filter_params["address"] = self.tokens event_filter = self.web3.eth.filter(filter_params) events = event_filter.get_all_entries() for event in events: log = self.receipt_log_mapper.web3_dict_to_transaction_receipt_log( event) erc20_transfer = self.erc20_processor.filter_transfer_from_log(log) if erc20_transfer is not None: self.exporter.export_item( self.erc20_transfer_mapper.erc20_transfer_to_dict( erc20_transfer)) self.web3.eth.uninstallFilter(event_filter.filter_id) def _end(self): if self.output_file is not None: self.output_file.close()
class ExportBlocksJob(BatchExportJob): def __init__( self, start_block, end_block, batch_size, ipc_wrapper, max_workers=5, blocks_output=None, transactions_output=None, block_fields_to_export=BLOCK_FIELDS_TO_EXPORT, transaction_fields_to_export=TRANSACTION_FIELDS_TO_EXPORT): super().__init__(start_block, end_block, batch_size, max_workers) self.ipc_wrapper = ipc_wrapper self.blocks_output = blocks_output self.transactions_output = transactions_output self.block_fields_to_export = block_fields_to_export self.transaction_fields_to_export = transaction_fields_to_export self.export_blocks = blocks_output is not None self.export_transactions = transactions_output is not None if not self.export_blocks and not self.export_transactions: raise ValueError('Either blocks_output or transactions_output must be provided') self.block_mapper = EthBlockMapper() self.transaction_mapper = EthTransactionMapper() self.blocks_output_file = None self.transactions_output_file = None self.blocks_exporter = None self.transactions_exporter = None def _start(self): super()._start() self.blocks_output_file = get_file_handle(self.blocks_output, binary=True) self.blocks_exporter = CsvItemExporter( self.blocks_output_file, fields_to_export=self.block_fields_to_export) self.transactions_output_file = get_file_handle(self.transactions_output, binary=True) self.transactions_exporter = CsvItemExporter( self.transactions_output_file, fields_to_export=self.transaction_fields_to_export) def _export_batch(self, batch_start, batch_end): blocks_rpc = list(generate_get_block_by_number_json_rpc(batch_start, batch_end, self.export_transactions)) response = self.ipc_wrapper.make_request(json.dumps(blocks_rpc)) for response_item in response: result = response_item['result'] block = self.block_mapper.json_dict_to_block(result) self._export_block(block) def _export_block(self, block): if self.export_blocks: self.blocks_exporter.export_item(self.block_mapper.block_to_dict(block)) if self.export_transactions: for tx in block.transactions: self.transactions_exporter.export_item(self.transaction_mapper.transaction_to_dict(tx)) def _end(self): super()._end() close_silently(self.blocks_output_file) close_silently(self.transactions_output_file)
from ethereumetl.utils import smart_open parser = argparse.ArgumentParser( description='Extract blocks from eth_getBlockByNumber JSON RPC output') parser.add_argument('--input', default=None, type=str, help='The input file. If not specified stdin is used.') parser.add_argument('--output', default=None, type=str, help='The output file. If not specified stdout is used.') args = parser.parse_args() with smart_open(args.input, 'r') as input_file, smart_open(args.output, binary=True) as output_file: block_mapper = EthBlockMapper() exporter = CsvItemExporter(output_file) exporter.start_exporting() for line in input_file: json_line = json.loads(line) result = json_line.get('result', None) if result is None: continue block = block_mapper.json_dict_to_block(result) exporter.export_item(block_mapper.block_to_dict(block)) exporter.finish_exporting()
parser = argparse.ArgumentParser( description='Extract blocks and transactions from eth_getBlockByNumber JSON RPC output') parser.add_argument('--input', default=None, type=str, help='The input file. If not specified stdin is used.') parser.add_argument('--blocks-output', default=None, type=str, help='The output file for blocks. If not specified stdout is used.') parser.add_argument('--transactions-output', default=None, type=str, help='The output file for transactions. If not specified stdout is used.') args = parser.parse_args() with smart_open(args.input, 'r') as input_file, \ smart_open(args.blocks_output, binary=True) as blocks_output_file, \ smart_open(args.transactions_output, binary=True) as tx_output_file: block_mapper = EthBlockMapper() tx_mapper = EthTransactionMapper() blocks_exporter = CsvItemExporter(blocks_output_file) tx_exporter = CsvItemExporter(tx_output_file) for line in input_file: json_line = json.loads(line) result = json_line.get('result', None) if result is None: continue block = block_mapper.json_dict_to_block(result) blocks_exporter.export_item(block_mapper.block_to_dict(block)) if block.transactions is not None: for transaction in block.transactions: tx_exporter.export_item(tx_mapper.transaction_to_dict(transaction))
description='Exports ERC20 transfers using eth_newFilter and eth_getFilterLogs JSON RPC APIs.') parser.add_argument('--start-block', default=0, type=int, help='Start block') parser.add_argument('--end-block', required=True, type=int, help='End block') parser.add_argument('--output', default=None, type=str, help='The output file. If not specified stdout is used.') parser.add_argument('--ipc-path', required=True, type=str, help='The full path to the ipc socket file.') parser.add_argument('--ipc-timeout', default=300, type=int, help='The timeout in seconds for ipc calls.') parser.add_argument('--batch-size', default=100, type=int, help='The number of blocks to filter at a time.') args = parser.parse_args() with smart_open(args.output, binary=True) as output_file: transaction_receipt_log_mapper = EthTransactionReceiptLogMapper() erc20_transfer_mapper = EthErc20TransferMapper() erc20_processor = EthErc20Processor() exporter = CsvItemExporter(output_file) web3 = Web3(IPCProvider(args.ipc_path, timeout=args.ipc_timeout)) for batch_start_block in range(args.start_block, args.end_block + 1, args.batch_size): batch_end_block = min(batch_start_block + args.batch_size - 1, args.end_block) event_filter = web3.eth.filter({ "fromBlock": batch_start_block, "toBlock": batch_end_block, "topics": [TRANSFER_EVENT_TOPIC] }) events = event_filter.get_all_entries() for event in events:
help='Whether or not to extract transactions.') parser.add_argument( '--transactions-output', default=None, type=str, help='The output file for transactions. If not specified stdout is used.') args = parser.parse_args() with smart_open(args.input, 'r') as input_file, \ smart_open(args.blocks_output, binary=True) if args.extract_blocks else None as blocks_output_file , \ smart_open(args.transactions_output, binary=True) if args.extract_transactions else None as tx_output_file: block_mapper = EthBlockMapper() tx_mapper = EthTransactionMapper() blocks_exporter = CsvItemExporter( blocks_output_file) if blocks_output_file is not None else None tx_exporter = CsvItemExporter( tx_output_file) if tx_output_file is not None else None for line in input_file: json_line = json.loads(line) result = json_line.get('result', None) if result is None: continue block = block_mapper.json_dict_to_block(result) if blocks_exporter is not None: blocks_exporter.export_item(block_mapper.block_to_dict(block)) if block.transactions is not None and tx_exporter is not None: for transaction in block.transactions: tx_exporter.export_item(
class ExportErc20TransfersJob(BaseJob): def __init__(self, start_block, end_block, batch_size, web3, output, max_workers=5, tokens=None, fields_to_export=FIELDS_TO_EXPORT): self.start_block = start_block self.end_block = end_block self.batch_size = batch_size self.web3 = web3 self.output = output self.max_workers = max_workers self.tokens = tokens self.fields_to_export = fields_to_export self.receipt_log_mapper = EthReceiptLogMapper() self.erc20_transfer_mapper = EthErc20TransferMapper() self.erc20_processor = EthErc20Processor() self.output_file = None self.exporter = None self.executor: FailSafeExecutor = None def _start(self): # Using bounded executor prevents unlimited queue growth # and allows monitoring in-progress futures and failing fast in case of errors. self.executor = FailSafeExecutor(BoundedExecutor(1, self.max_workers)) self.output_file = get_file_handle(self.output, binary=True) self.exporter = CsvItemExporter(self.output_file, fields_to_export=self.fields_to_export) def _export(self): for batch_start, batch_end in split_to_batches(self.start_block, self.end_block, self.batch_size): self.executor.submit(self._fail_safe_export_batch, batch_start, batch_end) def _fail_safe_export_batch(self, batch_start, batch_end): try: self._export_batch(batch_start, batch_end) except (Timeout, OSError): # try exporting one by one for block_number in range(batch_start, batch_end + 1): self._export_batch(block_number, block_number) def _export_batch(self, batch_start, batch_end): filter_params = { 'fromBlock': batch_start, 'toBlock': batch_end, 'topics': [TRANSFER_EVENT_TOPIC] } if self.tokens is not None and len(self.tokens) > 0: filter_params['address'] = self.tokens event_filter = self.web3.eth.filter(filter_params) events = event_filter.get_all_entries() for event in events: log = self.receipt_log_mapper.web3_dict_to_receipt_log(event) erc20_transfer = self.erc20_processor.filter_transfer_from_log(log) if erc20_transfer is not None: self.exporter.export_item( self.erc20_transfer_mapper.erc20_transfer_to_dict( erc20_transfer)) self.web3.eth.uninstallFilter(event_filter.filter_id) def _end(self): self.executor.shutdown() close_silently(self.output_file)