def _start(self):
        # Using bounded executor prevents unlimited queue growth
        # and allows monitoring in-progress futures and failing fast in case of errors.
        self.executor = FailSafeExecutor(BoundedExecutor(1, self.max_workers))

        self.output_file = get_file_handle(self.output, binary=True)
        self.exporter = CsvItemExporter(self.output_file,
                                        fields_to_export=self.fields_to_export)
    def _start(self):
        super()._start()

        self.output_file = get_file_handle(self.output,
                                           binary=True,
                                           create_parent_dirs=True)
        self.exporter = CsvItemExporter(self.output_file,
                                        fields_to_export=self.fields_to_export)
class ExportErc20TransfersJob(BatchExportJob):
    def __init__(self,
                 start_block,
                 end_block,
                 batch_size,
                 web3,
                 output,
                 max_workers=5,
                 tokens=None,
                 fields_to_export=FIELDS_TO_EXPORT):
        super().__init__(start_block, end_block, batch_size, max_workers)
        self.web3 = web3
        self.output = output
        self.tokens = tokens
        self.fields_to_export = fields_to_export

        self.receipt_log_mapper = EthReceiptLogMapper()
        self.erc20_transfer_mapper = EthErc20TransferMapper()
        self.erc20_processor = EthErc20Processor()

        self.output_file = None
        self.exporter = None

    def _start(self):
        super()._start()

        self.output_file = get_file_handle(self.output,
                                           binary=True,
                                           create_parent_dirs=True)
        self.exporter = CsvItemExporter(self.output_file,
                                        fields_to_export=self.fields_to_export)

    def _export_batch(self, batch_start, batch_end):
        # https://github.com/ethereum/wiki/wiki/JSON-RPC#eth_getfilterlogs
        filter_params = {
            'fromBlock': batch_start,
            'toBlock': batch_end,
            'topics': [TRANSFER_EVENT_TOPIC]
        }

        if self.tokens is not None and len(self.tokens) > 0:
            filter_params['address'] = self.tokens

        event_filter = self.web3.eth.filter(filter_params)
        events = event_filter.get_all_entries()
        for event in events:
            log = self.receipt_log_mapper.web3_dict_to_receipt_log(event)
            erc20_transfer = self.erc20_processor.filter_transfer_from_log(log)
            if erc20_transfer is not None:
                self.exporter.export_item(
                    self.erc20_transfer_mapper.erc20_transfer_to_dict(
                        erc20_transfer))

        self.web3.eth.uninstallFilter(event_filter.filter_id)

    def _end(self):
        super()._end()
        close_silently(self.output_file)
Example #4
0
    def _start(self):
        self.blocks_output_file = get_file_handle(self.blocks_output,
                                                  binary=True)
        self.transactions_output_file = get_file_handle(
            self.transactions_output, binary=True)

        self.blocks_exporter = CsvItemExporter(self.blocks_output_file)
        self.transactions_exporter = CsvItemExporter(
            self.transactions_output_file)
    def _start(self):
        super()._start()

        self.blocks_output_file = get_file_handle(self.blocks_output, binary=True)
        self.blocks_exporter = CsvItemExporter(
            self.blocks_output_file, fields_to_export=self.block_fields_to_export)

        self.transactions_output_file = get_file_handle(self.transactions_output, binary=True)
        self.transactions_exporter = CsvItemExporter(
            self.transactions_output_file, fields_to_export=self.transaction_fields_to_export)
 def open(self):
     for item_type, filename in self.filename_mapping.items():
         self.file_mapping[item_type] = get_file_handle(filename,
                                                        binary=True)
         self.exporter_mapping[item_type] = CsvItemExporter(
             self.file_mapping[item_type],
             fields_to_export=self.field_mapping[item_type])
Example #7
0
    def open(self):
        for item_type, filename in self.filename_mapping.items():
            file = get_file_handle(filename, binary=True)
            fields = self.field_mapping[item_type]
            self.file_mapping[item_type] = file
            if str(filename).endswith('.json'):
                item_exporter = JsonLinesItemExporter(file, fields_to_export=fields)
            else:
                item_exporter = CsvItemExporter(file, fields_to_export=fields)
            self.exporter_mapping[item_type] = item_exporter

            self.counter_mapping[item_type] = AtomicCounter()
Example #8
0
class ExportBlocksJob(BaseJob):
    def __init__(self,
                 start_block,
                 end_block,
                 batch_size,
                 ipc_wrapper,
                 max_workers=5,
                 blocks_output=None,
                 transactions_output=None,
                 block_fields_to_export=BLOCK_FIELDS_TO_EXPORT,
                 transaction_fields_to_export=TRANSACTION_FIELDS_TO_EXPORT):
        self.start_block = start_block
        self.end_block = end_block
        self.batch_size = batch_size
        self.ipc_wrapper = ipc_wrapper
        self.max_workers = max_workers
        self.blocks_output = blocks_output
        self.transactions_output = transactions_output
        self.block_fields_to_export = block_fields_to_export
        self.transaction_fields_to_export = transaction_fields_to_export

        self.export_blocks = blocks_output is not None
        self.export_transactions = transactions_output is not None
        if not self.export_blocks and not self.export_transactions:
            raise ValueError(
                'Either blocks_output or transactions_output must be provided')

        self.block_mapper = EthBlockMapper()
        self.transaction_mapper = EthTransactionMapper()

        self.blocks_output_file = None
        self.transactions_output_file = None

        self.blocks_exporter = None
        self.transactions_exporter = None

        self.executor: FailSafeExecutor = None

    def _start(self):
        # Using bounded executor prevents unlimited queue growth
        # and allows monitoring in-progress futures and failing fast in case of errors.
        self.executor = FailSafeExecutor(BoundedExecutor(1, self.max_workers))

        self.blocks_output_file = get_file_handle(self.blocks_output,
                                                  binary=True)
        self.blocks_exporter = CsvItemExporter(
            self.blocks_output_file,
            fields_to_export=self.block_fields_to_export)

        self.transactions_output_file = get_file_handle(
            self.transactions_output, binary=True)
        self.transactions_exporter = CsvItemExporter(
            self.transactions_output_file,
            fields_to_export=self.transaction_fields_to_export)

    def _export(self):
        for batch_start, batch_end in split_to_batches(self.start_block,
                                                       self.end_block,
                                                       self.batch_size):
            self.executor.submit(self._fail_safe_export_batch, batch_start,
                                 batch_end)

    def _fail_safe_export_batch(self, batch_start, batch_end):
        try:
            self._export_batch(batch_start, batch_end)
        except (Timeout, OSError):
            # try exporting blocks one by one
            for block_number in range(batch_start, batch_end + 1):
                self._export_batch(block_number, block_number)

    def _export_batch(self, batch_start, batch_end):
        blocks_rpc = list(
            generate_get_block_by_number_json_rpc(batch_start, batch_end,
                                                  self.export_transactions))
        response = self.ipc_wrapper.make_request(json.dumps(blocks_rpc))
        for response_item in response:
            result = response_item['result']
            block = self.block_mapper.json_dict_to_block(result)
            self._export_block(block)

    def _export_block(self, block):
        if self.export_blocks:
            self.blocks_exporter.export_item(
                self.block_mapper.block_to_dict(block))
        if self.export_transactions:
            for tx in block.transactions:
                self.transactions_exporter.export_item(
                    self.transaction_mapper.transaction_to_dict(tx))

    def _end(self):
        self.executor.shutdown()
        close_silently(self.blocks_output_file)
        close_silently(self.transactions_output_file)
Example #9
0
class ExportBlocksJob(BaseJob):
    def __init__(self,
                 start_block,
                 end_block,
                 batch_size,
                 ipc_wrapper,
                 blocks_output=None,
                 transactions_output=None):
        self.start_block = start_block
        self.end_block = end_block
        self.batch_size = batch_size
        self.ipc_wrapper = ipc_wrapper
        self.blocks_output = blocks_output
        self.transactions_output = transactions_output

        self.export_blocks = blocks_output is not None
        self.export_transactions = transactions_output is not None
        if not self.export_blocks and not self.export_transactions:
            raise ValueError(
                'Either blocks_output or transactions_output must be provided')

        self.block_mapper = EthBlockMapper()
        self.transaction_mapper = EthTransactionMapper()

        self.blocks_output_file = None
        self.transactions_output_file = None

        self.blocks_exporter = None
        self.transactions_exporter = None

    def _start(self):
        self.blocks_output_file = get_file_handle(self.blocks_output,
                                                  binary=True)
        self.transactions_output_file = get_file_handle(
            self.transactions_output, binary=True)

        self.blocks_exporter = CsvItemExporter(self.blocks_output_file)
        self.transactions_exporter = CsvItemExporter(
            self.transactions_output_file)

    def _export(self):
        for batch_start, batch_end in split_to_batches(self.start_block,
                                                       self.end_block,
                                                       self.batch_size):
            try:
                self._export_batch(batch_start, batch_end)
            except (Timeout, SocketTimeoutException):
                # try exporting blocks one by one
                for block_number in range(batch_start, batch_end + 1):
                    self._export_batch(block_number, block_number)

    def _export_batch(self, batch_start, batch_end):
        blocks_rpc = list(
            generate_get_block_by_number_json_rpc(batch_start, batch_end,
                                                  self.export_transactions))
        response = self.ipc_wrapper.make_request(json.dumps(blocks_rpc))
        for response_item in response:
            result = response_item['result']
            block = self.block_mapper.json_dict_to_block(result)
            self._export_block(block)

    def _export_block(self, block):
        if self.export_blocks:
            self.blocks_exporter.export_item(
                self.block_mapper.block_to_dict(block))
        if self.export_transactions:
            for tx in block.transactions:
                self.transactions_exporter.export_item(
                    self.transaction_mapper.transaction_to_dict(tx))

    def _end(self):
        if self.blocks_output_file is not None:
            self.blocks_output_file.close()
        if self.transactions_output_file is not None:
            self.transactions_output_file.close()
Example #10
0
 def _start(self):
     self.output_file = get_file_handle(self.output, binary=True)
     self.exporter = CsvItemExporter(self.output_file)
Example #11
0
class ExportErc20TransfersJob(BaseJob):
    def __init__(self,
                 start_block,
                 end_block,
                 batch_size,
                 web3,
                 output,
                 tokens=None):
        self.start_block = start_block
        self.end_block = end_block
        self.batch_size = batch_size
        self.web3 = web3
        self.output = output
        self.tokens = tokens

        self.receipt_log_mapper = EthReceiptLogMapper()
        self.erc20_transfer_mapper = EthErc20TransferMapper()
        self.erc20_processor = EthErc20Processor()

        self.output_file = None
        self.exporter = None

    def _start(self):
        self.output_file = get_file_handle(self.output, binary=True)
        self.exporter = CsvItemExporter(self.output_file)

    def _export(self):
        for batch_start, batch_end in split_to_batches(self.start_block,
                                                       self.end_block,
                                                       self.batch_size):
            try:
                self._export_batch(batch_start, batch_end)
            except (Timeout, SocketTimeoutException):
                # try exporting one by one
                for block_number in range(batch_start, batch_end + 1):
                    self._export_batch(block_number, block_number)

    def _export_batch(self, batch_start, batch_end):
        filter_params = {
            'fromBlock': batch_start,
            'toBlock': batch_end,
            'topics': [TRANSFER_EVENT_TOPIC]
        }

        if self.tokens is not None and len(self.tokens) > 0:
            filter_params["address"] = self.tokens

        event_filter = self.web3.eth.filter(filter_params)
        events = event_filter.get_all_entries()
        for event in events:
            log = self.receipt_log_mapper.web3_dict_to_transaction_receipt_log(
                event)
            erc20_transfer = self.erc20_processor.filter_transfer_from_log(log)
            if erc20_transfer is not None:
                self.exporter.export_item(
                    self.erc20_transfer_mapper.erc20_transfer_to_dict(
                        erc20_transfer))

        self.web3.eth.uninstallFilter(event_filter.filter_id)

    def _end(self):
        if self.output_file is not None:
            self.output_file.close()
class ExportBlocksJob(BatchExportJob):
    def __init__(
            self,
            start_block,
            end_block,
            batch_size,
            ipc_wrapper,
            max_workers=5,
            blocks_output=None,
            transactions_output=None,
            block_fields_to_export=BLOCK_FIELDS_TO_EXPORT,
            transaction_fields_to_export=TRANSACTION_FIELDS_TO_EXPORT):
        super().__init__(start_block, end_block, batch_size, max_workers)
        self.ipc_wrapper = ipc_wrapper
        self.blocks_output = blocks_output
        self.transactions_output = transactions_output
        self.block_fields_to_export = block_fields_to_export
        self.transaction_fields_to_export = transaction_fields_to_export

        self.export_blocks = blocks_output is not None
        self.export_transactions = transactions_output is not None
        if not self.export_blocks and not self.export_transactions:
            raise ValueError('Either blocks_output or transactions_output must be provided')

        self.block_mapper = EthBlockMapper()
        self.transaction_mapper = EthTransactionMapper()

        self.blocks_output_file = None
        self.transactions_output_file = None

        self.blocks_exporter = None
        self.transactions_exporter = None

    def _start(self):
        super()._start()

        self.blocks_output_file = get_file_handle(self.blocks_output, binary=True)
        self.blocks_exporter = CsvItemExporter(
            self.blocks_output_file, fields_to_export=self.block_fields_to_export)

        self.transactions_output_file = get_file_handle(self.transactions_output, binary=True)
        self.transactions_exporter = CsvItemExporter(
            self.transactions_output_file, fields_to_export=self.transaction_fields_to_export)

    def _export_batch(self, batch_start, batch_end):
        blocks_rpc = list(generate_get_block_by_number_json_rpc(batch_start, batch_end, self.export_transactions))
        response = self.ipc_wrapper.make_request(json.dumps(blocks_rpc))
        for response_item in response:
            result = response_item['result']
            block = self.block_mapper.json_dict_to_block(result)
            self._export_block(block)

    def _export_block(self, block):
        if self.export_blocks:
            self.blocks_exporter.export_item(self.block_mapper.block_to_dict(block))
        if self.export_transactions:
            for tx in block.transactions:
                self.transactions_exporter.export_item(self.transaction_mapper.transaction_to_dict(tx))

    def _end(self):
        super()._end()
        close_silently(self.blocks_output_file)
        close_silently(self.transactions_output_file)
Example #13
0
from ethereumetl.utils import smart_open

parser = argparse.ArgumentParser(
    description='Extract blocks from eth_getBlockByNumber JSON RPC output')
parser.add_argument('--input',
                    default=None,
                    type=str,
                    help='The input file. If not specified stdin is used.')
parser.add_argument('--output',
                    default=None,
                    type=str,
                    help='The output file. If not specified stdout is used.')

args = parser.parse_args()

with smart_open(args.input,
                'r') as input_file, smart_open(args.output,
                                               binary=True) as output_file:
    block_mapper = EthBlockMapper()

    exporter = CsvItemExporter(output_file)
    exporter.start_exporting()
    for line in input_file:
        json_line = json.loads(line)
        result = json_line.get('result', None)
        if result is None:
            continue
        block = block_mapper.json_dict_to_block(result)
        exporter.export_item(block_mapper.block_to_dict(block))
    exporter.finish_exporting()
Example #14
0
parser = argparse.ArgumentParser(
    description='Extract blocks and transactions from eth_getBlockByNumber JSON RPC output')
parser.add_argument('--input', default=None, type=str, help='The input file. If not specified stdin is used.')
parser.add_argument('--blocks-output', default=None, type=str,
                    help='The output file for blocks. If not specified stdout is used.')
parser.add_argument('--transactions-output', default=None, type=str,
                    help='The output file for transactions. If not specified stdout is used.')

args = parser.parse_args()

with smart_open(args.input, 'r') as input_file, \
        smart_open(args.blocks_output, binary=True) as blocks_output_file, \
        smart_open(args.transactions_output, binary=True) as tx_output_file:
    block_mapper = EthBlockMapper()
    tx_mapper = EthTransactionMapper()

    blocks_exporter = CsvItemExporter(blocks_output_file)
    tx_exporter = CsvItemExporter(tx_output_file)

    for line in input_file:
        json_line = json.loads(line)
        result = json_line.get('result', None)
        if result is None:
            continue
        block = block_mapper.json_dict_to_block(result)
        blocks_exporter.export_item(block_mapper.block_to_dict(block))

        if block.transactions is not None:
            for transaction in block.transactions:
                tx_exporter.export_item(tx_mapper.transaction_to_dict(transaction))
    description='Exports ERC20 transfers using eth_newFilter and eth_getFilterLogs JSON RPC APIs.')
parser.add_argument('--start-block', default=0, type=int, help='Start block')
parser.add_argument('--end-block', required=True, type=int, help='End block')
parser.add_argument('--output', default=None, type=str, help='The output file. If not specified stdout is used.')
parser.add_argument('--ipc-path', required=True, type=str, help='The full path to the ipc socket file.')
parser.add_argument('--ipc-timeout', default=300, type=int, help='The timeout in seconds for ipc calls.')
parser.add_argument('--batch-size', default=100, type=int, help='The number of blocks to filter at a time.')

args = parser.parse_args()


with smart_open(args.output, binary=True) as output_file:
    transaction_receipt_log_mapper = EthTransactionReceiptLogMapper()
    erc20_transfer_mapper = EthErc20TransferMapper()
    erc20_processor = EthErc20Processor()
    exporter = CsvItemExporter(output_file)

    web3 = Web3(IPCProvider(args.ipc_path, timeout=args.ipc_timeout))

    for batch_start_block in range(args.start_block, args.end_block + 1, args.batch_size):
        batch_end_block = min(batch_start_block + args.batch_size - 1, args.end_block)

        event_filter = web3.eth.filter({
            "fromBlock": batch_start_block,
            "toBlock": batch_end_block,
            "topics": [TRANSFER_EVENT_TOPIC]
        })

        events = event_filter.get_all_entries()

        for event in events:
                    help='Whether or not to extract transactions.')
parser.add_argument(
    '--transactions-output',
    default=None,
    type=str,
    help='The output file for transactions. If not specified stdout is used.')

args = parser.parse_args()

with smart_open(args.input, 'r') as input_file, \
        smart_open(args.blocks_output, binary=True) if args.extract_blocks else None as blocks_output_file , \
        smart_open(args.transactions_output, binary=True) if args.extract_transactions else None as tx_output_file:
    block_mapper = EthBlockMapper()
    tx_mapper = EthTransactionMapper()

    blocks_exporter = CsvItemExporter(
        blocks_output_file) if blocks_output_file is not None else None
    tx_exporter = CsvItemExporter(
        tx_output_file) if tx_output_file is not None else None

    for line in input_file:
        json_line = json.loads(line)
        result = json_line.get('result', None)
        if result is None:
            continue
        block = block_mapper.json_dict_to_block(result)
        if blocks_exporter is not None:
            blocks_exporter.export_item(block_mapper.block_to_dict(block))

        if block.transactions is not None and tx_exporter is not None:
            for transaction in block.transactions:
                tx_exporter.export_item(
class ExportErc20TransfersJob(BaseJob):
    def __init__(self,
                 start_block,
                 end_block,
                 batch_size,
                 web3,
                 output,
                 max_workers=5,
                 tokens=None,
                 fields_to_export=FIELDS_TO_EXPORT):
        self.start_block = start_block
        self.end_block = end_block
        self.batch_size = batch_size
        self.web3 = web3
        self.output = output
        self.max_workers = max_workers
        self.tokens = tokens
        self.fields_to_export = fields_to_export

        self.receipt_log_mapper = EthReceiptLogMapper()
        self.erc20_transfer_mapper = EthErc20TransferMapper()
        self.erc20_processor = EthErc20Processor()

        self.output_file = None
        self.exporter = None

        self.executor: FailSafeExecutor = None

    def _start(self):
        # Using bounded executor prevents unlimited queue growth
        # and allows monitoring in-progress futures and failing fast in case of errors.
        self.executor = FailSafeExecutor(BoundedExecutor(1, self.max_workers))

        self.output_file = get_file_handle(self.output, binary=True)
        self.exporter = CsvItemExporter(self.output_file,
                                        fields_to_export=self.fields_to_export)

    def _export(self):
        for batch_start, batch_end in split_to_batches(self.start_block,
                                                       self.end_block,
                                                       self.batch_size):
            self.executor.submit(self._fail_safe_export_batch, batch_start,
                                 batch_end)

    def _fail_safe_export_batch(self, batch_start, batch_end):
        try:
            self._export_batch(batch_start, batch_end)
        except (Timeout, OSError):
            # try exporting one by one
            for block_number in range(batch_start, batch_end + 1):
                self._export_batch(block_number, block_number)

    def _export_batch(self, batch_start, batch_end):
        filter_params = {
            'fromBlock': batch_start,
            'toBlock': batch_end,
            'topics': [TRANSFER_EVENT_TOPIC]
        }

        if self.tokens is not None and len(self.tokens) > 0:
            filter_params['address'] = self.tokens

        event_filter = self.web3.eth.filter(filter_params)
        events = event_filter.get_all_entries()
        for event in events:
            log = self.receipt_log_mapper.web3_dict_to_receipt_log(event)
            erc20_transfer = self.erc20_processor.filter_transfer_from_log(log)
            if erc20_transfer is not None:
                self.exporter.export_item(
                    self.erc20_transfer_mapper.erc20_transfer_to_dict(
                        erc20_transfer))

        self.web3.eth.uninstallFilter(event_filter.filter_id)

    def _end(self):
        self.executor.shutdown()
        close_silently(self.output_file)