Example #1
0
class BatchWorkExecutor:
    def __init__(self,
                 starting_batch_size,
                 max_workers,
                 retry_exceptions=RETRY_EXCEPTIONS):
        self.batch_size = starting_batch_size
        self.max_workers = max_workers
        # Using bounded executor prevents unlimited queue growth
        # and allows monitoring in-progress futures and failing fast in case of errors.
        self.executor = FailSafeExecutor(BoundedExecutor(1, self.max_workers))
        self.retry_exceptions = retry_exceptions

    def execute(self, work_iterable, work_handler):
        for batch in dynamic_batch_iterator(work_iterable,
                                            lambda: self.batch_size):
            self.executor.submit(self._fail_safe_execute, work_handler, batch)

    # Check race conditions
    def _fail_safe_execute(self, work_handler, batch):
        try:
            work_handler(batch)
        except self.retry_exceptions:
            batch_size = self.batch_size
            # Reduce the batch size. Subsequent batches will be 2 times smaller
            if batch_size == len(batch) and batch_size > 1:
                self.batch_size = max(1, int(batch_size / 2))
            # For the failed batch try handling items one by one
            for item in batch:
                work_handler([item])

    def shutdown(self):
        self.executor.shutdown()
Example #2
0
class BatchWorkExecutor:
    def __init__(self, starting_batch_size, max_workers, retry_exceptions=(Timeout, OSError)):
        self.batch_size = starting_batch_size
        self.max_workers = max_workers
        self.executor = FailSafeExecutor(BoundedExecutor(1, self.max_workers))
        self.retry_exceptions = retry_exceptions

    def execute(self, work_iterable, work_handler):
        for batch in dynamic_batch_iterator(work_iterable, lambda: self.batch_size):
            self.executor.submit(self._fail_safe_execute, work_handler, batch)

    # Check race conditions
    def _fail_safe_execute(self, work_handler, batch):
        try:
            work_handler(batch)
        except Exception as ex:
            if type(ex) in self.retry_exceptions:
                batch_size = self.batch_size
                # If can't reduce the batch size further then raise
                if batch_size == 1:
                    raise ex
                # Reduce the batch size. Subsequent batches will be 2 times smaller
                if batch_size == len(batch):
                    self.batch_size = max(1, int(batch_size / 2))
                # For the failed batch try handling items one by one
                for item in batch:
                    work_handler([item])
            else:
                raise ex

    def shutdown(self):
        self.executor.shutdown()
Example #3
0
 def __init__(self, starting_batch_size, max_workers, retry_exceptions=RETRY_EXCEPTIONS):
     self.batch_size = starting_batch_size
     self.max_workers = max_workers
     # Using bounded executor prevents unlimited queue growth
     # and allows monitoring in-progress futures and failing fast in case of errors.
     self.executor = FailSafeExecutor(BoundedExecutor(1, self.max_workers))
     self.retry_exceptions = retry_exceptions
Example #4
0
 def __init__(self,
              starting_batch_size,
              max_workers,
              retry_exceptions=(Timeout, OSError)):
     self.batch_size = starting_batch_size
     self.max_workers = max_workers
     self.executor = FailSafeExecutor(BoundedExecutor(1, self.max_workers))
     self.retry_exceptions = retry_exceptions
    def _start(self):
        # Using bounded executor prevents unlimited queue growth
        # and allows monitoring in-progress futures and failing fast in case of errors.
        self.executor = FailSafeExecutor(BoundedExecutor(1, self.max_workers))

        self.output_file = get_file_handle(self.output, binary=True)
        self.exporter = CsvItemExporter(self.output_file,
                                        fields_to_export=self.fields_to_export)
 def __init__(self,
              starting_batch_size,
              max_workers,
              retry_exceptions=RETRY_EXCEPTIONS,
              max_retries=5):
     self.batch_size = starting_batch_size
     self.max_batch_size = starting_batch_size
     self.latest_batch_size_change_time = None
     self.max_workers = max_workers
     # Using bounded executor prevents unlimited queue growth
     # and allows monitoring in-progress futures and failing fast in case of errors.
     self.executor = FailSafeExecutor(BoundedExecutor(1, self.max_workers))
     self.retry_exceptions = retry_exceptions
     self.max_retries = max_retries
     self.progress_logger = ProgressLogger()
     self.logger = logging.getLogger('BatchWorkExecutor')
Example #7
0
class BatchExportJob(BaseJob):
    def __init__(self, range_start, range_end, batch_size, max_workers=5):
        if range_start < 0 or range_end < 0:
            raise ValueError(
                'range_start and range_end must be greater or equal to 0')

        if range_end < range_start:
            raise ValueError(
                'range_end must be greater or equal to range_start')

        if batch_size <= 0:
            raise ValueError('batch_size must be greater than 0')

        if max_workers <= 0:
            raise ValueError('max_workers must be greater than 0')

        self.start = range_start
        self.end = range_end
        self.batch_size = batch_size
        self.max_workers = max_workers

        self.executor = None

    def _start(self):
        # Using bounded executor prevents unlimited queue growth
        # and allows monitoring in-progress futures and failing fast in case of errors.
        self.executor = FailSafeExecutor(BoundedExecutor(1, self.max_workers))

    def _export(self):
        for batch_start, batch_end in split_to_batches(self.start, self.end,
                                                       self.batch_size):
            self.executor.submit(self._fail_safe_export_batch, batch_start,
                                 batch_end)

    def _fail_safe_export_batch(self, batch_start, batch_end):
        try:
            self._export_batch(batch_start, batch_end)
        except (Timeout, OSError):
            # try exporting one by one
            for block_number in range(batch_start, batch_end + 1):
                self._export_batch(block_number, block_number)

    def _export_batch(self, batch_start, batch_end):
        pass

    def _end(self):
        self.executor.shutdown()
class BatchWorkExecutor:
    def __init__(self,
                 starting_batch_size,
                 max_workers,
                 retry_exceptions=RETRY_EXCEPTIONS,
                 max_retries=5):
        self.batch_size = starting_batch_size
        self.max_batch_size = starting_batch_size
        self.latest_batch_size_change_time = None
        self.max_workers = max_workers
        # Using bounded executor prevents unlimited queue growth
        # and allows monitoring in-progress futures and failing fast in case of errors.
        self.executor = FailSafeExecutor(BoundedExecutor(1, self.max_workers))
        self.retry_exceptions = retry_exceptions
        self.max_retries = max_retries
        self.progress_logger = ProgressLogger()
        self.logger = logging.getLogger('BatchWorkExecutor')

    def execute(self, work_iterable, work_handler, total_items=None):
        self.progress_logger.start(total_items=total_items)
        for batch in dynamic_batch_iterator(work_iterable,
                                            lambda: self.batch_size):
            self.executor.submit(self._fail_safe_execute, work_handler, batch)

    def _fail_safe_execute(self, work_handler, batch):
        try:
            work_handler(batch)
            self._try_increase_batch_size(len(batch))
        except self.retry_exceptions:
            self.logger.exception(
                'An exception occurred while executing work_handler.')
            self._try_decrease_batch_size(len(batch))
            self.logger.info(
                'The batch of size {} will be retried one item at a time.'.
                format(len(batch)))
            for item in batch:
                execute_with_retries(work_handler, [item],
                                     max_retries=self.max_retries,
                                     retry_exceptions=self.retry_exceptions)

        self.progress_logger.track(len(batch))

    # Some acceptable race conditions are possible
    def _try_decrease_batch_size(self, current_batch_size):
        batch_size = self.batch_size
        if batch_size == current_batch_size and batch_size > 1:
            new_batch_size = int(current_batch_size / 2)
            self.logger.info(
                'Reducing batch size to {}.'.format(new_batch_size))
            self.batch_size = new_batch_size
            self.latest_batch_size_change_time = time.time()

    def _try_increase_batch_size(self, current_batch_size):
        if current_batch_size * 2 <= self.max_batch_size:
            current_time = time.time()
            latest_batch_size_change_time = self.latest_batch_size_change_time
            seconds_since_last_change = current_time - latest_batch_size_change_time \
                if latest_batch_size_change_time is not None else 0
            if seconds_since_last_change > BATCH_CHANGE_COOLDOWN_PERIOD_SECONDS:
                new_batch_size = current_batch_size * 2
                self.logger.info(
                    'Increasing batch size to {}.'.format(new_batch_size))
                self.batch_size = new_batch_size
                self.latest_batch_size_change_time = current_time

    def shutdown(self):
        self.executor.shutdown()
        self.progress_logger.finish()
class ExportErc20TransfersJob(BaseJob):
    def __init__(self,
                 start_block,
                 end_block,
                 batch_size,
                 web3,
                 output,
                 max_workers=5,
                 tokens=None,
                 fields_to_export=FIELDS_TO_EXPORT):
        self.start_block = start_block
        self.end_block = end_block
        self.batch_size = batch_size
        self.web3 = web3
        self.output = output
        self.max_workers = max_workers
        self.tokens = tokens
        self.fields_to_export = fields_to_export

        self.receipt_log_mapper = EthReceiptLogMapper()
        self.erc20_transfer_mapper = EthErc20TransferMapper()
        self.erc20_processor = EthErc20Processor()

        self.output_file = None
        self.exporter = None

        self.executor: FailSafeExecutor = None

    def _start(self):
        # Using bounded executor prevents unlimited queue growth
        # and allows monitoring in-progress futures and failing fast in case of errors.
        self.executor = FailSafeExecutor(BoundedExecutor(1, self.max_workers))

        self.output_file = get_file_handle(self.output, binary=True)
        self.exporter = CsvItemExporter(self.output_file,
                                        fields_to_export=self.fields_to_export)

    def _export(self):
        for batch_start, batch_end in split_to_batches(self.start_block,
                                                       self.end_block,
                                                       self.batch_size):
            self.executor.submit(self._fail_safe_export_batch, batch_start,
                                 batch_end)

    def _fail_safe_export_batch(self, batch_start, batch_end):
        try:
            self._export_batch(batch_start, batch_end)
        except (Timeout, OSError):
            # try exporting one by one
            for block_number in range(batch_start, batch_end + 1):
                self._export_batch(block_number, block_number)

    def _export_batch(self, batch_start, batch_end):
        filter_params = {
            'fromBlock': batch_start,
            'toBlock': batch_end,
            'topics': [TRANSFER_EVENT_TOPIC]
        }

        if self.tokens is not None and len(self.tokens) > 0:
            filter_params['address'] = self.tokens

        event_filter = self.web3.eth.filter(filter_params)
        events = event_filter.get_all_entries()
        for event in events:
            log = self.receipt_log_mapper.web3_dict_to_receipt_log(event)
            erc20_transfer = self.erc20_processor.filter_transfer_from_log(log)
            if erc20_transfer is not None:
                self.exporter.export_item(
                    self.erc20_transfer_mapper.erc20_transfer_to_dict(
                        erc20_transfer))

        self.web3.eth.uninstallFilter(event_filter.filter_id)

    def _end(self):
        self.executor.shutdown()
        close_silently(self.output_file)
Example #10
0
 def _start(self):
     # Using bounded executor prevents unlimited queue growth
     # and allows monitoring in-progress futures and failing fast in case of errors.
     self.executor = FailSafeExecutor(BoundedExecutor(1, self.max_workers))
Example #11
0
class ExportBlocksJob(BaseJob):
    def __init__(self,
                 start_block,
                 end_block,
                 batch_size,
                 ipc_wrapper,
                 max_workers=5,
                 blocks_output=None,
                 transactions_output=None,
                 block_fields_to_export=BLOCK_FIELDS_TO_EXPORT,
                 transaction_fields_to_export=TRANSACTION_FIELDS_TO_EXPORT):
        self.start_block = start_block
        self.end_block = end_block
        self.batch_size = batch_size
        self.ipc_wrapper = ipc_wrapper
        self.max_workers = max_workers
        self.blocks_output = blocks_output
        self.transactions_output = transactions_output
        self.block_fields_to_export = block_fields_to_export
        self.transaction_fields_to_export = transaction_fields_to_export

        self.export_blocks = blocks_output is not None
        self.export_transactions = transactions_output is not None
        if not self.export_blocks and not self.export_transactions:
            raise ValueError(
                'Either blocks_output or transactions_output must be provided')

        self.block_mapper = EthBlockMapper()
        self.transaction_mapper = EthTransactionMapper()

        self.blocks_output_file = None
        self.transactions_output_file = None

        self.blocks_exporter = None
        self.transactions_exporter = None

        self.executor: FailSafeExecutor = None

    def _start(self):
        # Using bounded executor prevents unlimited queue growth
        # and allows monitoring in-progress futures and failing fast in case of errors.
        self.executor = FailSafeExecutor(BoundedExecutor(1, self.max_workers))

        self.blocks_output_file = get_file_handle(self.blocks_output,
                                                  binary=True)
        self.blocks_exporter = CsvItemExporter(
            self.blocks_output_file,
            fields_to_export=self.block_fields_to_export)

        self.transactions_output_file = get_file_handle(
            self.transactions_output, binary=True)
        self.transactions_exporter = CsvItemExporter(
            self.transactions_output_file,
            fields_to_export=self.transaction_fields_to_export)

    def _export(self):
        for batch_start, batch_end in split_to_batches(self.start_block,
                                                       self.end_block,
                                                       self.batch_size):
            self.executor.submit(self._fail_safe_export_batch, batch_start,
                                 batch_end)

    def _fail_safe_export_batch(self, batch_start, batch_end):
        try:
            self._export_batch(batch_start, batch_end)
        except (Timeout, OSError):
            # try exporting blocks one by one
            for block_number in range(batch_start, batch_end + 1):
                self._export_batch(block_number, block_number)

    def _export_batch(self, batch_start, batch_end):
        blocks_rpc = list(
            generate_get_block_by_number_json_rpc(batch_start, batch_end,
                                                  self.export_transactions))
        response = self.ipc_wrapper.make_request(json.dumps(blocks_rpc))
        for response_item in response:
            result = response_item['result']
            block = self.block_mapper.json_dict_to_block(result)
            self._export_block(block)

    def _export_block(self, block):
        if self.export_blocks:
            self.blocks_exporter.export_item(
                self.block_mapper.block_to_dict(block))
        if self.export_transactions:
            for tx in block.transactions:
                self.transactions_exporter.export_item(
                    self.transaction_mapper.transaction_to_dict(tx))

    def _end(self):
        self.executor.shutdown()
        close_silently(self.blocks_output_file)
        close_silently(self.transactions_output_file)