Esempio n. 1
0
 def __init__(self, starting_batch_size, max_workers, retry_exceptions=RETRY_EXCEPTIONS):
     self.batch_size = starting_batch_size
     self.max_workers = max_workers
     # Using bounded executor prevents unlimited queue growth
     # and allows monitoring in-progress futures and failing fast in case of errors.
     self.executor = FailSafeExecutor(BoundedExecutor(1, self.max_workers))
     self.retry_exceptions = retry_exceptions
     self.progress_logger = ProgressLogger()
Esempio n. 2
0
class BatchWorkExecutor:
    def __init__(self, starting_batch_size, max_workers, retry_exceptions=RETRY_EXCEPTIONS):
        self.batch_size = starting_batch_size
        self.max_workers = max_workers
        # Using bounded executor prevents unlimited queue growth
        # and allows monitoring in-progress futures and failing fast in case of errors.
        self.executor = FailSafeExecutor(BoundedExecutor(1, self.max_workers))
        self.retry_exceptions = retry_exceptions
        self.progress_logger = ProgressLogger()

    def execute(self, work_iterable, work_handler, total_items=None):
        self.progress_logger.start(total_items=total_items)
        for batch in dynamic_batch_iterator(work_iterable, lambda: self.batch_size):
            self.executor.submit(self._fail_safe_execute, work_handler, batch)

    # Check race conditions
    def _fail_safe_execute(self, work_handler, batch):
        try:
            work_handler(batch)
        except self.retry_exceptions:
            batch_size = self.batch_size
            # Reduce the batch size. Subsequent batches will be 2 times smaller
            if batch_size == len(batch) and batch_size > 1:
                self.batch_size = int(batch_size / 2)
            # For the failed batch try handling items one by one
            for item in batch:
                work_handler([item])
        self.progress_logger.track(len(batch))

    def shutdown(self):
        self.executor.shutdown()
        self.progress_logger.finish()
def test_progress_logger():
    logger_mock = LoggerMock()
    progress_logger = ProgressLogger(logger=logger_mock, log_item_step=1000)

    progress_logger.start()
    [progress_logger.track(100) for _ in range(100)]
    progress_logger.finish()

    assert len(logger_mock.logs) == 12
    assert logger_mock.logs[0] == 'Started work.'
    assert logger_mock.logs[1] == '1000 items processed.'
    assert logger_mock.logs[11].startswith('Finished work. Total items processed: 10000. Took ')
def test_progress_logger_with_total_items():
    logger_mock = LoggerMock()
    progress_logger = ProgressLogger(logger=logger_mock, log_percentage_step=5)

    progress_logger.start(total_items=1234)
    [progress_logger.track(99) for _ in range(100)]
    progress_logger.finish()

    assert len(logger_mock.logs) == 102
    assert logger_mock.logs[0] == 'Started work. Items to process: 1234.'
    assert logger_mock.logs[1] == '99 items processed. Progress is 8%.'
    assert logger_mock.logs[100] == '9900 items processed. Progress is 802%!!!'
    assert logger_mock.logs[101].startswith('Finished work. Total items processed: 9900. Took ')
class BatchWorkExecutor:
    def __init__(self,
                 starting_batch_size,
                 max_workers,
                 retry_exceptions=RETRY_EXCEPTIONS,
                 max_retries=5):
        self.batch_size = starting_batch_size
        self.max_batch_size = starting_batch_size
        self.latest_batch_size_change_time = None
        self.max_workers = max_workers
        # Using bounded executor prevents unlimited queue growth
        # and allows monitoring in-progress futures and failing fast in case of errors.
        self.executor = FailSafeExecutor(BoundedExecutor(1, self.max_workers))
        self.retry_exceptions = retry_exceptions
        self.max_retries = max_retries
        self.progress_logger = ProgressLogger()
        self.logger = logging.getLogger('BatchWorkExecutor')

    def execute(self, work_iterable, work_handler, total_items=None):
        self.progress_logger.start(total_items=total_items)
        for batch in dynamic_batch_iterator(work_iterable,
                                            lambda: self.batch_size):
            self.executor.submit(self._fail_safe_execute, work_handler, batch)

    def _fail_safe_execute(self, work_handler, batch):
        try:
            work_handler(batch)
            self._try_increase_batch_size(len(batch))
        except self.retry_exceptions:
            self.logger.exception(
                'An exception occurred while executing work_handler.')
            self._try_decrease_batch_size(len(batch))
            self.logger.info(
                'The batch of size {} will be retried one item at a time.'.
                format(len(batch)))
            for item in batch:
                execute_with_retries(work_handler, [item],
                                     max_retries=self.max_retries,
                                     retry_exceptions=self.retry_exceptions)

        self.progress_logger.track(len(batch))

    # Some acceptable race conditions are possible
    def _try_decrease_batch_size(self, current_batch_size):
        batch_size = self.batch_size
        if batch_size == current_batch_size and batch_size > 1:
            new_batch_size = int(current_batch_size / 2)
            self.logger.info(
                'Reducing batch size to {}.'.format(new_batch_size))
            self.batch_size = new_batch_size
            self.latest_batch_size_change_time = time.time()

    def _try_increase_batch_size(self, current_batch_size):
        if current_batch_size * 2 <= self.max_batch_size:
            current_time = time.time()
            latest_batch_size_change_time = self.latest_batch_size_change_time
            seconds_since_last_change = current_time - latest_batch_size_change_time \
                if latest_batch_size_change_time is not None else 0
            if seconds_since_last_change > BATCH_CHANGE_COOLDOWN_PERIOD_SECONDS:
                new_batch_size = current_batch_size * 2
                self.logger.info(
                    'Increasing batch size to {}.'.format(new_batch_size))
                self.batch_size = new_batch_size
                self.latest_batch_size_change_time = current_time

    def shutdown(self):
        self.executor.shutdown()
        self.progress_logger.finish()