Exemple #1
0
class Distributer(object):
    def __init__(self, kind, chunkSize=20):
        self.kind = kind
        self.chunk_size = chunkSize

    def distribute(self, function, function_kwargs, iterable, starmap=False, skip_if_error=True, max_attempts=3):
        if self.kind == 'thread':
            self.pool = ThreadPool(self.chunk_size)
        else:
            raise NotImplemented

        if isinstance(self.pool, ThreadPool):
            for chunk in chunks(iterable, self.chunk_size):
                attempts = 0
                individually = False

                while attempts < max_attempts:
                    try:
                        if not individually:
                            if starmap:
                                ret = self.pool.starmap(function, chunk)
                            else:
                                ret = self.pool.map(partial(function, **function_kwargs), chunk)
                            attempts = max_attempts
                        else:
                            ret = []
                            for item in chunk:
                                try:
                                    if starmap:
                                        val = function(*item)
                                    else:
                                        val = partial(function, **function_kwargs)(item)
                                    ret.append(val)

                                except Exception as e:
                                    log.critical(e)
                                    log.critical(''.join(traceback.format_exception(None, e, e.__traceback__)))
                                    attempts += 1
                                    if attempts >= max_attempts:
                                        if skip_if_error:
                                            ret.append(pd.DataFrame())
                                        else:
                                            raise e

                    except Exception as e:
                        log.error(e)
                        attempts += 1
                        if attempts >= max_attempts:
                            individually = True
                            attempts = 0

                for i, item in enumerate(ret):
                    yield (chunk[i], item)

        else:
            raise NotImplemented

        self.pool.stop()

    def stop(self):
        self.pool.stop()

    @staticmethod
    def default():
        return Distributer('thread')