def __init__(self, source, batchsize=500, queuesize=20): """Arguments: - source: the source to iterate - batchsize: the number of rows passed from the worker process each time it passes on a batch of rows. Must be positive. Default: 500 - queuesize: the maximum number of batches that can wait in a queue between the processes. 0 means unlimited. Default: 20 """ if not isinstance(batchsize, int) or batchsize < 1: raise ValueError('batchsize must be a positive integer') self.__source = source self.__batchsize = batchsize self.__queue = Queue(queuesize) p = Process(target=self.__worker) p.name = "Process for ProcessSource" p.start()
def __init__(self, seq, callee): """Arguments: - seq: a sequence with the elements for each of which a unique source must be created. the elements are given (one by one) to callee. - callee: a function f(e) that must accept elements as those in the seq argument. the function should return a source which then will be iterated by this source. the function is called once for every element in seq. """ self.__queue = Queue() # a multiprocessing.Queue if not callable(callee): raise TypeError('callee must be callable') self.__callee = callee for e in seq: # put them in a safe queue such that this object can be used from # different fork'ed processes self.__queue.put(e)