Esempio n. 1
class DynamicForEachSource(object):
    """A source that for each given argument creates a new source that
    will be iterated by this source.

    For example, useful for directories where a CSVSource should be created
    for each file.

    The user must provide a function that when called with a single argument,
    returns a new source to iterate. A DynamicForEachSource instance can be
    given to several ProcessSource instances.

    def __init__(self, seq, callee):
        - seq: A sequence with the elements for each of which a unique source
          must be created. The elements are given (one by one) to callee.
        - callee: A function f(e) that must accept elements as those in the seq
          argument. The function should return a source which then will be
          iterated by this source. The function is called once for every
          element in seq.
        self.__queue = Queue()  # a multiprocessing.Queue
        if not callable(callee):
            raise TypeError, 'callee must be callable'
        self.__callee = callee
        for e in seq:
            # put them in a safe queue such that this object can be used from
            # different fork'ed processes

    def __iter__(self):
        while True:
                arg = self.__queue.get(False)
                src = self.__callee(arg)
                for row in src:
                    yield row
            except Empty:
                raise StopIteration
Esempio n. 2
class ProcessSource(object):
    """A class for iterating another source in a separate process"""

    def __init__(self, source, batchsize=500, queuesize=20):
           - source: the source to iterate
           - batchsize: the number of rows passed from the worker process each
             time it passes on a batch of rows. Must be positive. Default: 1000
           - queuesize: the maximum number of batches that can wait in a queue
             between the processes. 0 means unlimited. Default: 100
        if type(batchsize) != int or batchsize < 1:
            raise ValueError, 'batchsize must be a positive integer'
        self.__source = source
        self.__batchsize = batchsize
        self.__queue = Queue(queuesize)
        p = Process(target=self.__worker) = "Process for ProcessSource"

    def __worker(self):
        batch = []
            for row in self.__source:
                if len(batch) == self.__batchsize:
                    batch = []
            # We're done. Send the batch if it has any data and a signal
            if batch:
        except Exception, e:
            if batch: