Example #1
0
    def test_memory_consumption_stream_processing(self):
        """Tests that memory usage is constant when
        processing a large stream."""
        pool = vimap.pool.fork_identical(string_length_worker)

        with assert_memory_use("test-stream-40-mb", 0, 3):
            pool.imap(urandom_stream(size_kb=40 * 1024)).block_ignore_output()
Example #2
0
    def test_memory_consumption_stream_processing(self):
        """Tests that memory usage is constant when
        processing a large stream."""
        pool = vimap.pool.fork_identical(string_length_worker)

        with assert_memory_use("test-stream-40-mb", 0, 3):
            pool.imap(
                urandom_stream(size_kb=40 * 1024)).block_ignore_output()
Example #3
0
    def test(self):
        weakrefs = []
        inputs = (MyFancyClass(x) for x in xrange(100))

        def inputs_with_weakrefs():
            for x in inputs:
                weakrefs.append(weakref.ref(x))
                yield x

        pool = vimap.pool.fork_identical(do_nothing_worker, num_workers=1)
        pool.imap(inputs_with_weakrefs()).block_ignore_output()
        assert all(wr() is None for wr in weakrefs)
Example #4
0
    def test_all_fds_cleaned_up(self):
        initial_open_fds = get_open_fds()
        pool = vimap.pool.fork_identical(basic_worker, num_workers=1)
        after_fork_open_fds = get_open_fds()
        list(pool.imap([1, 2, 3]).zip_in_out())
        after_finish_open_fds = get_open_fds()

        # Check that some FDs were opened after forking
        after_fork = difference_open_fds(initial_open_fds, after_fork_open_fds)
        # T.assert_equal(after_fork['closed'], [])
        T.assert_gte(len(after_fork['opened']),
                     2)  # should have at least 3 open fds
        # All opened files should be FIFOs
        if not all(info.modes == ['fifo']
                   for info in after_fork['opened'].values()):
            print("Infos: {0}".format(after_fork['opened']))
            T.assert_not_reached("Some infos are not FIFOs")

        after_cleanup = difference_open_fds(after_fork_open_fds,
                                            after_finish_open_fds)
        T.assert_gte(len(after_cleanup['closed']), 2)

        left_around = difference_open_fds(initial_open_fds,
                                          after_finish_open_fds)
        if len(left_around['opened']) != 0:
            queue_fds_left_around = dict(item
                                         for item in self.queue_fds.items()
                                         if item[0] in left_around['opened'])
            print("Queue FDs left around: {0}".format(queue_fds_left_around))
        T.assert_equal(len(left_around['opened']), 0)
Example #5
0
    def test_all_fds_cleaned_up(self):
        initial_open_fds = get_open_fds()
        pool = vimap.pool.fork_identical(basic_worker, num_workers=1)
        after_fork_open_fds = get_open_fds()
        list(pool.imap([1, 2, 3]).zip_in_out())
        after_finish_open_fds = get_open_fds()

        # Check that some FDs were opened after forking
        after_fork = difference_open_fds(initial_open_fds,
                                         after_fork_open_fds)
        # T.assert_equal(after_fork['closed'], [])
        T.assert_gte(len(after_fork['opened']),
                     2)  # should have at least 3 open fds
        # All opened files should be FIFOs
        if not all(info.modes == ['fifo'] for info in
                   after_fork['opened'].values()):
            print("Infos: {0}".format(after_fork['opened']))
            T.assert_not_reached("Some infos are not FIFOs")

        after_cleanup = difference_open_fds(after_fork_open_fds,
                                            after_finish_open_fds)
        T.assert_gte(len(after_cleanup['closed']), 2)

        left_around = difference_open_fds(initial_open_fds,
                                          after_finish_open_fds)
        if len(left_around['opened']) != 0:
            queue_fds_left_around = dict(
                item for item in self.queue_fds.items() if
                item[0] in left_around['opened'])
            print(
                "Queue FDs left around: {0}".format(queue_fds_left_around))
        T.assert_equal(len(left_around['opened']), 0)
 def test_basic(self):
     ti = TestInstance(unpickleable)
     pool = vimap.pool.fork(
         ti.worker.init_args(init_arg=4) for _ in xrange(3))
     result = list(pool.imap([2100, 2200, 2300]).zip_in_out())
     T.assert_equal(set(result),
                    set([(2300, 2307), (2100, 2107), (2200, 2207)]))
Example #7
0
    def run_function_over_input(self):
        """Wrapper for fork_identical and zip_in_out()

        Takes care of the standard "fork a bunch of workers to execute a
        function and then feed them all the file names"

        yields: input, output tuples from the zip_in_out() call. You should
                use this function as follows:

                for input, output in runner.run_function_over_input():
                    # do something with input/output
        """
        start = datetime.datetime.now()

        self.progress = self.options.progress

        files = glob.glob(self.options.datadir + self.options.dataglob)
        pool = vimap.pool.fork_identical(
            self.worker_fcn,
            num_workers=self.options.num_workers,
        )

        for input, output in pool.imap(files).zip_in_out():
            if self.progress:
                print "Processing", input

            yield input, output

        end = datetime.datetime.now()

        if self.progress:
            print "\n Started at: ", start.isoformat()
            print "Finished at: ", end.isoformat()
            print "Processed {0} files in {1} seconds".format(
                len(files), (end - start).seconds)
Example #8
0
    def test_streaming(self):
        """Cleverish test to check that vimap is really streaming. Essentially
        we make the input generator that emits,

            [0, 1, 2, 3, ..., 99]  # variable inputs_which_must_be_processed

        and then emits [None, None, ...] until each of the numerical inputs
        have been processed (fed through the worker, and retrieved as output).
        """
        inputs_which_must_be_processed = frozenset(xrange(100))
        already_processed = set()
        num_elements_total = 0

        def input_generator():
            for i in sorted(inputs_which_must_be_processed):
                yield i
            while not already_processed.issuperset(
                    inputs_which_must_be_processed):
                yield None

        pool = self.fork_pool()
        for in_, _ in pool.imap(input_generator()).zip_in_out():
            already_processed.add(in_)
            num_elements_total += 1

        # NOTE: streaming_lookahead is the number of None elements emitted by
        # input_generator(). It can be greater than zero, when the worker
        # hasn't finished processing the first 100 numerical inputs, but our
        # main thread wants to enqueue more inputs (to keep the workers busy).
        streaming_lookahead = num_elements_total - len(
            inputs_which_must_be_processed)
        T.assert_gte(
            streaming_lookahead,
            0,
            "Sanity check failed.")

        # Note: This can *very* occasionally flake, since we can feed a bunch
        # of stuff to the input queue, pull a bunch to the temporary output
        # buffer (in the queue manager), but only yield one element from the
        # zip_in_out() function.
        #
        # We may refine streaming properties to make this impossible, but in
        # general vimap works under the assumption that the input may be an
        # infinte stream, but should be something we can do some limited
        # non-blocking read-ahead with.
        T.assert_lte(
            streaming_lookahead,
            pool.qm.max_total_in_flight,
            "max_total_in_flight is a hard upper bound, but was violated.")
Example #9
0
def imap_unordered(fcn, iterable, **kwargs):
    """Maps a function over an iterable. Essentially equivalent to
    multiprocessing.Pool().imap_unordered(fcn, iterable). Since vimap
    fixes bugs in multiprocessing, this function can be a nice alternative.

    Keyword arguments:
        num_workers (from fork_identical) -- number of workers
        extra keyword arguments: passed to the function on each iteration
    """
    @vimap.worker_process.worker
    def worker(inputs, **kwargs2):
        for in_ in inputs:
            yield fcn(in_, **kwargs2)
    pool = vimap.pool.fork_identical(worker, **kwargs)
    for _, output in pool.imap(iterable).zip_in_out():
        yield output
Example #10
0
def imap_unordered(fcn, iterable, **kwargs):
    """Maps a function over an iterable. Essentially equivalent to
    multiprocessing.Pool().imap_unordered(fcn, iterable). Since vimap
    fixes bugs in multiprocessing, this function can be a nice alternative.

    Keyword arguments:
        num_workers (from fork_identical) -- number of workers
        extra keyword arguments: passed to the function on each iteration
    """
    @vimap.worker_process.worker
    def worker(inputs, **kwargs2):
        for in_ in inputs:
            yield fcn(in_, **kwargs2)

    pool = vimap.pool.fork_identical(worker, **kwargs)
    for _, output in pool.imap(iterable).zip_in_out():
        yield output
Example #11
0
    def test_all_fds_cleaned_up(self):
        initial_open_fds = get_open_fds()
        pool = vimap.pool.fork_identical(basic_worker, num_workers=1)
        after_fork_open_fds = get_open_fds()
        list(pool.imap([1, 2, 3]).zip_in_out())
        after_finish_open_fds = get_open_fds()

        # Check that some FDs were opened after forking
        after_fork = difference_open_fds(initial_open_fds, after_fork_open_fds)
        # T.assert_equal(after_fork['closed'], [])
        T.assert_gte(len(after_fork['opened']), 2)  # should have at least 3 open fds
        # All opened files should be FIFOs
        T.assert_equal(all(typ == ['fifo'] for typ in after_fork['opened'].values()), True)

        after_cleanup = difference_open_fds(after_fork_open_fds, after_finish_open_fds)
        T.assert_gte(len(after_cleanup['closed']), 2)

        left_around = difference_open_fds(initial_open_fds, after_finish_open_fds)
        T.assert_equal(len(left_around['opened']), 0)
Example #12
0
 def sleep_in_parallel():
     pool.imap(time_sleep_s for _ in xrange(num_inputs))
     pool.block_ignore_output(close_if_done=False)
Example #13
0
 def factor_parallel():
     pool.imap(inputs).block_ignore_output(close_if_done=False)
 def test_basic(self):
     ti = TestInstance(unpickleable)
     pool = vimap.pool.fork(ti.worker.init_args(init_arg=4) for _ in xrange(3))
     result = list(pool.imap([2100, 2200, 2300]).zip_in_out())
     T.assert_equal(set(result), set([(2300, 2307), (2100, 2107), (2200, 2207)]))