def test_memory_consumption_stream_processing(self): """Tests that memory usage is constant when processing a large stream.""" pool = vimap.pool.fork_identical(string_length_worker) with assert_memory_use("test-stream-40-mb", 0, 3): pool.imap(urandom_stream(size_kb=40 * 1024)).block_ignore_output()
def test_memory_consumption_stream_processing(self): """Tests that memory usage is constant when processing a large stream.""" pool = vimap.pool.fork_identical(string_length_worker) with assert_memory_use("test-stream-40-mb", 0, 3): pool.imap( urandom_stream(size_kb=40 * 1024)).block_ignore_output()
def test(self): weakrefs = [] inputs = (MyFancyClass(x) for x in xrange(100)) def inputs_with_weakrefs(): for x in inputs: weakrefs.append(weakref.ref(x)) yield x pool = vimap.pool.fork_identical(do_nothing_worker, num_workers=1) pool.imap(inputs_with_weakrefs()).block_ignore_output() assert all(wr() is None for wr in weakrefs)
def test_all_fds_cleaned_up(self): initial_open_fds = get_open_fds() pool = vimap.pool.fork_identical(basic_worker, num_workers=1) after_fork_open_fds = get_open_fds() list(pool.imap([1, 2, 3]).zip_in_out()) after_finish_open_fds = get_open_fds() # Check that some FDs were opened after forking after_fork = difference_open_fds(initial_open_fds, after_fork_open_fds) # T.assert_equal(after_fork['closed'], []) T.assert_gte(len(after_fork['opened']), 2) # should have at least 3 open fds # All opened files should be FIFOs if not all(info.modes == ['fifo'] for info in after_fork['opened'].values()): print("Infos: {0}".format(after_fork['opened'])) T.assert_not_reached("Some infos are not FIFOs") after_cleanup = difference_open_fds(after_fork_open_fds, after_finish_open_fds) T.assert_gte(len(after_cleanup['closed']), 2) left_around = difference_open_fds(initial_open_fds, after_finish_open_fds) if len(left_around['opened']) != 0: queue_fds_left_around = dict(item for item in self.queue_fds.items() if item[0] in left_around['opened']) print("Queue FDs left around: {0}".format(queue_fds_left_around)) T.assert_equal(len(left_around['opened']), 0)
def test_all_fds_cleaned_up(self): initial_open_fds = get_open_fds() pool = vimap.pool.fork_identical(basic_worker, num_workers=1) after_fork_open_fds = get_open_fds() list(pool.imap([1, 2, 3]).zip_in_out()) after_finish_open_fds = get_open_fds() # Check that some FDs were opened after forking after_fork = difference_open_fds(initial_open_fds, after_fork_open_fds) # T.assert_equal(after_fork['closed'], []) T.assert_gte(len(after_fork['opened']), 2) # should have at least 3 open fds # All opened files should be FIFOs if not all(info.modes == ['fifo'] for info in after_fork['opened'].values()): print("Infos: {0}".format(after_fork['opened'])) T.assert_not_reached("Some infos are not FIFOs") after_cleanup = difference_open_fds(after_fork_open_fds, after_finish_open_fds) T.assert_gte(len(after_cleanup['closed']), 2) left_around = difference_open_fds(initial_open_fds, after_finish_open_fds) if len(left_around['opened']) != 0: queue_fds_left_around = dict( item for item in self.queue_fds.items() if item[0] in left_around['opened']) print( "Queue FDs left around: {0}".format(queue_fds_left_around)) T.assert_equal(len(left_around['opened']), 0)
def test_basic(self): ti = TestInstance(unpickleable) pool = vimap.pool.fork( ti.worker.init_args(init_arg=4) for _ in xrange(3)) result = list(pool.imap([2100, 2200, 2300]).zip_in_out()) T.assert_equal(set(result), set([(2300, 2307), (2100, 2107), (2200, 2207)]))
def run_function_over_input(self): """Wrapper for fork_identical and zip_in_out() Takes care of the standard "fork a bunch of workers to execute a function and then feed them all the file names" yields: input, output tuples from the zip_in_out() call. You should use this function as follows: for input, output in runner.run_function_over_input(): # do something with input/output """ start = datetime.datetime.now() self.progress = self.options.progress files = glob.glob(self.options.datadir + self.options.dataglob) pool = vimap.pool.fork_identical( self.worker_fcn, num_workers=self.options.num_workers, ) for input, output in pool.imap(files).zip_in_out(): if self.progress: print "Processing", input yield input, output end = datetime.datetime.now() if self.progress: print "\n Started at: ", start.isoformat() print "Finished at: ", end.isoformat() print "Processed {0} files in {1} seconds".format( len(files), (end - start).seconds)
def test_streaming(self): """Cleverish test to check that vimap is really streaming. Essentially we make the input generator that emits, [0, 1, 2, 3, ..., 99] # variable inputs_which_must_be_processed and then emits [None, None, ...] until each of the numerical inputs have been processed (fed through the worker, and retrieved as output). """ inputs_which_must_be_processed = frozenset(xrange(100)) already_processed = set() num_elements_total = 0 def input_generator(): for i in sorted(inputs_which_must_be_processed): yield i while not already_processed.issuperset( inputs_which_must_be_processed): yield None pool = self.fork_pool() for in_, _ in pool.imap(input_generator()).zip_in_out(): already_processed.add(in_) num_elements_total += 1 # NOTE: streaming_lookahead is the number of None elements emitted by # input_generator(). It can be greater than zero, when the worker # hasn't finished processing the first 100 numerical inputs, but our # main thread wants to enqueue more inputs (to keep the workers busy). streaming_lookahead = num_elements_total - len( inputs_which_must_be_processed) T.assert_gte( streaming_lookahead, 0, "Sanity check failed.") # Note: This can *very* occasionally flake, since we can feed a bunch # of stuff to the input queue, pull a bunch to the temporary output # buffer (in the queue manager), but only yield one element from the # zip_in_out() function. # # We may refine streaming properties to make this impossible, but in # general vimap works under the assumption that the input may be an # infinte stream, but should be something we can do some limited # non-blocking read-ahead with. T.assert_lte( streaming_lookahead, pool.qm.max_total_in_flight, "max_total_in_flight is a hard upper bound, but was violated.")
def imap_unordered(fcn, iterable, **kwargs): """Maps a function over an iterable. Essentially equivalent to multiprocessing.Pool().imap_unordered(fcn, iterable). Since vimap fixes bugs in multiprocessing, this function can be a nice alternative. Keyword arguments: num_workers (from fork_identical) -- number of workers extra keyword arguments: passed to the function on each iteration """ @vimap.worker_process.worker def worker(inputs, **kwargs2): for in_ in inputs: yield fcn(in_, **kwargs2) pool = vimap.pool.fork_identical(worker, **kwargs) for _, output in pool.imap(iterable).zip_in_out(): yield output
def test_all_fds_cleaned_up(self): initial_open_fds = get_open_fds() pool = vimap.pool.fork_identical(basic_worker, num_workers=1) after_fork_open_fds = get_open_fds() list(pool.imap([1, 2, 3]).zip_in_out()) after_finish_open_fds = get_open_fds() # Check that some FDs were opened after forking after_fork = difference_open_fds(initial_open_fds, after_fork_open_fds) # T.assert_equal(after_fork['closed'], []) T.assert_gte(len(after_fork['opened']), 2) # should have at least 3 open fds # All opened files should be FIFOs T.assert_equal(all(typ == ['fifo'] for typ in after_fork['opened'].values()), True) after_cleanup = difference_open_fds(after_fork_open_fds, after_finish_open_fds) T.assert_gte(len(after_cleanup['closed']), 2) left_around = difference_open_fds(initial_open_fds, after_finish_open_fds) T.assert_equal(len(left_around['opened']), 0)
def sleep_in_parallel(): pool.imap(time_sleep_s for _ in xrange(num_inputs)) pool.block_ignore_output(close_if_done=False)
def factor_parallel(): pool.imap(inputs).block_ignore_output(close_if_done=False)
def test_basic(self): ti = TestInstance(unpickleable) pool = vimap.pool.fork(ti.worker.init_args(init_arg=4) for _ in xrange(3)) result = list(pool.imap([2100, 2200, 2300]).zip_in_out()) T.assert_equal(set(result), set([(2300, 2307), (2100, 2107), (2200, 2207)]))