def test_deadlock_kill(self): """Test deadlock recovery for reusable_executor""" executor = get_reusable_executor(max_workers=1, timeout=None) # trigger the spawning of the worker process executor.submit(sleep, 0.1) worker = next(iter(executor._processes.values())) with pytest.warns(UserWarning) as recorded_warnings: executor = get_reusable_executor(max_workers=2, timeout=None) assert len(recorded_warnings) == 1 expected_msg = ("Trying to resize an executor with running jobs:" " waiting for jobs completion before resizing.") assert recorded_warnings[0].message.args[0] == expected_msg os.kill(worker.pid, SIGKILL) wait_dead(worker) # wait for the executor to be able to detect the issue and set itself # in broken state: sleep(.5) with pytest.raises(TerminatedWorkerError, match=filter_match(r"SIGKILL")): executor.submit(id_sleep, 42, 0.1).result() # the get_reusable_executor factory should be able to create a new # working instance executor = get_reusable_executor(max_workers=2, timeout=None) assert executor.submit(id_sleep, 42, 0.).result() == 42
def test_invalid_process_number(): """Raise error on invalid process number""" with pytest.raises(ValueError): get_reusable_executor(max_workers=0) with pytest.raises(ValueError): get_reusable_executor(max_workers=-1)
def test_compat_with_concurrent_futures_exception(): # It should be possible to use a loky process pool executor as a dropin # replacement for a ProcessPoolExecutor, including when catching # exceptions: pytest.importorskip('concurrent.futures') from concurrent.futures.process import BrokenProcessPool as BPPExc with pytest.raises(BPPExc): get_reusable_executor(max_workers=2).submit(crash).result()
def test_resize_after_timeout(self): executor = get_reusable_executor(max_workers=2, timeout=.001) assert executor.submit(id_sleep, 42, 0.).result() == 42 sleep(.1) executor = get_reusable_executor(max_workers=8, timeout=.001) assert executor.submit(id_sleep, 42, 0.).result() == 42 sleep(.1) executor = get_reusable_executor(max_workers=2, timeout=.001) assert executor.submit(id_sleep, 42, 0.).result() == 42
def test_kill_workers_on_new_options(self): # submit a long running job with no timeout executor = get_reusable_executor(max_workers=2, timeout=None) f = executor.submit(sleep, 10000) # change the constructor parameter while requesting not to wait # for the long running task to complete (the workers will get # shutdown forcibly) executor = get_reusable_executor(max_workers=2, timeout=5, kill_workers=True) with pytest.raises(ShutdownExecutorError): f.result() f2 = executor.submit(id_sleep, 42, 0) assert f2.result() == 42
def test_resize_after_timeout(self): with warnings.catch_warnings(record=True) as recorded_warnings: warnings.simplefilter("always") executor = get_reusable_executor(max_workers=2, timeout=.001) assert executor.submit(id_sleep, 42, 0.).result() == 42 sleep(.1) executor = get_reusable_executor(max_workers=8, timeout=.001) assert executor.submit(id_sleep, 42, 0.).result() == 42 sleep(.1) executor = get_reusable_executor(max_workers=2, timeout=.001) assert executor.submit(id_sleep, 42, 0.).result() == 42 if len(recorded_warnings) > 1: expected_msg = 'A worker stopped' assert expected_msg in recorded_warnings[0].message.args[0]
def test_kill_after_resize_call(self): """Test recovery if killed after resize call""" # Test the executor resizing called before a kill arrive executor = get_reusable_executor(max_workers=2, timeout=None) executor.map(id, range(2)) # trigger the creation of worker processes pid = next(iter(executor._processes.keys())) executor.submit(kill_friend, (pid, .1)) with pytest.warns(UserWarning) as recorded_warnings: executor = get_reusable_executor(max_workers=1, timeout=None) assert len(recorded_warnings) == 1 expected_msg = ("Trying to resize an executor with running jobs:" " waiting for jobs completion before resizing.") assert recorded_warnings[0].message.args[0] == expected_msg assert executor.submit(id_sleep, 42, 0.).result() == 42
def test_osx_accelerate_freeze(): """Test no freeze on OSX with Accelerate""" a = np.random.randn(1000, 1000) np.dot(a, a) executor = get_reusable_executor(max_workers=2) executor.submit(np.dot, (a, a)) executor.shutdown(wait=True)
def run_models_gpu(configurations, models_path, data_path, gpus_available=None): if gpus_available == None: gpus_available = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] # no. gpus available in ESA server n_configs = len(configurations) n_gpus = len(gpus_available) n_procs = n_configs if n_configs < n_gpus else n_gpus gpus_use = gpus_available[:n_procs] configs_partition = partition_configs(configurations, n_procs) def mycallback(res): gpu_id = res.result(timeout=1) print('GPU #{:d} complete'.format(gpu_id)) # using external package loky (basically wrapper of multiprocessing) # able to gracefully terminate worker processes # and therefore free allocated resouces (e.g. GPU memory) on job completion with get_reusable_executor(max_workers=n_procs, timeout=1) as executor: for idx, config_lst in enumerate(configs_partition): res = executor.submit(run_on_gpu, gpus_use[idx], config_lst, models_path, data_path) res.add_done_callback(mycallback)
def test_memory_leak_protection(): def leak_some_memory(size=int(1e6), delay=0.01): if getattr(os, '__loky_leak', None) is None: os.__loky_leak = [] os.__loky_leak.append(b"\x00" * size) # Leave enough time for the memory leak detector to kick-in: # by default the process does not check its memory usage # more than once per second. sleep(delay) leaked_size = sum(len(buffer) for buffer in os.__loky_leak) return os.getpid(), leaked_size executor = get_reusable_executor(max_workers=1) with pytest.warns(UserWarning, match='memory leak'): futures = [] for i in range(300): # Total run time should be 3s which is way over the 1s cooldown # period between two consecutive memory checks in the worker. futures.append(executor.submit(leak_some_memory)) results = [f.result() for f in futures] # The pid of the worker has changed when restarting the worker first_pid, last_pid = results[0][0], results[-1][0] assert first_pid != last_pid # The restart happened after 100 MB of leak over the default process # size + what has leaked since the last memory check. for _, leak_size in results: assert leak_size / 1e6 < 250
def test_crash_races(self, n_proc): """Test the race conditions in reusable_executor crash handling""" if (sys.platform == 'win32' and sys.version_info >= (3, 8) and n_proc > 5): pytest.skip( "On win32, the paging size can be too small to import numpy " "multiple times in the sub-processes (imported when loading " "this file). Skipping while no better solution is found. See " "https://github.com/joblib/loky/issues/279 for more details.") # Test for external crash signal comming from neighbor # with various race setup executor = get_reusable_executor(max_workers=n_proc, timeout=None) executor.map(id, range(n_proc)) # trigger the creation of the workers pids = list(executor._processes.keys()) assert len(pids) == n_proc assert None not in pids res = executor.map(sleep_then_check_pids_exist, [(.0001 * (j // 2), pids) for j in range(2 * n_proc)]) assert all(list(res)) with pytest.raises(TerminatedWorkerError, match=filter_match(r"SIGKILL")): res = executor.map(kill_friend, pids[::-1]) list(res)
def __init__(self, max_workers=1): self.max_workers = max_workers self.pool = get_reusable_executor(max_workers=max_workers, timeout=None) # self.pool = ProcessPoolExecutor(max_workers=max_workers) # self.pool = MPIPoolExecutor(max_workers, main=False) self.tasks = OrderedDict() # a dict of {tags: futures}
def in_callback_submit(future): future2 = get_reusable_executor( max_workers=2, timeout=12).submit(func, *args) # Store the future of the job submitted in the callback to make it # easy to introspect. future.callback_future = future2 future.callback_done.set()
def test_in_callback_submit_with_crash(self, func, args, expected_exc, match): """Test the recovery from callback crash""" executor = get_reusable_executor(max_workers=2, timeout=12) def in_callback_submit(future): future2 = get_reusable_executor(max_workers=2, timeout=12).submit(func, *args) # Store the future of the job submitted in the callback to make it # easy to introspect. future.callback_future = future2 future.callback_done.set() # Make sure the first submitted job last a bit to make sure that # the callback will be called in the queue manager thread and not # immediately in the main thread. delay = 0.1 f = executor.submit(id_sleep, 42, delay) f.callback_done = threading.Event() f.add_done_callback(in_callback_submit) assert f.result() == 42 if not f.callback_done.wait(timeout=3): raise AssertionError('callback not done before timeout') with pytest.raises(expected_exc) as exc_info: f.callback_future.result() # For remote traceback, ensure that the cause contains the original # error if match is not None: with pytest.raises(_RemoteTraceback, match=match): raise exc_info.value.__cause__
def setup_method(self, method): default_start_method = get_context().get_start_method() assert default_start_method == "loky", default_start_method executor = get_reusable_executor(max_workers=2) _check_executor_started(executor) # There can be less than 2 workers because of the worker timeout _check_subprocesses_number(executor, expected_max_process_number=2)
def test_compat_with_concurrent_futures_exception(self): # It should be possible to use a loky process pool executor as a dropin # replacement for a ProcessPoolExecutor, including when catching # exceptions: concurrent = pytest.importorskip('concurrent') from concurrent.futures.process import BrokenProcessPool as BPPExc with pytest.raises(BPPExc): get_reusable_executor(max_workers=2).submit(crash).result() e = get_reusable_executor(max_workers=2) f = e.submit(id, 42) # Ensure that loky.Future are compatible with concurrent.futures # (see #155) assert isinstance(f, concurrent.futures.Future) (done, running) = concurrent.futures.wait([f], timeout=15) assert len(running) == 0
def parallel_map(func, args, num_proc): """Run function for all arguments using multiple processes.""" num_proc = min(num_proc, len(args)) if num_proc <= 1: return list(map(func, args)) else: with get_reusable_executor(max_workers=num_proc, timeout=None) as e: return list(e.map(func, args))
def test_worker_timeout_shutdown_deadlock(self): """Check that worker timeout don't cause deadlock even when shutting down. """ with pytest.warns(UserWarning, match=r'^A worker timeout while some jobs'): with get_reusable_executor(max_workers=2, timeout=.001) as e: f = e.submit(id, SlowlyPickling(1)) f.result()
def test_reusable_executor_thread_safety(self, workers, executor_state): if executor_state == 'clean_start': # Create a new shared executor and ensures that it's workers are # ready: get_reusable_executor(reuse=False).submit(id, 42).result() else: # Break the shared executor before launching the threads: with pytest.raises(TerminatedWorkerError, match=filter_match(r"SIGSEGV")): executor = get_reusable_executor(reuse=False) executor.submit(return_instance, CrashAtPickle).result() def helper_func(output_collector, max_workers=2, n_outer_steps=5, n_inner_steps=10): with warnings.catch_warnings(): # ignore resize warnings warnings.simplefilter("always") executor = get_reusable_executor(max_workers=max_workers) for i in range(n_outer_steps): results = executor.map(lambda x: x**2, range(n_inner_steps)) expected_result = [x**2 for x in range(n_inner_steps)] assert list(results) == expected_result output_collector.append('ok') if workers == 'constant': max_workers = [2] * 10 else: max_workers = [(i % 4) + 1 for i in range(10)] # Use the same executor with the same number of workers concurrently # in different threads: output_collector = [] threads = [ threading.Thread(target=helper_func, args=(output_collector, w), name='test_thread_%02d_max_workers_%d' % (i, w)) for i, w in enumerate(max_workers) ] with warnings.catch_warnings(record=True): for t in threads: t.start() for t in threads: t.join() assert output_collector == ['ok'] * len(threads)
def map_reduce_multicore( f: tp.Callable[..., ResultType], reduction: tp.Callable[[ResultType, ResultType], ResultType], initial_value: tp.Optional[ResultType] = None, args_list: tp.Optional[tp.Sequence[tp.Sequence]] = None, kwargs_list: tp.Optional[tp.Sequence[tp.Dict[str, tp.Any]]] = None, number_of_batches: tp.Optional[int] = None, multiprocessing_pool_type: MultiprocessingPoolType = MultiprocessingPoolType.default()) \ -> ResultType: if number_of_batches is None: if args_list is not None: number_of_batches = len(args_list) elif kwargs_list is not None: number_of_batches = len(kwargs_list) else: raise ValueError('Number_of_batches must be defined if ' 'both args_list and kwargs_list are empty') if args_list is None: args_list = number_of_batches * [list()] if kwargs_list is None: kwargs_list = number_of_batches * [dict()] result = initial_value if multiprocessing_pool_type == MultiprocessingPoolType.LOKY: from concurrent.futures import as_completed from loky import get_reusable_executor executor = \ get_reusable_executor(timeout=None, context='loky') futures = [ executor.submit(f, *args, **kwargs) for args, kwargs in zip(args_list, kwargs_list) ] result_from_future = lambda x: x.result() elif multiprocessing_pool_type == MultiprocessingPoolType.PATHOS: from pathos.pools import ProcessPool pool = ProcessPool() futures = [ pool.apipe(f, *args, **kwargs) for args, kwargs in zip(args_list, kwargs_list) ] result_from_future = lambda x: x.get() else: raise ValueError( f'Multiprocessing pool type {multiprocessing_pool_type} not supported' ) for future in futures: result = reduce_with_none(result, result_from_future(future), reduction) return result
def executor(max_workers, timeout, func, feeds): assert type( feeds ) is list, "[ERROR] feeds should be a Python list; here it is {}".format( str(type(feeds))) executor = get_reusable_executor(max_workers=int(max_workers), timeout=int(timeout)) results = executor.map(func, feeds) return results
async def run_in_executor(func, *args, **kwargs): exc = get_reusable_executor(max_workers=WORKERS) # uses the asyncio default thread pool executor to then # apply the function on the pool of processes # inefficient, but ProcessPoolExecutor will not restart # processes in case of memory leak res = await asyncio.get_event_loop().run_in_executor( exc, partial(func, *args, **kwargs)) return res
def test_shutdown_deadlock(self): """Test recovery if killed after resize call""" # Test the executor.shutdown call do not cause deadlock executor = get_reusable_executor(max_workers=2, timeout=None) executor.map(id, range(2)) # start the worker processes executor.submit(kill_friend, (next(iter(executor._processes.keys())), .0)) sleep(.01) executor.shutdown(wait=True)
def test_numpy_dot_parent_and_child_no_freeze(self): """Test that no freeze happens in child process when numpy's thread pool is started in the parent. """ a = np.random.randn(1000, 1000) np.dot(a, a) # trigger the thread pool init in the parent process executor = get_reusable_executor(max_workers=2) executor.submit(np.dot, a, a).result() executor.shutdown(wait=True)
def test_queue_full_deadlock(self): executor = get_reusable_executor(max_workers=1) fs_fail = [executor.submit(do_nothing, ErrorAtPickle(True)) for i in range(100)] fs = [executor.submit(do_nothing, ErrorAtPickle(False)) for i in range(100)] with pytest.raises(PicklingError): fs_fail[99].result() assert fs[99].result()
def test_imap_handle_iterable_exception(self): # The catch of the errors in imap generation depend on the # builded version of python executor = get_reusable_executor(max_workers=2) with pytest.raises(SayWhenError): executor.map(id_sleep, exception_throwing_generator(10, 3), chunksize=1) # SayWhenError seen at start of problematic chunk's results executor = get_reusable_executor(max_workers=2) with pytest.raises(SayWhenError): executor.map(id_sleep, exception_throwing_generator(20, 7), chunksize=2) executor = get_reusable_executor(max_workers=2) with pytest.raises(SayWhenError): executor.map(id_sleep, exception_throwing_generator(20, 7), chunksize=4)
def test_informative_error_when_fail_at_unpickle(self): executor = get_reusable_executor(max_workers=2) obj = ErrorAtUnpickle(RuntimeError, 'message raised in child') f = executor.submit(id, obj) with pytest.raises(BrokenProcessPool) as exc_info: f.result() assert 'RuntimeError' in str(exc_info.value.__cause__) assert 'message raised in child' in str(exc_info.value.__cause__)
def _getExecutor(): global parallel, _executor, _prefetch if _executor is not None: return _executor _executor = get_reusable_executor( max_workers=parallel * _prefetch, # initializer=_init, # initargs=(db_pool_size, db_host, db_port, db_pwd), timeout=4500) return _executor
def _getExecutor(workers=multiprocessing.cpu_count()): global _executor if _executor is not None: return _executor _executor = get_reusable_executor( max_workers=workers, initializer=_init, # initargs=(db_pool_size, db_host, db_port, db_pwd), timeout=1800) return _executor
def test_reusable_executor_reuse_true(self): executor = get_reusable_executor(max_workers=3, timeout=42) executor.submit(id, 42).result() assert len(executor._processes) == 3 assert executor._timeout == 42 executor2 = get_reusable_executor(reuse=True) executor2.submit(id, 42).result() assert len(executor2._processes) == 3 assert executor2._timeout == 42 assert executor2 is executor executor3 = get_reusable_executor() executor3.submit(id, 42).result() assert len(executor3._processes) == cpu_count() assert executor3._timeout == 10 assert executor3 is not executor executor4 = get_reusable_executor() assert executor4 is executor3