def test_nested_parallel_warnings(): # The warnings happen in child processes so # warnings.catch_warnings can not be used for this tests that's # why we use check_subprocess_call instead if posix is None: # This test pass only when fork is the process start method raise SkipTest('Not a POSIX platform') template_code = """ import sys from joblib import Parallel, delayed def func(): return 42 def parallel_func(): res = Parallel(n_jobs={inner_n_jobs})(delayed(func)() for _ in range(3)) return res Parallel(n_jobs={outer_n_jobs})(delayed(parallel_func)() for _ in range(5)) """ # no warnings if inner_n_jobs=1 code = template_code.format(inner_n_jobs=1, outer_n_jobs=2) check_subprocess_call([sys.executable, '-c', code], stderr_regex='^$') # warnings if inner_n_jobs != 1 regex = ('Multiprocessing-backed parallel loops cannot ' 'be nested') code = template_code.format(inner_n_jobs=2, outer_n_jobs=2) check_subprocess_call([sys.executable, '-c', code], stderr_regex=regex)
def parallel_search(k_pot, pots, lambdas, n_jobs=4): """Method for Parallel L-curve computation Parameters ---------- k_pot : np.array pots : list lambdas : list Returns ------- modelnormseq : list residualseq : list """ if PARALLEL_AVAILABLE: jobs = (delayed(L_model_fast)(k_pot, pots, lamb, i) for i, lamb in enumerate(lambdas)) modelvsres = Parallel(n_jobs=n_jobs, backend='threading')(jobs) else: # Please verify this! modelvsres = [] for i, lamb in enumerate(lambdas): modelvsres.append(L_model_fast(k_pot, pots, lamb, i)) modelnormseq, residualseq = zip(*modelvsres) return modelnormseq, residualseq
def concurrent_get_filename(array, temp_dirs): with Parallel(backend='loky', n_jobs=2, max_nbytes=10) as p: for i in range(10): [filename ] = p(delayed(getattr)(array, 'filename') for _ in range(1)) temp_dirs.add(os.path.dirname(filename))
def pmap(f, arr, n_jobs=-1, prefer='threads', verbose=10): return Parallel(n_jobs=n_jobs, prefer=prefer, verbose=verbose)(delayed(f)(i) for i in arr)
def check_same_results(params): n_tasks = params.pop('n_tasks') expected = [square(i) for i in range(n_tasks)] results = Parallel(**params)(delayed(square)(i) for i in range(n_tasks)) assert_equal(results, expected)
def test_parallel_timeout_fail(): # Check that timeout properly fails when function is too slow for backend in ['multiprocessing', 'threading']: nose.tools.assert_raises( TimeoutError, Parallel(n_jobs=2, backend=backend, timeout=0.01), (delayed(sleep)(10) for x in range(10)))
def check_nested_loop(parent_backend, child_backend): Parallel(n_jobs=2, backend=parent_backend)(delayed(nested_loop)(child_backend) for _ in range(2))
param_names.append("seed") param_names.append("result") true_param_len = len(param_names) # Important to record values to calculate global minimas efficiently for m in metrics.keys(): for split in splits: split_key = "{}_scores".format(split) param_names.append("{}_{}".format(m, split_key)) # df = pd.DataFrame([], columns=param_names) count = 0 missing = [] with parallel_backend(backend="multiprocessing", n_jobs=args.n_jobs): dfs = Parallel()( delayed(joblib_fn)( count, entry, param_names ) for count, entry in enumerate(itertools.product(*param_list), start=1) ) missing = [_df for _df in dfs if isinstance(_df, int)] dfs = [_df for _df in dfs if isinstance(_df, pd.DataFrame)] df = pd.concat(dfs).sort_index() try: _global_mins = extract_global_minimums( df, z_discrete.get_hyperparameter_names(), true_param_len ) except Exception as e: print(repr(e)) _global_mins = None global_mins = dict(val=dict(), test=dict())
def test_simple_parallel(backend, n_jobs, verbose): assert ([square(x) for x in range(5) ] == Parallel(n_jobs=n_jobs, backend=backend, verbose=verbose)(delayed(square)(x) for x in range(5)))
def parallel_func(inner_n_jobs, backend): return Parallel(n_jobs=inner_n_jobs, backend=backend)(delayed(square)(i) for i in range(3))
def test_func_code_consistency(): from joblib.parallel import Parallel, delayed codes = Parallel(n_jobs=2)(delayed(_get_code)() for _ in range(5)) assert len(set(codes)) == 1
quartiles = list(range(0, 100, 25)) feat_quartiles = np.array( [[np.nanpercentile(data[start:end], q) for start, end in blocks] for q in quartiles]) feat_quartiles = flatten(feat_quartiles) feats += feat_mean + feat_std + feat_quartiles mods = ['mean', 'std'] + [f'quart{q}' for q in quartiles] for start, end in blocks: _names = [f'{feat_name}_{mod}_block{start}:{end}' for mod in mods] for _name in _names: feature_names[_name] = {'start': start, 'type': feat_name} return feats, feature_names res = Parallel(16)(delayed(extract)(p) for p in tqdm(ss, desc='extracting features')) feature_names = res[0][1] feature_types = list(set([x["type"] for x in feature_names.values()])) data_x = np.array([x for x, _ in res]) data_y = np.array([p.group == 'nt1' for p in ss]) if len(feature_names) != len(data_x.T): print( f'Names do not match length of feature vector {len(data_x)}!={len(feature_names)}' ) # replace nan values with the mean of this value over all other participants for i in range(data_x.shape[-1]): data_x[np.isnan(data_x[:, i]), i] = np.nanmean(data_x[:, i]) data_x = np.nan_to_num(data_x, posinf=0, neginf=0)
def nested_call(n): return Parallel(n_jobs=2)(delayed(id)(i) for i in range(n))
def get_nested_pids(): assert _active_backend_type() == ThreadingBackend return Parallel(n_jobs=2)(delayed(os.getpid)() for _ in range(2))
def nested_function_outer(i): Parallel(n_jobs=2)( delayed(nested_function_inner)(j) for j in range(30))
def nested_function_inner(i): Parallel(n_jobs=2)( delayed(exception_raiser)(j) for j in range(30))
def test_parallel_with_exhausted_iterator(): exhausted_iterator = iter([]) assert Parallel(n_jobs=2)(exhausted_iterator) == []
def test_mutate_input_with_threads(): """Input is mutable when using the threading backend""" q = Queue(maxsize=5) Parallel(n_jobs=2, backend="threading")(delayed(q.put)(1) for _ in range(5)) assert q.full()
def nested_loop(backend): Parallel(n_jobs=2, backend=backend)(delayed(square)(.01) for _ in range(2))
def test_parallel_kwargs(n_jobs): """Check the keyword argument processing of pmap.""" lst = range(10) assert ([f(x, y=1) for x in lst ] == Parallel(n_jobs=n_jobs)(delayed(f)(x, y=1) for x in lst))
def test_parallel_kwargs(): """Check the keyword argument processing of pmap.""" lst = range(10) for n_jobs in (1, 4): yield (assert_equal, [f(x, y=1) for x in lst], Parallel(n_jobs=n_jobs)(delayed(f)(x, y=1) for x in lst))
def test_parallel_timeout_success(backend): # Check that timeout isn't thrown when function is fast enough assert len( Parallel(n_jobs=2, backend=backend, timeout=10)(delayed(sleep)(0.001) for x in range(10))) == 10
def test_exception_dispatch(): "Make sure that exception raised during dispatch are indeed captured" assert_raises(ValueError, Parallel(n_jobs=2, pre_dispatch=16, verbose=0), (delayed(exception_raiser)(i) for i in range(30)))
def test_parallel_timeout_fail(backend): # Check that timeout properly fails when function is too slow with raises(TimeoutError): Parallel(n_jobs=2, backend=backend, timeout=0.01)(delayed(sleep)(10) for x in range(10))
def test_parallel_with_exhausted_iterator(): exhausted_iterator = iter([]) assert_equal(Parallel(n_jobs=2)(exhausted_iterator), [])
def test_error_capture(backend): # Check that error are captured, and that correct exceptions # are raised. if mp is not None: # A JoblibException will be raised only if there is indeed # multiprocessing with raises(JoblibException): Parallel(n_jobs=2, backend=backend)( [delayed(division)(x, y) for x, y in zip((0, 1), (1, 0))]) with raises(WorkerInterrupt): Parallel(n_jobs=2, backend=backend)( [delayed(interrupt_raiser)(x) for x in (1, 0)]) # Try again with the context manager API with Parallel(n_jobs=2, backend=backend) as parallel: assert get_workers(parallel._backend) is not None original_workers = get_workers(parallel._backend) with raises(JoblibException): parallel( [delayed(division)(x, y) for x, y in zip((0, 1), (1, 0))]) # The managed pool should still be available and be in a working # state despite the previously raised (and caught) exception assert get_workers(parallel._backend) is not None # The pool should have been interrupted and restarted: assert get_workers(parallel._backend) is not original_workers assert ([f(x, y=1) for x in range(10) ] == parallel(delayed(f)(x, y=1) for x in range(10))) original_workers = get_workers(parallel._backend) with raises(WorkerInterrupt): parallel([delayed(interrupt_raiser)(x) for x in (1, 0)]) # The pool should still be available despite the exception assert get_workers(parallel._backend) is not None # The pool should have been interrupted and restarted: assert get_workers(parallel._backend) is not original_workers assert ([f(x, y=1) for x in range(10) ] == parallel(delayed(f)(x, y=1) for x in range(10))) # Check that the inner pool has been terminated when exiting the # context manager assert get_workers(parallel._backend) is None else: with raises(KeyboardInterrupt): Parallel(n_jobs=2)([delayed(interrupt_raiser)(x) for x in (1, 0)]) # wrapped exceptions should inherit from the class of the original # exception to make it easy to catch them with raises(ZeroDivisionError): Parallel(n_jobs=2)( [delayed(division)(x, y) for x, y in zip((0, 1), (1, 0))]) with raises(MyExceptionWithFinickyInit): Parallel(n_jobs=2, verbose=0)((delayed(exception_raiser)(i, custom_exception=True) for i in range(30))) try: # JoblibException wrapping is disabled in sequential mode: ex = JoblibException() Parallel(n_jobs=1)(delayed(division)(x, y) for x, y in zip((0, 1), (1, 0))) except Exception as ex: assert not isinstance(ex, JoblibException)
def test_func_code_consistency(): from joblib.parallel import Parallel, delayed codes = Parallel(n_jobs=2)(delayed(_get_code)() for _ in range(5)) nose.tools.assert_equal(len(set(codes)), 1)
def test_invalid_backend(): with raises(ValueError): Parallel(backend='unit-testing')
def test_parallel_call_cached_function_defined_in_jupyter( tmpdir, call_before_reducing): # Calling an interactively defined memory.cache()'d function inside a # Parallel call used to clear the existing cache related to the said # function (https://github.com/joblib/joblib/issues/1035) # This tests checks that this is no longer the case. # TODO: test that the cache related to the function cache persists across # ipython sessions (provided that no code change were made to the # function's source)? # The first part of the test makes the necessary low-level calls to emulate # the definition of a function in an jupyter notebook cell. Joblib has # some custom code to treat functions defined specifically in jupyter # notebooks/ipython session -- we want to test this code, which requires # the emulation to be rigorous. for session_no in [0, 1]: ipython_cell_source = ''' def f(x): return x ''' ipython_cell_id = '<ipython-input-{}-000000000000>'.format(session_no) exec( compile(textwrap.dedent(ipython_cell_source), filename=ipython_cell_id, mode='exec')) # f is now accessible in the locals mapping - but for some unknown # reason, f = locals()['f'] throws a KeyError at runtime, we need to # bind locals()['f'] to a different name in the local namespace aliased_f = locals()['f'] aliased_f.__module__ = "__main__" # Preliminary sanity checks, and tests checking that joblib properly # identified f as an interactive function defined in a jupyter notebook assert aliased_f(1) == 1 assert aliased_f.__code__.co_filename == ipython_cell_id memory = Memory(location=tmpdir.strpath, verbose=0) cached_f = memory.cache(aliased_f) assert len(os.listdir(tmpdir / 'joblib')) == 1 f_cache_relative_directory = os.listdir(tmpdir / 'joblib')[0] assert 'ipython-input' in f_cache_relative_directory f_cache_directory = tmpdir / 'joblib' / f_cache_relative_directory if session_no == 0: # The cache should be empty as cached_f has not been called yet. assert os.listdir(f_cache_directory) == ['f'] assert os.listdir(f_cache_directory / 'f') == [] if call_before_reducing: cached_f(3) # Two files were just created, func_code.py, and a folder # containing the informations (inputs hash/ouptput) of # cached_f(3) assert len(os.listdir(f_cache_directory / 'f')) == 2 # Now, testing #1035: when calling a cached function, joblib # used to dynamically inspect the underlying function to # extract its source code (to verify it matches the source code # of the function as last inspected by joblib) -- however, # source code introspection fails for dynamic functions sent to # child processes - which would eventually make joblib clear # the cache associated to f res = Parallel(n_jobs=2)(delayed(cached_f)(i) for i in [1, 2]) else: # Submit the function to the joblib child processes, although # the function has never been called in the parent yet. This # triggers a specific code branch inside # MemorizedFunc.__reduce__. res = Parallel(n_jobs=2)(delayed(cached_f)(i) for i in [1, 2]) assert len(os.listdir(f_cache_directory / 'f')) == 3 cached_f(3) # Making sure f's cache does not get cleared after the parallel # calls, and contains ALL cached functions calls (f(1), f(2), f(3)) # and 'func_code.py' assert len(os.listdir(f_cache_directory / 'f')) == 4 else: # For the second session, there should be an already existing cache assert len(os.listdir(f_cache_directory / 'f')) == 4 cached_f(3) # The previous cache should not be invalidated after calling the # function in a new session assert len(os.listdir(f_cache_directory / 'f')) == 4
def test_invalid_njobs(backend): with raises(ValueError) as excinfo: Parallel(n_jobs=0, backend=backend)._initialize_backend() assert "n_jobs == 0 in Parallel has no meaning" in str(excinfo.value)
def test_invalid_batch_size(batch_size): with raises(ValueError): Parallel(batch_size=batch_size)
def _recursive_parallel(nesting_limit=None): """A horrible function that does recursive parallel calls""" return Parallel()(delayed(_recursive_parallel)() for i in range(2))