def test_backend_hinting_and_constraints_with_custom_backends(capsys): # Custom backends can declare that they use threads and have shared memory # semantics: class MyCustomThreadingBackend(ParallelBackendBase): supports_sharedmem = True use_threads = True def apply_async(self): pass def effective_n_jobs(self, n_jobs): return n_jobs with parallel_backend(MyCustomThreadingBackend()): p = Parallel(n_jobs=2, prefer='processes') # ignored assert type(p._backend) == MyCustomThreadingBackend p = Parallel(n_jobs=2, require='sharedmem') assert type(p._backend) == MyCustomThreadingBackend class MyCustomProcessingBackend(ParallelBackendBase): supports_sharedmem = False use_threads = False def apply_async(self): pass def effective_n_jobs(self, n_jobs): return n_jobs with parallel_backend(MyCustomProcessingBackend()): p = Parallel(n_jobs=2, prefer='processes') assert type(p._backend) == MyCustomProcessingBackend out, err = capsys.readouterr() assert out == "" assert err == "" p = Parallel(n_jobs=2, require='sharedmem', verbose=10) assert type(p._backend) == ThreadingBackend out, err = capsys.readouterr() expected = ("Using ThreadingBackend as joblib.Parallel backend " "instead of MyCustomProcessingBackend as the latter " "does not provide shared memory semantics.") assert out.strip() == expected assert err == "" with raises(ValueError): Parallel(backend=MyCustomProcessingBackend(), require='sharedmem')
def test_thread_bomb_mitigation(backend): # Test that recursive parallelism raises a recursion rather than # saturating the operating system resources by creating a unbounded number # of threads. with parallel_backend(backend, n_jobs=2): with raises(RecursionError): _recursive_parallel()
def test_global_parallel_backend(): default = Parallel()._backend pb = parallel_backend('threading') assert isinstance(Parallel()._backend, ThreadingBackend) pb.unregister() assert type(Parallel()._backend) is type(default)
def test_external_backends(): def register_foo(): BACKENDS['foo'] = ThreadingBackend EXTERNAL_BACKENDS['foo'] = register_foo with parallel_backend('foo'): assert isinstance(Parallel()._backend, ThreadingBackend)
def test_sklearn_cv(): iris = datasets.load_iris() clf = svm.SVC(kernel='linear', C=1) with parallel_backend('spark', n_jobs=3): scores = cross_val_score(clf, iris.data, iris.target, cv=5) expected = [0.97, 1.0, 0.97, 0.97, 1.0] for i in range(5): assert (pytest.approx(scores[i], 0.01) == expected[i]) # test with default n_jobs=-1 with parallel_backend('spark'): scores = cross_val_score(clf, iris.data, iris.target, cv=5) for i in range(5): assert (pytest.approx(scores[i], 0.01) == expected[i])
def test_simple(): with parallel_backend('spark') as (ba, _): seq = Parallel(n_jobs=5)(delayed(inc)(i) for i in range(10)) assert seq == [inc(i) for i in range(10)] with pytest.raises(BaseException): Parallel(n_jobs=5)(delayed(slow_raise_value_error)(i == 3) for i in range(10))
def test_backend_hinting_and_constraints(): for n_jobs in [1, 2, -1]: assert type(Parallel(n_jobs=n_jobs)._backend) == LokyBackend p = Parallel(n_jobs=n_jobs, prefer='threads') assert type(p._backend) == ThreadingBackend p = Parallel(n_jobs=n_jobs, prefer='processes') assert type(p._backend) == LokyBackend p = Parallel(n_jobs=n_jobs, require='sharedmem') assert type(p._backend) == ThreadingBackend # Explicit backend selection can override backend hinting although it # is useless to pass a hint when selecting a backend. p = Parallel(n_jobs=2, backend='loky', prefer='threads') assert type(p._backend) == LokyBackend with parallel_backend('loky', n_jobs=2): # Explicit backend selection by the user with the context manager # should be respected when combined with backend hints only. p = Parallel(prefer='threads') assert type(p._backend) == LokyBackend assert p.n_jobs == 2 with parallel_backend('loky', n_jobs=2): # Locally hard-coded n_jobs value is respected. p = Parallel(n_jobs=3, prefer='threads') assert type(p._backend) == LokyBackend assert p.n_jobs == 3 with parallel_backend('loky', n_jobs=2): # Explicit backend selection by the user with the context manager # should be ignored when the Parallel call has hard constraints. # In this case, the default backend that supports shared mem is # used an the default number of processes is used. p = Parallel(require='sharedmem') assert type(p._backend) == ThreadingBackend assert p.n_jobs == 1 with parallel_backend('loky', n_jobs=2): p = Parallel(n_jobs=3, require='sharedmem') assert type(p._backend) == ThreadingBackend assert p.n_jobs == 3
def test_nested_parallelism_with_dask(): distributed = pytest.importorskip('distributed') client = distributed.Client(n_workers=2, threads_per_worker=2) # noqa # 10 MB of data as argument to trigger implicit scattering data = np.ones(int(1e7), dtype=np.uint8) for i in range(2): with parallel_backend('dask'): backend_types_and_levels = _recursive_backend_info(data=data) assert len(backend_types_and_levels) == 4 assert all(name == 'DaskDistributedBackend' for name, _ in backend_types_and_levels) # No argument with parallel_backend('dask'): backend_types_and_levels = _recursive_backend_info() assert len(backend_types_and_levels) == 4 assert all(name == 'DaskDistributedBackend' for name, _ in backend_types_and_levels)
def test_nested_parallel_limit(backend): with parallel_backend(backend, n_jobs=2): backend_types_and_levels = _recursive_backend_info() top_level_backend_type = backend.title() + 'Backend' expected_types_and_levels = [(top_level_backend_type, 0), ('ThreadingBackend', 1), ('SequentialBackend', 2), ('SequentialBackend', 3)] assert backend_types_and_levels == expected_types_and_levels
async def get_variables(): global PROJECTS global WITH_UPVOTE global UPTIME UPTIME['MRBV_SECONDS_VARIABLES'] = time.time() + MRBV_SECONDS_VARIABLES this_PROJECTS = copy.copy(PROJECTS) # issue https://github.com/scikit-learn/scikit-learn/issues/8920 with parallel_backend('threading'): data = Parallel(n_jobs=4)(delayed(check_project_upvote)(project) for project in this_PROJECTS) WITH_UPVOTE = [i for i in data if i is not None] print(f"found {len(WITH_UPVOTE)} MRBV_BOT_UPVOTE in {len(PROJECTS)} projects")
def test_nested_backend_context_manager(): # Check that by default, nested parallel calls will always use the # ThreadingBackend def get_nested_pids(): assert _active_backend_type() == ThreadingBackend return Parallel(n_jobs=2)(delayed(os.getpid)() for _ in range(2)) for backend in ['threading', 'loky', 'multiprocessing']: with parallel_backend(backend): pid_groups = Parallel(n_jobs=2)(delayed(get_nested_pids)() for _ in range(10)) for pid_group in pid_groups: assert len(set(pid_group)) == 1
async def get_merges(): global WITH_UPVOTE global MERGE_REQUESTS global PROJECTS global UPTIME UPTIME['MRBV_GET_SECONDS_MERGE'] = time.time() + MRBV_GET_SECONDS_MERGE this_WITH_UPVOTE = copy.copy(WITH_UPVOTE) with parallel_backend('threading'): data = Parallel(n_jobs=4)(delayed(check_merge_upvote)(data) for data in this_WITH_UPVOTE) buff = [] for mergerequests in [i for i in data if i is not None]: buff = buff + mergerequests MERGE_REQUESTS = buff print(f"found {len(MERGE_REQUESTS)} merges with {len(WITH_UPVOTE)} MRBV_BOT_UPVOTE in {len(PROJECTS)} projects")
def check_backend_context_manager(backend_name): with parallel_backend(backend_name, n_jobs=3): active_backend, active_n_jobs = parallel.get_active_backend() assert_equal(active_n_jobs, 3) assert_equal(effective_n_jobs(3), 3) p = Parallel() assert_equal(p.n_jobs, 3) if backend_name == "multiprocessing": assert_equal(type(active_backend), MultiprocessingBackend) assert_equal(type(p._backend), MultiprocessingBackend) elif backend_name == "threading": assert_equal(type(active_backend), ThreadingBackend) assert_equal(type(p._backend), ThreadingBackend) elif backend_name.startswith("test_"): assert_equal(type(active_backend), FakeParallelBackend) assert_equal(type(p._backend), FakeParallelBackend)
def check_backend_context_manager(backend_name): with parallel_backend(backend_name, n_jobs=3): active_backend, active_n_jobs = parallel.get_active_backend() assert active_n_jobs == 3 assert effective_n_jobs(3) == 3 p = Parallel() assert p.n_jobs == 3 if backend_name == 'multiprocessing': assert type(active_backend) == MultiprocessingBackend assert type(p._backend) == MultiprocessingBackend elif backend_name == 'threading': assert type(active_backend) == ThreadingBackend assert type(p._backend) == ThreadingBackend elif backend_name.startswith('test_'): assert type(active_backend) == FakeParallelBackend assert type(p._backend) == FakeParallelBackend
def __call__(self): # Exactly the same as in BatchedCalls, with the # difference being that it gets args and kwargs from # object store (which have been put in there by # put_items_in_object_store) # Set the default nested backend to self._backend but do # not set the change the default number of processes to -1 with parallel_backend(self._backend, n_jobs=self._n_jobs): return [ func( *[ray_get_if_needed(arg) for arg in args], **{k: ray_get_if_needed(v) for k, v in kwargs.items()}, ) for func, args, kwargs in self.items ]
def check_backend_context_manager(backend_name): with parallel_backend(backend_name, n_jobs=3): active_backend, active_n_jobs = parallel.get_active_backend() assert_equal(active_n_jobs, 3) assert_equal(effective_n_jobs(3), 3) p = Parallel() assert_equal(p.n_jobs, 3) if backend_name == 'multiprocessing': assert_equal(type(active_backend), MultiprocessingBackend) assert_equal(type(p._backend), MultiprocessingBackend) elif backend_name == 'threading': assert_equal(type(active_backend), ThreadingBackend) assert_equal(type(p._backend), ThreadingBackend) elif backend_name.startswith('test_'): assert_equal(type(active_backend), FakeParallelBackend) assert_equal(type(p._backend), FakeParallelBackend)
def test_nested_parallelism_limit(backend): with parallel_backend(backend, n_jobs=2): backend_types_and_levels = _recursive_backend_info() if cpu_count() == 1: second_level_backend_type = 'SequentialBackend' else: second_level_backend_type = 'ThreadingBackend' top_level_backend_type = backend.title() + 'Backend' expected_types_and_levels = [ (top_level_backend_type, 0), (second_level_backend_type, 1), ('SequentialBackend', 2), ('SequentialBackend', 3) ] assert backend_types_and_levels == expected_types_and_levels
def test_direct_parameterized_backend_context_manager(): assert _active_backend_type() == MultiprocessingBackend # Check that it's possible to pass a backend instance directly, # without registration with parallel_backend(ParameterizedParallelBackend(param=43), n_jobs=5): active_backend, active_n_jobs = parallel.get_active_backend() assert type(active_backend) == ParameterizedParallelBackend assert active_backend.param == 43 assert active_n_jobs == 5 p = Parallel() assert p.n_jobs == 5 assert p._backend is active_backend results = p(delayed(sqrt)(i) for i in range(5)) assert results == [sqrt(i) for i in range(5)] # The default backend is again retored assert _active_backend_type() == MultiprocessingBackend
def test_direct_parameterized_backend_context_manager(): assert_equal(_active_backend_type(), MultiprocessingBackend) # Check that it's possible to pass a backend instance directly, # without registration with parallel_backend(ParameterizedParallelBackend(param=43), n_jobs=5): active_backend, active_n_jobs = parallel.get_active_backend() assert_equal(type(active_backend), ParameterizedParallelBackend) assert_equal(active_backend.param, 43) assert_equal(active_n_jobs, 5) p = Parallel() assert_equal(p.n_jobs, 5) assert_true(p._backend is active_backend) results = p(delayed(sqrt)(i) for i in range(5)) assert_equal(results, [sqrt(i) for i in range(5)]) # The default backend is again retored assert_equal(_active_backend_type(), MultiprocessingBackend)
def test_direct_parameterized_backend_context_manager(): assert _active_backend_type() == DefaultBackend # Check that it's possible to pass a backend instance directly, # without registration with parallel_backend(ParameterizedParallelBackend(param=43), n_jobs=5): active_backend, active_n_jobs = parallel.get_active_backend() assert type(active_backend) == ParameterizedParallelBackend assert active_backend.param == 43 assert active_n_jobs == 5 p = Parallel() assert p.n_jobs == 5 assert p._backend is active_backend results = p(delayed(sqrt)(i) for i in range(5)) assert results == [sqrt(i) for i in range(5)] # The default backend is again restored assert _active_backend_type() == DefaultBackend
def test_parameterized_backend_context_manager(monkeypatch): monkeypatch.setitem(BACKENDS, 'param_backend', ParameterizedParallelBackend) assert _active_backend_type() == DefaultBackend with parallel_backend('param_backend', param=42, n_jobs=3): active_backend, active_n_jobs = parallel.get_active_backend() assert type(active_backend) == ParameterizedParallelBackend assert active_backend.param == 42 assert active_n_jobs == 3 p = Parallel() assert p.n_jobs == 3 assert p._backend is active_backend results = p(delayed(sqrt)(i) for i in range(5)) assert results == [sqrt(i) for i in range(5)] # The default backend is again restored assert _active_backend_type() == DefaultBackend
def test_retrieval_context(): import contextlib class MyBackend(ThreadingBackend): i = 0 @contextlib.contextmanager def retrieval_context(self): self.i += 1 yield register_parallel_backend("retrieval", MyBackend) def nested_call(n): return Parallel(n_jobs=2)(delayed(id)(i) for i in range(n)) with parallel_backend("retrieval") as (ba, _): Parallel(n_jobs=2)(delayed(nested_call, check_pickle=False)(i) for i in range(5)) assert ba.i == 1
def test_parameterized_backend_context_manager(): register_parallel_backend('param_backend', ParameterizedParallelBackend) try: assert _active_backend_type() == MultiprocessingBackend with parallel_backend('param_backend', param=42, n_jobs=3): active_backend, active_n_jobs = parallel.get_active_backend() assert type(active_backend) == ParameterizedParallelBackend assert active_backend.param == 42 assert active_n_jobs == 3 p = Parallel() assert p.n_jobs == 3 assert p._backend is active_backend results = p(delayed(sqrt)(i) for i in range(5)) assert results == [sqrt(i) for i in range(5)] # The default backend is again retored assert _active_backend_type() == MultiprocessingBackend finally: del BACKENDS['param_backend']
def test_parameterized_backend_context_manager(): register_parallel_backend('param_backend', ParameterizedParallelBackend) try: assert_equal(_active_backend_type(), MultiprocessingBackend) with parallel_backend('param_backend', param=42, n_jobs=3): active_backend, active_n_jobs = parallel.get_active_backend() assert_equal(type(active_backend), ParameterizedParallelBackend) assert_equal(active_backend.param, 42) assert_equal(active_n_jobs, 3) p = Parallel() assert_equal(p.n_jobs, 3) assert_true(p._backend is active_backend) results = p(delayed(sqrt)(i) for i in range(5)) assert_equal(results, [sqrt(i) for i in range(5)]) # The default backend is again retored assert_equal(_active_backend_type(), MultiprocessingBackend) finally: del BACKENDS['param_backend']
def test_parameterized_backend_context_manager(): register_parallel_backend("param_backend", ParameterizedParallelBackend) try: assert_equal(_active_backend_type(), MultiprocessingBackend) with parallel_backend("param_backend", param=42, n_jobs=3): active_backend, active_n_jobs = parallel.get_active_backend() assert_equal(type(active_backend), ParameterizedParallelBackend) assert_equal(active_backend.param, 42) assert_equal(active_n_jobs, 3) p = Parallel() assert_equal(p.n_jobs, 3) assert_true(p._backend is active_backend) results = p(delayed(sqrt)(i) for i in range(5)) assert_equal(results, [sqrt(i) for i in range(5)]) # The default backend is again retored assert_equal(_active_backend_type(), MultiprocessingBackend) finally: del BACKENDS["param_backend"]
def run(allpool, test): # return [runOptLearn(allpool[0])] if test: result = [runOptLearn(p, test) for p in allpool] else: with parallel_backend('threading'): result = (Parallel()(delayed(runOptLearn)(p) for p in allpool)) success = [] fail = [] for i in range(len(result)): if (result[i].success): success.append(result[i]) else: logger.error("!!!FAILED: %d %s", i, result[i].shortname) fail.append(result[i]) # for pool in allpool: # runOptLearn(pool) # break; logger.debug('finish') return success, fail
help="The path where all models directories are") parser.add_argument("--output_path", default=None, type=str, help="The path to dump yaml file") parser.add_argument("--n_jobs", default=4, type=int, help="number of cores") args = parser.parse_args() return args if __name__ == "__main__": args = input_arguments() benches = load_benchmark_settings() benches = {k: v for k, v in benches.items() if check_key(k)} with parallel_backend(backend="multiprocessing", n_jobs=args.n_jobs): dicts = Parallel()(delayed(update_dict_entry)(key, args.path) for key, value in benches.items()) tabular_dict = dict() for entry in dicts: tabular_dict.update(entry) print("Collected {} keys...".format(len(tabular_dict))) with open(os.path.join(args.output_path, "tabular_plot_config.yaml"), "w") as f: f.writelines(yaml.dump(tabular_dict)) print("Done!")
# each task-config-fidelity would have been evaluated on a different seed obj = { config_hash: { fidelity_hash: { seed: v } } } task_datas[task_id] = update_table_with_new_entry(task_datas[task_id], obj) file_count += 1 try: # deleting data file that was processed os.remove(os.path.join(dump_path, filename)) # os.remove(os.path.join(dump_path, filename)) except FileNotFoundError: continue logger.info("\tFinished batch processing in {:.3f} seconds".format(time.time() - start)) logger.info("\tUpdating benchmark data files...") with parallel_backend(backend="loky", n_jobs=args.n_jobs): Parallel()( delayed(save_task_file)(task_id, obj, output_path) for task_id, obj in task_datas.items() ) logger.info("\tContinuing to next batch") logger.info("\t{}".format("-" * 25)) logger.info("Done!") logger.info("Total files processed: {}".format(file_count))
def check_backend_context_manager(backend_name): with parallel_backend(backend_name): assert_equal(parallel.get_default_backend(), backend_name)