def check_parallel_context_manager(backend): lst = range(10) expected = [f(x, y=1) for x in lst] with Parallel(n_jobs=4, backend=backend) as p: # Internally a pool instance has been eagerly created and is managed # via the context manager protocol managed_pool = p._pool if mp is not None: assert_true(managed_pool is not None) # We make call with the managed parallel object several times inside # the managed block: assert_equal(expected, p(delayed(f)(x, y=1) for x in lst)) assert_equal(expected, p(delayed(f)(x, y=1) for x in lst)) # Those calls have all used the same pool instance: if mp is not None: assert_true(managed_pool is p._pool) # As soon as we exit the context manager block, the pool is terminated and # no longer referenced from the parallel object: assert_true(p._pool is None) # It's still possible to use the parallel instance in non-managed mode: assert_equal(expected, p(delayed(f)(x, y=1) for x in lst)) assert_true(p._pool is None)
def check_simple_parallel(backend): X = range(5) for n_jobs in (1, 2, -1, -2): nose.tools.assert_equal([square(x) for x in X], Parallel(n_jobs=n_jobs)(delayed(square)(x) for x in X)) try: # To smoke-test verbosity, we capture stdout orig_stdout = sys.stdout orig_stderr = sys.stdout if sys.version_info[0] == 3: sys.stderr = io.StringIO() sys.stderr = io.StringIO() else: sys.stdout = io.BytesIO() sys.stderr = io.BytesIO() for verbose in (2, 11, 100): Parallel(n_jobs=-1, verbose=verbose, backend=backend)(delayed(square)(x) for x in X) Parallel(n_jobs=1, verbose=verbose, backend=backend)(delayed(square)(x) for x in X) Parallel(n_jobs=2, verbose=verbose, pre_dispatch=2, backend=backend)(delayed(square)(x) for x in X) Parallel(n_jobs=2, verbose=verbose, backend=backend)(delayed(square)(x) for x in X) except Exception as e: my_stdout = sys.stdout my_stderr = sys.stderr sys.stdout = orig_stdout sys.stderr = orig_stderr print(unicode(my_stdout.getvalue())) print(unicode(my_stderr.getvalue())) raise e finally: sys.stdout = orig_stdout sys.stderr = orig_stderr
def test_error_capture(): # Check that error are captured, and that correct exceptions # are raised. if mp is not None: # A JoblibException will be raised only if there is indeed # multiprocessing nose.tools.assert_raises(JoblibException, Parallel(n_jobs=2), [delayed(division)(x, y) for x, y in zip((0, 1), (1, 0))], ) nose.tools.assert_raises(WorkerInterrupt, Parallel(n_jobs=2), [delayed(interrupt_raiser)(x) for x in (1, 0)], ) else: nose.tools.assert_raises(KeyboardInterrupt, Parallel(n_jobs=2), [delayed(interrupt_raiser)(x) for x in (1, 0)], ) nose.tools.assert_raises(ZeroDivisionError, Parallel(n_jobs=2), [delayed(division)(x, y) for x, y in zip((0, 1), (1, 0))], ) try: ex = JoblibException() Parallel(n_jobs=1)( delayed(division)(x, y) for x, y in zip((0, 1), (1, 0))) except Exception as ex: nose.tools.assert_false(isinstance(ex, JoblibException))
def test_error_capture(): # Check that error are captured, and that correct exceptions # are raised. if mp is not None: # A JoblibException will be raised only if there is indeed # multiprocessing assert_raises(JoblibException, Parallel(n_jobs=2), [delayed(division)(x, y) for x, y in zip((0, 1), (1, 0))]) assert_raises(WorkerInterrupt, Parallel(n_jobs=2), [delayed(interrupt_raiser)(x) for x in (1, 0)]) # Try again with the context manager API with Parallel(n_jobs=2) as parallel: assert_true(parallel._pool is not None) assert_raises(JoblibException, parallel, [delayed(division)(x, y) for x, y in zip((0, 1), (1, 0))]) # The managed pool should still be available and be in a working # state despite the previously raised (and caught) exception assert_true(parallel._pool is not None) assert_equal([f(x, y=1) for x in range(10)], parallel(delayed(f)(x, y=1) for x in range(10))) assert_raises(WorkerInterrupt, parallel, [delayed(interrupt_raiser)(x) for x in (1, 0)]) # The pool should still be available despite the exception assert_true(parallel._pool is not None) assert_equal([f(x, y=1) for x in range(10)], parallel(delayed(f)(x, y=1) for x in range(10))) # Check that the inner pool has been terminated when exiting the # context manager assert_true(parallel._pool is None) else: assert_raises(KeyboardInterrupt, Parallel(n_jobs=2), [delayed(interrupt_raiser)(x) for x in (1, 0)]) # wrapped exceptions should inherit from the class of the original # exception to make it easy to catch them assert_raises(ZeroDivisionError, Parallel(n_jobs=2), [delayed(division)(x, y) for x, y in zip((0, 1), (1, 0))]) assert_raises( MyExceptionWithFinickyInit, Parallel(n_jobs=2, verbose=0), (delayed(exception_raiser)(i, custom_exception=True) for i in range(30))) try: # JoblibException wrapping is disabled in sequential mode: ex = JoblibException() Parallel(n_jobs=1)( delayed(division)(x, y) for x, y in zip((0, 1), (1, 0))) except Exception as ex: assert_false(isinstance(ex, JoblibException))
def check_dispatch_one_job(backend): """ Test that with only one job, Parallel does act as a iterator. """ queue = list() def producer(): for i in range(6): queue.append("Produced %i" % i) yield i # disable batching Parallel(n_jobs=1, batch_size=1, backend=backend)(delayed(consumer)(queue, x) for x in producer()) nose.tools.assert_equal( queue, [ "Produced 0", "Consumed 0", "Produced 1", "Consumed 1", "Produced 2", "Consumed 2", "Produced 3", "Consumed 3", "Produced 4", "Consumed 4", "Produced 5", "Consumed 5", ], ) nose.tools.assert_equal(len(queue), 12) # empty the queue for the next check queue[:] = [] # enable batching Parallel(n_jobs=1, batch_size=4, backend=backend)(delayed(consumer)(queue, x) for x in producer()) nose.tools.assert_equal( queue, [ # First batch "Produced 0", "Produced 1", "Produced 2", "Produced 3", "Consumed 0", "Consumed 1", "Consumed 2", "Consumed 3", # Second batch "Produced 4", "Produced 5", "Consumed 4", "Consumed 5", ], ) nose.tools.assert_equal(len(queue), 12)
def check_dispatch_multiprocessing(backend): """ Check that using pre_dispatch Parallel does indeed dispatch items lazily. """ if mp is None: raise nose.SkipTest() manager = mp.Manager() queue = manager.list() def producer(): for i in range(6): queue.append("Produced %i" % i) yield i Parallel(n_jobs=2, batch_size=1, pre_dispatch=3, backend=backend)( delayed(consumer)(queue, "any") for _ in producer() ) # Only 3 tasks are dispatched out of 6. The 4th task is dispatched only # after any of the first 3 jobs have completed. first_four = list(queue)[:4] # The the first consumption event can sometimes happen before the end of # the dispatching, hence, pop it before introspecting the "Produced" events first_four.remove("Consumed any") nose.tools.assert_equal(first_four, ["Produced 0", "Produced 1", "Produced 2"]) nose.tools.assert_equal(len(queue), 12)
def test_batching_auto_threading(): # batching='auto' with the threading backend leaves the effective batch # size to 1 (no batching) as it has been found to never be beneficial with # this low-overhead backend. p = Parallel(n_jobs=2, batch_size="auto", backend="threading") p(delayed(id)(i) for i in range(5000)) # many very fast tasks assert_equal(p._effective_batch_size, 1)
def test_backend_context_manager(): all_test_backends = ['test_backend_%d' % i for i in range(3)] for test_backend in all_test_backends: register_parallel_backend(test_backend, FakeParallelBackend) all_backends = ['multiprocessing', 'threading'] + all_test_backends try: assert _active_backend_type() == MultiprocessingBackend # check that this possible to switch parallel backends sequentially for test_backend in all_backends: # TODO: parametrize this block later # yield check_backend_context_manager, test_backend check_backend_context_manager(test_backend) # The default backend is retored assert _active_backend_type() == MultiprocessingBackend # Check that context manager switching is thread safe: Parallel(n_jobs=2, backend='threading')( delayed(check_backend_context_manager)(b) for b in all_backends if not b) # The default backend is again retored assert _active_backend_type() == MultiprocessingBackend finally: for backend_name in list(BACKENDS.keys()): if backend_name.startswith('test_'): del BACKENDS[backend_name]
def test_parallel_timeout_success(): # Check that timeout isn't thrown when function is fast enough for backend in ['multiprocessing', 'threading']: nose.tools.assert_equal( 10, len(Parallel(n_jobs=2, backend=backend, timeout=10) (delayed(sleep)(0.001) for x in range(10))))
def parallel_search(k_pot, pots, lambdas, n_jobs=4): """Method for Parallel L-curve computation Parameters ---------- k_pot : np.array pots : list lambdas : list Returns ------- modelnormseq : list residualseq : list """ if PARALLEL_AVAILABLE: jobs = (delayed(L_model_fast)(k_pot, pots, lamb, i) for i, lamb in enumerate(lambdas)) modelvsres = Parallel(n_jobs=n_jobs, backend='threading')(jobs) else: # Please verify this! modelvsres = [] for i, lamb in enumerate(lambdas): modelvsres.append(L_model_fast(k_pot, pots, lamb, i)) modelnormseq, residualseq = zip(*modelvsres) return modelnormseq, residualseq
def test_backend_context_manager(): all_test_backends = ["test_backend_%d" % i for i in range(3)] for test_backend in all_test_backends: register_parallel_backend(test_backend, FakeParallelBackend) all_backends = ["multiprocessing", "threading"] + all_test_backends try: assert_equal(_active_backend_type(), MultiprocessingBackend) # check that this possible to switch parallel backends sequentially for test_backend in all_backends: yield check_backend_context_manager, test_backend # The default backend is retored assert_equal(_active_backend_type(), MultiprocessingBackend) # Check that context manager switching is thread safe: Parallel(n_jobs=2, backend="threading")( delayed(check_backend_context_manager)(b) for b in all_backends if not b ) # The default backend is again retored assert_equal(_active_backend_type(), MultiprocessingBackend) finally: for backend_name in list(BACKENDS.keys()): if backend_name.startswith("test_"): del BACKENDS[backend_name]
def test_parallel_kwargs(): """Check the keyword argument processing of pmap.""" lst = range(10) for n_jobs in (1, 4): yield (assert_equal, [f(x, y=1) for x in lst], Parallel(n_jobs=n_jobs)(delayed(f)(x, y=1) for x in lst))
def test_nested_exception_dispatch(): # Ensure TransportableException objects for nested joblib cases gets # propagated. assert_raises( JoblibException, Parallel(n_jobs=2, pre_dispatch=16, verbose=0), (delayed(SafeFunction(exception_raiser))(i) for i in range(30)))
def test_exception_dispatch(): "Make sure that exception raised during dispatch are indeed captured" nose.tools.assert_raises( ValueError, Parallel(n_jobs=2, pre_dispatch=16, verbose=0), (delayed(exception_raiser)(i) for i in range(30)), )
def test_multiple_spawning(): # Test that attempting to launch a new Python after spawned # subprocesses will raise an error, to avoid infinite loops on # systems that do not support fork if not int(os.environ.get("JOBLIB_MULTIPROCESSING", 1)): raise nose.SkipTest() assert_raises(ImportError, Parallel(n_jobs=2, pre_dispatch="all"), [delayed(_reload_joblib)() for i in range(10)])
def test_batching_auto_multiprocessing(): with Parallel(n_jobs=2, batch_size="auto", backend="multiprocessing") as p: p(delayed(id)(i) for i in range(5000)) # many very fast tasks # It should be strictly larger than 1 but as we don't want heisen # failures on clogged CI worker environment be safe and only check that # it's a strictly positive number. assert_true(p._backend.compute_batch_size() > 0)
def run_trials(self, per_doc): results = Parallel(n_jobs=self.n_jobs)(delayed(bootstrap_trials)(per_doc, share, self.metrics) for share in _job_shares(self.n_jobs, self.trials)) history = defaultdict(list) for res in results: for metric in self.metrics: history[metric].extend(res[metric]) return history
def test_parallel_pickling(): """ Check that pmap captures the errors when it is passed an object that cannot be pickled. """ def g(x): return x ** 2 assert_raises(PickleError, Parallel(), (delayed(g)(x) for x in range(10)))
def test_pre_dispatch_race_condition(): # Check that using pre-dispatch does not yield a race condition on the # iterable generator that is not thread-safe natively. # this is a non-regression test for the "Pool seems closed" class of error for n_tasks in [2, 10, 20]: for n_jobs in [2, 4]: Parallel(n_jobs=n_jobs, pre_dispatch="2 * n_jobs")( delayed(square)(i) for i in range(n_tasks))
def test_parallel_timeout_fail(): # Check that timeout properly fails when function is too slow for backend in ['multiprocessing', 'threading']: nose.tools.assert_raises( TimeoutError, Parallel(n_jobs=2, backend=backend, timeout=0.01), (delayed(sleep)(10) for x in range(10)) )
def test_batching_auto_threading(): # batching='auto' with the threading backend leaves the effective batch # size to 1 (no batching) as it has been found to never be beneficial with # this low-overhead backend. with Parallel(n_jobs=2, batch_size='auto', backend='threading') as p: p(delayed(id)(i) for i in range(5000)) # many very fast tasks assert p._backend.compute_batch_size() == 1
def compute_geodesic_distance_matrix(verts, tris): print "precomputing geodesic distance..." n_chunks = cpu_count() chunk_size = int(np.ceil(len(verts) / float(n_chunks))) sources = np.arange(len(verts)) D = Parallel(n_chunks)( delayed(compute_geodesic_distances)(verts, tris, sources[i: i + chunk_size]) for i in xrange(0, len(verts), chunk_size)) return np.vstack(D)
def check_main_thread_renamed_no_warning(backend): with warnings.catch_warnings(record=True) as caught_warnings: warnings.simplefilter("always") results = Parallel(n_jobs=2, backend=backend)( delayed(square)(x) for x in range(3)) assert results == [0, 1, 4] # The multiprocessing backend will raise a warning when detecting that is # started from the non-main thread. Let's check that there is no false # positive because of the name change. assert caught_warnings == []
def test_batching_auto_multiprocessing(): p = Parallel(n_jobs=2, batch_size="auto", backend="multiprocessing") p(delayed(id)(i) for i in range(5000)) # many very fast tasks # When the auto-tuning of the batch size is enabled # size kicks in the following attribute gets updated. assert_true(hasattr(p, "_effective_batch_size")) # It should be strictly larger than 1 but as we don't want heisen failures # on clogged CI worker environment be safe and only check that it's a # strictly positive number. assert_true(p._effective_batch_size > 0)
def test_numpy_arrays_use_different_memory(mmap_mode): def func(arr, value): arr[:] = value return arr arrays = [np.zeros((10, 10), dtype='float64') for i in range(10)] results = Parallel(mmap_mode=mmap_mode, max_nbytes=0, n_jobs=2)( delayed(func)(arr, i) for i, arr in enumerate(arrays)) for i, arr in enumerate(results): np.testing.assert_array_equal(arr, i)
def check_dispatch_one_job(backend): """ Test that with only one job, Parallel does act as a iterator. """ queue = list() def producer(): for i in range(6): queue.append('Produced %i' % i) yield i # disable batching Parallel(n_jobs=1, batch_size=1, backend=backend)( delayed(consumer)(queue, x) for x in producer()) assert_equal(queue, [ 'Produced 0', 'Consumed 0', 'Produced 1', 'Consumed 1', 'Produced 2', 'Consumed 2', 'Produced 3', 'Consumed 3', 'Produced 4', 'Consumed 4', 'Produced 5', 'Consumed 5', ]) assert_equal(len(queue), 12) # empty the queue for the next check queue[:] = [] # enable batching Parallel(n_jobs=1, batch_size=4, backend=backend)( delayed(consumer)(queue, x) for x in producer()) assert_equal(queue, [ # First batch 'Produced 0', 'Produced 1', 'Produced 2', 'Produced 3', 'Consumed 0', 'Consumed 1', 'Consumed 2', 'Consumed 3', # Second batch 'Produced 4', 'Produced 5', 'Consumed 4', 'Consumed 5', ]) assert_equal(len(queue), 12)
def test_auto_memmap_on_arrays_from_generator(): # Non-regression test for a problem with a bad interaction between the # GC collecting arrays recently created during iteration inside the # parallel dispatch loop and the auto-memmap feature of Parallel. # See: https://github.com/joblib/joblib/pull/294 def generate_arrays(n): for i in range(n): yield np.ones(10, dtype=np.float32) * i # Use max_nbytes=1 to force the use of memory-mapping even for small # arrays results = Parallel(n_jobs=2, max_nbytes=1)(delayed(check_memmap)(a) for a in generate_arrays(100)) for result, expected in zip(results, generate_arrays(len(results))): np.testing.assert_array_equal(expected, result)
def run_in_parallel(common_classes, number_of_activations, function_to_run, parallel, number_of_parallel): avg_number_of_generations = sum(parallel(delayed(function_to_run)(common_classes, number_of_activations / number_of_parallel) for _ in xrange(number_of_parallel))) / number_of_parallel f = open('results.txt', 'a') HGT_factor, mutation_factor, population_factor = common_classes.get_simulation_variables() f.write("results for the following parameters: HGT_factor=" + str(HGT_factor) + " mutation factor=" + str(mutation_factor) + " population_factor=" + str(population_factor) + " is: " + str(avg_number_of_generations) + "\n") f.close()
def test_parallel_pickling(): """ Check that pmap captures the errors when it is passed an object that cannot be pickled. """ def g(x): return x ** 2 try: # pickling a local function always fail but the exception # raised is a PickleError for python <= 3.4 and AttributeError # for python >= 3.5 pickle.dumps(g) except Exception as exc: exception_class = exc.__class__ assert_raises(exception_class, Parallel(), (delayed(g)(x) for x in range(10)))
def test_cached_function_race_condition_when_persisting_output(tmpdir, capfd): # Test race condition where multiple processes are writing into # the same output.pkl. See # https://github.com/joblib/joblib/issues/490 for more details. memory = Memory(location=tmpdir.strpath) func_cached = memory.cache(fast_func_with_complex_output) Parallel(n_jobs=2)(delayed(func_cached)() for i in range(3)) stdout, stderr = capfd.readouterr() # Checking both stdout and stderr (ongoing PR #434 may change # logging destination) to make sure there is no exception while # loading the results exception_msg = 'Exception while loading results' assert exception_msg not in stdout assert exception_msg not in stderr
def test_main_thread_renamed_no_warning(backend, monkeypatch): # Check that no default backend relies on the name of the main thread: # https://github.com/joblib/joblib/issues/180#issuecomment-253266247 # Some programs use a different name for the main thread. This is the case # for uWSGI apps for instance. monkeypatch.setattr(target=threading.current_thread(), name='name', value='some_new_name_for_the_main_thread') with warns(None) as warninfo: results = Parallel(n_jobs=2, backend=backend)( delayed(square)(x) for x in range(3)) assert results == [0, 1, 4] # Due to the default parameters of LokyBackend, there is a chance that # warninfo catches Warnings from worker timeouts. We remove it if it exists warninfo = [w for w in warninfo if "worker timeout" not in str(w.message)] # The multiprocessing backend will raise a warning when detecting that is # started from the non-main thread. Let's check that there is no false # positive because of the name change. assert len(warninfo) == 0
def test_cached_function_race_condition_when_persisting_output_2( tmpdir, capfd): # Test race condition in first attempt at solving # https://github.com/joblib/joblib/issues/490. The race condition # was due to the delay between seeing the cache directory created # (interpreted as the result being cached) and the output.pkl being # pickled. memory = Memory(location=tmpdir.strpath) func_cached = memory.cache(fast_func_with_conditional_complex_output) Parallel(n_jobs=2)(delayed(func_cached)(True if i % 2 == 0 else False) for i in range(3)) stdout, stderr = capfd.readouterr() # Checking both stdout and stderr (ongoing PR #434 may change # logging destination) to make sure there is no exception while # loading the results exception_msg = 'Exception while loading results' assert exception_msg not in stdout assert exception_msg not in stderr
def run(allpool, test): # return [runOptLearn(allpool[0])] if test: result = [runOptLearn(p, test) for p in allpool] else: with parallel_backend('threading'): result = (Parallel()(delayed(runOptLearn)(p) for p in allpool)) success = [] fail = [] for i in range(len(result)): if (result[i].success): success.append(result[i]) else: logger.error("!!!FAILED: %d %s", i, result[i].shortname) fail.append(result[i]) # for pool in allpool: # runOptLearn(pool) # break; logger.debug('finish') return success, fail
def test_retrieval_context(): import contextlib class MyBackend(ThreadingBackend): i = 0 @contextlib.contextmanager def retrieval_context(self): self.i += 1 yield register_parallel_backend("retrieval", MyBackend) def nested_call(n): return Parallel(n_jobs=2)(delayed(id)(i) for i in range(n)) with parallel_backend("retrieval") as (ba, _): Parallel(n_jobs=2)( delayed(nested_call, check_pickle=False)(i) for i in range(5) ) assert ba.i == 1
def test_no_blas_crash_or_freeze_with_multiprocessing(): if sys.version_info < (3, 4): raise SkipTest('multiprocessing can cause BLAS freeze on old Python') # Use the spawn backend that is both robust and available on all platforms spawn_backend = mp.get_context('spawn') # Check that on recent Python version, the 'spawn' start method can make # it possible to use multiprocessing in conjunction of any BLAS # implementation that happens to be used by numpy with causing a freeze or # a crash rng = np.random.RandomState(42) # call BLAS DGEMM to force the initialization of the internal thread-pool # in the main process a = rng.randn(1000, 1000) np.dot(a, a.T) # check that the internal BLAS thread-pool is not in an inconsistent state # in the worker processes managed by multiprocessing Parallel(n_jobs=2, backend=spawn_backend)(delayed(np.dot)(a, a.T) for i in range(2))
def junction_search(directory, junction_folder, input_data_folder, blast_results_folder, junction_sequence, exclusion_sequence, threads): unmap_files = get_sam_filelist(directory, input_data_folder) if not len(unmap_files): click.echo( red_fg("\n>>> ERROR: No .sam files found in directory %s." % directory)) sys.exit(1) junction_seqs = make_search_junctions(junction_sequence) click.echo( cyan_fg( "\n>>> The primary, secondary, and tertiary sequences searched are:" )) for j in junction_seqs: click.echo(yellow_fg(" %s" % j)) click.echo(cyan_fg('\n>>> Starting junction search on %s cores.' % threads)) parallel.Parallel(n_jobs=threads)(parallel.delayed( jsearch)(directory, f, input_data_folder, junction_folder, junction_seqs, exclusion_sequence) for f in unmap_files) multi_convert(directory, junction_folder, blast_results_folder)
def significance(self, pair1, pair2): per_doc1, overall1 = pair1 per_doc2, overall2 = pair2 # TODO: limit to metrics base_diff = _result_diff(overall1, overall2) randomized_diffs = functools.partial(self.METHODS[self.method], per_doc1, per_doc2, base_diff) results = Parallel(n_jobs=self.n_jobs)( delayed(randomized_diffs)(share) for share in _job_shares(self.n_jobs, self.trials)) all_counts = [] for result in results: metrics, counts = zip(*result.items()) all_counts.append(counts) return { metric: { 'diff': base_diff[metric], 'p': (sum(counts) + 1) / (self.trials + 1) } for metric, counts in zip(metrics, zip(*all_counts)) }
def test_dispatch_multiprocessing(backend): """ Check that using pre_dispatch Parallel does indeed dispatch items lazily. """ manager = mp.Manager() queue = manager.list() def producer(): for i in range(6): queue.append('Produced %i' % i) yield i Parallel(n_jobs=2, batch_size=1, pre_dispatch=3, backend=backend)(delayed(consumer)(queue, 'any') for _ in producer()) # Only 3 tasks are dispatched out of 6. The 4th task is dispatched only # after any of the first 3 jobs have completed. first_four = list(queue)[:4] # The the first consumption event can sometimes happen before the end of # the dispatching, hence, pop it before introspecting the "Produced" events first_four.remove('Consumed any') assert first_four == ['Produced 0', 'Produced 1', 'Produced 2'] assert len(queue) == 12
def test_simple_parallel(backend, n_jobs, verbose): assert ([square(x) for x in range(5) ] == Parallel(n_jobs=n_jobs, backend=backend, verbose=verbose)(delayed(square)(x) for x in range(5)))
def __call__(self): if not self.models.is_dir(): raise ValueError(f"{self.models} is not a directory.") if not self.gt.is_dir(): raise ValueError(f"{self.gt} is not a directory.") self.threshold = np.linspace( 0, 1, self.extra.get('interp', self.extra.get('interp_points', 1000))) if self.dataset.lower() == 'mobius': from datasets import MOBIUS dataset = MOBIUS elif self.dataset.lower() == 'sbvpi': from datasets import SBVPI dataset = SBVPI else: from datasets import Dataset dataset = Dataset dataset = dataset.from_dir(self.gt, mask_dir=None) dataset.shuffle() with tqdm_joblib(tqdm(desc="Reading GT", total=len(dataset))): gt = dict( Parallel(n_jobs=-1)(delayed(self._load_gt)(gt_sample) for gt_sample in dataset)) for self._model in self.models.iterdir(): self._predictions = self._model / 'Predictions' self._binarised = self._model / 'Binarised' if not self._predictions.is_dir(): raise ValueError(f"{self._predictions} is not a directory.") if not self._binarised.is_dir(): raise ValueError(f"{self._binarised} is not a directory.") # Check if all pickles already exist flat_attrs = tuple() for attr in ATTR_EXP: try: flat_attrs += attr except TypeError: flat_attrs += attr, unique_attr_values = { attr: {getattr(sample, attr) for sample in dataset} for attr in set(flat_attrs) } exp_attr_values = [{ attr: unique_attr_values[attr] for attr in ensure_iterable(attrs, True) } for attrs in ATTR_EXP] attr_experiments = { ', '.join(f'{attr.title()}={val.name.title()}' for attr, val in current_values.items()): current_values for current_exp in exp_attr_values for current_values in dict_product(current_exp) } all_names = ['Overall'] + list(attr_experiments) if not self.extra.get('overwrite', False) and all( (self._model / f'Pickles/{name}.pkl').is_file() for name in all_names): print( f"All pickles already exist, skipping {self._model.name}") continue #TODO: Move folds here and only load one fold's predictions at a time # We can't do this because experiment2 needs to have different splits. If we absolutely need this, we'll have to reread the images for each sub-experiment anew. # We can cache the images for each split until the end of the split - that way we'll only need to read some of the images anew. print(f"Evaluating model {self._model.name}") with tqdm_joblib( tqdm(desc="Reading predictions", total=len(dataset))): pred_bin = dict( Parallel(n_jobs=-1)(delayed(self._process_image)(gt_sample) for gt_sample in dataset)) # This will filter out non-existing predictions, so the code will still work, but missing predictions should be addressed (otherwise evaluation is unfair) pred_bin_gt = { gt_sample: (*pred_bin[gt_sample], gt[gt_sample]) for gt_sample in dataset if pred_bin[gt_sample] is not None } # Overall self._experiment1(pred_bin_gt) # Split by lighting, phones, and gaze for attrs in ATTR_EXP: self._experiment2(pred_bin_gt, attrs)
def createMISPEventFromHash(self, _hash, filename, additional_hash=False): LibIoC_DK.debugging("Creating the MISP hash event: %s" % (_hash), main._DEBUG_, main._LOGGING_, main.hFile) _hash = _hash.lower() # if the MISP event for the hash value already exists, stop the further process. if self.checkMISPHashEventExist(_hash): LibIoC_DK.debugging("The MISP hash event ALREADY exists", main._DEBUG_, main._LOGGING_, main.hFile) return False result = self.malware_repo_connector.getMalwareInfo(_hash) if not result: return False event = self.misp_connection.new_event(0, 1, 2, _hash) self.misp_connection.add_named_attribute( event, category='Other', type_value='comment', value=LibIoC_DK.getFileName( filename)) # this will work as the ground truth of IoCs # the first three attributes in the result is md5, sha1, and sha256 representation malware hash, so manually store them in the event. md5 = result.pop(0)[2] sha1 = result.pop(0)[2] sha256 = result.pop(0)[2] self.misp_connection.add_hashes(event, category='Payload installation', md5=md5) if main._STAT_: self.ioc_stat.addCategory2('hash') self.misp_connection.add_hashes(event, category='Payload installation', sha1=sha1) if main._STAT_: self.ioc_stat.addCategory2('hash') self.misp_connection.add_hashes(event, category='Payload installation', sha256=sha256) if main._STAT_: self.ioc_stat.addCategory2('hash') if main._DOWNLOAD_MALWARE_: sample_path = self.config[ 'SampleRoot'] + '/' + LibIoC_DK.getReportPublicationYear( filename) if not os.path.exists(sample_path + '/' + _hash): malware_buffer = self.malware_repo_connector.downloadMalware( sha256, False) if malware_buffer: if not os.path.exists(sample_path): os.makedirs(sample_path) f = open(sample_path + '/' + _hash, 'wb') f.write(malware_buffer) f.close() extracted = self.malware_repo_connector.unzipMalware( sample_path + '/' + _hash, sample_path) if extracted.lower() != _hash.lower(): os.remove(sample_path + '/' + _hash) _hash = extracted header_info = LibIoC_DK.getMalwareHeaderInfo(sample_path + '/' + _hash) if header_info is not None: self.addMalwareHeaderInfo(header_info, event) if main._STAT_ and self.ioc_stat.report_name != filename: self.ioc_stat.setReportBuffer(filename) if main._PARALLELIZE_ATTRIB_ADDITION_: # Parallelized Version print('[Hash] Parallelized attribute storing...') joblib.Parallel(joblib.cpu_count())( delayed(addAttribute)(self, event, attr) for attr in result) else: # Sequential Version for attr in result: self.addAttribute(event, attr, filename) LibIoC_DK.debugging("The MISP hash event created", main._DEBUG_, main._LOGGING_, main.hFile) return True
def createMISPEventFromReport(self, ioc, filename, _date): LibIoC_DK.debugging( "Creating the MISP report event: %s" % (LibIoC_DK.getFileName(filename)), main._DEBUG_, main._LOGGING_, main.hFile) ''' # if the MISP event for the filename already exists, add ioc in the event eid = self.getMISPEventID(LibIoC_DK.getFileName(filename)) if eid: event = self.misp_connection.get_event(eid) for attr in ioc: self.addAttribute(event, attr) return False ''' if self.checkMISPReportEventExist(filename): LibIoC_DK.debugging("The MISP report event ALREADY exists", main._DEBUG_, main._LOGGING_, main.hFile) return False try: if _date is not None: #_date = datetime.datetime.strptime(_date, "%m/%d/%Y").strftime("%Y-%m-%d") _date = _date.split("/") if int(_date[0]) > 12: _date = datetime.date(int(_date[0]), int(_date[1]), int(_date[2])).isoformat() else: _date = datetime.date(int(_date[2]), int(_date[0]), int(_date[1])).isoformat() except Exception as e: print e event = self.misp_connection.new_event(0, 1, 2, LibIoC_DK.getFileName(filename), date=_date) self.misp_connection.add_named_attribute( event, category='Other', type_value='comment', value=LibIoC_DK.getFileName( filename)) # this will work as the ground truth of IoC if main._STAT_ and self.ioc_stat.report_name != filename: self.ioc_stat.setReportBuffer(filename) if main._PARALLELIZE_ATTRIB_ADDITION_: # Parallelized Version print('[Report] Parallelized attribute storing...') attr_added = joblib.Parallel(joblib.cpu_count())( delayed(addAttribute)(self, event, attr, filename) for attr in ioc) else: # Sequential Version attr_added = False for attr in ioc: attr_added = self.addAttribute(event, attr, filename) or attr_added if (type(attr_added) is bool and not attr_added): self.misp_connection.delete_event(event['Event']['id']) LibIoC_DK.debugging( "NO attribute added for the report: %s" % (LibIoC_DK.getFileName(filename)), main._DEBUG_, main._LOGGING_, main.hFile) return False if type(attr_added) is list: if not (True in attr_added): self.misp_connection.delete_event(event['Event']['id']) LibIoC_DK.debugging( "NO attribute added for the report: %s" % (LibIoC_DK.getFileName(filename)), main._DEBUG_, main._LOGGING_, main.hFile) return False LibIoC_DK.debugging("The MISP report event created", main._DEBUG_, main._LOGGING_, main.hFile) return True
def nested_call(n): return Parallel(n_jobs=2)(delayed(id)(i) for i in range(n))
def get_nested_pids(): assert _active_backend_type() == ThreadingBackend return Parallel(n_jobs=2)(delayed(os.getpid)() for _ in range(2))
def test_exception_dispatch(): """Make sure that exception raised during dispatch are indeed captured""" with raises(ValueError): Parallel(n_jobs=2, pre_dispatch=16, verbose=0)(delayed(exception_raiser)(i) for i in range(30))
def test_error_capture(backend): # Check that error are captured, and that correct exceptions # are raised. if mp is not None: # A JoblibException will be raised only if there is indeed # multiprocessing with raises(JoblibException): Parallel(n_jobs=2, backend=backend)( [delayed(division)(x, y) for x, y in zip((0, 1), (1, 0))]) with raises(WorkerInterrupt): Parallel(n_jobs=2, backend=backend)( [delayed(interrupt_raiser)(x) for x in (1, 0)]) # Try again with the context manager API with Parallel(n_jobs=2, backend=backend) as parallel: assert get_workers(parallel._backend) is not None original_workers = get_workers(parallel._backend) with raises(JoblibException): parallel( [delayed(division)(x, y) for x, y in zip((0, 1), (1, 0))]) # The managed pool should still be available and be in a working # state despite the previously raised (and caught) exception assert get_workers(parallel._backend) is not None # The pool should have been interrupted and restarted: assert get_workers(parallel._backend) is not original_workers assert ([f(x, y=1) for x in range(10) ] == parallel(delayed(f)(x, y=1) for x in range(10))) original_workers = get_workers(parallel._backend) with raises(WorkerInterrupt): parallel([delayed(interrupt_raiser)(x) for x in (1, 0)]) # The pool should still be available despite the exception assert get_workers(parallel._backend) is not None # The pool should have been interrupted and restarted: assert get_workers(parallel._backend) is not original_workers assert ([f(x, y=1) for x in range(10) ] == parallel(delayed(f)(x, y=1) for x in range(10))) # Check that the inner pool has been terminated when exiting the # context manager assert get_workers(parallel._backend) is None else: with raises(KeyboardInterrupt): Parallel(n_jobs=2)([delayed(interrupt_raiser)(x) for x in (1, 0)]) # wrapped exceptions should inherit from the class of the original # exception to make it easy to catch them with raises(ZeroDivisionError): Parallel(n_jobs=2)( [delayed(division)(x, y) for x, y in zip((0, 1), (1, 0))]) with raises(MyExceptionWithFinickyInit): Parallel(n_jobs=2, verbose=0)((delayed(exception_raiser)(i, custom_exception=True) for i in range(30))) try: # JoblibException wrapping is disabled in sequential mode: ex = JoblibException() Parallel(n_jobs=1)(delayed(division)(x, y) for x, y in zip((0, 1), (1, 0))) except Exception as ex: assert not isinstance(ex, JoblibException)
def test_parallel_timeout_fail(backend): # Check that timeout properly fails when function is too slow with raises(TimeoutError): Parallel(n_jobs=2, backend=backend, timeout=0.01)(delayed(sleep)(10) for x in range(10))
def test_parallel_timeout_success(backend): # Check that timeout isn't thrown when function is fast enough assert len( Parallel(n_jobs=2, backend=backend, timeout=10)(delayed(sleep)(0.001) for x in range(10))) == 10
def get_timestamps(blocks): data = Parallel(10, 'threading')(delayed(get_block_timestamp)(block) for block in blocks) return pd.to_datetime([x * 1e9 for x in data])
def test_parallel_call_cached_function_defined_in_jupyter( tmpdir, call_before_reducing): # Calling an interactively defined memory.cache()'d function inside a # Parallel call used to clear the existing cache related to the said # function (https://github.com/joblib/joblib/issues/1035) # This tests checks that this is no longer the case. # TODO: test that the cache related to the function cache persists across # ipython sessions (provided that no code change were made to the # function's source)? # The first part of the test makes the necessary low-level calls to emulate # the definition of a function in an jupyter notebook cell. Joblib has # some custom code to treat functions defined specifically in jupyter # notebooks/ipython session -- we want to test this code, which requires # the emulation to be rigorous. for session_no in [0, 1]: ipython_cell_source = ''' def f(x): return x ''' ipython_cell_id = '<ipython-input-{}-000000000000>'.format(session_no) exec( compile(textwrap.dedent(ipython_cell_source), filename=ipython_cell_id, mode='exec')) # f is now accessible in the locals mapping - but for some unknown # reason, f = locals()['f'] throws a KeyError at runtime, we need to # bind locals()['f'] to a different name in the local namespace aliased_f = locals()['f'] aliased_f.__module__ = "__main__" # Preliminary sanity checks, and tests checking that joblib properly # identified f as an interactive function defined in a jupyter notebook assert aliased_f(1) == 1 assert aliased_f.__code__.co_filename == ipython_cell_id memory = Memory(location=tmpdir.strpath, verbose=0) cached_f = memory.cache(aliased_f) assert len(os.listdir(tmpdir / 'joblib')) == 1 f_cache_relative_directory = os.listdir(tmpdir / 'joblib')[0] assert 'ipython-input' in f_cache_relative_directory f_cache_directory = tmpdir / 'joblib' / f_cache_relative_directory if session_no == 0: # The cache should be empty as cached_f has not been called yet. assert os.listdir(f_cache_directory) == ['f'] assert os.listdir(f_cache_directory / 'f') == [] if call_before_reducing: cached_f(3) # Two files were just created, func_code.py, and a folder # containing the informations (inputs hash/ouptput) of # cached_f(3) assert len(os.listdir(f_cache_directory / 'f')) == 2 # Now, testing #1035: when calling a cached function, joblib # used to dynamically inspect the underlying function to # extract its source code (to verify it matches the source code # of the function as last inspected by joblib) -- however, # source code introspection fails for dynamic functions sent to # child processes - which would eventually make joblib clear # the cache associated to f res = Parallel(n_jobs=2)(delayed(cached_f)(i) for i in [1, 2]) else: # Submit the function to the joblib child processes, although # the function has never been called in the parent yet. This # triggers a specific code branch inside # MemorizedFunc.__reduce__. res = Parallel(n_jobs=2)(delayed(cached_f)(i) for i in [1, 2]) assert len(os.listdir(f_cache_directory / 'f')) == 3 cached_f(3) # Making sure f's cache does not get cleared after the parallel # calls, and contains ALL cached functions calls (f(1), f(2), f(3)) # and 'func_code.py' assert len(os.listdir(f_cache_directory / 'f')) == 4 else: # For the second session, there should be an already existing cache assert len(os.listdir(f_cache_directory / 'f')) == 4 cached_f(3) # The previous cache should not be invalidated after calling the # function in a new session assert len(os.listdir(f_cache_directory / 'f')) == 4
def _cross_validate_with_warm_start( estimators, X, y=None, *, groups=None, scoring=None, cv=None, n_jobs=None, verbose=0, fit_params=None, pre_dispatch="2*n_jobs", return_train_score=False, return_estimator=False, error_score=np.nan, ): """Evaluate metric(s) by cross-validation and also record fit/score times. Read more in the :ref:`User Guide <multimetric_cross_validation>`. Parameters ---------- estimator : estimator object implementing 'fit' The object to use to fit the data. X : array-like of shape (n_samples, n_features) The data to fit. Can be for example a list, or an array. y : array-like of shape (n_samples,) or (n_samples, n_outputs), \ default=None The target variable to try to predict in the case of supervised learning. groups : array-like of shape (n_samples,), default=None Group labels for the samples used while splitting the dataset into train/test set. Only used in conjunction with a "Group" :term:`cv` instance (e.g., :class:`GroupKFold`). scoring : str, callable, list/tuple, or dict, default=None A single str (see :ref:`scoring_parameter`) or a callable (see :ref:`scoring`) to evaluate the predictions on the test set. For evaluating multiple metrics, either give a list of (unique) strings or a dict with names as keys and callables as values. NOTE that when using custom scorers, each scorer should return a single value. Metric functions returning a list/array of values can be wrapped into multiple scorers that return one value each. See :ref:`multimetric_grid_search` for an example. If None, the estimator's score method is used. cv : int, cross-validation generator or an iterable, default=None Determines the cross-validation splitting strategy. Possible inputs for cv are: - None, to use the default 5-fold cross validation, - int, to specify the number of folds in a `(Stratified)KFold`, - :term:`CV splitter`, - An iterable yielding (train, test) splits as arrays of indices. For int/None inputs, if the estimator is a classifier and ``y`` is either binary or multiclass, :class:`StratifiedKFold` is used. In all other cases, :class:`KFold` is used. Refer :ref:`User Guide <cross_validation>` for the various cross-validation strategies that can be used here. .. versionchanged:: 0.22 ``cv`` default value if None changed from 3-fold to 5-fold. n_jobs : int, default=None The number of CPUs to use to do the computation. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary <n_jobs>` for more details. verbose : int, default=0 The verbosity level. fit_params : dict, default=None Parameters to pass to the fit method of the estimator. pre_dispatch : int or str, default='2*n_jobs' Controls the number of jobs that get dispatched during parallel execution. Reducing this number can be useful to avoid an explosion of memory consumption when more jobs get dispatched than CPUs can process. This parameter can be: - None, in which case all the jobs are immediately created and spawned. Use this for lightweight and fast-running jobs, to avoid delays due to on-demand spawning of the jobs - An int, giving the exact number of total jobs that are spawned - A str, giving an expression as a function of n_jobs, as in '2*n_jobs' return_train_score : bool, default=False Whether to include train scores. Computing training scores is used to get insights on how different parameter settings impact the overfitting/underfitting trade-off. However computing the scores on the training set can be computationally expensive and is not strictly required to select the parameters that yield the best generalization performance. .. versionadded:: 0.19 .. versionchanged:: 0.21 Default value was changed from ``True`` to ``False`` return_estimator : bool, default=False Whether to return the estimators fitted on each split. .. versionadded:: 0.20 error_score : 'raise' or numeric Value to assign to the score if an error occurs in estimator fitting. If set to 'raise', the error is raised. If a numeric value is given, FitFailedWarning is raised. This parameter does not affect the refit step, which will always raise the error. .. versionadded:: 0.20 Returns ------- scores : dict of float arrays of shape (n_splits,) Array of scores of the estimator for each run of the cross validation. A dict of arrays containing the score/time arrays for each scorer is returned. The possible keys for this ``dict`` are: ``test_score`` The score array for test scores on each cv split. Suffix ``_score`` in ``test_score`` changes to a specific metric like ``test_r2`` or ``test_auc`` if there are multiple scoring metrics in the scoring parameter. ``train_score`` The score array for train scores on each cv split. Suffix ``_score`` in ``train_score`` changes to a specific metric like ``train_r2`` or ``train_auc`` if there are multiple scoring metrics in the scoring parameter. This is available only if ``return_train_score`` parameter is ``True``. ``fit_time`` The time for fitting the estimator on the train set for each cv split. ``score_time`` The time for scoring the estimator on the test set for each cv split. (Note time for scoring on the train set is not included even if ``return_train_score`` is set to ``True`` ``estimator`` The estimator objects for each cv split. This is available only if ``return_estimator`` parameter is set to ``True``. Examples -------- >>> from sklearn import datasets, linear_model >>> from sklearn.model_selection import cross_validate >>> from sklearn.metrics import make_scorer >>> from sklearn.metrics import confusion_matrix >>> from sklearn.svm import LinearSVC >>> diabetes = datasets.load_diabetes() >>> X = diabetes.data[:150] >>> y = diabetes.target[:150] >>> lasso = linear_model.Lasso() Single metric evaluation using ``cross_validate`` >>> cv_results = cross_validate(lasso, X, y, cv=3) >>> sorted(cv_results.keys()) ['fit_time', 'score_time', 'test_score'] >>> cv_results['test_score'] array([0.33150734, 0.08022311, 0.03531764]) Multiple metric evaluation using ``cross_validate`` (please refer the ``scoring`` parameter doc for more information) >>> scores = cross_validate(lasso, X, y, cv=3, ... scoring=('r2', 'neg_mean_squared_error'), ... return_train_score=True) >>> print(scores['test_neg_mean_squared_error']) [-3635.5... -3573.3... -6114.7...] >>> print(scores['train_r2']) [0.28010158 0.39088426 0.22784852] See Also --------- :func:`sklearn.model_selection.cross_val_score`: Run cross-validation for single metric evaluation. :func:`sklearn.model_selection.cross_val_predict`: Get predictions from each split of cross-validation for diagnostic purposes. :func:`sklearn.metrics.make_scorer`: Make a scorer from a performance metric or loss function. """ X, y, groups = indexable(X, y, groups) cv = check_cv(cv, y, classifier=is_classifier(estimators[0])) if callable(scoring): scorers = {"score": scoring} elif scoring is None or isinstance(scoring, str): scorers = {"score": check_scoring(estimators[0], scoring=scoring)} else: try: scorers = _check_multimetric_scoring(estimators[0], scoring=scoring) # sklearn < 0.24.0 compatibility if isinstance(scorers, tuple): scorers = scorers[0] except KeyError: pass # We clone the estimator to make sure that all the folds are # independent, and that it is pickle-able. parallel = Parallel(n_jobs=n_jobs, verbose=verbose, pre_dispatch=pre_dispatch) results_org = parallel( delayed(_fit_and_score)( estimators[i], X, y, scorers, train_test_tuple[0], train_test_tuple[1], verbose, None, fit_params[i] if isinstance(fit_params, list) else fit_params, return_train_score=return_train_score, return_times=True, return_n_test_samples=True, return_estimator=return_estimator, error_score=error_score, ) for i, train_test_tuple in enumerate(cv.split(X, y, groups)) ) results = _aggregate_score_dicts(results_org) ret = {} ret["fit_time"] = results["fit_time"] ret["score_time"] = results["score_time"] if return_estimator: ret["estimator"] = results["estimator"] test_scores_dict = _normalize_score_results(results["test_scores"]) if return_train_score: train_scores_dict = _normalize_score_results(results["train_scores"]) for name in test_scores_dict: ret["test_%s" % name] = test_scores_dict[name] if return_train_score: key = "train_%s" % name ret[key] = train_scores_dict[name] return (ret, results_org)
def parallel_func(inner_n_jobs, backend): return Parallel(n_jobs=inner_n_jobs, backend=backend)(delayed(square)(i) for i in range(3))
# each task-config-fidelity would have been evaluated on a different seed obj = { config_hash: { fidelity_hash: { seed: v } } } task_datas[task_id] = update_table_with_new_entry(task_datas[task_id], obj) file_count += 1 try: # deleting data file that was processed os.remove(os.path.join(dump_path, filename)) # os.remove(os.path.join(dump_path, filename)) except FileNotFoundError: continue logger.info("\tFinished batch processing in {:.3f} seconds".format(time.time() - start)) logger.info("\tUpdating benchmark data files...") with parallel_backend(backend="loky", n_jobs=args.n_jobs): Parallel()( delayed(save_task_file)(task_id, obj, output_path) for task_id, obj in task_datas.items() ) logger.info("\tContinuing to next batch") logger.info("\t{}".format("-" * 25)) logger.info("Done!") logger.info("Total files processed: {}".format(file_count))
def nested_loop(backend): Parallel(n_jobs=2, backend=backend)(delayed(square)(.01) for _ in range(2))
def concurrent_get_filename(array, temp_dirs): with Parallel(backend='loky', n_jobs=2, max_nbytes=10) as p: for i in range(10): [filename ] = p(delayed(getattr)(array, 'filename') for _ in range(1)) temp_dirs.add(os.path.dirname(filename))
def test_nested_loop(parent_backend, child_backend): Parallel(n_jobs=2, backend=parent_backend)(delayed(nested_loop)(child_backend) for _ in range(2))
help="The path where all models directories are") parser.add_argument("--output_path", default=None, type=str, help="The path to dump yaml file") parser.add_argument("--n_jobs", default=4, type=int, help="number of cores") args = parser.parse_args() return args if __name__ == "__main__": args = input_arguments() benches = load_benchmark_settings() benches = {k: v for k, v in benches.items() if check_key(k)} with parallel_backend(backend="multiprocessing", n_jobs=args.n_jobs): dicts = Parallel()(delayed(update_dict_entry)(key, args.path) for key, value in benches.items()) tabular_dict = dict() for entry in dicts: tabular_dict.update(entry) print("Collected {} keys...".format(len(tabular_dict))) with open(os.path.join(args.output_path, "tabular_plot_config.yaml"), "w") as f: f.writelines(yaml.dump(tabular_dict)) print("Done!")
def test_mutate_input_with_threads(): """Input is mutable when using the threading backend""" q = Queue(maxsize=5) Parallel(n_jobs=2, backend="threading")(delayed(q.put)(1) for _ in range(5)) assert q.full()
def vault_prices(self, blocks): prices = Parallel(10, 'threading')( delayed(magic.get_price)(self.vault, block=block) for block in blocks) return prices
def test_parallel_kwargs(n_jobs): """Check the keyword argument processing of pmap.""" lst = range(10) assert ([f(x, y=1) for x in lst ] == Parallel(n_jobs=n_jobs)(delayed(f)(x, y=1) for x in lst))