Example #1
0
def test_backend_hinting_and_constraints():
    for n_jobs in [1, 2, -1]:
        assert type(Parallel(n_jobs=n_jobs)._backend) == LokyBackend

        p = Parallel(n_jobs=n_jobs, prefer='threads')
        assert type(p._backend) == ThreadingBackend

        p = Parallel(n_jobs=n_jobs, prefer='processes')
        assert type(p._backend) == LokyBackend

        p = Parallel(n_jobs=n_jobs, require='sharedmem')
        assert type(p._backend) == ThreadingBackend

    # Explicit backend selection can override backend hinting although it
    # is useless to pass a hint when selecting a backend.
    p = Parallel(n_jobs=2, backend='loky', prefer='threads')
    assert type(p._backend) == LokyBackend

    with parallel_backend('loky', n_jobs=2):
        # Explicit backend selection by the user with the context manager
        # should be respected when combined with backend hints only.
        p = Parallel(prefer='threads')
        assert type(p._backend) == LokyBackend
        assert p.n_jobs == 2

    with parallel_backend('loky', n_jobs=2):
        # Locally hard-coded n_jobs value is respected.
        p = Parallel(n_jobs=3, prefer='threads')
        assert type(p._backend) == LokyBackend
        assert p.n_jobs == 3

    with parallel_backend('loky', n_jobs=2):
        # Explicit backend selection by the user with the context manager
        # should be ignored when the Parallel call has hard constraints.
        # In this case, the default backend that supports shared mem is
        # used an the default number of processes is used.
        p = Parallel(require='sharedmem')
        assert type(p._backend) == ThreadingBackend
        assert p.n_jobs == 1

    with parallel_backend('loky', n_jobs=2):
        p = Parallel(n_jobs=3, require='sharedmem')
        assert type(p._backend) == ThreadingBackend
        assert p.n_jobs == 3
Example #2
0
def test_nested_parallel_warnings(parent_backend, child_backend, expected):

    # no warnings if inner_n_jobs=1
    Parallel(n_jobs=2, backend=parent_backend)(
        delayed(_assert_warning_nested)(
            backend=child_backend, inner_n_jobs=1,
            expected=False)
        for _ in range(5))

    #  warnings if inner_n_jobs != 1 and expected
    res = Parallel(n_jobs=2, backend=parent_backend)(
        delayed(_assert_warning_nested)(
            backend=child_backend, inner_n_jobs=2,
            expected=expected)
        for _ in range(5))

    # warning handling is not thread safe. One thread might see multiple
    # warning or no warning at all.
    if parent_backend == "threading":
        assert any(res)
    else:
        assert all(res)
Example #3
0
def test_numpy_arrays_use_different_memory(mmap_mode):
    def func(arr, value):
        arr[:] = value
        return arr

    arrays = [np.zeros((10, 10), dtype='float64') for i in range(10)]

    results = Parallel(mmap_mode=mmap_mode, max_nbytes=0,
                       n_jobs=2)(delayed(func)(arr, i)
                                 for i, arr in enumerate(arrays))

    for i, arr in enumerate(results):
        np.testing.assert_array_equal(arr, i)
Example #4
0
def test_auto_memmap_on_arrays_from_generator(backend):
    # Non-regression test for a problem with a bad interaction between the
    # GC collecting arrays recently created during iteration inside the
    # parallel dispatch loop and the auto-memmap feature of Parallel.
    # See: https://github.com/joblib/joblib/pull/294
    def generate_arrays(n):
        for i in range(n):
            yield np.ones(10, dtype=np.float32) * i
    # Use max_nbytes=1 to force the use of memory-mapping even for small
    # arrays
    results = Parallel(n_jobs=2, max_nbytes=1, backend=backend)(
        delayed(check_memmap)(a) for a in generate_arrays(100))
    for result, expected in zip(results, generate_arrays(len(results))):
        np.testing.assert_array_equal(expected, result)

    # Second call to force loky to adapt the executor by growing the number
    # of worker processes. This is a non-regression test for:
    # https://github.com/joblib/joblib/issues/629.
    results = Parallel(n_jobs=4, max_nbytes=1, backend=backend)(
        delayed(check_memmap)(a) for a in generate_arrays(100))
    for result, expected in zip(results, generate_arrays(len(results))):
        np.testing.assert_array_equal(expected, result)
def check_dispatch_one_job(backend):
    """ Test that with only one job, Parallel does act as a iterator.
    """
    queue = list()

    def producer():
        for i in range(6):
            queue.append('Produced %i' % i)
            yield i

    # disable batching
    Parallel(n_jobs=1, batch_size=1, backend=backend)(
        delayed(consumer)(queue, x) for x in producer())
    nose.tools.assert_equal(queue, [
        'Produced 0', 'Consumed 0',
        'Produced 1', 'Consumed 1',
        'Produced 2', 'Consumed 2',
        'Produced 3', 'Consumed 3',
        'Produced 4', 'Consumed 4',
        'Produced 5', 'Consumed 5',
    ])
    nose.tools.assert_equal(len(queue), 12)

    # empty the queue for the next check
    queue[:] = []

    # enable batching
    Parallel(n_jobs=1, batch_size=4, backend=backend)(
        delayed(consumer)(queue, x) for x in producer())
    nose.tools.assert_equal(queue, [
        # First batch
        'Produced 0', 'Produced 1', 'Produced 2', 'Produced 3',
        'Consumed 0', 'Consumed 1', 'Consumed 2', 'Consumed 3',

        # Second batch
        'Produced 4', 'Produced 5', 'Consumed 4', 'Consumed 5',
    ])
    nose.tools.assert_equal(len(queue), 12)
Example #6
0
def check_simple_parallel(backend):
    X = range(5)
    for n_jobs in (1, 2, -1, -2):
        assert_equal([square(x) for x in X],
                     Parallel(n_jobs=n_jobs,
                              backend=backend)(delayed(square)(x) for x in X))
    try:
        # To smoke-test verbosity, we capture stdout
        orig_stdout = sys.stdout
        orig_stderr = sys.stdout
        if PY3_OR_LATER:
            sys.stderr = io.StringIO()
            sys.stderr = io.StringIO()
        else:
            sys.stdout = io.BytesIO()
            sys.stderr = io.BytesIO()
        for verbose in (2, 11, 100):
            Parallel(n_jobs=-1, verbose=verbose,
                     backend=backend)(delayed(square)(x) for x in X)
            Parallel(n_jobs=1, verbose=verbose,
                     backend=backend)(delayed(square)(x) for x in X)
            Parallel(n_jobs=2,
                     verbose=verbose,
                     pre_dispatch=2,
                     backend=backend)(delayed(square)(x) for x in X)
            Parallel(n_jobs=2, verbose=verbose,
                     backend=backend)(delayed(square)(x) for x in X)
    except Exception as e:
        my_stdout = sys.stdout
        my_stderr = sys.stderr
        sys.stdout = orig_stdout
        sys.stderr = orig_stderr
        print(unicode(my_stdout.getvalue()))
        print(unicode(my_stderr.getvalue()))
        raise e
    finally:
        sys.stdout = orig_stdout
        sys.stderr = orig_stderr
Example #7
0
def test_memmapping_leaks(backend, tmpdir):
    # Non-regression test for memmapping backends. Ensure that the data
    # does not stay too long in memory
    tmpdir = tmpdir.strpath

    # Use max_nbytes=1 to force the use of memory-mapping even for small
    # arrays
    with Parallel(n_jobs=2, max_nbytes=1, backend=backend,
                  temp_folder=tmpdir) as p:
        p(delayed(check_memmap)(a) for a in [np.random.random(10)] * 2)

        # The memmap folder should not be clean in the context scope
        assert len(os.listdir(tmpdir)) > 0

    # Make sure that the shared memory is cleaned at the end when we exit
    # the context
    assert not os.listdir(tmpdir)

    # Make sure that the shared memory is cleaned at the end of a call
    p = Parallel(n_jobs=2, max_nbytes=1, backend=backend)
    p(delayed(check_memmap)(a) for a in [np.random.random(10)] * 2)

    assert not os.listdir(tmpdir)
Example #8
0
def test_dispatch_one_job(backend, batch_size, expected_queue):
    """ Test that with only one job, Parallel does act as a iterator.
    """
    queue = list()

    def producer():
        for i in range(6):
            queue.append('Produced %i' % i)
            yield i

    Parallel(n_jobs=1, batch_size=batch_size,
             backend=backend)(delayed(consumer)(queue, x) for x in producer())
    assert queue == expected_queue
    assert len(queue) == 12
Example #9
0
def test_nested_backend_context_manager():
    # Check that by default, nested parallel calls will always use the
    # ThreadingBackend

    def get_nested_pids():
        assert _active_backend_type() == ThreadingBackend
        return Parallel(n_jobs=2)(delayed(os.getpid)() for _ in range(2))

    for backend in ['threading', 'loky', 'multiprocessing']:
        with parallel_backend(backend):
            pid_groups = Parallel(n_jobs=2)(delayed(get_nested_pids)()
                                            for _ in range(10))
            for pid_group in pid_groups:
                assert len(set(pid_group)) == 1
Example #10
0
def test_abort_backend(n_jobs, backend):
    delays = ["a"] + [10] * 100

    if os.environ.get("TRAVIS_OS_NAME") is not None and n_jobs < 0:
        # Use only up to 8 cpu in travis as cpu_count return 32 whereas we
        # only access 2 cores.
        n_jobs += 8

    with raises(TypeError):
        t_start = time.time()
        Parallel(n_jobs=n_jobs,
                 backend=backend)(delayed(time.sleep)(i) for i in delays)
    dt = time.time() - t_start
    assert dt < 3
def compute_information_grids(scan_results, max_spikes, n_jobs, **kwargs):
    info_results = Parallel(n_jobs=n_jobs, temp_folder='/tmp')(delayed(res_to_info)(
        res=res,
        method='slsqp',
        max_spikes=max_spikes,
        **kwargs) for res in scan_results.values()
    )

    param_list = []
    for param in scan_results.keys():
        alpha1, alpha2 = eval(param)
        param_list.append((alpha1, alpha2))

    return {param: res for param, res in zip(param_list, info_results)}
Example #12
0
def compare_with_mutation_factors(common_classes):
    number_of_activations = common_classes.get_number_of_activations()
    number_of_parallel = 3
    parallel = Parallel(n_jobs=number_of_parallel)
    length, epsilon, mutation_neighborhood, tolerance = common_classes.get_common_classes(
    )

    f = open('results.txt', 'a')
    f.write("\n~~~mutation factor experiment~~~\n")
    f.write("\npopulation size is " +
            str(common_classes.get_population_size()))
    f.write("\nlenghth is {0} and epsilon is {1}\n".format(length, epsilon))
    f.close()

    f = open('results.txt', 'a')
    f.write("\nHGT 1:\n")
    f.close()

    for mutation_factor in frange(0.1, 1, 0.1):
        common_classes.set_simulation_variables(1, mutation_factor, 1)
        run_in_parallel(common_classes,
                        number_of_activations,
                        function_to_run=learn_HGT,
                        parallel=parallel,
                        number_of_parallel=number_of_parallel)

    f = open('results.txt', 'a')
    f.write("\nHGT 0.1:\n")
    f.close()

    for mutation_factor in frange(0.1, 1, 0.1):
        common_classes.set_simulation_variables(0.1, mutation_factor, 1)
        run_in_parallel(common_classes,
                        number_of_activations,
                        function_to_run=learn_HGT,
                        parallel=parallel,
                        number_of_parallel=number_of_parallel)

    f = open('results.txt', 'a')
    f.write("\nrecombination:\n")
    f.close()

    for mutation_factor in frange(0.1, 1, 0.1):
        common_classes.set_simulation_variables(1, mutation_factor, 1)
        run_in_parallel(common_classes,
                        number_of_activations,
                        function_to_run=learn_recombination,
                        parallel=parallel,
                        number_of_parallel=number_of_parallel)
Example #13
0
def test_auto_memmap_on_arrays_from_generator():
    # Non-regression test for a problem with a bad interaction between the
    # GC collecting arrays recently created during iteration inside the
    # parallel dispatch loop and the auto-memmap feature of Parallel.
    # See: https://github.com/joblib/joblib/pull/294
    def generate_arrays(n):
        for i in range(n):
            yield np.ones(10, dtype=np.float32) * i

    # Use max_nbytes=1 to force the use of memory-mapping even for small
    # arrays
    results = Parallel(n_jobs=2, max_nbytes=1)(delayed(check_memmap)(a)
                                               for a in generate_arrays(100))
    for result, expected in zip(results, generate_arrays(len(results))):
        np.testing.assert_array_equal(expected, result)
def test_default_mp_context():
    p = Parallel(n_jobs=2, backend='multiprocessing')
    if sys.version_info >= (3, 4):
        # Under Python 3.4+ the multiprocessing context can be configured
        # by an environment variable
        env_method = os.environ.get('JOBLIB_START_METHOD', '').strip() or None
        if env_method is None:
            # Check the default behavior
            if sys.platform == 'win32':
                assert_equal(p._mp_context.get_start_method(), 'spawn')
            else:
                assert_equal(p._mp_context.get_start_method(), 'fork')
        else:
            assert_equal(p._mp_context.get_start_method(), env_method)
    else:
        assert_equal(p._mp_context, None)
def check_backend_context_manager(backend_name):
    with parallel_backend(backend_name, n_jobs=3):
        active_backend, active_n_jobs = parallel.get_active_backend()
        assert_equal(active_n_jobs, 3)
        assert_equal(effective_n_jobs(3), 3)
        p = Parallel()
        assert_equal(p.n_jobs, 3)
        if backend_name == 'multiprocessing':
            assert_equal(type(active_backend), MultiprocessingBackend)
            assert_equal(type(p._backend), MultiprocessingBackend)
        elif backend_name == 'threading':
            assert_equal(type(active_backend), ThreadingBackend)
            assert_equal(type(p._backend), ThreadingBackend)
        elif backend_name.startswith('test_'):
            assert_equal(type(active_backend), FakeParallelBackend)
            assert_equal(type(p._backend), FakeParallelBackend)
Example #16
0
def check_backend_context_manager(backend_name):
    with parallel_backend(backend_name, n_jobs=3):
        active_backend, active_n_jobs = parallel.get_active_backend()
        assert active_n_jobs == 3
        assert effective_n_jobs(3) == 3
        p = Parallel()
        assert p.n_jobs == 3
        if backend_name == 'multiprocessing':
            assert type(active_backend) == MultiprocessingBackend
            assert type(p._backend) == MultiprocessingBackend
        elif backend_name == 'threading':
            assert type(active_backend) == ThreadingBackend
            assert type(p._backend) == ThreadingBackend
        elif backend_name.startswith('test_'):
            assert type(active_backend) == FakeParallelBackend
            assert type(p._backend) == FakeParallelBackend
Example #17
0
    def __init__(self,
                 vad_config,
                 key_list,
                 file_path,
                 meta_data,
                 max_timestep=None,
                 n_jobs=12):
        self.roots = file_path
        self.root_key = key_list
        self.max_timestep = max_timestep
        self.vad_c = vad_config
        self.dataset = []
        self.all_speakers = []

        for index in range(len(self.root_key)):
            cache_path = Path(
                os.path.dirname(__file__)
            ) / '.wav_lengths' / f'{self.root_key[index]}_length.pt'
            cache_path.parent.mkdir(exist_ok=True)
            root = Path(self.roots[index])

            if not cache_path.is_file():

                def trimmed_length(path):
                    wav_sample, _ = apply_effects_file(path, EFFECTS)
                    wav_sample = wav_sample.squeeze(0)
                    length = wav_sample.shape[0]
                    return length

                wav_paths = find_files(root)
                wav_lengths = Parallel(n_jobs=n_jobs)(
                    delayed(trimmed_length)(path)
                    for path in tqdm.tqdm(wav_paths, desc="Preprocessing"))
                wav_tags = [Path(path).parts[-3:] for path in wav_paths]
                torch.save([wav_tags, wav_lengths], str(cache_path))
            else:
                wav_tags, wav_lengths = torch.load(str(cache_path))
                wav_paths = [root.joinpath(*tag) for tag in wav_tags]

            speaker_dirs = ([f.stem for f in root.iterdir() if f.is_dir()])
            self.all_speakers.extend(speaker_dirs)
            for path, length in zip(wav_paths, wav_lengths):
                if length > self.vad_c['min_sec']:
                    self.dataset.append(path)

        self.all_speakers.sort()
        self.speaker_num = len(self.all_speakers)
Example #18
0
def test_cached_function_race_condition_when_persisting_output(tmpdir, capfd):
    # Test race condition where multiple processes are writing into
    # the same output.pkl. See
    # https://github.com/joblib/joblib/issues/490 for more details.
    memory = Memory(location=tmpdir.strpath)
    func_cached = memory.cache(fast_func_with_complex_output)

    Parallel(n_jobs=2)(delayed(func_cached)() for i in range(3))

    stdout, stderr = capfd.readouterr()

    # Checking both stdout and stderr (ongoing PR #434 may change
    # logging destination) to make sure there is no exception while
    # loading the results
    exception_msg = 'Exception while loading results'
    assert exception_msg not in stdout
    assert exception_msg not in stderr
Example #19
0
def test_parallel_pickling():
    """ Check that pmap captures the errors when it is passed an object
        that cannot be pickled.
    """
    def g(x):
        return x**2

    try:
        # pickling a local function always fail but the exception
        # raised is a PickleError for python <= 3.4 and AttributeError
        # for python >= 3.5
        pickle.dumps(g)
    except Exception as exc:
        exception_class = exc.__class__

    assert_raises(exception_class, Parallel(),
                  (delayed(g)(x) for x in range(10)))
def test_main_thread_renamed_no_warning(backend, monkeypatch):
    # Check that no default backend relies on the name of the main thread:
    # https://github.com/joblib/joblib/issues/180#issuecomment-253266247
    # Some programs use a different name for the main thread. This is the case
    # for uWSGI apps for instance.
    monkeypatch.setattr(target=threading.current_thread(),
                        name='name',
                        value='some_new_name_for_the_main_thread')

    with warns(None) as warninfo:
        results = Parallel(n_jobs=2, backend=backend)(delayed(square)(x)
                                                      for x in range(3))
        assert results == [0, 1, 4]
    # The multiprocessing backend will raise a warning when detecting that is
    # started from the non-main thread. Let's check that there is no false
    # positive because of the name change.
    assert len(warninfo) == 0
Example #21
0
    def significance(self, pair1, pair2):
        per_doc1, overall1 = pair1
        per_doc2, overall2 = pair2
        # TODO: limit to metrics
        base_diff = _result_diff(overall1, overall2)
        randomized_diffs = functools.partial(self.METHODS[self.method],
                                             per_doc1, per_doc2,
                                             base_diff)
        results = Parallel(n_jobs=self.n_jobs)(delayed(randomized_diffs)(share)
                                               for share in _job_shares(self.n_jobs, self.trials))
        all_counts = []
        for result in results:
            metrics, counts = zip(*result.items())
            all_counts.append(counts)

        return {metric: {'diff': base_diff[metric],
                         'p': (sum(counts) + 1) / (self.trials + 1)}
                for metric, counts in zip(metrics, zip(*all_counts))}
Example #22
0
def test_direct_parameterized_backend_context_manager():
    assert _active_backend_type() == DefaultBackend

    # Check that it's possible to pass a backend instance directly,
    # without registration
    with parallel_backend(ParameterizedParallelBackend(param=43), n_jobs=5):
        active_backend, active_n_jobs = parallel.get_active_backend()
        assert type(active_backend) == ParameterizedParallelBackend
        assert active_backend.param == 43
        assert active_n_jobs == 5
        p = Parallel()
        assert p.n_jobs == 5
        assert p._backend is active_backend
        results = p(delayed(sqrt)(i) for i in range(5))
    assert results == [sqrt(i) for i in range(5)]

    # The default backend is again restored
    assert _active_backend_type() == DefaultBackend
Example #23
0
def test_parameterized_backend_context_manager(monkeypatch):
    monkeypatch.setitem(BACKENDS, 'param_backend',
                        ParameterizedParallelBackend)
    assert _active_backend_type() == DefaultBackend

    with parallel_backend('param_backend', param=42, n_jobs=3):
        active_backend, active_n_jobs = parallel.get_active_backend()
        assert type(active_backend) == ParameterizedParallelBackend
        assert active_backend.param == 42
        assert active_n_jobs == 3
        p = Parallel()
        assert p.n_jobs == 3
        assert p._backend is active_backend
        results = p(delayed(sqrt)(i) for i in range(5))
    assert results == [sqrt(i) for i in range(5)]

    # The default backend is again restored
    assert _active_backend_type() == DefaultBackend
Example #24
0
def test_backend_batch_statistics_reset(backend):
    """Test that a parallel backend correctly resets its batch statistics."""
    n_jobs = 2
    n_inputs = 500
    task_time = 2. / n_inputs

    p = Parallel(verbose=10, n_jobs=n_jobs, backend=backend)
    p(delayed(time.sleep)(task_time) for i in range(n_inputs))
    assert (p._backend._effective_batch_size ==
            p._backend._DEFAULT_EFFECTIVE_BATCH_SIZE)
    assert (p._backend._smoothed_batch_duration ==
            p._backend._DEFAULT_SMOOTHED_BATCH_DURATION)

    p(delayed(time.sleep)(task_time) for i in range(n_inputs))
    assert (p._backend._effective_batch_size ==
            p._backend._DEFAULT_EFFECTIVE_BATCH_SIZE)
    assert (p._backend._smoothed_batch_duration ==
            p._backend._DEFAULT_SMOOTHED_BATCH_DURATION)
Example #25
0
def test_backend_context_manager(monkeypatch, backend):
    if backend not in BACKENDS:
        monkeypatch.setitem(BACKENDS, backend, FakeParallelBackend)

    assert _active_backend_type() == DefaultBackend
    # check that this possible to switch parallel backends sequentially
    check_backend_context_manager(backend)

    # The default backend is restored
    assert _active_backend_type() == DefaultBackend

    # Check that context manager switching is thread safe:
    Parallel(n_jobs=2, backend='threading')(
        delayed(check_backend_context_manager)(b)
        for b in all_backends_for_context_manager if not b)

    # The default backend is again restored
    assert _active_backend_type() == DefaultBackend
Example #26
0
def test_error_capture():
    # Check that error are captured, and that correct exceptions
    # are raised.
    if mp is not None:
        # A JoblibException will be raised only if there is indeed
        # multiprocessing
        assert_raises(
            JoblibException, Parallel(n_jobs=2),
            [delayed(division)(x, y) for x, y in zip((0, 1), (1, 0))])
        assert_raises(WorkerInterrupt, Parallel(n_jobs=2),
                      [delayed(interrupt_raiser)(x) for x in (1, 0)])

        # Try again with the context manager API
        with Parallel(n_jobs=2) as parallel:
            assert_true(parallel._pool is not None)

            assert_raises(
                JoblibException, parallel,
                [delayed(division)(x, y) for x, y in zip((0, 1), (1, 0))])

            # The managed pool should still be available and be in a working
            # state despite the previously raised (and caught) exception
            assert_true(parallel._pool is not None)
            assert_equal([f(x, y=1) for x in range(10)],
                         parallel(delayed(f)(x, y=1) for x in range(10)))

            assert_raises(WorkerInterrupt, parallel,
                          [delayed(interrupt_raiser)(x) for x in (1, 0)])

            # The pool should still be available despite the exception
            assert_true(parallel._pool is not None)
            assert_equal([f(x, y=1) for x in range(10)],
                         parallel(delayed(f)(x, y=1) for x in range(10)))

        # Check that the inner pool has been terminated when exiting the
        # context manager
        assert_true(parallel._pool is None)
    else:
        assert_raises(KeyboardInterrupt, Parallel(n_jobs=2),
                      [delayed(interrupt_raiser)(x) for x in (1, 0)])

    # wrapped exceptions should inherit from the class of the original
    # exception to make it easy to catch them
    assert_raises(ZeroDivisionError, Parallel(n_jobs=2),
                  [delayed(division)(x, y) for x, y in zip((0, 1), (1, 0))])
    try:
        # JoblibException wrapping is disabled in sequential mode:
        ex = JoblibException()
        Parallel(n_jobs=1)(delayed(division)(x, y)
                           for x, y in zip((0, 1), (1, 0)))
    except Exception as ex:
        assert_false(isinstance(ex, JoblibException))
Example #27
0
def noise_eval():
    """
    Test: intrinsic eval on noise-corrupted vectors

    Add noise as usual, evaluated intrinsically.
    """
    noise_data = []
    for dname, df in word_level_datasets():
        for vname, path in zip(NAMES, PATHS):
            logging.info('starting %s %s', dname, vname)
            res = Parallel(n_jobs=-1)(delayed(_intrinsic_eval_words)(path, df, noise) \
                                      for noise in np.arange(0, 3.1, .2))
            for strict, relaxed, noise, rel_pval, str_pval, _, boot_i in chain.from_iterable(res):
                noise_data.append((vname, dname, noise, 'strict', strict, str_pval, boot_i))
                noise_data.append((vname, dname, noise, 'relaxed', relaxed, rel_pval, boot_i))
    noise_df = pd.DataFrame(noise_data,
                            columns=['vect', 'test', 'noise', 'kind', 'corr', 'pval', 'folds'])
    noise_df.to_csv('intrinsic_noise_word_level.csv')
def test_direct_parameterized_backend_context_manager():
    assert_equal(_active_backend_type(), MultiprocessingBackend)

    # Check that it's possible to pass a backend instance directly,
    # without registration
    with parallel_backend(ParameterizedParallelBackend(param=43), n_jobs=5):
        active_backend, active_n_jobs = parallel.get_active_backend()
        assert_equal(type(active_backend), ParameterizedParallelBackend)
        assert_equal(active_backend.param, 43)
        assert_equal(active_n_jobs, 5)
        p = Parallel()
        assert_equal(p.n_jobs, 5)
        assert_true(p._backend is active_backend)
        results = p(delayed(sqrt)(i) for i in range(5))
    assert_equal(results, [sqrt(i) for i in range(5)])

    # The default backend is again retored
    assert_equal(_active_backend_type(), MultiprocessingBackend)
Example #29
0
def test_nested_exception_dispatch(backend):
    """Ensure errors for nested joblib cases gets propagated

    For Python 2.7, the TransportableException wrapping and unwrapping should
    preserve the traceback information of the inner function calls.

    For Python 3, we rely on the built-in __cause__ system that already
    report this kind of information to the user.
    """
    if PY27 and backend == 'multiprocessing':
        raise SkipTest("Nested parallel calls can deadlock with the python 2.7"
                       "multiprocessing backend.")

    def nested_function_inner(i):
        Parallel(n_jobs=2)(
            delayed(exception_raiser)(j) for j in range(30))

    def nested_function_outer(i):
        Parallel(n_jobs=2)(
            delayed(nested_function_inner)(j) for j in range(30))

    with raises(ValueError) as excinfo:
        Parallel(n_jobs=2, backend=backend)(
            delayed(nested_function_outer)(i) for i in range(30))

    # Check that important information such as function names are visible
    # in the final error message reported to the user
    report_lines = format_exception(excinfo.type, excinfo.value, excinfo.tb)
    report = "".join(report_lines)
    assert 'nested_function_outer' in report
    assert 'nested_function_inner' in report
    assert 'exception_raiser' in report

    if PY3_OR_LATER:
        # Under Python 3, there is no need for exception wrapping as the
        # exception raised in a worker process is transportable by default and
        # preserves the necessary information via the `__cause__` attribute.
        assert type(excinfo.value) is ValueError
    else:
        # The wrapping mechanism used to make exception of Python2.7
        # transportable does not create a JoblibJoblibJoblibValueError
        # despite the 3 nested parallel calls.
        assert type(excinfo.value) is JoblibValueError
Example #30
0
def test_no_blas_crash_or_freeze_with_multiprocessing():
    if sys.version_info < (3, 4):
        raise nose.SkipTest('multiprocessing can cause BLAS freeze on'
                            ' old Python')
    # Check that on recent Python version, the forkserver start method can make
    # it possible to use multiprocessing in conjunction of any BLAS
    # implementation that happens to be used by numpy with causing a freeze or
    # a crash
    rng = np.random.RandomState(42)

    # call BLAS DGEMM to force the initialization of the internal thread-pool
    # in the main process
    a = rng.randn(1000, 1000)
    np.dot(a, a.T)

    # check that the internal BLAS thread-pool is not in an inconsistent state
    # in the worker processes managed by multiprocessing
    Parallel(n_jobs=2, backend='multiprocessing')(delayed(np.dot)(a, a.T)
                                                  for i in range(2))