Ejemplo n.º 1
0
def test_nested_parallel_warnings():
    # The warnings happen in child processes so
    # warnings.catch_warnings can not be used for this tests that's
    # why we use check_subprocess_call instead
    if posix is None:
        # This test pass only when fork is the process start method
        raise SkipTest('Not a POSIX platform')

    template_code = """
import sys

from joblib import Parallel, delayed


def func():
    return 42


def parallel_func():
    res =  Parallel(n_jobs={inner_n_jobs})(delayed(func)() for _ in range(3))
    return res

Parallel(n_jobs={outer_n_jobs})(delayed(parallel_func)() for _ in range(5))
    """
    # no warnings if inner_n_jobs=1
    code = template_code.format(inner_n_jobs=1, outer_n_jobs=2)
    check_subprocess_call([sys.executable, '-c', code],
                          stderr_regex='^$')

    #  warnings if inner_n_jobs != 1
    regex = ('Multiprocessing-backed parallel loops cannot '
             'be nested')
    code = template_code.format(inner_n_jobs=2, outer_n_jobs=2)
    check_subprocess_call([sys.executable, '-c', code],
                          stderr_regex=regex)
Ejemplo n.º 2
0
def parallel_search(k_pot, pots, lambdas, n_jobs=4):
    """Method for Parallel L-curve computation

    Parameters
    ----------
    k_pot : np.array
    pots : list
    lambdas : list
    Returns
    -------
    modelnormseq : list
    residualseq : list

    """
    if PARALLEL_AVAILABLE:
        jobs = (delayed(L_model_fast)(k_pot, pots, lamb, i)
                for i, lamb in enumerate(lambdas))
        modelvsres = Parallel(n_jobs=n_jobs, backend='threading')(jobs)
    else:
        # Please verify this!
        modelvsres = []
        for i, lamb in enumerate(lambdas):
            modelvsres.append(L_model_fast(k_pot, pots, lamb, i))
    modelnormseq, residualseq = zip(*modelvsres)
    return modelnormseq, residualseq
Ejemplo n.º 3
0
 def concurrent_get_filename(array, temp_dirs):
     with Parallel(backend='loky', n_jobs=2, max_nbytes=10) as p:
         for i in range(10):
             [filename
              ] = p(delayed(getattr)(array, 'filename') for _ in range(1))
             temp_dirs.add(os.path.dirname(filename))
Ejemplo n.º 4
0
def pmap(f, arr, n_jobs=-1, prefer='threads', verbose=10):
    return Parallel(n_jobs=n_jobs, prefer=prefer,
                    verbose=verbose)(delayed(f)(i) for i in arr)
Ejemplo n.º 5
0
def check_same_results(params):
    n_tasks = params.pop('n_tasks')
    expected = [square(i) for i in range(n_tasks)]
    results = Parallel(**params)(delayed(square)(i) for i in range(n_tasks))
    assert_equal(results, expected)
Ejemplo n.º 6
0
def test_parallel_timeout_fail():
    # Check that timeout properly fails when function is too slow
    for backend in ['multiprocessing', 'threading']:
        nose.tools.assert_raises(
            TimeoutError, Parallel(n_jobs=2, backend=backend, timeout=0.01),
            (delayed(sleep)(10) for x in range(10)))
Ejemplo n.º 7
0
def check_nested_loop(parent_backend, child_backend):
    Parallel(n_jobs=2,
             backend=parent_backend)(delayed(nested_loop)(child_backend)
                                     for _ in range(2))
Ejemplo n.º 8
0
    param_names.append("seed")
    param_names.append("result")
    true_param_len = len(param_names)
    # Important to record values to calculate global minimas efficiently
    for m in metrics.keys():
        for split in splits:
            split_key = "{}_scores".format(split)
            param_names.append("{}_{}".format(m, split_key))
    # df = pd.DataFrame([], columns=param_names)
    count = 0
    missing = []

    with parallel_backend(backend="multiprocessing", n_jobs=args.n_jobs):
        dfs = Parallel()(
            delayed(joblib_fn)(
                count, entry, param_names
            ) for count, entry in enumerate(itertools.product(*param_list), start=1)
        )
    missing = [_df for _df in dfs if isinstance(_df, int)]
    dfs = [_df for _df in dfs if isinstance(_df, pd.DataFrame)]

    df = pd.concat(dfs).sort_index()
    try:
        _global_mins = extract_global_minimums(
            df, z_discrete.get_hyperparameter_names(), true_param_len
        )
    except Exception as e:
        print(repr(e))
        _global_mins = None

    global_mins = dict(val=dict(), test=dict())
Ejemplo n.º 9
0
def test_simple_parallel(backend, n_jobs, verbose):
    assert ([square(x) for x in range(5)
             ] == Parallel(n_jobs=n_jobs, backend=backend,
                           verbose=verbose)(delayed(square)(x)
                                            for x in range(5)))
Ejemplo n.º 10
0
def parallel_func(inner_n_jobs, backend):
    return Parallel(n_jobs=inner_n_jobs,
                    backend=backend)(delayed(square)(i) for i in range(3))
Ejemplo n.º 11
0
def test_func_code_consistency():
    from joblib.parallel import Parallel, delayed
    codes = Parallel(n_jobs=2)(delayed(_get_code)() for _ in range(5))
    assert len(set(codes)) == 1
Ejemplo n.º 12
0
        quartiles = list(range(0, 100, 25))
        feat_quartiles = np.array(
            [[np.nanpercentile(data[start:end], q) for start, end in blocks]
             for q in quartiles])
        feat_quartiles = flatten(feat_quartiles)
        feats += feat_mean + feat_std + feat_quartiles

        mods = ['mean', 'std'] + [f'quart{q}' for q in quartiles]
        for start, end in blocks:
            _names = [f'{feat_name}_{mod}_block{start}:{end}' for mod in mods]
            for _name in _names:
                feature_names[_name] = {'start': start, 'type': feat_name}
    return feats, feature_names


res = Parallel(16)(delayed(extract)(p)
                   for p in tqdm(ss, desc='extracting features'))

feature_names = res[0][1]
feature_types = list(set([x["type"] for x in feature_names.values()]))
data_x = np.array([x for x, _ in res])
data_y = np.array([p.group == 'nt1' for p in ss])

if len(feature_names) != len(data_x.T):
    print(
        f'Names do not match length of feature vector {len(data_x)}!={len(feature_names)}'
    )

# replace nan values with the mean of this value over all other participants
for i in range(data_x.shape[-1]):
    data_x[np.isnan(data_x[:, i]), i] = np.nanmean(data_x[:, i])
data_x = np.nan_to_num(data_x, posinf=0, neginf=0)
Ejemplo n.º 13
0
 def nested_call(n):
     return Parallel(n_jobs=2)(delayed(id)(i) for i in range(n))
Ejemplo n.º 14
0
 def get_nested_pids():
     assert _active_backend_type() == ThreadingBackend
     return Parallel(n_jobs=2)(delayed(os.getpid)() for _ in range(2))
Ejemplo n.º 15
0
 def nested_function_outer(i):
     Parallel(n_jobs=2)(
         delayed(nested_function_inner)(j) for j in range(30))
Ejemplo n.º 16
0
 def nested_function_inner(i):
     Parallel(n_jobs=2)(
         delayed(exception_raiser)(j) for j in range(30))
Ejemplo n.º 17
0
def test_parallel_with_exhausted_iterator():
    exhausted_iterator = iter([])
    assert Parallel(n_jobs=2)(exhausted_iterator) == []
Ejemplo n.º 18
0
def test_mutate_input_with_threads():
    """Input is mutable when using the threading backend"""
    q = Queue(maxsize=5)
    Parallel(n_jobs=2,
             backend="threading")(delayed(q.put)(1) for _ in range(5))
    assert q.full()
Ejemplo n.º 19
0
def nested_loop(backend):
    Parallel(n_jobs=2, backend=backend)(delayed(square)(.01) for _ in range(2))
Ejemplo n.º 20
0
def test_parallel_kwargs(n_jobs):
    """Check the keyword argument processing of pmap."""
    lst = range(10)
    assert ([f(x, y=1) for x in lst
             ] == Parallel(n_jobs=n_jobs)(delayed(f)(x, y=1) for x in lst))
Ejemplo n.º 21
0
def test_parallel_kwargs():
    """Check the keyword argument processing of pmap."""
    lst = range(10)
    for n_jobs in (1, 4):
        yield (assert_equal, [f(x, y=1) for x in lst],
               Parallel(n_jobs=n_jobs)(delayed(f)(x, y=1) for x in lst))
Ejemplo n.º 22
0
def test_parallel_timeout_success(backend):
    # Check that timeout isn't thrown when function is fast enough
    assert len(
        Parallel(n_jobs=2, backend=backend,
                 timeout=10)(delayed(sleep)(0.001) for x in range(10))) == 10
Ejemplo n.º 23
0
def test_exception_dispatch():
    "Make sure that exception raised during dispatch are indeed captured"
    assert_raises(ValueError, Parallel(n_jobs=2, pre_dispatch=16, verbose=0),
                  (delayed(exception_raiser)(i) for i in range(30)))
Ejemplo n.º 24
0
def test_parallel_timeout_fail(backend):
    # Check that timeout properly fails when function is too slow
    with raises(TimeoutError):
        Parallel(n_jobs=2, backend=backend,
                 timeout=0.01)(delayed(sleep)(10) for x in range(10))
Ejemplo n.º 25
0
def test_parallel_with_exhausted_iterator():
    exhausted_iterator = iter([])
    assert_equal(Parallel(n_jobs=2)(exhausted_iterator), [])
Ejemplo n.º 26
0
def test_error_capture(backend):
    # Check that error are captured, and that correct exceptions
    # are raised.
    if mp is not None:
        # A JoblibException will be raised only if there is indeed
        # multiprocessing
        with raises(JoblibException):
            Parallel(n_jobs=2, backend=backend)(
                [delayed(division)(x, y) for x, y in zip((0, 1), (1, 0))])
        with raises(WorkerInterrupt):
            Parallel(n_jobs=2, backend=backend)(
                [delayed(interrupt_raiser)(x) for x in (1, 0)])

        # Try again with the context manager API
        with Parallel(n_jobs=2, backend=backend) as parallel:
            assert get_workers(parallel._backend) is not None
            original_workers = get_workers(parallel._backend)

            with raises(JoblibException):
                parallel(
                    [delayed(division)(x, y) for x, y in zip((0, 1), (1, 0))])

            # The managed pool should still be available and be in a working
            # state despite the previously raised (and caught) exception
            assert get_workers(parallel._backend) is not None

            # The pool should have been interrupted and restarted:
            assert get_workers(parallel._backend) is not original_workers

            assert ([f(x, y=1) for x in range(10)
                     ] == parallel(delayed(f)(x, y=1) for x in range(10)))

            original_workers = get_workers(parallel._backend)
            with raises(WorkerInterrupt):
                parallel([delayed(interrupt_raiser)(x) for x in (1, 0)])

            # The pool should still be available despite the exception
            assert get_workers(parallel._backend) is not None

            # The pool should have been interrupted and restarted:
            assert get_workers(parallel._backend) is not original_workers

            assert ([f(x, y=1) for x in range(10)
                     ] == parallel(delayed(f)(x, y=1) for x in range(10)))

        # Check that the inner pool has been terminated when exiting the
        # context manager
        assert get_workers(parallel._backend) is None
    else:
        with raises(KeyboardInterrupt):
            Parallel(n_jobs=2)([delayed(interrupt_raiser)(x) for x in (1, 0)])

    # wrapped exceptions should inherit from the class of the original
    # exception to make it easy to catch them
    with raises(ZeroDivisionError):
        Parallel(n_jobs=2)(
            [delayed(division)(x, y) for x, y in zip((0, 1), (1, 0))])

    with raises(MyExceptionWithFinickyInit):
        Parallel(n_jobs=2,
                 verbose=0)((delayed(exception_raiser)(i,
                                                       custom_exception=True)
                             for i in range(30)))

    try:
        # JoblibException wrapping is disabled in sequential mode:
        ex = JoblibException()
        Parallel(n_jobs=1)(delayed(division)(x, y)
                           for x, y in zip((0, 1), (1, 0)))
    except Exception as ex:
        assert not isinstance(ex, JoblibException)
Ejemplo n.º 27
0
def test_func_code_consistency():
    from joblib.parallel import Parallel, delayed
    codes = Parallel(n_jobs=2)(delayed(_get_code)() for _ in range(5))
    nose.tools.assert_equal(len(set(codes)), 1)
Ejemplo n.º 28
0
def test_invalid_backend():
    with raises(ValueError):
        Parallel(backend='unit-testing')
Ejemplo n.º 29
0
def test_parallel_call_cached_function_defined_in_jupyter(
        tmpdir, call_before_reducing):
    # Calling an interactively defined memory.cache()'d function inside a
    # Parallel call used to clear the existing cache related to the said
    # function (https://github.com/joblib/joblib/issues/1035)

    # This tests checks that this is no longer the case.

    # TODO: test that the cache related to the function cache persists across
    # ipython sessions (provided that no code change were made to the
    # function's source)?

    # The first part of the test makes the necessary low-level calls to emulate
    # the definition of a function in an jupyter notebook cell. Joblib has
    # some custom code to treat functions defined specifically in jupyter
    # notebooks/ipython session -- we want to test this code, which requires
    # the emulation to be rigorous.
    for session_no in [0, 1]:
        ipython_cell_source = '''
        def f(x):
            return x
        '''

        ipython_cell_id = '<ipython-input-{}-000000000000>'.format(session_no)

        exec(
            compile(textwrap.dedent(ipython_cell_source),
                    filename=ipython_cell_id,
                    mode='exec'))
        # f is now accessible in the locals mapping - but for some unknown
        # reason, f = locals()['f'] throws a KeyError at runtime, we need to
        # bind locals()['f'] to a different name in the local namespace
        aliased_f = locals()['f']
        aliased_f.__module__ = "__main__"

        # Preliminary sanity checks, and tests checking that joblib properly
        # identified f as an interactive function defined in a jupyter notebook
        assert aliased_f(1) == 1
        assert aliased_f.__code__.co_filename == ipython_cell_id

        memory = Memory(location=tmpdir.strpath, verbose=0)
        cached_f = memory.cache(aliased_f)

        assert len(os.listdir(tmpdir / 'joblib')) == 1
        f_cache_relative_directory = os.listdir(tmpdir / 'joblib')[0]
        assert 'ipython-input' in f_cache_relative_directory

        f_cache_directory = tmpdir / 'joblib' / f_cache_relative_directory

        if session_no == 0:
            # The cache should be empty as cached_f has not been called yet.
            assert os.listdir(f_cache_directory) == ['f']
            assert os.listdir(f_cache_directory / 'f') == []

            if call_before_reducing:
                cached_f(3)
                # Two files were just created, func_code.py, and a folder
                # containing the informations (inputs hash/ouptput) of
                # cached_f(3)
                assert len(os.listdir(f_cache_directory / 'f')) == 2

                # Now, testing  #1035: when calling a cached function, joblib
                # used to dynamically inspect the underlying function to
                # extract its source code (to verify it matches the source code
                # of the function as last inspected by joblib) -- however,
                # source code introspection fails for dynamic functions sent to
                # child processes - which would eventually make joblib clear
                # the cache associated to f
                res = Parallel(n_jobs=2)(delayed(cached_f)(i) for i in [1, 2])
            else:
                # Submit the function to the joblib child processes, although
                # the function has never been called in the parent yet. This
                # triggers a specific code branch inside
                # MemorizedFunc.__reduce__.
                res = Parallel(n_jobs=2)(delayed(cached_f)(i) for i in [1, 2])
                assert len(os.listdir(f_cache_directory / 'f')) == 3

                cached_f(3)

            # Making sure f's cache does not get cleared after the parallel
            # calls, and contains ALL cached functions calls (f(1), f(2), f(3))
            # and 'func_code.py'
            assert len(os.listdir(f_cache_directory / 'f')) == 4
        else:
            # For the second session, there should be an already existing cache
            assert len(os.listdir(f_cache_directory / 'f')) == 4

            cached_f(3)

            # The previous cache should not be invalidated after calling the
            # function in a new session
            assert len(os.listdir(f_cache_directory / 'f')) == 4
Ejemplo n.º 30
0
def test_invalid_njobs(backend):
    with raises(ValueError) as excinfo:
        Parallel(n_jobs=0, backend=backend)._initialize_backend()
    assert "n_jobs == 0 in Parallel has no meaning" in str(excinfo.value)
Ejemplo n.º 31
0
def test_invalid_batch_size(batch_size):
    with raises(ValueError):
        Parallel(batch_size=batch_size)
Ejemplo n.º 32
0
def _recursive_parallel(nesting_limit=None):
    """A horrible function that does recursive parallel calls"""
    return Parallel()(delayed(_recursive_parallel)() for i in range(2))