Exemple #1
0
def test_backend_hinting_and_constraints_with_custom_backends(capsys):
    # Custom backends can declare that they use threads and have shared memory
    # semantics:
    class MyCustomThreadingBackend(ParallelBackendBase):
        supports_sharedmem = True
        use_threads = True

        def apply_async(self):
            pass

        def effective_n_jobs(self, n_jobs):
            return n_jobs

    with parallel_backend(MyCustomThreadingBackend()):
        p = Parallel(n_jobs=2, prefer='processes')  # ignored
        assert type(p._backend) == MyCustomThreadingBackend

        p = Parallel(n_jobs=2, require='sharedmem')
        assert type(p._backend) == MyCustomThreadingBackend

    class MyCustomProcessingBackend(ParallelBackendBase):
        supports_sharedmem = False
        use_threads = False

        def apply_async(self):
            pass

        def effective_n_jobs(self, n_jobs):
            return n_jobs

    with parallel_backend(MyCustomProcessingBackend()):
        p = Parallel(n_jobs=2, prefer='processes')
        assert type(p._backend) == MyCustomProcessingBackend

        out, err = capsys.readouterr()
        assert out == ""
        assert err == ""

        p = Parallel(n_jobs=2, require='sharedmem', verbose=10)
        assert type(p._backend) == ThreadingBackend

        out, err = capsys.readouterr()
        expected = ("Using ThreadingBackend as joblib.Parallel backend "
                    "instead of MyCustomProcessingBackend as the latter "
                    "does not provide shared memory semantics.")
        assert out.strip() == expected
        assert err == ""

    with raises(ValueError):
        Parallel(backend=MyCustomProcessingBackend(), require='sharedmem')
def test_thread_bomb_mitigation(backend):
    # Test that recursive parallelism raises a recursion rather than
    # saturating the operating system resources by creating a unbounded number
    # of threads.
    with parallel_backend(backend, n_jobs=2):
        with raises(RecursionError):
            _recursive_parallel()
Exemple #3
0
def test_global_parallel_backend():
    default = Parallel()._backend

    pb = parallel_backend('threading')
    assert isinstance(Parallel()._backend, ThreadingBackend)

    pb.unregister()
    assert type(Parallel()._backend) is type(default)
Exemple #4
0
def test_external_backends():
    def register_foo():
        BACKENDS['foo'] = ThreadingBackend

    EXTERNAL_BACKENDS['foo'] = register_foo

    with parallel_backend('foo'):
        assert isinstance(Parallel()._backend, ThreadingBackend)
Exemple #5
0
def test_sklearn_cv():
    iris = datasets.load_iris()
    clf = svm.SVC(kernel='linear', C=1)
    with parallel_backend('spark', n_jobs=3):
        scores = cross_val_score(clf, iris.data, iris.target, cv=5)

    expected = [0.97, 1.0, 0.97, 0.97, 1.0]

    for i in range(5):
        assert (pytest.approx(scores[i], 0.01) == expected[i])

    # test with default n_jobs=-1
    with parallel_backend('spark'):
        scores = cross_val_score(clf, iris.data, iris.target, cv=5)

    for i in range(5):
        assert (pytest.approx(scores[i], 0.01) == expected[i])
Exemple #6
0
def test_simple():
    with parallel_backend('spark') as (ba, _):
        seq = Parallel(n_jobs=5)(delayed(inc)(i) for i in range(10))
        assert seq == [inc(i) for i in range(10)]

    with pytest.raises(BaseException):
        Parallel(n_jobs=5)(delayed(slow_raise_value_error)(i == 3)
                           for i in range(10))
Exemple #7
0
def test_backend_hinting_and_constraints():
    for n_jobs in [1, 2, -1]:
        assert type(Parallel(n_jobs=n_jobs)._backend) == LokyBackend

        p = Parallel(n_jobs=n_jobs, prefer='threads')
        assert type(p._backend) == ThreadingBackend

        p = Parallel(n_jobs=n_jobs, prefer='processes')
        assert type(p._backend) == LokyBackend

        p = Parallel(n_jobs=n_jobs, require='sharedmem')
        assert type(p._backend) == ThreadingBackend

    # Explicit backend selection can override backend hinting although it
    # is useless to pass a hint when selecting a backend.
    p = Parallel(n_jobs=2, backend='loky', prefer='threads')
    assert type(p._backend) == LokyBackend

    with parallel_backend('loky', n_jobs=2):
        # Explicit backend selection by the user with the context manager
        # should be respected when combined with backend hints only.
        p = Parallel(prefer='threads')
        assert type(p._backend) == LokyBackend
        assert p.n_jobs == 2

    with parallel_backend('loky', n_jobs=2):
        # Locally hard-coded n_jobs value is respected.
        p = Parallel(n_jobs=3, prefer='threads')
        assert type(p._backend) == LokyBackend
        assert p.n_jobs == 3

    with parallel_backend('loky', n_jobs=2):
        # Explicit backend selection by the user with the context manager
        # should be ignored when the Parallel call has hard constraints.
        # In this case, the default backend that supports shared mem is
        # used an the default number of processes is used.
        p = Parallel(require='sharedmem')
        assert type(p._backend) == ThreadingBackend
        assert p.n_jobs == 1

    with parallel_backend('loky', n_jobs=2):
        p = Parallel(n_jobs=3, require='sharedmem')
        assert type(p._backend) == ThreadingBackend
        assert p.n_jobs == 3
Exemple #8
0
def test_nested_parallelism_with_dask():
    distributed = pytest.importorskip('distributed')
    client = distributed.Client(n_workers=2, threads_per_worker=2)  # noqa

    # 10 MB of data as argument to trigger implicit scattering
    data = np.ones(int(1e7), dtype=np.uint8)
    for i in range(2):
        with parallel_backend('dask'):
            backend_types_and_levels = _recursive_backend_info(data=data)
        assert len(backend_types_and_levels) == 4
        assert all(name == 'DaskDistributedBackend'
                   for name, _ in backend_types_and_levels)

    # No argument
    with parallel_backend('dask'):
        backend_types_and_levels = _recursive_backend_info()
    assert len(backend_types_and_levels) == 4
    assert all(name == 'DaskDistributedBackend'
               for name, _ in backend_types_and_levels)
def test_nested_parallel_limit(backend):
    with parallel_backend(backend, n_jobs=2):
        backend_types_and_levels = _recursive_backend_info()

    top_level_backend_type = backend.title() + 'Backend'
    expected_types_and_levels = [(top_level_backend_type, 0),
                                 ('ThreadingBackend', 1),
                                 ('SequentialBackend', 2),
                                 ('SequentialBackend', 3)]
    assert backend_types_and_levels == expected_types_and_levels
Exemple #10
0
async def get_variables():
    global PROJECTS
    global WITH_UPVOTE
    global UPTIME
    UPTIME['MRBV_SECONDS_VARIABLES'] = time.time() + MRBV_SECONDS_VARIABLES
    this_PROJECTS = copy.copy(PROJECTS)
    # issue https://github.com/scikit-learn/scikit-learn/issues/8920
    with parallel_backend('threading'):
        data = Parallel(n_jobs=4)(delayed(check_project_upvote)(project) for project in this_PROJECTS)
        WITH_UPVOTE = [i for i in data if i is not None]
    print(f"found {len(WITH_UPVOTE)} MRBV_BOT_UPVOTE in {len(PROJECTS)} projects")
Exemple #11
0
def test_nested_backend_context_manager():
    # Check that by default, nested parallel calls will always use the
    # ThreadingBackend

    def get_nested_pids():
        assert _active_backend_type() == ThreadingBackend
        return Parallel(n_jobs=2)(delayed(os.getpid)() for _ in range(2))

    for backend in ['threading', 'loky', 'multiprocessing']:
        with parallel_backend(backend):
            pid_groups = Parallel(n_jobs=2)(delayed(get_nested_pids)()
                                            for _ in range(10))
            for pid_group in pid_groups:
                assert len(set(pid_group)) == 1
Exemple #12
0
async def get_merges():
    global WITH_UPVOTE
    global MERGE_REQUESTS
    global PROJECTS
    global UPTIME
    UPTIME['MRBV_GET_SECONDS_MERGE'] = time.time() + MRBV_GET_SECONDS_MERGE
    this_WITH_UPVOTE = copy.copy(WITH_UPVOTE)
    with parallel_backend('threading'):
        data = Parallel(n_jobs=4)(delayed(check_merge_upvote)(data) for data in this_WITH_UPVOTE)
        buff = []
        for mergerequests in [i for i in data if i is not None]:
            buff = buff + mergerequests
        MERGE_REQUESTS = buff
    print(f"found {len(MERGE_REQUESTS)} merges with {len(WITH_UPVOTE)} MRBV_BOT_UPVOTE in {len(PROJECTS)} projects")
Exemple #13
0
def check_backend_context_manager(backend_name):
    with parallel_backend(backend_name, n_jobs=3):
        active_backend, active_n_jobs = parallel.get_active_backend()
        assert_equal(active_n_jobs, 3)
        assert_equal(effective_n_jobs(3), 3)
        p = Parallel()
        assert_equal(p.n_jobs, 3)
        if backend_name == "multiprocessing":
            assert_equal(type(active_backend), MultiprocessingBackend)
            assert_equal(type(p._backend), MultiprocessingBackend)
        elif backend_name == "threading":
            assert_equal(type(active_backend), ThreadingBackend)
            assert_equal(type(p._backend), ThreadingBackend)
        elif backend_name.startswith("test_"):
            assert_equal(type(active_backend), FakeParallelBackend)
            assert_equal(type(p._backend), FakeParallelBackend)
Exemple #14
0
def check_backend_context_manager(backend_name):
    with parallel_backend(backend_name, n_jobs=3):
        active_backend, active_n_jobs = parallel.get_active_backend()
        assert active_n_jobs == 3
        assert effective_n_jobs(3) == 3
        p = Parallel()
        assert p.n_jobs == 3
        if backend_name == 'multiprocessing':
            assert type(active_backend) == MultiprocessingBackend
            assert type(p._backend) == MultiprocessingBackend
        elif backend_name == 'threading':
            assert type(active_backend) == ThreadingBackend
            assert type(p._backend) == ThreadingBackend
        elif backend_name.startswith('test_'):
            assert type(active_backend) == FakeParallelBackend
            assert type(p._backend) == FakeParallelBackend
Exemple #15
0
        def __call__(self):
            # Exactly the same as in BatchedCalls, with the
            # difference being that it gets args and kwargs from
            # object store (which have been put in there by
            # put_items_in_object_store)

            # Set the default nested backend to self._backend but do
            # not set the change the default number of processes to -1
            with parallel_backend(self._backend, n_jobs=self._n_jobs):
                return [
                    func(
                        *[ray_get_if_needed(arg) for arg in args],
                        **{k: ray_get_if_needed(v) for k, v in kwargs.items()},
                    )
                    for func, args, kwargs in self.items
                ]
Exemple #16
0
def check_backend_context_manager(backend_name):
    with parallel_backend(backend_name, n_jobs=3):
        active_backend, active_n_jobs = parallel.get_active_backend()
        assert active_n_jobs == 3
        assert effective_n_jobs(3) == 3
        p = Parallel()
        assert p.n_jobs == 3
        if backend_name == 'multiprocessing':
            assert type(active_backend) == MultiprocessingBackend
            assert type(p._backend) == MultiprocessingBackend
        elif backend_name == 'threading':
            assert type(active_backend) == ThreadingBackend
            assert type(p._backend) == ThreadingBackend
        elif backend_name.startswith('test_'):
            assert type(active_backend) == FakeParallelBackend
            assert type(p._backend) == FakeParallelBackend
def check_backend_context_manager(backend_name):
    with parallel_backend(backend_name, n_jobs=3):
        active_backend, active_n_jobs = parallel.get_active_backend()
        assert_equal(active_n_jobs, 3)
        assert_equal(effective_n_jobs(3), 3)
        p = Parallel()
        assert_equal(p.n_jobs, 3)
        if backend_name == 'multiprocessing':
            assert_equal(type(active_backend), MultiprocessingBackend)
            assert_equal(type(p._backend), MultiprocessingBackend)
        elif backend_name == 'threading':
            assert_equal(type(active_backend), ThreadingBackend)
            assert_equal(type(p._backend), ThreadingBackend)
        elif backend_name.startswith('test_'):
            assert_equal(type(active_backend), FakeParallelBackend)
            assert_equal(type(p._backend), FakeParallelBackend)
Exemple #18
0
def test_nested_parallelism_limit(backend):
    with parallel_backend(backend, n_jobs=2):
        backend_types_and_levels = _recursive_backend_info()

    if cpu_count() == 1:
        second_level_backend_type = 'SequentialBackend'
    else:
        second_level_backend_type = 'ThreadingBackend'

    top_level_backend_type = backend.title() + 'Backend'
    expected_types_and_levels = [
        (top_level_backend_type, 0),
        (second_level_backend_type, 1),
        ('SequentialBackend', 2),
        ('SequentialBackend', 3)
    ]
    assert backend_types_and_levels == expected_types_and_levels
Exemple #19
0
def test_direct_parameterized_backend_context_manager():
    assert _active_backend_type() == MultiprocessingBackend

    # Check that it's possible to pass a backend instance directly,
    # without registration
    with parallel_backend(ParameterizedParallelBackend(param=43), n_jobs=5):
        active_backend, active_n_jobs = parallel.get_active_backend()
        assert type(active_backend) == ParameterizedParallelBackend
        assert active_backend.param == 43
        assert active_n_jobs == 5
        p = Parallel()
        assert p.n_jobs == 5
        assert p._backend is active_backend
        results = p(delayed(sqrt)(i) for i in range(5))
    assert results == [sqrt(i) for i in range(5)]

    # The default backend is again retored
    assert _active_backend_type() == MultiprocessingBackend
def test_direct_parameterized_backend_context_manager():
    assert_equal(_active_backend_type(), MultiprocessingBackend)

    # Check that it's possible to pass a backend instance directly,
    # without registration
    with parallel_backend(ParameterizedParallelBackend(param=43), n_jobs=5):
        active_backend, active_n_jobs = parallel.get_active_backend()
        assert_equal(type(active_backend), ParameterizedParallelBackend)
        assert_equal(active_backend.param, 43)
        assert_equal(active_n_jobs, 5)
        p = Parallel()
        assert_equal(p.n_jobs, 5)
        assert_true(p._backend is active_backend)
        results = p(delayed(sqrt)(i) for i in range(5))
    assert_equal(results, [sqrt(i) for i in range(5)])

    # The default backend is again retored
    assert_equal(_active_backend_type(), MultiprocessingBackend)
Exemple #21
0
def test_direct_parameterized_backend_context_manager():
    assert_equal(_active_backend_type(), MultiprocessingBackend)

    # Check that it's possible to pass a backend instance directly,
    # without registration
    with parallel_backend(ParameterizedParallelBackend(param=43), n_jobs=5):
        active_backend, active_n_jobs = parallel.get_active_backend()
        assert_equal(type(active_backend), ParameterizedParallelBackend)
        assert_equal(active_backend.param, 43)
        assert_equal(active_n_jobs, 5)
        p = Parallel()
        assert_equal(p.n_jobs, 5)
        assert_true(p._backend is active_backend)
        results = p(delayed(sqrt)(i) for i in range(5))
    assert_equal(results, [sqrt(i) for i in range(5)])

    # The default backend is again retored
    assert_equal(_active_backend_type(), MultiprocessingBackend)
Exemple #22
0
def test_direct_parameterized_backend_context_manager():
    assert _active_backend_type() == DefaultBackend

    # Check that it's possible to pass a backend instance directly,
    # without registration
    with parallel_backend(ParameterizedParallelBackend(param=43), n_jobs=5):
        active_backend, active_n_jobs = parallel.get_active_backend()
        assert type(active_backend) == ParameterizedParallelBackend
        assert active_backend.param == 43
        assert active_n_jobs == 5
        p = Parallel()
        assert p.n_jobs == 5
        assert p._backend is active_backend
        results = p(delayed(sqrt)(i) for i in range(5))
    assert results == [sqrt(i) for i in range(5)]

    # The default backend is again restored
    assert _active_backend_type() == DefaultBackend
Exemple #23
0
def test_parameterized_backend_context_manager(monkeypatch):
    monkeypatch.setitem(BACKENDS, 'param_backend',
                        ParameterizedParallelBackend)
    assert _active_backend_type() == DefaultBackend

    with parallel_backend('param_backend', param=42, n_jobs=3):
        active_backend, active_n_jobs = parallel.get_active_backend()
        assert type(active_backend) == ParameterizedParallelBackend
        assert active_backend.param == 42
        assert active_n_jobs == 3
        p = Parallel()
        assert p.n_jobs == 3
        assert p._backend is active_backend
        results = p(delayed(sqrt)(i) for i in range(5))
    assert results == [sqrt(i) for i in range(5)]

    # The default backend is again restored
    assert _active_backend_type() == DefaultBackend
Exemple #24
0
def test_retrieval_context():
    import contextlib

    class MyBackend(ThreadingBackend):
        i = 0

        @contextlib.contextmanager
        def retrieval_context(self):
            self.i += 1
            yield

    register_parallel_backend("retrieval", MyBackend)

    def nested_call(n):
        return Parallel(n_jobs=2)(delayed(id)(i) for i in range(n))

    with parallel_backend("retrieval") as (ba, _):
        Parallel(n_jobs=2)(delayed(nested_call, check_pickle=False)(i)
                           for i in range(5))
        assert ba.i == 1
Exemple #25
0
def test_parameterized_backend_context_manager():
    register_parallel_backend('param_backend', ParameterizedParallelBackend)
    try:
        assert _active_backend_type() == MultiprocessingBackend

        with parallel_backend('param_backend', param=42, n_jobs=3):
            active_backend, active_n_jobs = parallel.get_active_backend()
            assert type(active_backend) == ParameterizedParallelBackend
            assert active_backend.param == 42
            assert active_n_jobs == 3
            p = Parallel()
            assert p.n_jobs == 3
            assert p._backend is active_backend
            results = p(delayed(sqrt)(i) for i in range(5))
        assert results == [sqrt(i) for i in range(5)]

        # The default backend is again retored
        assert _active_backend_type() == MultiprocessingBackend
    finally:
        del BACKENDS['param_backend']
def test_parameterized_backend_context_manager():
    register_parallel_backend('param_backend', ParameterizedParallelBackend)
    try:
        assert_equal(_active_backend_type(), MultiprocessingBackend)

        with parallel_backend('param_backend', param=42, n_jobs=3):
            active_backend, active_n_jobs = parallel.get_active_backend()
            assert_equal(type(active_backend), ParameterizedParallelBackend)
            assert_equal(active_backend.param, 42)
            assert_equal(active_n_jobs, 3)
            p = Parallel()
            assert_equal(p.n_jobs, 3)
            assert_true(p._backend is active_backend)
            results = p(delayed(sqrt)(i) for i in range(5))
        assert_equal(results, [sqrt(i) for i in range(5)])

        # The default backend is again retored
        assert_equal(_active_backend_type(), MultiprocessingBackend)
    finally:
        del BACKENDS['param_backend']
Exemple #27
0
def test_parameterized_backend_context_manager():
    register_parallel_backend("param_backend", ParameterizedParallelBackend)
    try:
        assert_equal(_active_backend_type(), MultiprocessingBackend)

        with parallel_backend("param_backend", param=42, n_jobs=3):
            active_backend, active_n_jobs = parallel.get_active_backend()
            assert_equal(type(active_backend), ParameterizedParallelBackend)
            assert_equal(active_backend.param, 42)
            assert_equal(active_n_jobs, 3)
            p = Parallel()
            assert_equal(p.n_jobs, 3)
            assert_true(p._backend is active_backend)
            results = p(delayed(sqrt)(i) for i in range(5))
        assert_equal(results, [sqrt(i) for i in range(5)])

        # The default backend is again retored
        assert_equal(_active_backend_type(), MultiprocessingBackend)
    finally:
        del BACKENDS["param_backend"]
Exemple #28
0
def test_parameterized_backend_context_manager():
    register_parallel_backend('param_backend', ParameterizedParallelBackend)
    try:
        assert _active_backend_type() == MultiprocessingBackend

        with parallel_backend('param_backend', param=42, n_jobs=3):
            active_backend, active_n_jobs = parallel.get_active_backend()
            assert type(active_backend) == ParameterizedParallelBackend
            assert active_backend.param == 42
            assert active_n_jobs == 3
            p = Parallel()
            assert p.n_jobs == 3
            assert p._backend is active_backend
            results = p(delayed(sqrt)(i) for i in range(5))
        assert results == [sqrt(i) for i in range(5)]

        # The default backend is again retored
        assert _active_backend_type() == MultiprocessingBackend
    finally:
        del BACKENDS['param_backend']
Exemple #29
0
def run(allpool, test):
    # return [runOptLearn(allpool[0])]
    if test:
        result = [runOptLearn(p, test) for p in allpool]
    else:
        with parallel_backend('threading'):
            result = (Parallel()(delayed(runOptLearn)(p) for p in allpool))

    success = []
    fail = []
    for i in range(len(result)):
        if (result[i].success):
            success.append(result[i])
        else:
            logger.error("!!!FAILED: %d %s", i, result[i].shortname)
            fail.append(result[i])
    # for pool in allpool:
    #     runOptLearn(pool)
    #     break;
    logger.debug('finish')
    return success, fail
                        help="The path where all models directories are")
    parser.add_argument("--output_path",
                        default=None,
                        type=str,
                        help="The path to dump yaml file")
    parser.add_argument("--n_jobs",
                        default=4,
                        type=int,
                        help="number of cores")
    args = parser.parse_args()
    return args


if __name__ == "__main__":
    args = input_arguments()

    benches = load_benchmark_settings()
    benches = {k: v for k, v in benches.items() if check_key(k)}

    with parallel_backend(backend="multiprocessing", n_jobs=args.n_jobs):
        dicts = Parallel()(delayed(update_dict_entry)(key, args.path)
                           for key, value in benches.items())
    tabular_dict = dict()
    for entry in dicts:
        tabular_dict.update(entry)
    print("Collected {} keys...".format(len(tabular_dict)))
    with open(os.path.join(args.output_path, "tabular_plot_config.yaml"),
              "w") as f:
        f.writelines(yaml.dump(tabular_dict))
    print("Done!")
Exemple #31
0
                # each task-config-fidelity would have been evaluated on a different seed
                obj = {
                    config_hash: {
                        fidelity_hash: {
                            seed: v
                        }
                    }
                }
                task_datas[task_id] = update_table_with_new_entry(task_datas[task_id], obj)
                file_count += 1

            try:
                # deleting data file that was processed
                os.remove(os.path.join(dump_path, filename))
                # os.remove(os.path.join(dump_path, filename))
            except FileNotFoundError:
                continue

        logger.info("\tFinished batch processing in {:.3f} seconds".format(time.time() - start))
        logger.info("\tUpdating benchmark data files...")

        with parallel_backend(backend="loky", n_jobs=args.n_jobs):
            Parallel()(
                delayed(save_task_file)(task_id, obj, output_path) for task_id, obj in task_datas.items()
            )
        logger.info("\tContinuing to next batch")
        logger.info("\t{}".format("-" * 25))

    logger.info("Done!")
    logger.info("Total files processed: {}".format(file_count))
Exemple #32
0
def check_backend_context_manager(backend_name):
    with parallel_backend(backend_name):
        assert_equal(parallel.get_default_backend(), backend_name)