def run_this_twice():
    chromo = pd.read_csv("test_dataset/x.csv", index_col=0)
    x = np.random.RandomState(seed=42).randint(10, size=(100))
    list(cached_holdouts_generator(chromo, holdouts=example_chromosomal_holdouts, cache_dir="holdouts")(
        results_directory="results"
    ))
    list(cached_holdouts_generator(x, holdouts=example_random_holdouts, cache_dir="holdouts")(
        results_directory="results"
    ))
    list(cached_holdouts_generator(x, holdouts=example_random_holdouts_2, cache_dir="holdouts")(
        results_directory="results"
    ))
    list(cached_holdouts_generator(x, holdouts=example_balanced_random_holdouts, cache_dir="holdouts")(
        results_directory="results"
    ))
Beispiel #2
0
def test_holdouts_tempered():
    clear_all_cache(results_directory="results", cache_dir="holdouts")
    np.random.seed(10)
    generator = cached_holdouts_generator(np.random.randint(100,
                                                            size=(100, 100)),
                                          holdouts=random_holdouts([0.1],
                                                                   [10]),
                                          cache_dir="holdouts")
    list(generator(results_directory="results"))
    paths = glob("holdouts/holdouts/*.pickle.gz")
    path = paths[0]
    os.remove(path)
    touch(path)
    with pytest.raises(ValueError):
        list(generator())
    clear_invalid_cache(cache_dir="holdouts")
    assert set(glob("holdouts/holdouts/*.pickle.gz")) == set(paths[1:])
    list(generator(results_directory="results"))
    paths = glob("holdouts/holdouts/*.pickle.gz")
    path = paths[0]
    os.remove(path)
    with pytest.raises(ValueError):
        list(generator())
    clear_invalid_cache(cache_dir="holdouts")
    assert set(glob("holdouts/holdouts/*.pickle.gz")) == set(paths[1:])
    list(generator(results_directory="results"))
    clear_all_cache(results_directory="results", cache_dir="holdouts")
def test_keras_cache():
    clear_all_cache(results_directory="results", cache_dir="holdouts")

    X, y = load_iris(return_X_y=True)
    X = X[y!=2]
    y = y[y!=2]
    generator = cached_holdouts_generator(
        X, y, holdouts=example_random_holdouts, cache_dir="holdouts", skip=skip)
    hyper_parameters = {
        "epochs": 10
    }

    for _, _, inner in generator(results_directory="results", hyper_parameters=hyper_parameters):
        for _ in inner(results_directory="results", hyper_parameters=hyper_parameters):
            pass

    for (training, testing), outer_key, inner in generator(results_directory="results", hyper_parameters=hyper_parameters):
        with pytest.raises(ValueError):
            load_result("results", outer_key, hyper_parameters)
        add_work_in_progress("results", outer_key, hyper_parameters)
        for (inner_training, inner_testing), inner_key, _ in inner(results_directory="results", hyper_parameters=hyper_parameters):
            store_keras_result(inner_key, **train(inner_training, inner_testing, hyper_parameters))
            with pytest.raises(ValueError):
                store_keras_result(inner_key, **train(inner_training, inner_testing, hyper_parameters))
        store_keras_result(outer_key, **train(training, testing, hyper_parameters))

    for (training, testing), outer_key, inner in generator(results_directory="results", hyper_parameters=hyper_parameters):
        assert training is None
        assert testing is None
        assert not inner()
        load_result("results", outer_key, hyper_parameters)

    clear_all_cache(results_directory="results", cache_dir="holdouts")
def test_work_in_progress():
    clear_all_cache(results_directory="results", cache_dir="holdouts")
    generator = cached_holdouts_generator(np.random.randint(
        100, size=(100, 100)), holdouts=random_holdouts([0.1], [3]), cache_dir="holdouts")
    for _, key, _ in generator(results_directory="results"):
        with pytest.raises(ValueError):
            remove_work_in_progress("results", key)
        add_work_in_progress("results", key)
        store_result(key, {"ciao": 1}, 0, results_directory="results", cache_dir="holdouts")
        with pytest.raises(ValueError):
            add_work_in_progress("results", key)
    clear_all_cache(results_directory="results", cache_dir="holdouts")
def test_multiprocessing():
    clear_all_cache(results_directory="results", cache_dir="holdouts")
    generator = cached_holdouts_generator(np.random.randint(100,
                                                            size=(100, 100)),
                                          holdouts=random_holdouts([0.1],
                                                                   [24]),
                                          cache_dir="holdouts",
                                          skip=skip)

    with Pool(cpu_count()) as p:
        p.map(job_wrapper, generator(results_directory="results"))
        p.map(job_wrapper, generator(results_directory="results"))
        p.close()
        p.join()
    regroup_results(results_directory="results")
    clear_all_cache(results_directory="results", cache_dir="holdouts")
Beispiel #6
0
def run_this_twice():
    x = np.random.RandomState(seed=42).randint(10, size=(100))
    generator = holdouts_generator(x,
                                   x,
                                   holdouts=example_balanced_random_holdouts,
                                   verbose=False)
    cached_generator = cached_holdouts_generator(
        x, x, holdouts=example_balanced_random_holdouts, cache_dir="holdouts")
    for ((train, test),
         inner), ((cached_train, cached_test), _, cached_inner) in zip(
             generator(), cached_generator(results_directory="results")):
        assert all([
            t.shape == ct.shape and np.all(t == ct)
            for t, ct in zip(train, cached_train)
        ])
        assert all([
            t.shape == ct.shape and np.all(t == ct)
            for t, ct in zip(test, cached_test)
        ])
        for ((inner_train, inner_test),
             small), ((inner_cached_train, inner_cached_test), _,
                      cached_small) in zip(
                          inner(), cached_inner(results_directory="results")):
            assert all([
                t.shape == ct.shape and np.all(t == ct)
                for t, ct in zip(inner_train, inner_cached_train)
            ])
            assert all([
                t.shape == ct.shape and np.all(t == ct)
                for t, ct in zip(inner_test, inner_cached_test)
            ])
            for ((small_train, small_test),
                 _), ((small_cached_train, small_cached_test), _,
                      _) in zip(small(),
                                cached_small(results_directory="results")):
                assert all([
                    t.shape == ct.shape and np.all(t == ct)
                    for t, ct in zip(small_train, small_cached_train)
                ])
                assert all([
                    t.shape == ct.shape and np.all(t == ct)
                    for t, ct in zip(small_test, small_cached_test)
                ])
Beispiel #7
0
def run_this_twice():
    x = pd.read_csv("test_dataset/x.csv", index_col=0)
    generator = holdouts_generator(x, x, holdouts=example_chromosomal_holdouts)
    cached_generator = cached_holdouts_generator(
        x, x, holdouts=example_chromosomal_holdouts, cache_dir="holdouts")
    for ((train, test),
         inner), ((cached_train, cached_test), _, cached_inner) in zip(
             generator(), cached_generator(results_directory="results")):
        assert all([np.all(t == ct) for t, ct in zip(train, cached_train)])
        assert all([np.all(t == ct) for t, ct in zip(test, cached_test)])
        for ((inner_train, inner_test),
             _), ((inner_cached_train, inner_cached_test),
                  _, _) in zip(inner(),
                               cached_inner(results_directory="results")):
            assert all([
                np.all(t == ct)
                for t, ct in zip(inner_train, inner_cached_train)
            ])
            assert all([
                np.all(t == ct) for t, ct in zip(inner_test, inner_cached_test)
            ])
def test_clear_invalid_results():
    clear_all_cache()
    np.random.seed(10)
    generator = cached_holdouts_generator(np.random.randint(100, size=(100,100)), holdouts=random_holdouts([0.1], [1]), cache_dir="holdouts")
    with pytest.raises(ValueError):
        list(generator())
    with pytest.raises(ValueError):
        list(generator(results_directory={"test":4}))
    gen = generator(results_directory="results")
    (_, _), key, _ = next(gen)
    store_result(key, {"ping":"pong"}, 0, results_directory="results", cache_dir="holdouts", hyper_parameters={"keb":"ab"})
    assert len(glob("results/results/*.json")) == 1
    clear_invalid_results(results_directory="results", cache_dir="holdouts")
    assert len(glob("results/results/*.json")) == 1
    path = glob("holdouts/holdouts/*.pickle.gz")[0]
    os.remove(path)
    with pytest.raises(ValueError):
        list(generator(results_directory="results"))
    clear_invalid_results(results_directory="results", cache_dir="holdouts")
    with pytest.raises(ValueError):
        store_result(key, {"ping":"pong"}, 0, results_directory="results", cache_dir="holdouts")
    assert len(glob("results/results/*.json")) == 0
    clear_all_cache()
    
Beispiel #9
0
def job(X):
    sleep(random.random())
    list(cached_holdouts_generator(X, skip=skip, holdouts=slow_random_holdouts([0.1, 0.1], [cpu_count(), 1]), cache_dir="holdouts")(
        results_directory="results"
    ))