def run_this_twice(): chromo = pd.read_csv("test_dataset/x.csv", index_col=0) x = np.random.RandomState(seed=42).randint(10, size=(100)) list(cached_holdouts_generator(chromo, holdouts=example_chromosomal_holdouts, cache_dir="holdouts")( results_directory="results" )) list(cached_holdouts_generator(x, holdouts=example_random_holdouts, cache_dir="holdouts")( results_directory="results" )) list(cached_holdouts_generator(x, holdouts=example_random_holdouts_2, cache_dir="holdouts")( results_directory="results" )) list(cached_holdouts_generator(x, holdouts=example_balanced_random_holdouts, cache_dir="holdouts")( results_directory="results" ))
def test_holdouts_tempered(): clear_all_cache(results_directory="results", cache_dir="holdouts") np.random.seed(10) generator = cached_holdouts_generator(np.random.randint(100, size=(100, 100)), holdouts=random_holdouts([0.1], [10]), cache_dir="holdouts") list(generator(results_directory="results")) paths = glob("holdouts/holdouts/*.pickle.gz") path = paths[0] os.remove(path) touch(path) with pytest.raises(ValueError): list(generator()) clear_invalid_cache(cache_dir="holdouts") assert set(glob("holdouts/holdouts/*.pickle.gz")) == set(paths[1:]) list(generator(results_directory="results")) paths = glob("holdouts/holdouts/*.pickle.gz") path = paths[0] os.remove(path) with pytest.raises(ValueError): list(generator()) clear_invalid_cache(cache_dir="holdouts") assert set(glob("holdouts/holdouts/*.pickle.gz")) == set(paths[1:]) list(generator(results_directory="results")) clear_all_cache(results_directory="results", cache_dir="holdouts")
def test_keras_cache(): clear_all_cache(results_directory="results", cache_dir="holdouts") X, y = load_iris(return_X_y=True) X = X[y!=2] y = y[y!=2] generator = cached_holdouts_generator( X, y, holdouts=example_random_holdouts, cache_dir="holdouts", skip=skip) hyper_parameters = { "epochs": 10 } for _, _, inner in generator(results_directory="results", hyper_parameters=hyper_parameters): for _ in inner(results_directory="results", hyper_parameters=hyper_parameters): pass for (training, testing), outer_key, inner in generator(results_directory="results", hyper_parameters=hyper_parameters): with pytest.raises(ValueError): load_result("results", outer_key, hyper_parameters) add_work_in_progress("results", outer_key, hyper_parameters) for (inner_training, inner_testing), inner_key, _ in inner(results_directory="results", hyper_parameters=hyper_parameters): store_keras_result(inner_key, **train(inner_training, inner_testing, hyper_parameters)) with pytest.raises(ValueError): store_keras_result(inner_key, **train(inner_training, inner_testing, hyper_parameters)) store_keras_result(outer_key, **train(training, testing, hyper_parameters)) for (training, testing), outer_key, inner in generator(results_directory="results", hyper_parameters=hyper_parameters): assert training is None assert testing is None assert not inner() load_result("results", outer_key, hyper_parameters) clear_all_cache(results_directory="results", cache_dir="holdouts")
def test_work_in_progress(): clear_all_cache(results_directory="results", cache_dir="holdouts") generator = cached_holdouts_generator(np.random.randint( 100, size=(100, 100)), holdouts=random_holdouts([0.1], [3]), cache_dir="holdouts") for _, key, _ in generator(results_directory="results"): with pytest.raises(ValueError): remove_work_in_progress("results", key) add_work_in_progress("results", key) store_result(key, {"ciao": 1}, 0, results_directory="results", cache_dir="holdouts") with pytest.raises(ValueError): add_work_in_progress("results", key) clear_all_cache(results_directory="results", cache_dir="holdouts")
def test_multiprocessing(): clear_all_cache(results_directory="results", cache_dir="holdouts") generator = cached_holdouts_generator(np.random.randint(100, size=(100, 100)), holdouts=random_holdouts([0.1], [24]), cache_dir="holdouts", skip=skip) with Pool(cpu_count()) as p: p.map(job_wrapper, generator(results_directory="results")) p.map(job_wrapper, generator(results_directory="results")) p.close() p.join() regroup_results(results_directory="results") clear_all_cache(results_directory="results", cache_dir="holdouts")
def run_this_twice(): x = np.random.RandomState(seed=42).randint(10, size=(100)) generator = holdouts_generator(x, x, holdouts=example_balanced_random_holdouts, verbose=False) cached_generator = cached_holdouts_generator( x, x, holdouts=example_balanced_random_holdouts, cache_dir="holdouts") for ((train, test), inner), ((cached_train, cached_test), _, cached_inner) in zip( generator(), cached_generator(results_directory="results")): assert all([ t.shape == ct.shape and np.all(t == ct) for t, ct in zip(train, cached_train) ]) assert all([ t.shape == ct.shape and np.all(t == ct) for t, ct in zip(test, cached_test) ]) for ((inner_train, inner_test), small), ((inner_cached_train, inner_cached_test), _, cached_small) in zip( inner(), cached_inner(results_directory="results")): assert all([ t.shape == ct.shape and np.all(t == ct) for t, ct in zip(inner_train, inner_cached_train) ]) assert all([ t.shape == ct.shape and np.all(t == ct) for t, ct in zip(inner_test, inner_cached_test) ]) for ((small_train, small_test), _), ((small_cached_train, small_cached_test), _, _) in zip(small(), cached_small(results_directory="results")): assert all([ t.shape == ct.shape and np.all(t == ct) for t, ct in zip(small_train, small_cached_train) ]) assert all([ t.shape == ct.shape and np.all(t == ct) for t, ct in zip(small_test, small_cached_test) ])
def run_this_twice(): x = pd.read_csv("test_dataset/x.csv", index_col=0) generator = holdouts_generator(x, x, holdouts=example_chromosomal_holdouts) cached_generator = cached_holdouts_generator( x, x, holdouts=example_chromosomal_holdouts, cache_dir="holdouts") for ((train, test), inner), ((cached_train, cached_test), _, cached_inner) in zip( generator(), cached_generator(results_directory="results")): assert all([np.all(t == ct) for t, ct in zip(train, cached_train)]) assert all([np.all(t == ct) for t, ct in zip(test, cached_test)]) for ((inner_train, inner_test), _), ((inner_cached_train, inner_cached_test), _, _) in zip(inner(), cached_inner(results_directory="results")): assert all([ np.all(t == ct) for t, ct in zip(inner_train, inner_cached_train) ]) assert all([ np.all(t == ct) for t, ct in zip(inner_test, inner_cached_test) ])
def test_clear_invalid_results(): clear_all_cache() np.random.seed(10) generator = cached_holdouts_generator(np.random.randint(100, size=(100,100)), holdouts=random_holdouts([0.1], [1]), cache_dir="holdouts") with pytest.raises(ValueError): list(generator()) with pytest.raises(ValueError): list(generator(results_directory={"test":4})) gen = generator(results_directory="results") (_, _), key, _ = next(gen) store_result(key, {"ping":"pong"}, 0, results_directory="results", cache_dir="holdouts", hyper_parameters={"keb":"ab"}) assert len(glob("results/results/*.json")) == 1 clear_invalid_results(results_directory="results", cache_dir="holdouts") assert len(glob("results/results/*.json")) == 1 path = glob("holdouts/holdouts/*.pickle.gz")[0] os.remove(path) with pytest.raises(ValueError): list(generator(results_directory="results")) clear_invalid_results(results_directory="results", cache_dir="holdouts") with pytest.raises(ValueError): store_result(key, {"ping":"pong"}, 0, results_directory="results", cache_dir="holdouts") assert len(glob("results/results/*.json")) == 0 clear_all_cache()
def job(X): sleep(random.random()) list(cached_holdouts_generator(X, skip=skip, holdouts=slow_random_holdouts([0.1, 0.1], [cpu_count(), 1]), cache_dir="holdouts")( results_directory="results" ))