def test_history_fit(): total_budget = 50 batch_size = 5 objective = 'max' params = get_hyperparameter_list() h = hp.HyperParameterList(params) shac = engine.SHAC(h, total_budget=total_budget, num_batches=batch_size, objective=objective) assert shac.total_classifiers == min(max(batch_size - 1, 1), 18) assert shac._per_classifier_budget == 10 assert shac.num_workers == 10 assert len(shac.classifiers) == 0 assert len(shac.dataset) == 0 # do sequential work for debugging shac.num_parallel_generators = 2 shac.num_parallel_evaluators = 2 print("Evaluating before training") np.random.seed(0) # Create the callbacks history = cb.History() # training history = shac.fit(evaluation_simple, callbacks=[history]) assert isinstance(history, cb.History) assert 'begin_run_index' in history.history assert 'model' in history.history assert 'parameters' in history.history assert 'evaluations' in history.history assert 'per_classifier_budget' in history.history assert 'generator_threads' in history.history assert 'device_ids' in history.history # Test passing in empty callback list # training shac = engine.SHAC(h, total_budget=total_budget, num_batches=batch_size, objective=objective) history = shac.fit(evaluation_simple) assert isinstance(history, cb.History) assert 'begin_run_index' in history.history assert 'model' in history.history assert 'parameters' in history.history assert 'evaluations' in history.history assert 'per_classifier_budget' in history.history assert 'generator_threads' in history.history assert 'device_ids' in history.history
def test_shac_simple_multiparameter(): total_budget = 50 batch_size = 5 objective = 'max' params = get_multi_parameter_list() h = hp.HyperParameterList(params) shac = engine.SHAC(h, total_budget=total_budget, num_batches=batch_size, objective=objective) assert shac.total_classifiers == min(max(batch_size - 1, 1), 18) assert shac._per_classifier_budget == 10 assert shac.num_workers == 10 assert len(shac.classifiers) == 0 assert len(shac.dataset) == 0 # do sequential work for debugging shac.num_parallel_generators = 2 shac.num_parallel_evaluators = 2 # training shac.fit(evaluation_simple_multi) assert len(shac.classifiers) <= shac.total_classifiers assert os.path.exists('shac/datasets/dataset.csv') assert os.path.exists('shac/classifiers/classifiers.pkl') print() print("Evaluating after training") np.random.seed(0) # Serialization shac.save_data() # Restore with different batchsize shac2 = engine.SHAC(None, total_budget=total_budget, num_batches=10, objective=objective) shac2.restore_data() np.random.seed(0) # test no file found, yet no error shutil.rmtree('shac/') shac2.dataset = None shac2.classifiers = None shac2.restore_data()
def test_shac_initialization(): total_budget = 50 batch_size = 5 objective = 'max' params = get_hyperparameter_list() h = hp.HyperParameterList(params) # direct params list submission shac = engine.SHAC(params, total_budget=total_budget, num_batches=batch_size, objective=objective) # submission of HyperParameterList shac = engine.SHAC(h, total_budget=total_budget, num_batches=batch_size, objective=objective) # default number of parallel executors shac.set_num_parallel_generators(None) shac.set_num_parallel_evaluators(None) shac.concurrent_evaluators() shac.parallel_evaluators() assert shac.generator_backend == 'loky' assert shac.evaluator_backend == 'loky' shac.num_parallel_generators = 20 assert shac.num_parallel_generators == 20 shac = engine.SHAC(h, total_budget=total_budget, num_batches=batch_size, objective=objective) with pytest.raises(ValueError): shac.generator_backend = 'random' with pytest.raises(ValueError): shac.evaluator_backend = 'random' shac = engine.SHAC(None, total_budget=total_budget, num_batches=batch_size, objective=objective) # No parameters with pytest.raises(RuntimeError): shac.predict()
def test_history_fit_dataset(): total_budget = 1000 batch_size = 5 objective = 'max' params = [hp.UniformHP('x', -1., 1.), hp.NormalHP('y', 0., 5.)] h = hp.HyperParameterList(params) shac = engine.SHAC(h, total_budget=total_budget, num_batches=batch_size, objective=objective) # create the mock dataset create_mock_dataset() print("Evaluating before training") np.random.seed(0) # Create the callbacks history = cb.History() # training history = shac.fit_dataset('shac/mock.csv', callbacks=[history]) assert isinstance(history, cb.History) assert 'begin_run_index' in history.history assert 'model' in history.history assert 'parameters' in history.history assert 'evaluations' in history.history assert 'per_classifier_budget' in history.history # Test passing in empty callback list # training shac = engine.SHAC(h, total_budget=total_budget, num_batches=batch_size, objective=objective) history = shac.fit_dataset('shac/mock.csv', presort=True) assert isinstance(history, cb.History) assert 'begin_run_index' in history.history assert 'model' in history.history assert 'parameters' in history.history assert 'evaluations' in history.history assert 'per_classifier_budget' in history.history
def test_csvwriter_fit_dataset(): total_budget = 1000 batch_size = 5 objective = 'max' params = [hp.UniformHP('x', -1., 1.), hp.NormalHP('y', 0., 5.)] h = hp.HyperParameterList(params) shac = engine.SHAC(h, total_budget=total_budget, num_batches=batch_size, objective=objective) # create the mock dataset create_mock_dataset() print("Evaluating before training") np.random.seed(0) # Create the callbacks callback = cb.CSVLogger('shac/logs.csv') # training shac.fit_dataset('shac/mock.csv', callbacks=[callback]) assert os.path.exists('shac/logs.csv')
def test_csvwriter_fit(): total_budget = 50 batch_size = 5 objective = 'max' params = get_hyperparameter_list() h = hp.HyperParameterList(params) shac = engine.SHAC(h, total_budget=total_budget, num_batches=batch_size, objective=objective) assert shac.total_classifiers == min(max(batch_size - 1, 1), 18) assert shac._per_classifier_budget == 10 assert shac.num_workers == 10 assert len(shac.classifiers) == 0 assert len(shac.dataset) == 0 # do sequential work for debugging shac.num_parallel_generators = 2 shac.num_parallel_evaluators = 2 print("Evaluating before training") np.random.seed(0) # Create the callbacks callback = cb.CSVLogger('shac/logs.csv', append=True) # training shac.fit(evaluation_simple, callbacks=[callback]) assert os.path.exists('shac/logs.csv')
def test_shac_simple_early_stop(): total_budget = 100 batch_size = 20 objective = 'max' params = get_hyperparameter_list() h = hp.HyperParameterList(params) shac = engine.SHAC(h, total_budget=total_budget, num_batches=batch_size, objective=objective) assert shac.total_classifiers == min(max(batch_size - 1, 1), 18) assert shac._per_classifier_budget == 5 assert shac.num_workers == 5 assert len(shac.classifiers) == 0 assert len(shac.dataset) == 0 # do sequential work for debugging shac.num_parallel_generators = 1 shac.num_parallel_evaluators = 1 # training (with failure) shac.fit(evaluation_simple, early_stop=True, skip_cv_checks=True) assert len(shac.classifiers) == 0
def run_shac_hartmann6(): total_budget = 200 num_batches = 20 objective = 'min' params = get_hartmann6_hyperparameter_list() h = hp.HyperParameterList(params) shac = engine.SHAC(h, total_budget=total_budget, num_batches=num_batches, objective=objective) # do parallel work for fast processing shac.num_parallel_generators = 8 shac.num_parallel_evaluators = 1 print() # training if os.path.exists('shac/'): shac.restore_data() shac.fit(evaluation_hartmann6, skip_cv_checks=True) print() print("Evaluating after training") predictions = shac.predict(num_batches=1, num_workers_per_batch=1) pred_evals = [evaluation_hartmann6(0, pred) for pred in predictions] pred_mean = np.mean(pred_evals) print() print("Predicted mean : ", pred_mean)
def test_shac_fit_dataset_presort(): total_budget = 1000 batch_size = 5 objective = 'max' params = [hp.UniformHP('x', -1., 1.), hp.NormalHP('y', 0., 5.)] h = hp.HyperParameterList(params) shac = engine.SHAC(h, total_budget=total_budget, num_batches=batch_size, objective=objective) # create the mock dataset create_mock_dataset() assert shac.total_classifiers == min(max(batch_size - 1, 1), 18) assert shac._per_classifier_budget == 200 assert shac.num_workers == 200 assert len(shac.classifiers) == 0 assert len(shac.dataset) == 0 # do sequential work for debugging shac.num_parallel_generators = 2 shac.num_parallel_evaluators = 2 print("Evaluating before training") np.random.seed(0) # training shac.fit_dataset('shac/mock.csv', presort=True, skip_cv_checks=True, early_stop=True) assert len(shac.classifiers) <= shac.total_classifiers assert os.path.exists('shac/datasets/dataset.csv') assert os.path.exists('shac/classifiers/classifiers.pkl') print() print("Evaluating after training") np.random.seed(0) predictions = shac.predict(num_batches=16, num_workers_per_batch=1) def eval_fn(id, pred): return pred['x'] ** 2 + pred['y'] ** 3 pred_evals = [eval_fn(0, pred) for pred in predictions] pred_mean = np.mean(pred_evals) random_x = np.random.uniform(-1., 1., size=1000) random_y = np.random.normal(0., 5., size=1000) random_eval = random_x ** 2 + random_y ** 3 random_mean = np.mean(random_eval) print() print("Random mean : ", random_mean) print("Predicted mean : ", pred_mean) assert random_mean < pred_mean # Serialization shac.save_data() # Restore with different batchsize shac2 = engine.SHAC(None, total_budget=total_budget, num_batches=10, objective=objective) shac2.restore_data() np.random.seed(0) predictions = shac.predict(num_batches=10, num_workers_per_batch=1) pred_evals = [eval_fn(0, pred) for pred in predictions] pred_mean = np.mean(pred_evals) print() print("Random mean : ", random_mean) print("Predicted mean : ", pred_mean) assert random_mean < pred_mean # test no file found, yet no error shutil.rmtree('shac/') shac2.dataset = None shac2.classifiers = None shac2.restore_data()
def test_shac_fit_dataset(): total_budget = 1000 batch_size = 5 objective = 'max' params = [hp.UniformHP('x', -1., 1.), hp.NormalHP('y', 0., 5.)] h = hp.HyperParameterList(params) shac = engine.SHAC(h, total_budget=total_budget, num_batches=batch_size, objective=objective) # create the mock dataset create_mock_dataset() # Test wrong path with pytest.raises(FileNotFoundError): shac.fit_dataset('random.csv') # Test wrong engine configurations shac3 = engine.SHAC(h, 50000, num_batches=5) # Number of samples required is more than provided samples with pytest.raises(ValueError): shac3.fit_dataset('shac/mock.csv') # Test `None` parameters for engine shac5 = engine.SHAC(None, total_budget, batch_size) with pytest.raises(ValueError): shac5.fit_dataset('shac/mock.csv') # Wrong number of set params shac4 = engine.SHAC([params[0]], total_budget, batch_size) with pytest.raises(ValueError): shac4.fit_dataset('shac/mock.csv') assert shac.total_classifiers == min(max(batch_size - 1, 1), 18) assert shac._per_classifier_budget == 200 assert shac.num_workers == 200 assert len(shac.classifiers) == 0 assert len(shac.dataset) == 0 # do sequential work for debugging shac.num_parallel_generators = 2 shac.num_parallel_evaluators = 2 print("Evaluating before training") np.random.seed(0) # training shac.fit_dataset('shac/mock.csv', presort=False) assert len(shac.classifiers) <= shac.total_classifiers assert os.path.exists('shac/datasets/dataset.csv') assert os.path.exists('shac/classifiers/classifiers.pkl') print() print("Evaluating after training") np.random.seed(0) predictions = shac.predict(num_batches=16, num_workers_per_batch=1) def eval_fn(id, pred): return pred['x'] ** 2 + pred['y'] ** 3 pred_evals = [eval_fn(0, pred) for pred in predictions] pred_mean = np.mean(pred_evals) random_x = np.random.uniform(-1., 1., size=1000) random_y = np.random.normal(0., 5., size=1000) random_eval = random_x ** 2 + random_y ** 3 random_mean = np.mean(random_eval) print() print("Random mean : ", random_mean) print("Predicted mean : ", pred_mean) assert random_mean < pred_mean # Serialization shac.save_data() # Restore with different batchsize shac2 = engine.SHAC(None, total_budget=total_budget, num_batches=10, objective=objective) shac2.restore_data() np.random.seed(0) predictions = shac.predict(num_batches=10, num_workers_per_batch=1) pred_evals = [eval_fn(0, pred) for pred in predictions] pred_mean = np.mean(pred_evals) print() print("Random mean : ", random_mean) print("Predicted mean : ", pred_mean) assert random_mean < pred_mean # test no file found, yet no error shutil.rmtree('shac/') shac2.dataset = None shac2.classifiers = None shac2.restore_data()
def test_shac_simple_relax_checks(): total_budget = 50 batch_size = 5 objective = 'max' params = get_hyperparameter_list() h = hp.HyperParameterList(params) shac = engine.SHAC(h, total_budget=total_budget, num_batches=batch_size, objective=objective) assert shac.total_classifiers == min(max(batch_size - 1, 1), 18) assert shac._per_classifier_budget == 10 assert shac.num_workers == 10 assert len(shac.classifiers) == 0 assert len(shac.dataset) == 0 # do sequential work for debugging shac.num_parallel_generators = 1 shac.num_parallel_evaluators = 1 print("Evaluating before training") np.random.seed(0) random_samples = shac.predict(num_batches=16, num_workers_per_batch=1) # random sample predictions random_eval = [evaluation_simple(0, sample) for sample in random_samples] random_mean = np.mean(random_eval) print() # training shac.fit(evaluation_simple, relax_checks=True) assert len(shac.classifiers) <= shac.total_classifiers assert os.path.exists('shac/datasets/dataset.csv') assert os.path.exists('shac/classifiers/classifiers.pkl') print() print("Evaluating after training") np.random.seed(0) predictions = shac.predict(num_batches=16, num_workers_per_batch=1) pred_evals = [evaluation_simple(0, pred) for pred in predictions] pred_mean = np.mean(pred_evals) print() print("Random mean : ", random_mean) print("Predicted mean : ", pred_mean) assert random_mean < pred_mean # Serialization shac.save_data() # Restore with different batchsize shac2 = engine.SHAC(None, total_budget=total_budget, num_batches=10, objective=objective) shac2.restore_data() np.random.seed(0) predictions = shac.predict(num_batches=10, num_workers_per_batch=1, relax_checks=True) pred_evals = [evaluation_simple(0, pred) for pred in predictions] pred_mean = np.mean(pred_evals) print() print("Random mean : ", random_mean) print("Predicted mean : ", pred_mean) assert random_mean < pred_mean # test no file found, yet no error shutil.rmtree('shac/') shac2.dataset = None shac2.classifiers = None shac2.restore_data()
def test_shac_simple_seeded_manually(): total_budget = 50 batch_size = 5 objective = 'max' params = get_hyperparameter_list() h = hp.HyperParameterList(params) shac = engine.SHAC(h, total_budget=total_budget, num_batches=batch_size, objective=objective) # set the seed manually shac.set_seed(0) assert shac.total_classifiers == min(max(batch_size - 1, 1), 18) assert shac._per_classifier_budget == 10 assert shac.num_workers == 10 assert len(shac.classifiers) == 0 assert len(shac.dataset) == 0 # do sequential work for debugging shac.num_parallel_generators = 2 shac.num_parallel_evaluators = 2 print("Evaluating before training") # test prediction modes with pytest.raises(ValueError): shac.predict(max_classfiers=10) random_samples = shac.predict(num_samples=None, num_batches=None, num_workers_per_batch=1) # random sample predictions random_eval = [evaluation_simple(0, sample) for sample in random_samples] assert len(random_eval) == 1 random_samples = shac.predict(num_samples=4, num_batches=None, num_workers_per_batch=1) # random sample predictions random_eval = [evaluation_simple(0, sample) for sample in random_samples] assert len(random_eval) == 4 random_samples = shac.predict(num_samples=None, num_batches=1, num_workers_per_batch=1) # random sample predictions random_eval = [evaluation_simple(0, sample) for sample in random_samples] assert len(random_eval) == 5 random_samples = shac.predict(num_samples=2, num_batches=1, num_workers_per_batch=1) # random sample predictions random_eval = [evaluation_simple(0, sample) for sample in random_samples] assert len(random_eval) == 7 random_samples = shac.predict(num_batches=16, num_workers_per_batch=1) # random sample predictions random_eval = [evaluation_simple(0, sample) for sample in random_samples] random_mean = np.mean(random_eval) print() # training shac.fit(evaluation_simple) assert len(shac.classifiers) <= shac.total_classifiers assert os.path.exists('shac/datasets/dataset.csv') assert os.path.exists('shac/classifiers/classifiers.pkl') print() print("Evaluating after training") predictions = shac.predict(num_batches=20, num_workers_per_batch=1) print("Shac preds", predictions) pred_evals = [evaluation_simple(0, pred) for pred in predictions] pred_mean = np.mean(pred_evals) print() print("Random mean : ", random_mean) print("Predicted mean : ", pred_mean) assert random_mean < pred_mean # Serialization shac.save_data() # Restore with different batchsize shac2 = engine.SHAC(None, total_budget=total_budget, num_batches=10, objective=objective) shac2.restore_data() with shac2.as_deterministic(1): predictions = shac2.predict(num_batches=20, num_workers_per_batch=1) pred_evals = [evaluation_simple(0, pred) for pred in predictions] pred_mean = np.mean(pred_evals) print() print("Random mean : ", random_mean) print("Predicted mean : ", pred_mean) assert random_mean < pred_mean # Check if predictions are unique evals = {} for val in random_eval: if val in evals: evals[val] += 1 else: evals[val] = 1 assert len(evals) > 1 # Test if two predictions are same with two evals of same seed with shac2.as_deterministic(0): predictions = shac2.predict(num_batches=20, num_workers_per_batch=1) pred_evals1 = [evaluation_simple(0, pred) for pred in predictions] with shac2.as_deterministic(0): predictions = shac2.predict(num_batches=20, num_workers_per_batch=1) pred_evals2 = [evaluation_simple(0, pred) for pred in predictions] for p1, p2 in zip(pred_evals1, pred_evals2): assert p1 == p2 # test no file found, yet no error shutil.rmtree('shac/') shac2.dataset = None shac2.classifiers = None shac2.restore_data()
print() print("Predicted mean : ", pred_mean) check_branin_impl() # print('Time for 1000 iterations = ', timeit.timeit("check_branin_impl()", # setup="from __main__ import check_branin_impl", # number=1000)) """ Train """ # start = time.time() # check_shac_branin() # end = time.time() # print("Time in seconds : ", end - start) """ Evaluation """ shac = engine.SHAC(None, total_budget=200, num_batches=5, objective='min') shac.restore_data() # takes about 10 mins on 8 cores start = time.time() predictions = shac.predict(5, num_workers_per_batch=5, max_classfiers=17) end = time.time() print("Time in seconds : ", end - start) pred_evals = [evaluation_branin(0, pred) for pred in predictions] pred_mean = float(np.mean(pred_evals)) pred_std = float(np.std(pred_evals)) print() print("Predicted results : %0.5f +- (%0.5f)" % (pred_mean, pred_std))