Example #1
0
def test_history_fit():
    total_budget = 50
    batch_size = 5
    objective = 'max'

    params = get_hyperparameter_list()
    h = hp.HyperParameterList(params)

    shac = engine.SHAC(h,
                       total_budget=total_budget,
                       num_batches=batch_size,
                       objective=objective)

    assert shac.total_classifiers == min(max(batch_size - 1, 1), 18)
    assert shac._per_classifier_budget == 10
    assert shac.num_workers == 10
    assert len(shac.classifiers) == 0
    assert len(shac.dataset) == 0

    # do sequential work for debugging
    shac.num_parallel_generators = 2
    shac.num_parallel_evaluators = 2

    print("Evaluating before training")
    np.random.seed(0)

    # Create the callbacks
    history = cb.History()

    # training
    history = shac.fit(evaluation_simple, callbacks=[history])

    assert isinstance(history, cb.History)
    assert 'begin_run_index' in history.history
    assert 'model' in history.history
    assert 'parameters' in history.history
    assert 'evaluations' in history.history
    assert 'per_classifier_budget' in history.history
    assert 'generator_threads' in history.history
    assert 'device_ids' in history.history

    # Test passing in empty callback list

    # training
    shac = engine.SHAC(h,
                       total_budget=total_budget,
                       num_batches=batch_size,
                       objective=objective)

    history = shac.fit(evaluation_simple)

    assert isinstance(history, cb.History)
    assert 'begin_run_index' in history.history
    assert 'model' in history.history
    assert 'parameters' in history.history
    assert 'evaluations' in history.history
    assert 'per_classifier_budget' in history.history
    assert 'generator_threads' in history.history
    assert 'device_ids' in history.history
Example #2
0
def test_shac_simple_multiparameter():
    total_budget = 50
    batch_size = 5
    objective = 'max'

    params = get_multi_parameter_list()
    h = hp.HyperParameterList(params)

    shac = engine.SHAC(h,
                       total_budget=total_budget,
                       num_batches=batch_size,
                       objective=objective)

    assert shac.total_classifiers == min(max(batch_size - 1, 1), 18)
    assert shac._per_classifier_budget == 10
    assert shac.num_workers == 10
    assert len(shac.classifiers) == 0
    assert len(shac.dataset) == 0

    # do sequential work for debugging
    shac.num_parallel_generators = 2
    shac.num_parallel_evaluators = 2

    # training
    shac.fit(evaluation_simple_multi)

    assert len(shac.classifiers) <= shac.total_classifiers
    assert os.path.exists('shac/datasets/dataset.csv')
    assert os.path.exists('shac/classifiers/classifiers.pkl')

    print()
    print("Evaluating after training")
    np.random.seed(0)

    # Serialization
    shac.save_data()

    # Restore with different batchsize
    shac2 = engine.SHAC(None,
                        total_budget=total_budget,
                        num_batches=10,
                        objective=objective)

    shac2.restore_data()

    np.random.seed(0)
    # test no file found, yet no error
    shutil.rmtree('shac/')

    shac2.dataset = None
    shac2.classifiers = None
    shac2.restore_data()
Example #3
0
def test_shac_initialization():
    total_budget = 50
    batch_size = 5
    objective = 'max'

    params = get_hyperparameter_list()
    h = hp.HyperParameterList(params)

    # direct params list submission
    shac = engine.SHAC(params,
                       total_budget=total_budget,
                       num_batches=batch_size,
                       objective=objective)

    # submission of HyperParameterList
    shac = engine.SHAC(h,
                       total_budget=total_budget,
                       num_batches=batch_size,
                       objective=objective)

    # default number of parallel executors
    shac.set_num_parallel_generators(None)
    shac.set_num_parallel_evaluators(None)

    shac.concurrent_evaluators()
    shac.parallel_evaluators()

    assert shac.generator_backend == 'loky'
    assert shac.evaluator_backend == 'loky'

    shac.num_parallel_generators = 20
    assert shac.num_parallel_generators == 20

    shac = engine.SHAC(h,
                       total_budget=total_budget,
                       num_batches=batch_size,
                       objective=objective)

    with pytest.raises(ValueError):
        shac.generator_backend = 'random'

    with pytest.raises(ValueError):
        shac.evaluator_backend = 'random'

    shac = engine.SHAC(None,
                       total_budget=total_budget,
                       num_batches=batch_size,
                       objective=objective)

    # No parameters
    with pytest.raises(RuntimeError):
        shac.predict()
Example #4
0
def test_history_fit_dataset():
    total_budget = 1000
    batch_size = 5
    objective = 'max'

    params = [hp.UniformHP('x', -1., 1.), hp.NormalHP('y', 0., 5.)]
    h = hp.HyperParameterList(params)

    shac = engine.SHAC(h,
                       total_budget=total_budget,
                       num_batches=batch_size,
                       objective=objective)

    # create the mock dataset
    create_mock_dataset()

    print("Evaluating before training")
    np.random.seed(0)

    # Create the callbacks
    history = cb.History()

    # training
    history = shac.fit_dataset('shac/mock.csv', callbacks=[history])

    assert isinstance(history, cb.History)
    assert 'begin_run_index' in history.history
    assert 'model' in history.history
    assert 'parameters' in history.history
    assert 'evaluations' in history.history
    assert 'per_classifier_budget' in history.history

    # Test passing in empty callback list

    # training
    shac = engine.SHAC(h,
                       total_budget=total_budget,
                       num_batches=batch_size,
                       objective=objective)

    history = shac.fit_dataset('shac/mock.csv', presort=True)

    assert isinstance(history, cb.History)
    assert 'begin_run_index' in history.history
    assert 'model' in history.history
    assert 'parameters' in history.history
    assert 'evaluations' in history.history
    assert 'per_classifier_budget' in history.history
Example #5
0
def test_csvwriter_fit_dataset():
    total_budget = 1000
    batch_size = 5
    objective = 'max'

    params = [hp.UniformHP('x', -1., 1.), hp.NormalHP('y', 0., 5.)]
    h = hp.HyperParameterList(params)

    shac = engine.SHAC(h,
                       total_budget=total_budget,
                       num_batches=batch_size,
                       objective=objective)

    # create the mock dataset
    create_mock_dataset()

    print("Evaluating before training")
    np.random.seed(0)

    # Create the callbacks
    callback = cb.CSVLogger('shac/logs.csv')

    # training
    shac.fit_dataset('shac/mock.csv', callbacks=[callback])

    assert os.path.exists('shac/logs.csv')
Example #6
0
def test_csvwriter_fit():
    total_budget = 50
    batch_size = 5
    objective = 'max'

    params = get_hyperparameter_list()
    h = hp.HyperParameterList(params)

    shac = engine.SHAC(h,
                       total_budget=total_budget,
                       num_batches=batch_size,
                       objective=objective)

    assert shac.total_classifiers == min(max(batch_size - 1, 1), 18)
    assert shac._per_classifier_budget == 10
    assert shac.num_workers == 10
    assert len(shac.classifiers) == 0
    assert len(shac.dataset) == 0

    # do sequential work for debugging
    shac.num_parallel_generators = 2
    shac.num_parallel_evaluators = 2

    print("Evaluating before training")
    np.random.seed(0)

    # Create the callbacks
    callback = cb.CSVLogger('shac/logs.csv', append=True)

    # training
    shac.fit(evaluation_simple, callbacks=[callback])

    assert os.path.exists('shac/logs.csv')
Example #7
0
def test_shac_simple_early_stop():
    total_budget = 100
    batch_size = 20
    objective = 'max'

    params = get_hyperparameter_list()
    h = hp.HyperParameterList(params)

    shac = engine.SHAC(h,
                       total_budget=total_budget,
                       num_batches=batch_size,
                       objective=objective)

    assert shac.total_classifiers == min(max(batch_size - 1, 1), 18)
    assert shac._per_classifier_budget == 5
    assert shac.num_workers == 5
    assert len(shac.classifiers) == 0
    assert len(shac.dataset) == 0

    # do sequential work for debugging
    shac.num_parallel_generators = 1
    shac.num_parallel_evaluators = 1

    # training (with failure)
    shac.fit(evaluation_simple, early_stop=True, skip_cv_checks=True)
    assert len(shac.classifiers) == 0
Example #8
0
def run_shac_hartmann6():
    total_budget = 200
    num_batches = 20
    objective = 'min'

    params = get_hartmann6_hyperparameter_list()
    h = hp.HyperParameterList(params)

    shac = engine.SHAC(h,
                       total_budget=total_budget,
                       num_batches=num_batches,
                       objective=objective)

    # do parallel work for fast processing
    shac.num_parallel_generators = 8
    shac.num_parallel_evaluators = 1

    print()

    # training
    if os.path.exists('shac/'):
        shac.restore_data()

    shac.fit(evaluation_hartmann6, skip_cv_checks=True)

    print()
    print("Evaluating after training")
    predictions = shac.predict(num_batches=1, num_workers_per_batch=1)
    pred_evals = [evaluation_hartmann6(0, pred) for pred in predictions]
    pred_mean = np.mean(pred_evals)

    print()
    print("Predicted mean : ", pred_mean)
def test_shac_fit_dataset_presort():
    total_budget = 1000
    batch_size = 5
    objective = 'max'

    params = [hp.UniformHP('x', -1., 1.),
              hp.NormalHP('y', 0., 5.)]
    h = hp.HyperParameterList(params)

    shac = engine.SHAC(h, total_budget=total_budget,
                       num_batches=batch_size, objective=objective)

    # create the mock dataset
    create_mock_dataset()

    assert shac.total_classifiers == min(max(batch_size - 1, 1), 18)
    assert shac._per_classifier_budget == 200
    assert shac.num_workers == 200
    assert len(shac.classifiers) == 0
    assert len(shac.dataset) == 0

    # do sequential work for debugging
    shac.num_parallel_generators = 2
    shac.num_parallel_evaluators = 2

    print("Evaluating before training")
    np.random.seed(0)

    # training
    shac.fit_dataset('shac/mock.csv', presort=True, skip_cv_checks=True, early_stop=True)

    assert len(shac.classifiers) <= shac.total_classifiers
    assert os.path.exists('shac/datasets/dataset.csv')
    assert os.path.exists('shac/classifiers/classifiers.pkl')

    print()
    print("Evaluating after training")
    np.random.seed(0)
    predictions = shac.predict(num_batches=16, num_workers_per_batch=1)

    def eval_fn(id, pred):
        return pred['x'] ** 2 + pred['y'] ** 3

    pred_evals = [eval_fn(0, pred) for pred in predictions]
    pred_mean = np.mean(pred_evals)

    random_x = np.random.uniform(-1., 1., size=1000)
    random_y = np.random.normal(0., 5., size=1000)
    random_eval = random_x ** 2 + random_y ** 3
    random_mean = np.mean(random_eval)

    print()
    print("Random mean : ", random_mean)
    print("Predicted mean : ", pred_mean)

    assert random_mean < pred_mean

    # Serialization
    shac.save_data()

    # Restore with different batchsize
    shac2 = engine.SHAC(None, total_budget=total_budget,
                        num_batches=10, objective=objective)

    shac2.restore_data()

    np.random.seed(0)
    predictions = shac.predict(num_batches=10, num_workers_per_batch=1)
    pred_evals = [eval_fn(0, pred) for pred in predictions]
    pred_mean = np.mean(pred_evals)

    print()
    print("Random mean : ", random_mean)
    print("Predicted mean : ", pred_mean)

    assert random_mean < pred_mean

    # test no file found, yet no error
    shutil.rmtree('shac/')

    shac2.dataset = None
    shac2.classifiers = None
    shac2.restore_data()
def test_shac_fit_dataset():
    total_budget = 1000
    batch_size = 5
    objective = 'max'

    params = [hp.UniformHP('x', -1., 1.),
              hp.NormalHP('y', 0., 5.)]
    h = hp.HyperParameterList(params)

    shac = engine.SHAC(h, total_budget=total_budget,
                       num_batches=batch_size, objective=objective)

    # create the mock dataset
    create_mock_dataset()

    # Test wrong path
    with pytest.raises(FileNotFoundError):
        shac.fit_dataset('random.csv')

    # Test wrong engine configurations
    shac3 = engine.SHAC(h, 50000, num_batches=5)

    # Number of samples required is more than provided samples
    with pytest.raises(ValueError):
        shac3.fit_dataset('shac/mock.csv')

    # Test `None` parameters for engine
    shac5 = engine.SHAC(None, total_budget, batch_size)

    with pytest.raises(ValueError):
        shac5.fit_dataset('shac/mock.csv')

    # Wrong number of set params
    shac4 = engine.SHAC([params[0]], total_budget, batch_size)

    with pytest.raises(ValueError):
        shac4.fit_dataset('shac/mock.csv')

    assert shac.total_classifiers == min(max(batch_size - 1, 1), 18)
    assert shac._per_classifier_budget == 200
    assert shac.num_workers == 200
    assert len(shac.classifiers) == 0
    assert len(shac.dataset) == 0

    # do sequential work for debugging
    shac.num_parallel_generators = 2
    shac.num_parallel_evaluators = 2

    print("Evaluating before training")
    np.random.seed(0)

    # training
    shac.fit_dataset('shac/mock.csv', presort=False)

    assert len(shac.classifiers) <= shac.total_classifiers
    assert os.path.exists('shac/datasets/dataset.csv')
    assert os.path.exists('shac/classifiers/classifiers.pkl')

    print()
    print("Evaluating after training")
    np.random.seed(0)
    predictions = shac.predict(num_batches=16, num_workers_per_batch=1)

    def eval_fn(id, pred):
        return pred['x'] ** 2 + pred['y'] ** 3

    pred_evals = [eval_fn(0, pred) for pred in predictions]
    pred_mean = np.mean(pred_evals)

    random_x = np.random.uniform(-1., 1., size=1000)
    random_y = np.random.normal(0., 5., size=1000)
    random_eval = random_x ** 2 + random_y ** 3
    random_mean = np.mean(random_eval)

    print()
    print("Random mean : ", random_mean)
    print("Predicted mean : ", pred_mean)

    assert random_mean < pred_mean

    # Serialization
    shac.save_data()

    # Restore with different batchsize
    shac2 = engine.SHAC(None, total_budget=total_budget,
                        num_batches=10, objective=objective)

    shac2.restore_data()

    np.random.seed(0)
    predictions = shac.predict(num_batches=10, num_workers_per_batch=1)
    pred_evals = [eval_fn(0, pred) for pred in predictions]
    pred_mean = np.mean(pred_evals)

    print()
    print("Random mean : ", random_mean)
    print("Predicted mean : ", pred_mean)

    assert random_mean < pred_mean

    # test no file found, yet no error
    shutil.rmtree('shac/')

    shac2.dataset = None
    shac2.classifiers = None
    shac2.restore_data()
def test_shac_simple_relax_checks():
    total_budget = 50
    batch_size = 5
    objective = 'max'

    params = get_hyperparameter_list()
    h = hp.HyperParameterList(params)

    shac = engine.SHAC(h, total_budget=total_budget,
                       num_batches=batch_size, objective=objective)

    assert shac.total_classifiers == min(max(batch_size - 1, 1), 18)
    assert shac._per_classifier_budget == 10
    assert shac.num_workers == 10
    assert len(shac.classifiers) == 0
    assert len(shac.dataset) == 0

    # do sequential work for debugging
    shac.num_parallel_generators = 1
    shac.num_parallel_evaluators = 1

    print("Evaluating before training")
    np.random.seed(0)
    random_samples = shac.predict(num_batches=16, num_workers_per_batch=1)  # random sample predictions
    random_eval = [evaluation_simple(0, sample) for sample in random_samples]
    random_mean = np.mean(random_eval)

    print()

    # training
    shac.fit(evaluation_simple, relax_checks=True)

    assert len(shac.classifiers) <= shac.total_classifiers
    assert os.path.exists('shac/datasets/dataset.csv')
    assert os.path.exists('shac/classifiers/classifiers.pkl')

    print()
    print("Evaluating after training")
    np.random.seed(0)
    predictions = shac.predict(num_batches=16, num_workers_per_batch=1)
    pred_evals = [evaluation_simple(0, pred) for pred in predictions]
    pred_mean = np.mean(pred_evals)

    print()
    print("Random mean : ", random_mean)
    print("Predicted mean : ", pred_mean)

    assert random_mean < pred_mean

    # Serialization
    shac.save_data()

    # Restore with different batchsize
    shac2 = engine.SHAC(None, total_budget=total_budget,
                        num_batches=10, objective=objective)

    shac2.restore_data()

    np.random.seed(0)
    predictions = shac.predict(num_batches=10, num_workers_per_batch=1, relax_checks=True)
    pred_evals = [evaluation_simple(0, pred) for pred in predictions]
    pred_mean = np.mean(pred_evals)

    print()
    print("Random mean : ", random_mean)
    print("Predicted mean : ", pred_mean)

    assert random_mean < pred_mean

    # test no file found, yet no error
    shutil.rmtree('shac/')

    shac2.dataset = None
    shac2.classifiers = None
    shac2.restore_data()
def test_shac_simple_seeded_manually():
    total_budget = 50
    batch_size = 5
    objective = 'max'

    params = get_hyperparameter_list()
    h = hp.HyperParameterList(params)

    shac = engine.SHAC(h, total_budget=total_budget,
                       num_batches=batch_size, objective=objective)

    # set the seed manually
    shac.set_seed(0)

    assert shac.total_classifiers == min(max(batch_size - 1, 1), 18)
    assert shac._per_classifier_budget == 10
    assert shac.num_workers == 10
    assert len(shac.classifiers) == 0
    assert len(shac.dataset) == 0

    # do sequential work for debugging
    shac.num_parallel_generators = 2
    shac.num_parallel_evaluators = 2

    print("Evaluating before training")

    # test prediction modes
    with pytest.raises(ValueError):
        shac.predict(max_classfiers=10)

    random_samples = shac.predict(num_samples=None, num_batches=None, num_workers_per_batch=1)  # random sample predictions
    random_eval = [evaluation_simple(0, sample) for sample in random_samples]
    assert len(random_eval) == 1

    random_samples = shac.predict(num_samples=4, num_batches=None, num_workers_per_batch=1)  # random sample predictions
    random_eval = [evaluation_simple(0, sample) for sample in random_samples]
    assert len(random_eval) == 4

    random_samples = shac.predict(num_samples=None, num_batches=1, num_workers_per_batch=1)  # random sample predictions
    random_eval = [evaluation_simple(0, sample) for sample in random_samples]
    assert len(random_eval) == 5

    random_samples = shac.predict(num_samples=2, num_batches=1, num_workers_per_batch=1)  # random sample predictions
    random_eval = [evaluation_simple(0, sample) for sample in random_samples]
    assert len(random_eval) == 7

    random_samples = shac.predict(num_batches=16, num_workers_per_batch=1)  # random sample predictions
    random_eval = [evaluation_simple(0, sample) for sample in random_samples]
    random_mean = np.mean(random_eval)

    print()

    # training
    shac.fit(evaluation_simple)

    assert len(shac.classifiers) <= shac.total_classifiers
    assert os.path.exists('shac/datasets/dataset.csv')
    assert os.path.exists('shac/classifiers/classifiers.pkl')

    print()
    print("Evaluating after training")
    predictions = shac.predict(num_batches=20, num_workers_per_batch=1)

    print("Shac preds", predictions)
    pred_evals = [evaluation_simple(0, pred) for pred in predictions]
    pred_mean = np.mean(pred_evals)

    print()
    print("Random mean : ", random_mean)
    print("Predicted mean : ", pred_mean)

    assert random_mean < pred_mean

    # Serialization
    shac.save_data()

    # Restore with different batchsize
    shac2 = engine.SHAC(None, total_budget=total_budget,
                        num_batches=10, objective=objective)

    shac2.restore_data()

    with shac2.as_deterministic(1):
        predictions = shac2.predict(num_batches=20, num_workers_per_batch=1)
    pred_evals = [evaluation_simple(0, pred) for pred in predictions]
    pred_mean = np.mean(pred_evals)

    print()
    print("Random mean : ", random_mean)
    print("Predicted mean : ", pred_mean)

    assert random_mean < pred_mean

    # Check if predictions are unique
    evals = {}
    for val in random_eval:
        if val in evals:
            evals[val] += 1
        else:
            evals[val] = 1

    assert len(evals) > 1

    # Test if two predictions are same with two evals of same seed
    with shac2.as_deterministic(0):
        predictions = shac2.predict(num_batches=20, num_workers_per_batch=1)
        pred_evals1 = [evaluation_simple(0, pred) for pred in predictions]

    with shac2.as_deterministic(0):
        predictions = shac2.predict(num_batches=20, num_workers_per_batch=1)
        pred_evals2 = [evaluation_simple(0, pred) for pred in predictions]

    for p1, p2 in zip(pred_evals1, pred_evals2):
        assert p1 == p2

    # test no file found, yet no error
    shutil.rmtree('shac/')

    shac2.dataset = None
    shac2.classifiers = None
    shac2.restore_data()
Example #13
0
    print()
    print("Predicted mean : ", pred_mean)


check_branin_impl()
# print('Time for 1000 iterations = ', timeit.timeit("check_branin_impl()",
#                                                    setup="from __main__ import check_branin_impl",
#                                                    number=1000))
""" Train """
# start = time.time()
# check_shac_branin()
# end = time.time()
# print("Time in seconds : ", end - start)
""" Evaluation """
shac = engine.SHAC(None, total_budget=200, num_batches=5, objective='min')

shac.restore_data()

# takes about 10 mins on 8 cores
start = time.time()
predictions = shac.predict(5, num_workers_per_batch=5, max_classfiers=17)
end = time.time()
print("Time in seconds : ", end - start)

pred_evals = [evaluation_branin(0, pred) for pred in predictions]
pred_mean = float(np.mean(pred_evals))
pred_std = float(np.std(pred_evals))

print()
print("Predicted results : %0.5f +- (%0.5f)" % (pred_mean, pred_std))