Exemple #1
0
def test_csvwriter_fit_dataset():
    total_budget = 1000
    batch_size = 5
    objective = 'max'

    params = [hp.UniformHP('x', -1., 1.), hp.NormalHP('y', 0., 5.)]
    h = hp.HyperParameterList(params)

    shac = engine.SHAC(h,
                       total_budget=total_budget,
                       num_batches=batch_size,
                       objective=objective)

    # create the mock dataset
    create_mock_dataset()

    print("Evaluating before training")
    np.random.seed(0)

    # Create the callbacks
    callback = cb.CSVLogger('shac/logs.csv')

    # training
    shac.fit_dataset('shac/mock.csv', callbacks=[callback])

    assert os.path.exists('shac/logs.csv')
Exemple #2
0
def test_history_fit_dataset():
    total_budget = 1000
    batch_size = 5
    objective = 'max'

    params = [hp.UniformHP('x', -1., 1.), hp.NormalHP('y', 0., 5.)]
    h = hp.HyperParameterList(params)

    shac = engine.SHAC(h,
                       total_budget=total_budget,
                       num_batches=batch_size,
                       objective=objective)

    # create the mock dataset
    create_mock_dataset()

    print("Evaluating before training")
    np.random.seed(0)

    # Create the callbacks
    history = cb.History()

    # training
    history = shac.fit_dataset('shac/mock.csv', callbacks=[history])

    assert isinstance(history, cb.History)
    assert 'begin_run_index' in history.history
    assert 'model' in history.history
    assert 'parameters' in history.history
    assert 'evaluations' in history.history
    assert 'per_classifier_budget' in history.history

    # Test passing in empty callback list

    # training
    shac = engine.SHAC(h,
                       total_budget=total_budget,
                       num_batches=batch_size,
                       objective=objective)

    history = shac.fit_dataset('shac/mock.csv', presort=True)

    assert isinstance(history, cb.History)
    assert 'begin_run_index' in history.history
    assert 'model' in history.history
    assert 'parameters' in history.history
    assert 'evaluations' in history.history
    assert 'per_classifier_budget' in history.history
def test_shac_fit_dataset_presort():
    total_budget = 1000
    batch_size = 5
    objective = 'max'

    params = [hp.UniformHP('x', -1., 1.),
              hp.NormalHP('y', 0., 5.)]
    h = hp.HyperParameterList(params)

    shac = engine.SHAC(h, total_budget=total_budget,
                       num_batches=batch_size, objective=objective)

    # create the mock dataset
    create_mock_dataset()

    assert shac.total_classifiers == min(max(batch_size - 1, 1), 18)
    assert shac._per_classifier_budget == 200
    assert shac.num_workers == 200
    assert len(shac.classifiers) == 0
    assert len(shac.dataset) == 0

    # do sequential work for debugging
    shac.num_parallel_generators = 2
    shac.num_parallel_evaluators = 2

    print("Evaluating before training")
    np.random.seed(0)

    # training
    shac.fit_dataset('shac/mock.csv', presort=True, skip_cv_checks=True, early_stop=True)

    assert len(shac.classifiers) <= shac.total_classifiers
    assert os.path.exists('shac/datasets/dataset.csv')
    assert os.path.exists('shac/classifiers/classifiers.pkl')

    print()
    print("Evaluating after training")
    np.random.seed(0)
    predictions = shac.predict(num_batches=16, num_workers_per_batch=1)

    def eval_fn(id, pred):
        return pred['x'] ** 2 + pred['y'] ** 3

    pred_evals = [eval_fn(0, pred) for pred in predictions]
    pred_mean = np.mean(pred_evals)

    random_x = np.random.uniform(-1., 1., size=1000)
    random_y = np.random.normal(0., 5., size=1000)
    random_eval = random_x ** 2 + random_y ** 3
    random_mean = np.mean(random_eval)

    print()
    print("Random mean : ", random_mean)
    print("Predicted mean : ", pred_mean)

    assert random_mean < pred_mean

    # Serialization
    shac.save_data()

    # Restore with different batchsize
    shac2 = engine.SHAC(None, total_budget=total_budget,
                        num_batches=10, objective=objective)

    shac2.restore_data()

    np.random.seed(0)
    predictions = shac.predict(num_batches=10, num_workers_per_batch=1)
    pred_evals = [eval_fn(0, pred) for pred in predictions]
    pred_mean = np.mean(pred_evals)

    print()
    print("Random mean : ", random_mean)
    print("Predicted mean : ", pred_mean)

    assert random_mean < pred_mean

    # test no file found, yet no error
    shutil.rmtree('shac/')

    shac2.dataset = None
    shac2.classifiers = None
    shac2.restore_data()
def test_shac_fit_dataset():
    total_budget = 1000
    batch_size = 5
    objective = 'max'

    params = [hp.UniformHP('x', -1., 1.),
              hp.NormalHP('y', 0., 5.)]
    h = hp.HyperParameterList(params)

    shac = engine.SHAC(h, total_budget=total_budget,
                       num_batches=batch_size, objective=objective)

    # create the mock dataset
    create_mock_dataset()

    # Test wrong path
    with pytest.raises(FileNotFoundError):
        shac.fit_dataset('random.csv')

    # Test wrong engine configurations
    shac3 = engine.SHAC(h, 50000, num_batches=5)

    # Number of samples required is more than provided samples
    with pytest.raises(ValueError):
        shac3.fit_dataset('shac/mock.csv')

    # Test `None` parameters for engine
    shac5 = engine.SHAC(None, total_budget, batch_size)

    with pytest.raises(ValueError):
        shac5.fit_dataset('shac/mock.csv')

    # Wrong number of set params
    shac4 = engine.SHAC([params[0]], total_budget, batch_size)

    with pytest.raises(ValueError):
        shac4.fit_dataset('shac/mock.csv')

    assert shac.total_classifiers == min(max(batch_size - 1, 1), 18)
    assert shac._per_classifier_budget == 200
    assert shac.num_workers == 200
    assert len(shac.classifiers) == 0
    assert len(shac.dataset) == 0

    # do sequential work for debugging
    shac.num_parallel_generators = 2
    shac.num_parallel_evaluators = 2

    print("Evaluating before training")
    np.random.seed(0)

    # training
    shac.fit_dataset('shac/mock.csv', presort=False)

    assert len(shac.classifiers) <= shac.total_classifiers
    assert os.path.exists('shac/datasets/dataset.csv')
    assert os.path.exists('shac/classifiers/classifiers.pkl')

    print()
    print("Evaluating after training")
    np.random.seed(0)
    predictions = shac.predict(num_batches=16, num_workers_per_batch=1)

    def eval_fn(id, pred):
        return pred['x'] ** 2 + pred['y'] ** 3

    pred_evals = [eval_fn(0, pred) for pred in predictions]
    pred_mean = np.mean(pred_evals)

    random_x = np.random.uniform(-1., 1., size=1000)
    random_y = np.random.normal(0., 5., size=1000)
    random_eval = random_x ** 2 + random_y ** 3
    random_mean = np.mean(random_eval)

    print()
    print("Random mean : ", random_mean)
    print("Predicted mean : ", pred_mean)

    assert random_mean < pred_mean

    # Serialization
    shac.save_data()

    # Restore with different batchsize
    shac2 = engine.SHAC(None, total_budget=total_budget,
                        num_batches=10, objective=objective)

    shac2.restore_data()

    np.random.seed(0)
    predictions = shac.predict(num_batches=10, num_workers_per_batch=1)
    pred_evals = [eval_fn(0, pred) for pred in predictions]
    pred_mean = np.mean(pred_evals)

    print()
    print("Random mean : ", random_mean)
    print("Predicted mean : ", pred_mean)

    assert random_mean < pred_mean

    # test no file found, yet no error
    shutil.rmtree('shac/')

    shac2.dataset = None
    shac2.classifiers = None
    shac2.restore_data()