Ejemplo n.º 1
0
def test_orchestration_run_one_step_missing_data(binh_korn_points):
    """Test that the model also works with missing observations"""
    gpr_0 = GaussianProcessRegressor(
        RBF(), normalize_y=True, n_restarts_optimizer=5, random_state=10
    )
    gpr_1 = GaussianProcessRegressor(
        RBF(), normalize_y=True, n_restarts_optimizer=5, random_state=10
    )

    X_binh_korn, y_binh_korn = binh_korn_points  # pylint:disable=invalid-name

    sample_idx = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 50, 60, 70])

    palinstance = PALSklearn(X_binh_korn, [gpr_0, gpr_1], 2, beta_scale=1)
    palinstance.cross_val_points = 0
    # make some of the observations missing
    y_binh_korn[:10, 1] = np.nan

    palinstance.update_train_set(sample_idx, y_binh_korn[sample_idx])

    idx = palinstance.run_one_step()
    assert idx[0] not in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 50, 60, 70]
    assert palinstance.number_sampled_points > 0
    assert sum(palinstance.unclassified) > 0
    assert sum(palinstance.discarded) == 0
Ejemplo n.º 2
0
def test_orchestration_run_one_step(make_random_dataset, binh_korn_points):
    """Test if the orchestration works.
    In the base class it should raise an error as without
    prediction function we cannot do anything
    """
    X, y = make_random_dataset  # pylint:disable=invalid-name
    gpr_0 = GaussianProcessRegressor(RBF(), normalize_y=True, n_restarts_optimizer=5)
    gpr_1 = GaussianProcessRegressor(RBF(), normalize_y=True, n_restarts_optimizer=5)
    gpr_2 = GaussianProcessRegressor(RBF(), normalize_y=True, n_restarts_optimizer=5)
    sample_idx = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
    palinstance = PALSklearn(X, [gpr_0, gpr_1, gpr_2], 3, beta_scale=1)
    palinstance.cross_val_points = 0
    palinstance.update_train_set(sample_idx, y[sample_idx])
    idx = palinstance.run_one_step()
    assert len(idx) == 1
    assert idx[0] not in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
    for model in palinstance.models:
        assert check_is_fitted(model) is None

    X_binh_korn, y_binh_korn = binh_korn_points  # pylint:disable=invalid-name

    sample_idx = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 50, 60, 70])

    palinstance = PALSklearn(X_binh_korn, [gpr_0, gpr_1], 2, beta_scale=1)
    palinstance.cross_val_points = 0
    palinstance.update_train_set(sample_idx, y_binh_korn[sample_idx])
    idx = palinstance.run_one_step()
    assert len(idx) == 1
    assert idx[0] not in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 50, 60, 70]
    assert palinstance.number_sampled_points == len(sample_idx)
    assert sum(palinstance.unclassified) > 0
    assert sum(palinstance.discarded) == 0
    for model in palinstance.models:
        assert check_is_fitted(model) is None
Ejemplo n.º 3
0
def test_pal_sklearn(make_random_dataset):
    """Test that we can create a instanec of the PAL sklearn class"""
    X, y = make_random_dataset  # pylint:disable=invalid-name
    gpr = GaussianProcessRegressor(RBF(), normalize_y=True, n_restarts_optimizer=5)
    pal_sklearn_instance = PALSklearn(X, [gpr, gpr, gpr], 3)
    pal_sklearn_instance.update_train_set(
        np.array([1, 2, 3, 4, 5]), y[np.array([1, 2, 3, 4, 5]), :]
    )
    assert pal_sklearn_instance.models[0].kernel.length_scale == 1
    pal_sklearn_instance._train()  # pylint:disable=protected-access
    assert pal_sklearn_instance.models[0].kernel_.length_scale != 1
Ejemplo n.º 4
0
def test_gridsearch_object(binh_korn_points):
    """Test the initialization of PALSklearn with a GridsearchCV object"""
    X_binh_korn, y_binh_korn = binh_korn_points  # pylint:disable=invalid-name
    sample_idx = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 50, 60, 70])
    grid_search_0 = GridSearchCV(GaussianProcessRegressor(), {"kernel": [RBF(), Matern()]})
    grid_search_1 = GridSearchCV(GaussianProcessRegressor(), {"kernel": [RBF(), Matern()]})

    with pytest.raises(ValueError):
        palinstance = PALSklearn(X_binh_korn, [grid_search_0, grid_search_1], 2, beta_scale=1)

    grid_search_0.fit(X_binh_korn, y_binh_korn[:, 0])
    grid_search_1.fit(X_binh_korn, y_binh_korn[:, 1])

    palinstance = PALSklearn(X_binh_korn, [grid_search_0, grid_search_1], 2, beta_scale=1)
    palinstance.cross_val_points = 0
    palinstance.update_train_set(sample_idx, y_binh_korn[sample_idx])

    idx = palinstance.run_one_step()
    assert len(idx) == 1
    assert idx[0] not in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
    for model in palinstance.models:
        assert check_is_fitted(model) is None
Ejemplo n.º 5
0
def test_orchestration_run_one_step_parallel(binh_korn_points):
    """Test the parallel processing"""
    X_binh_korn, y_binh_korn = binh_korn_points  # pylint:disable=invalid-name
    gpr_0 = GaussianProcessRegressor(
        Matern(), normalize_y=True, n_restarts_optimizer=5, random_state=10
    )
    gpr_1 = GaussianProcessRegressor(
        Matern(), normalize_y=True, n_restarts_optimizer=5, random_state=10
    )
    palinstance = PALSklearn(X_binh_korn, [gpr_0, gpr_1], 2, beta_scale=1 / 9, n_jobs=2)
    sample_idx = np.array([1, 10, 20, 40, 70, 90])
    palinstance.update_train_set(sample_idx, y_binh_korn[sample_idx])
    palinstance.cross_val_points = 0
    idx = palinstance.run_one_step(batch_size=10)
    for index in idx:
        assert index not in [1, 10, 20, 40, 70, 90]
    assert palinstance.number_sampled_points > 0
    assert sum(palinstance.unclassified) > 0
    assert sum(palinstance.discarded) == 0

    for model in palinstance.models:
        assert check_is_fitted(model) is None
Ejemplo n.º 6
0
def test_crossvalidate(binh_korn_points):
    """Test the crossvalidation routine"""
    gpr_0 = GaussianProcessRegressor(
        RBF(), normalize_y=True, n_restarts_optimizer=5, random_state=10
    )
    gpr_1 = GaussianProcessRegressor(
        RBF(), normalize_y=True, n_restarts_optimizer=5, random_state=10
    )

    X_binh_korn, y_binh_korn = binh_korn_points  # pylint:disable=invalid-name

    sample_idx = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 50, 60, 70])

    palinstance = PALSklearn(X_binh_korn, [gpr_0, gpr_1], 2, beta_scale=1)
    palinstance.update_train_set(sample_idx, y_binh_korn[sample_idx])

    original_sample_mask = palinstance.sampled

    cross_val_error = palinstance._crossvalidate()  # pylint:disable=protected-access
    assert (palinstance.sampled_indices == sample_idx).all()
    assert (palinstance.sampled == original_sample_mask).all()

    assert isinstance(cross_val_error, float)
    assert np.abs(cross_val_error) > 0
Ejemplo n.º 7
0
def test_augment_design_space_bk(binh_korn_points, binh_korn_points_finer):
    """Test the augment function by using a finer sampling of the Binh-Korn function
    for augmentation"""
    X_binh_korn, y_binh_korn = binh_korn_points  # pylint:disable=invalid-name
    (
        X_binh_korn_finer,  # pylint:disable=invalid-name
        _,
    ) = binh_korn_points_finer
    sample_idx = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
    gpr_0 = GaussianProcessRegressor(
        RBF(), normalize_y=True, n_restarts_optimizer=5, random_state=10
    )
    gpr_1 = GaussianProcessRegressor(
        RBF(), normalize_y=True, n_restarts_optimizer=5, random_state=10
    )
    palinstance = PALSklearn(X_binh_korn, [gpr_0, gpr_1], 2, beta_scale=1)
    palinstance.cross_val_points = 0
    palinstance.update_train_set(sample_idx, y_binh_korn[sample_idx])
    new_idx = palinstance.run_one_step()
    palinstance.update_train_set(new_idx, y_binh_korn[new_idx])
    number_pareto_optimal_points_old = palinstance.number_pareto_optimal_points
    palinstance.augment_design_space(X_binh_korn_finer)
    assert palinstance.number_discarded_points == 0
    assert palinstance.number_pareto_optimal_points > number_pareto_optimal_points_old
Ejemplo n.º 8
0
def test_augment_design_space(make_random_dataset):
    """Test if the reclassification step in the design step
    agumentation method works"""
    X, y = make_random_dataset  # pylint:disable=invalid-name
    gpr_0 = GaussianProcessRegressor(RBF(), normalize_y=True, n_restarts_optimizer=5)
    gpr_1 = GaussianProcessRegressor(RBF(), normalize_y=True, n_restarts_optimizer=5)
    gpr_2 = GaussianProcessRegressor(RBF(), normalize_y=True, n_restarts_optimizer=5)
    sample_idx = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
    palinstance = PALSklearn(X, [gpr_0, gpr_1, gpr_2], 3, beta_scale=1)
    palinstance.cross_val_points = 0
    palinstance.update_train_set(sample_idx, y[sample_idx])
    _ = palinstance.run_one_step()

    X_new = X + 1  # pylint:disable=invalid-name
    palinstance.augment_design_space(X_new, classify=True, clean_classify=False)
    assert palinstance.number_design_points == 200
    assert palinstance.number_sampled_points == len(sample_idx)

    # Adding new design points should not mess up with the models
    for model in palinstance.models:
        assert check_is_fitted(model) is None

    # Now, test the `clean_classify` flag
    gpr_0 = GaussianProcessRegressor(RBF(), normalize_y=True, n_restarts_optimizer=3)
    gpr_1 = GaussianProcessRegressor(RBF(), normalize_y=True, n_restarts_optimizer=3)
    gpr_2 = GaussianProcessRegressor(RBF(), normalize_y=True, n_restarts_optimizer=3)
    sample_idx = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
    palinstance = PALSklearn(X, [gpr_0, gpr_1, gpr_2], 3, beta_scale=1)
    palinstance.cross_val_points = 0
    palinstance.update_train_set(sample_idx, y[sample_idx])
    _ = palinstance.run_one_step()

    X_new = X + np.full((1, 10), 1)  # pylint:disable=invalid-name
    palinstance.augment_design_space(X_new)
    assert palinstance.number_design_points == 200
    assert palinstance.number_sampled_points == len(sample_idx)
Ejemplo n.º 9
0
def test_orchestration_run_one_step_batch(  # pylint:disable=too-many-statements
    binh_korn_points,
):
    """Test the batch sampling"""
    X_binh_korn, y_binh_korn = binh_korn_points  # pylint:disable=invalid-name
    sample_idx = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
    gpr_0 = GaussianProcessRegressor(
        RBF(), normalize_y=True, n_restarts_optimizer=5, random_state=10
    )
    gpr_1 = GaussianProcessRegressor(
        RBF(), normalize_y=True, n_restarts_optimizer=5, random_state=10
    )
    palinstance = PALSklearn(X_binh_korn, [gpr_0, gpr_1], 2, beta_scale=1)
    palinstance.cross_val_points = 0
    palinstance.update_train_set(sample_idx, y_binh_korn[sample_idx])
    idx = palinstance.run_one_step(batch_size=10)
    assert len(idx) == 10
    assert len(np.unique(idx)) == 10
    for index in idx:
        assert index not in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 50, 60, 70]
    assert palinstance.number_sampled_points > 0
    assert sum(palinstance.unclassified) > 0
    assert sum(palinstance.discarded) == 0
    for model in palinstance.models:
        assert check_is_fitted(model) is None

    # scaling up beta
    gpr_0 = GaussianProcessRegressor(
        RBF(), normalize_y=True, n_restarts_optimizer=5, random_state=10
    )
    gpr_1 = GaussianProcessRegressor(
        RBF(), normalize_y=True, n_restarts_optimizer=5, random_state=10
    )
    palinstance = PALSklearn(X_binh_korn, [gpr_0, gpr_1], 2, beta_scale=1 / 3)
    palinstance.cross_val_points = 0
    palinstance.update_train_set(sample_idx, y_binh_korn[sample_idx])
    idx = palinstance.run_one_step(batch_size=10)
    for index in idx:
        assert index not in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 50, 60, 70]
    assert palinstance.number_sampled_points > 0
    assert sum(palinstance.unclassified) > 0
    assert sum(palinstance.discarded) == 0
    for model in palinstance.models:
        assert check_is_fitted(model) is None

    # smaller initial set
    gpr_0 = GaussianProcessRegressor(
        RBF(), normalize_y=True, n_restarts_optimizer=5, random_state=10
    )
    gpr_1 = GaussianProcessRegressor(
        RBF(), normalize_y=True, n_restarts_optimizer=5, random_state=10
    )
    palinstance = PALSklearn(X_binh_korn, [gpr_0, gpr_1], 2, beta_scale=1 / 3)
    palinstance.cross_val_points = 0
    sample_idx = np.array([1, 10, 20, 40, 70, 90])
    palinstance.update_train_set(sample_idx, y_binh_korn[sample_idx])
    idx = palinstance.run_one_step(batch_size=10)
    for index in idx:
        assert index not in [1, 10, 20, 40, 70, 90]
    assert palinstance.number_sampled_points > 0
    assert sum(palinstance.unclassified) > 0
    assert sum(palinstance.discarded) == 0
    for model in palinstance.models:
        assert check_is_fitted(model) is None

    # smaller initial set and beta scale
    gpr_0 = GaussianProcessRegressor(
        RBF(), normalize_y=True, n_restarts_optimizer=5, random_state=10
    )
    gpr_1 = GaussianProcessRegressor(
        RBF(), normalize_y=True, n_restarts_optimizer=5, random_state=10
    )
    palinstance = PALSklearn(X_binh_korn, [gpr_0, gpr_1], 2, beta_scale=1 / 9)
    palinstance.cross_val_points = 0
    sample_idx = np.array([1, 10, 20, 40, 70, 90])
    palinstance.update_train_set(sample_idx, y_binh_korn[sample_idx])
    idx = palinstance.run_one_step(batch_size=10)
    for index in idx:
        assert index not in [1, 10, 20, 40, 70, 90]
    assert palinstance.number_sampled_points > 0
    assert sum(palinstance.unclassified) > 0
    assert sum(palinstance.discarded) == 0
    for model in palinstance.models:
        assert check_is_fitted(model) is None

    # smaller initial set and beta scale and different kernel
    gpr_0 = GaussianProcessRegressor(
        Matern(), normalize_y=True, n_restarts_optimizer=5, random_state=10
    )
    gpr_1 = GaussianProcessRegressor(
        Matern(), normalize_y=True, n_restarts_optimizer=5, random_state=10
    )
    palinstance = PALSklearn(X_binh_korn, [gpr_0, gpr_1], 2, beta_scale=1 / 9)
    palinstance.cross_val_points = 0
    sample_idx = np.array([1, 10, 20, 40, 70, 90])
    palinstance.update_train_set(sample_idx, y_binh_korn[sample_idx])
    idx = palinstance.run_one_step(batch_size=10)
    for index in idx:
        assert index not in [1, 10, 20, 40, 70, 90]
    assert palinstance.number_sampled_points > 0
    assert sum(palinstance.unclassified) > 0
    assert sum(palinstance.discarded) == 0

    for model in palinstance.models:
        assert check_is_fitted(model) is None

    # test using the "fixed" epsilon
    gpr_0 = GaussianProcessRegressor(
        RBF(), normalize_y=True, n_restarts_optimizer=6, random_state=10
    )
    gpr_1 = GaussianProcessRegressor(
        RBF(), normalize_y=True, n_restarts_optimizer=6, random_state=10
    )
    palinstance = PALSklearn(
        X_binh_korn,
        [gpr_0, gpr_1],
        2,
        beta_scale=1 / 9,
        ranges=np.ptp(y_binh_korn, axis=0),
    )
    assert palinstance.uses_fixed_epsilon
    palinstance.cross_val_points = 0
    sample_idx = np.array([1, 10, 20, 40, 70, 90])
    palinstance.update_train_set(sample_idx, y_binh_korn[sample_idx])
    idx = palinstance.run_one_step(batch_size=1)
    for index in idx:
        assert index not in [1, 10, 20, 40, 70, 90]
    assert palinstance.number_sampled_points > 0
    assert sum(palinstance.unclassified) > 0
    assert sum(palinstance.discarded) == 0

    for model in palinstance.models:
        assert check_is_fitted(model) is None