Exemple #1
0
def test_sklearn_real_data(tmp_dir):
    tuner = kt.SklearnTuner(
        oracle=kt.oracles.BayesianOptimization(objective=kt.Objective(
            "score", "max"),
                                               max_trials=10),
        hypermodel=build_model,
        scoring=metrics.make_scorer(metrics.accuracy_score),
        cv=model_selection.StratifiedKFold(5),
        directory=tmp_dir,
    )

    x, y = datasets.load_iris(return_X_y=True)
    x_train, x_test, y_train, y_test = model_selection.train_test_split(
        x, y, test_size=0.2)

    tuner.search(x_train, y_train)

    best_models = tuner.get_best_models(10)
    best_model = best_models[0]
    worst_model = best_models[9]
    best_model_score = best_model.score(x_test, y_test)
    worst_model_score = worst_model.score(x_test, y_test)

    assert best_model_score > 0.8
    assert best_model_score >= worst_model_score
Exemple #2
0
def test_sklearn_cv_with_groups(tmp_dir):
    tuner = kt.SklearnTuner(
        oracle=kt.oracles.BayesianOptimization(objective=kt.Objective(
            "score", "max"),
                                               max_trials=10),
        hypermodel=build_model,
        cv=model_selection.GroupKFold(5),
        directory=tmp_dir,
    )

    x = np.random.uniform(size=(50, 10))
    y = np.random.randint(0, 2, size=(50, ))
    groups = np.random.randint(0, 5, size=(50, ))
    tuner.search(x, y, groups=groups)

    assert len(tuner.oracle.trials) == 10

    best_trial = tuner.oracle.get_best_trials()[0]
    assert best_trial.status == "COMPLETED"
    assert best_trial.score is not None
    assert best_trial.best_step == 0
    assert best_trial.metrics.exists("score")

    # Make sure best model can be reloaded.
    best_model = tuner.get_best_models()[0]
    best_model.score(x, y)
Exemple #3
0
def test_sklearn_pipeline(tmp_dir):
    tuner = kt.SklearnTuner(
        oracle=kt.oracles.BayesianOptimization(objective=kt.Objective(
            "score", "max"),
                                               max_trials=10),
        hypermodel=build_pipeline,
        directory=tmp_dir,
    )

    x = np.random.uniform(size=(50, 10))
    y = np.random.randint(0, 2, size=(50, ))
    sample_weight = np.random.uniform(0.1, 1, size=(50, ))
    tuner.search(x, y, sample_weight=sample_weight)

    assert len(tuner.oracle.trials) == 10

    best_trial = tuner.oracle.get_best_trials()[0]
    assert best_trial.status == "COMPLETED"
    assert best_trial.score is not None
    assert best_trial.best_step == 0
    assert best_trial.metrics.exists("score")

    # Make sure best pipeline can be reloaded.
    best_pipeline = tuner.get_best_models()[0]
    best_pipeline.score(x, y)
Exemple #4
0
def test_sklearn_additional_metrics(tmp_dir):
    tuner = kt.SklearnTuner(
        oracle=kt.oracles.BayesianOptimization(objective=kt.Objective(
            "score", "max"),
                                               max_trials=10),
        hypermodel=build_model,
        metrics=[metrics.balanced_accuracy_score, metrics.recall_score],
        directory=tmp_dir,
    )

    x = np.random.uniform(size=(50, 10))
    y = np.random.randint(0, 2, size=(50, ))
    tuner.search(x, y)

    assert len(tuner.oracle.trials) == 10

    best_trial = tuner.oracle.get_best_trials()[0]
    assert best_trial.status == "COMPLETED"
    assert best_trial.score is not None
    assert best_trial.best_step == 0
    assert best_trial.metrics.exists("score")
    assert best_trial.metrics.exists("balanced_accuracy_score")
    assert best_trial.metrics.exists("recall_score")

    # Make sure best model can be reloaded.
    best_model = tuner.get_best_models()[0]
    best_model.score(x, y)
Exemple #5
0
def test_sklearn_custom_scoring_and_cv(tmp_dir):
    tuner = kt.SklearnTuner(
        oracle=kt.oracles.BayesianOptimization(objective=kt.Objective(
            "score", "max"),
                                               max_trials=10),
        hypermodel=build_model,
        scoring=metrics.make_scorer(metrics.balanced_accuracy_score),
        cv=model_selection.StratifiedKFold(5),
        directory=tmp_dir,
    )

    x = np.random.uniform(size=(50, 10))
    y = np.random.randint(0, 2, size=(50, ))
    tuner.search(x, y)

    assert len(tuner.oracle.trials) == 10

    best_trial = tuner.oracle.get_best_trials()[0]
    assert best_trial.status == "COMPLETED"
    assert best_trial.score is not None
    assert best_trial.best_step == 0
    assert best_trial.metrics.exists("score")

    # Make sure best model can be reloaded.
    best_model = tuner.get_best_models()[0]
    best_model.score(x, y)
Exemple #6
0
def test_sklearn_tuner_with_df(tmp_dir):
    tuner = kt.SklearnTuner(
        oracle=kt.oracles.BayesianOptimization(objective=kt.Objective(
            "score", "max"),
                                               max_trials=10),
        hypermodel=build_model,
        directory=tmp_dir,
    )

    x = pd.DataFrame(np.random.uniform(size=(50, 10)))
    y = pd.DataFrame(np.random.randint(0, 2, size=(50, )))
    tuner.search(x, y)

    assert len(tuner.oracle.trials) == 10
Exemple #7
0
def test_sklearn_not_install_error(tmp_dir):
    sklearn_module = kt.tuners.sklearn_tuner.sklearn
    kt.tuners.sklearn_tuner.sklearn = None

    with pytest.raises(ImportError, match="Please install sklearn"):
        kt.SklearnTuner(
            oracle=kt.oracles.BayesianOptimization(objective=kt.Objective(
                "score", "max"),
                                                   max_trials=10),
            hypermodel=build_model,
            directory=tmp_dir,
        )

    kt.tuners.sklearn_tuner.sklearn = sklearn_module