def test_sklearn_real_data(tmp_dir): tuner = kt.SklearnTuner( oracle=kt.oracles.BayesianOptimization(objective=kt.Objective( "score", "max"), max_trials=10), hypermodel=build_model, scoring=metrics.make_scorer(metrics.accuracy_score), cv=model_selection.StratifiedKFold(5), directory=tmp_dir, ) x, y = datasets.load_iris(return_X_y=True) x_train, x_test, y_train, y_test = model_selection.train_test_split( x, y, test_size=0.2) tuner.search(x_train, y_train) best_models = tuner.get_best_models(10) best_model = best_models[0] worst_model = best_models[9] best_model_score = best_model.score(x_test, y_test) worst_model_score = worst_model.score(x_test, y_test) assert best_model_score > 0.8 assert best_model_score >= worst_model_score
def test_sklearn_cv_with_groups(tmp_dir): tuner = kt.SklearnTuner( oracle=kt.oracles.BayesianOptimization(objective=kt.Objective( "score", "max"), max_trials=10), hypermodel=build_model, cv=model_selection.GroupKFold(5), directory=tmp_dir, ) x = np.random.uniform(size=(50, 10)) y = np.random.randint(0, 2, size=(50, )) groups = np.random.randint(0, 5, size=(50, )) tuner.search(x, y, groups=groups) assert len(tuner.oracle.trials) == 10 best_trial = tuner.oracle.get_best_trials()[0] assert best_trial.status == "COMPLETED" assert best_trial.score is not None assert best_trial.best_step == 0 assert best_trial.metrics.exists("score") # Make sure best model can be reloaded. best_model = tuner.get_best_models()[0] best_model.score(x, y)
def test_sklearn_pipeline(tmp_dir): tuner = kt.SklearnTuner( oracle=kt.oracles.BayesianOptimization(objective=kt.Objective( "score", "max"), max_trials=10), hypermodel=build_pipeline, directory=tmp_dir, ) x = np.random.uniform(size=(50, 10)) y = np.random.randint(0, 2, size=(50, )) sample_weight = np.random.uniform(0.1, 1, size=(50, )) tuner.search(x, y, sample_weight=sample_weight) assert len(tuner.oracle.trials) == 10 best_trial = tuner.oracle.get_best_trials()[0] assert best_trial.status == "COMPLETED" assert best_trial.score is not None assert best_trial.best_step == 0 assert best_trial.metrics.exists("score") # Make sure best pipeline can be reloaded. best_pipeline = tuner.get_best_models()[0] best_pipeline.score(x, y)
def test_sklearn_additional_metrics(tmp_dir): tuner = kt.SklearnTuner( oracle=kt.oracles.BayesianOptimization(objective=kt.Objective( "score", "max"), max_trials=10), hypermodel=build_model, metrics=[metrics.balanced_accuracy_score, metrics.recall_score], directory=tmp_dir, ) x = np.random.uniform(size=(50, 10)) y = np.random.randint(0, 2, size=(50, )) tuner.search(x, y) assert len(tuner.oracle.trials) == 10 best_trial = tuner.oracle.get_best_trials()[0] assert best_trial.status == "COMPLETED" assert best_trial.score is not None assert best_trial.best_step == 0 assert best_trial.metrics.exists("score") assert best_trial.metrics.exists("balanced_accuracy_score") assert best_trial.metrics.exists("recall_score") # Make sure best model can be reloaded. best_model = tuner.get_best_models()[0] best_model.score(x, y)
def test_sklearn_custom_scoring_and_cv(tmp_dir): tuner = kt.SklearnTuner( oracle=kt.oracles.BayesianOptimization(objective=kt.Objective( "score", "max"), max_trials=10), hypermodel=build_model, scoring=metrics.make_scorer(metrics.balanced_accuracy_score), cv=model_selection.StratifiedKFold(5), directory=tmp_dir, ) x = np.random.uniform(size=(50, 10)) y = np.random.randint(0, 2, size=(50, )) tuner.search(x, y) assert len(tuner.oracle.trials) == 10 best_trial = tuner.oracle.get_best_trials()[0] assert best_trial.status == "COMPLETED" assert best_trial.score is not None assert best_trial.best_step == 0 assert best_trial.metrics.exists("score") # Make sure best model can be reloaded. best_model = tuner.get_best_models()[0] best_model.score(x, y)
def test_sklearn_tuner_with_df(tmp_dir): tuner = kt.SklearnTuner( oracle=kt.oracles.BayesianOptimization(objective=kt.Objective( "score", "max"), max_trials=10), hypermodel=build_model, directory=tmp_dir, ) x = pd.DataFrame(np.random.uniform(size=(50, 10))) y = pd.DataFrame(np.random.randint(0, 2, size=(50, ))) tuner.search(x, y) assert len(tuner.oracle.trials) == 10
def test_sklearn_not_install_error(tmp_dir): sklearn_module = kt.tuners.sklearn_tuner.sklearn kt.tuners.sklearn_tuner.sklearn = None with pytest.raises(ImportError, match="Please install sklearn"): kt.SklearnTuner( oracle=kt.oracles.BayesianOptimization(objective=kt.Objective( "score", "max"), max_trials=10), hypermodel=build_model, directory=tmp_dir, ) kt.tuners.sklearn_tuner.sklearn = sklearn_module