def test_AUTO_stopping_metric_with_no_sorting_metric_regression(): print( "Check leaderboard with AUTO stopping metric and no sorting metric for regression" ) ds = import_dataset('regression', split=False) exclude_algos = ["DeepLearning", "GLM"] aml = H2OAutoML( project_name= "py_aml_lb_test_auto_stopping_metric_no_sorting_regression", exclude_algos=exclude_algos, max_models=10, nfolds=2, stopping_rounds=1, stopping_tolerance=0.5, seed=automl_seed) aml.train(y=ds.target, training_frame=ds.train) check_leaderboard( aml, exclude_algos, ["rmse", "mse", "mae", "rmsle", "mean_residual_deviance"], "rmse") base = get_partitioned_model_names(aml.leaderboard).base first = [m for m in base if 'XGBoost_1' in m] others = [m for m in base if m not in first] check_model_property( first, 'stopping_metric', True, None ) #if stopping_rounds == 0, actual value of stopping_metric is set to None check_model_property(others, 'stopping_metric', True, "deviance")
def test_AUTO_stopping_metric_with_no_sorting_metric_binary(): print( "Check leaderboard with AUTO stopping metric and no sorting metric for binary" ) ds = import_dataset('binary', split=False) exclude_algos = ["DeepLearning", "GLM", "StackedEnsemble"] aml = H2OAutoML( project_name="py_aml_lb_test_auto_stopping_metric_no_sorting_binary", seed=automl_seed, max_models=10, nfolds=2, stopping_rounds=1, stopping_tolerance=0.5, exclude_algos=exclude_algos) aml.train(y=ds.target, training_frame=ds.train) check_leaderboard( aml, exclude_algos, ["auc", "logloss", "aucpr", "mean_per_class_error", "rmse", "mse"], "auc", True) base = get_partitioned_model_names(aml.leaderboard).base first = [m for m in base if 'XGBoost_1' in m] others = [m for m in base if m not in first] check_model_property( first, 'stopping_metric', True, None ) #if stopping_rounds == 0, actual value of stopping_metric is set to None check_model_property(others, 'stopping_metric', True, "logloss")
def test_leaderboard_with_all_algos(): print("Check leaderboard for all algorithms") ds = import_dataset('multiclass', split=False) aml = H2OAutoML(project_name="py_aml_lb_test_all_algos", max_models=12, nfolds=2, stopping_rounds=1, stopping_tolerance=0.5, seed=automl_seed) aml.train(y=ds.target, training_frame=ds.train) check_leaderboard(aml, [], ["mean_per_class_error", "logloss", "rmse", "mse"], "mean_per_class_error")
def test_leaderboard_for_regression(): print("Check leaderboard for Regression with default sorting") ds = import_dataset('regression', split=False) exclude_algos = ["GBM", "DeepLearning"] aml = H2OAutoML(project_name="py_aml_lb_test_default_regr_sort", exclude_algos=exclude_algos, max_models=8, nfolds=2, stopping_rounds=1, stopping_tolerance=0.5, seed=automl_seed) aml.train(y=ds.target, training_frame=ds.train) check_leaderboard(aml, exclude_algos, ["mean_residual_deviance", "rmse", "mse", "mae", "rmsle"], "mean_residual_deviance")
def test_leaderboard_for_multiclass(): print("Check leaderboard for multiclass with default sorting") ds = import_dataset('multiclass', split=False) exclude_algos = ["GBM", "DeepLearning"] aml = H2OAutoML(project_name="py_aml_lb_test_default_multiclass_sort", seed=automl_seed, max_models=8, nfolds=2, stopping_rounds=1, stopping_tolerance=0.5, exclude_algos=exclude_algos) aml.train(y=ds.target, training_frame=ds.train) check_leaderboard(aml, exclude_algos, ["mean_per_class_error", "logloss", "rmse", "mse"], "mean_per_class_error")
def test_leaderboard_with_no_algos(): print("Check leaderboard for excluding all algos (empty leaderboard)") ds = import_dataset('binary', split=False) exclude_algos = all_algos aml = H2OAutoML(project_name="py_aml_lb_test_no_algo", exclude_algos=exclude_algos, max_runtime_secs=10, nfolds=2, stopping_rounds=1, stopping_tolerance=0.5, seed=automl_seed) aml.train(y=ds.target, training_frame=ds.train) lb = aml.leaderboard assert lb.nrows == 0 check_leaderboard(aml, exclude_algos, [], None, None)
def test_AUTO_stopping_metric_with_custom_sorting_metric_regression(): print("Check leaderboard with AUTO stopping metric and rmse sorting metric") ds = import_dataset('regression', split=False) exclude_algos = ["DeepLearning", "GLM"] aml = H2OAutoML(project_name="py_aml_lb_test_auto_stopping_metric_custom_sorting", exclude_algos=exclude_algos, max_models=10, nfolds=2, stopping_rounds=1, stopping_tolerance=0.5, seed=automl_seed, sort_metric="rmse") aml.train(y=ds.target, training_frame=ds.train) check_leaderboard(aml, exclude_algos, ["rmse", "mean_residual_deviance", "mse", "mae", "rmsle"], "rmse") base = get_partitioned_model_names(aml.leaderboard).base check_model_property(base, 'stopping_metric', True, "RMSE")
def test_leaderboard_for_binary_with_custom_sorting(): print("Check leaderboard for Binomial sort by logloss") ds = import_dataset('binary', split=False) exclude_algos = ["GLM", "DeepLearning", "DRF"] aml = H2OAutoML(project_name="py_aml_lb_test_custom_binom_sort", seed=automl_seed, max_models=8, nfolds=2, stopping_rounds=1, stopping_tolerance=0.5, exclude_algos=exclude_algos, sort_metric="logloss") aml.train(y=ds.target, training_frame=ds.train) check_leaderboard( aml, exclude_algos, ["logloss", "auc", "aucpr", "mean_per_class_error", "rmse", "mse"], "logloss")
def test_AUTO_stopping_metric_with_auc_sorting_metric(): print("Check leaderboard with AUTO stopping metric and auc sorting metric") ds = import_dataset('binary', split=False) exclude_algos = ["DeepLearning", "GLM", "StackedEnsemble"] aml = H2OAutoML( project_name="py_aml_lb_test_auto_stopping_metric_auc_sorting", seed=automl_seed, max_models=10, nfolds=2, stopping_rounds=1, stopping_tolerance=0.5, exclude_algos=exclude_algos, sort_metric='auc') aml.train(y=ds.target, training_frame=ds.train) check_leaderboard( aml, exclude_algos, ["auc", "logloss", "aucpr", "mean_per_class_error", "rmse", "mse"], "auc", True) base = get_partitioned_model_names(aml.leaderboard).base check_model_property(base, 'stopping_metric', True, "logloss")