def test_columns_not_in_x_and_y_are_ignored(): ds = import_dataset() names = ds.train.names x = ["AGE", "RACE", "DPROS"] y = ds.target def test_with_x_y_as_str_list(): aml = H2OAutoML(max_models=2, stopping_rounds=3, stopping_tolerance=0.001) aml.train(x=x, y=y, training_frame=ds.train, validation_frame=ds.valid, leaderboard_frame=ds.test) print("AutoML leaderboard") print(aml.leaderboard) models = aml.leaderboard["model_id"] check_ignore_cols_automl(models, names, x, y) def test_with_x_y_as_indices(): aml = H2OAutoML(max_models=2, stopping_rounds=3, stopping_tolerance=0.001) aml.train(x=[2, 3, 4], y=1, training_frame=ds.train, validation_frame=ds.valid, leaderboard_frame=ds.test) print("AutoML leaderboard") print(aml.leaderboard) models = aml.leaderboard["model_id"] check_ignore_cols_automl(models, names, x, y) def test_with_x_as_str_list_y_as_index(): aml = H2OAutoML(max_models=2, stopping_rounds=3, stopping_tolerance=0.001) aml.train(x=x, y=1, training_frame=ds.train, validation_frame=ds.valid, leaderboard_frame=ds.test) print("AutoML leaderboard") print(aml.leaderboard) models = aml.leaderboard["model_id"] check_ignore_cols_automl(models, names, x, y) def test_with_x_indices_y_as_str(): aml = H2OAutoML(max_models=2, stopping_rounds=3, stopping_tolerance=0.001) aml.train(x=[2,3,4], y=y, training_frame=ds.train, validation_frame=ds.valid, leaderboard_frame=ds.test) print("AutoML leaderboard") print(aml.leaderboard) models = aml.leaderboard["model_id"] check_ignore_cols_automl(models, names, x, y) pu.run_tests([ test_with_x_y_as_str_list, test_with_x_y_as_indices, test_with_x_as_str_list_y_as_index, test_with_x_indices_y_as_str ], run_in_isolation=False)
assert len(results.test_pclasses) == 1 def check_stackedensemble_with_GLM_metalearner_with_standardization_disabled( data, models): se = H2OStackedEnsembleEstimator( base_models=models, metalearner_algorithm='GLM', metalearner_nfolds=5, metalearner_params=dict(standardize=False), seed=seed) se.train(data.x, data.y, data.train) results = scores_and_preds(se, data.test) print(results) assert data.domain == results.test_pclasses, "expected predicted classes {} but got {}".format( data.domain, results.test_pclasses) data = setup_data() base_models = train_base_models(data) bad_model = train_bad_model(data) # print(scores_and_preds(bad_model, data.test)) all_models = base_models + [bad_model] check_stackedensemble_with_AUTO_metalearner(data, all_models) check_stackedensemble_with_DRF_metalearner(data, all_models) check_stackedensemble_with_GLM_metalearner(data, all_models) check_stackedensemble_with_GLM_metalearner_with_standardization_disabled( data, all_models) pu.run_tests( [test_models_not_predicting_some_classes_dont_corrupt_resulting_SE_model])
stack_gbm2 = H2OStackedEnsembleEstimator(base_models=[my_gbm, my_rf], metalearner_algorithm="gbm", metalearner_params=gbm_params, seed=55555) stack_gbm1.train(x=x, y=y, training_frame=train) stack_gbm2.train(x=x, y=y, training_frame=train) meta_gbm1 = h2o.get_model(stack_gbm1.metalearner()['name']) meta_gbm2 = h2o.get_model(stack_gbm2.metalearner()['name']) assert meta_gbm1.rmse(train=True) == meta_gbm2.rmse( train=True), "RMSE should match if same seed" #Train two SE models with diff metalearner seeds stack_gbm3 = H2OStackedEnsembleEstimator(base_models=[my_gbm, my_rf], metalearner_algorithm="gbm", metalearner_params=gbm_params, seed=55555) stack_gbm4 = H2OStackedEnsembleEstimator(base_models=[my_gbm, my_rf], metalearner_algorithm="gbm", metalearner_params=gbm_params, seed=98765) stack_gbm3.train(x=x, y=y, training_frame=train) stack_gbm4.train(x=x, y=y, training_frame=train) meta_gbm3 = h2o.get_model(stack_gbm3.metalearner()['name']) meta_gbm4 = h2o.get_model(stack_gbm4.metalearner()['name']) assert meta_gbm3.rmse(train=True) != meta_gbm4.rmse( train=True), "RMSE should NOT match if diff seed" pyunit_utils.run_tests([stackedensemble_metalearner_seed_test])
] gbm = H2OGradientBoostingEstimator(ntrees=1, offset_column="sulph", weights_column="alc", fold_column="tp") with warnings.catch_warnings(record=True) as w: warnings.simplefilter('default') gbm.train(x=x, y=y, training_frame=train, offset_column="sulphates", weights_column="alcohol", fold_column="type") gbm.train(x=x, y=y, training_frame=train, offset_column="sulphates", weights_column="alcohol", fold_column="type") assert len(w) == 2 * 3 pu.run_tests([ test_emit_no_warning_on_param_set_only_on_constructor, test_emit_no_warning_on_param_set_only_on_train_method, test_emit_no_warning_on_param_set_same_on_both_constructor_and_train_method, test_emit_no_warning_if_constructor_param_is_explicitly_set_to_default_value, test_emit_warning_on_param_set_differently_on_both_constructor_and_train_method, test_emit_warning_for_each_call_to_train_that_is_overriding_params, ])
se_model = train_stacked_ensemble(ds, base_models) #Predict in ensemble in Py client preds_py = se_model.predict(ds.test) tmp_dir = tempfile.mkdtemp() try: bin_file = h2o.save_model(se_model, tmp_dir) #Load binary model and predict bin_model = h2o.load_model(pu.locate(bin_file)) preds_bin = bin_model.predict(ds.test) finally: shutil.rmtree(tmp_dir) #Predictions from model in Py and binary model should be the same pred_diff = preds_bin - preds_py assert pred_diff["p0"].max() < 1e-11 assert pred_diff["p1"].max() < 1e-11 assert pred_diff["p0"].min() > -1e-11 assert pred_diff["p1"].min() > -1e-11 return [pu.tag_test(test, 'blending' if blending else None) for test in [ test_saved_binary_model_produces_same_predictions_as_original ]] pu.run_tests([ test_suite_stackedensemble_binary_model(), test_suite_stackedensemble_binary_model(blending=True), ])
def test_several_automl_instances_can_run_in_parallel(): # this is not a recommended behaviour, but this should work nonetheless. parallel = 5 models_per_run = 3 amls = [ H2OAutoML(max_models=models_per_run, nfolds=0, seed=1) for _ in range(parallel) ] ds = import_dataset() with ThreadPoolExecutor(max_workers=parallel) as executor: for i, aml in enumerate(amls): train = partial(aml.train, y=ds.target, training_frame=ds.train, leaderboard_frame=ds.test) executor.submit(train) project_names = [aml.project_name for aml in amls] print(project_names) assert len(set(project_names)) == parallel leaderboards = [aml.leaderboard for aml in amls] models = flatten([[lb[i, 0] for i in range(lb.nrows)] for lb in leaderboards]) print(models) assert len(set(models)) == parallel * models_per_run pu.run_tests([test_several_automl_instances_can_run_in_parallel])
else: assert not _has_method( cls, meth), "Class {} should not have method {}".format( name, meth) _check_exposed_in_h2o_sklearn_module(cls) def test_transformers_exposed_in_h2o_sklean_transforms_module(): import h2o.transforms mod = importlib.import_module('h2o.sklearn.transforms') class_names = [ name for name, _ in inspect.getmembers(h2o.transforms, inspect.isclass) ] for cl_name in class_names: for name in _make_transformer_names(cl_name): cls = getattr(mod, name, None) assert cls, "Class {} is missing in module {}".format(name, mod) for meth in sklearn_transformer_methods: assert _has_method( cls, meth), "Class {} is missing method {}".format(name, meth) _check_exposed_in_h2o_sklearn_module(cls) pyunit_utils.run_tests([ test_automl_estimators_exposed_in_h2o_sklearn_automl_module, test_algos_estimators_exposed_in_h2o_sklearn_estimators_module, test_transformers_exposed_in_h2o_sklean_transforms_module, ])
gbm = h2o.upload_model(glob.glob(TMP_DIR + "/gbm.model/*")[0]) drf = h2o.upload_model(glob.glob(TMP_DIR + "/drf.model/*")[0]) train = h2o.import_file(pu.locate("smalldata/iris/iris_train.csv"), "some_other_name_of_training_frame") test = h2o.import_file(pu.locate("smalldata/iris/iris_test.csv"), "some_other_name_of_test_frame") x = train.columns y = "species" x.remove(y) se_loaded = H2OStackedEnsembleEstimator( training_frame=train, validation_frame=test, base_models=[gbm.model_id, drf.model_id]) se_loaded.train(x=x, y=y, training_frame=train) assert len(se_loaded.base_models) == 2 finally: shutil.rmtree(TMP_DIR) pu.run_tests([ test_suite_stackedensemble_base_models(), test_suite_stackedensemble_base_models(blending=True), test_base_models_are_populated, test_stacked_ensemble_accepts_mixed_definition_of_base_models(), test_stacked_ensemble_is_able_to_use_imported_base_models, ])
print("Best Base-learner Test RMSE: {}".format(baselearner_best_rmse_test)) print("Ensemble Test RMSE: {}".format(stack_rmse_test)) assert_warn(stack_rmse_test < baselearner_best_rmse_test, "expected SE test RMSE would be smaller than the best of base learner test RMSE, but obtained: " \ "RMSE (SE) = {}, RMSE (best base learner) = {}".format(stack_rmse_test, baselearner_best_rmse_test)) def test_validation_frame_produces_same_metric_as_perf_test(): ds = prepare_data(blending) models = train_base_models(ds) se = train_stacked_ensemble(ds, models, validation_frame=ds.test) se_perf = se.model_performance(test_data=ds.test) se_perf_validation_frame = se.model_performance(valid=True) # since the metrics object is not exactly the same, we can just test that RSME is the same assert se_perf.rmse() == se_perf_validation_frame.rmse(), \ "expected SE test RMSE to be the same as SE validation frame RMSE, but obtained: " \ "RMSE (perf on test) = {}, RMSE (test passed as validation frame) = {}".format(se_perf.rmse(), se_perf_validation_frame.rmse()) return [pu.tag_test(test, 'blending' if blending else None) for test in [ test_predict_on_se_model, test_se_performance_is_better_than_individual_models, test_validation_frame_produces_same_metric_as_perf_test ]] pu.run_tests([ test_suite_stackedensemble_gaussian(), test_suite_stackedensemble_gaussian(blending=True) ])
assert abs( int(aml.training_info['duration_secs']) - (int(aml.training_info['stop_epoch']) - int(aml.training_info['start_epoch']))) <= 1 def test_train_verbosity(): train = h2o.import_file( path=pyunit_utils.locate("smalldata/logreg/prostate.csv")) y = 'CAPSULE' train[y] = train[y].asfactor() make_aml = lambda verbosity=None: H2OAutoML( project_name="test_train_verbosity", keep_cross_validation_predictions=True, max_models=2, stopping_tolerance=0.01, # triggers a warning event log message seed=1234, verbosity=verbosity) print("verbosity off") make_aml().train(y=y, training_frame=train) print("verbosity debug") make_aml('debug').train(y=y, training_frame=train) print("verbosity info") make_aml('info').train(y=y, training_frame=train) print("verbosity warn") make_aml('warn').train(y=y, training_frame=train) pyunit_utils.run_tests([test_event_log, test_train_verbosity])
def test_a_better_model_is_produced_with_validation_frame(): ds = prepare_data(blending) base_models = train_base_models(ds) se_no_valid = train_stacked_ensemble(ds, base_models) se_valid = train_stacked_ensemble(ds, base_models, validation_frame=ds.valid) assert se_no_valid.model_performance(valid=True) is None assert se_valid.model_performance(valid=True) is not None se_no_valid_perf = se_no_valid.model_performance(test_data=ds.test) se_valid_perf = se_valid.model_performance(test_data=ds.test) tolerance = 1e-3 # ad hoc tolerance as there's no guarantee perf will actually be better with validation frame assert se_no_valid_perf.auc() < se_valid_perf.auc() or (se_no_valid_perf.auc() - se_valid_perf.auc()) < tolerance, \ "Expected that a better model would be produced when passing a validation frame, bot obtained: " \ "AUC (no validation) = {}, AUC (validation frame) = {}".format(se_no_valid_perf.auc(), se_valid_perf.auc()) return [pu.tag_test(test, 'blending' if blending else None) for test in [ test_validation_metrics_are_computed_when_providing_validation_frame, test_a_better_model_is_produced_with_validation_frame ]] pu.run_tests([ test_suite_stackedensemble_validation_frame(), test_suite_stackedensemble_validation_frame(blending=True) ])
stack_auc_test = perf_se.test.auc() print("Best Base-learner Test AUC: {}".format(baselearner_best_auc_test)) print("Ensemble Test AUC: {}".format(stack_auc_test)) assert stack_auc_test > baselearner_best_auc_test, \ "expected SE test AUC would be greater than the best of base learner test AUC, but obtained: " \ "AUC (SE) = {}, AUC (best base learner) = {}".format(stack_auc_test, baselearner_best_auc_test) def test_validation_frame_produces_same_metric_as_perf_test(): ds = prepare_data(blending) models = train_base_models(ds) se = train_stacked_ensemble(ds, models, validation_frame=ds.test) se_perf = se.model_performance(test_data=ds.test) # since the metrics object is not exactly the same, we can just test that AUC is the same se_perf_validation_frame = se.model_performance(valid=True) assert se_perf.auc() == se_perf_validation_frame.auc(), \ "expected SE test AUC to be the same as SE validation frame AUC, but obtained: " \ "AUC (perf on test) = {}, AUC (test passed as validation frame) = {}".format(se_perf.auc(), se_perf_validation_frame.auc()) return [pu.tag_test(test, 'blending' if blending else None) for test in [ test_predict_on_se_model, test_se_performance_is_better_than_individual_models, test_validation_frame_produces_same_metric_as_perf_test ]] pu.run_tests([ test_suite_stackedensemble_binomial(), test_suite_stackedensemble_binomial(blending=True) ])
bm = train_base_models(datasets) se = train_stacked_ensemble(ds, bm) assert se.auc() > 0 def test_se_fails_when_base_models_use_incompatible_training_frames(): """ test that SE fails when passing in base models that were trained with frames of different size """ ds = prepare_data(blending) datasets = pu.ns(gbm=ds.extend(x=None), drf=ds.extend(x=None, train=ds.train[0:ds.train.nrows//2,:])) bm = train_base_models(datasets) try: train_stacked_ensemble(ds, bm) assert False, "Stacked Ensembles of models with different training frame sizes should fail" except Exception as e: assert "Base models are inconsistent: they use different size (number of rows) training frames" in str(e), "wrong error message: {}".format(str(e)) # raise e return [pu.tag_test(test, 'blending' if blending else None) for test in [ test_base_models_can_use_different_x, test_base_models_can_use_different_compatible_training_frames, test_se_fails_when_base_models_use_incompatible_training_frames ]] pu.run_tests([ test_suite_stackedensemble_training_frame(), test_suite_stackedensemble_training_frame(blending=True), ])
nfolds=nfolds, keep_cross_validation_predictions=True, seed=1) my_rf.train(x=x, y=y, training_frame=train) #Train two SE models with same metalearner seeds stack_gbm1 = H2OStackedEnsembleEstimator(base_models=[my_gbm, my_rf], metalearner_algorithm="gbm", metalearner_params = gbm_params, seed = 55555) stack_gbm2 = H2OStackedEnsembleEstimator(base_models=[my_gbm, my_rf], metalearner_algorithm="gbm", metalearner_params = gbm_params, seed = 55555) stack_gbm1.train(x=x, y=y, training_frame=train) stack_gbm2.train(x=x, y=y, training_frame=train) meta_gbm1 = h2o.get_model(stack_gbm1.metalearner()['name']) meta_gbm2 = h2o.get_model(stack_gbm2.metalearner()['name']) assert meta_gbm1.rmse(train=True) == meta_gbm2.rmse(train=True), "RMSE should match if same seed" #Train two SE models with diff metalearner seeds stack_gbm3 = H2OStackedEnsembleEstimator(base_models=[my_gbm, my_rf], metalearner_algorithm="gbm", metalearner_params = gbm_params, seed = 55555) stack_gbm4 = H2OStackedEnsembleEstimator(base_models=[my_gbm, my_rf], metalearner_algorithm="gbm", metalearner_params = gbm_params, seed = 98765) stack_gbm3.train(x=x, y=y, training_frame=train) stack_gbm4.train(x=x, y=y, training_frame=train) meta_gbm3 = h2o.get_model(stack_gbm3.metalearner()['name']) meta_gbm4 = h2o.get_model(stack_gbm4.metalearner()['name']) assert meta_gbm3.rmse(train=True) != meta_gbm4.rmse(train=True), "RMSE should NOT match if diff seed" pyunit_utils.run_tests([stackedensemble_metalearner_seed_test])
ds = import_dataset() aml1 = H2OAutoML(project_name="test_automl_rerun", max_models=max_models, seed=1, keep_cross_validation_predictions=True) aml1.train(y=ds.target, training_frame=ds.train) lb1 = model_names(aml1.leaderboard) aml2 = H2OAutoML(project_name="test_automl_rerun", max_models=max_models, seed=1, keep_cross_validation_predictions=True) aml2.train(y=ds.target_alt, training_frame=ds.train) lb2 = model_names(aml2.leaderboard) assert aml1.project_name == aml2.project_name assert_distinct_leaderboard(lb1, lb2, size=max_models) return [ test_rerun_with_same_data_adds_models_to_leaderboard, test_rerun_with_different_predictors_adds_models_to_leaderboard, test_rerun_with_different_training_frame_adds_models_to_leaderboard, test_rerun_with_different_target_resets_leaderboard, ] pu.run_tests([ suite_reruns_with_same_instance_without_project_name(), suite_reruns_with_same_instance_with_project_name(), suite_reruns_with_different_instance_without_project_name(), suite_reruns_with_different_instances_same_project_name(), ])
False): print("ERROR !!! " + str(e)) else: raise e def make_test(test, classifier): bound_test = partial(test, classifier) bound_test.__name__ = test.__name__ pyunit_utils.tag_test(bound_test, classifier.__name__) return bound_test def make_tests(classifier): return list( map(lambda test: make_test(test, classifier), [ test_estimator_with_h2o_frames, test_estimator_with_numpy_arrays, test_scores_are_equivalent ])) failing = [ 'H2OStackedEnsembleClassifier', # needs a separate test (requires models as parameters) ] classifiers = [ cls for name, cls in inspect.getmembers(h2o.sklearn, inspect.isclass) if name.endswith('Classifier') and name not in ['H2OAutoMLClassifier'] + failing ] pyunit_utils.run_tests([make_tests(c) for c in classifiers])
assert model._model_json['output']['stacking_strategy'] == 'blending' # TO DO PUBDEV-5676 # Add a test that checks fold_column like in runit pyunit_utils.run_tests([ test_early_stopping_args, test_no_x_train_set_only, test_no_x_train_and_validation_sets, test_no_x_train_and_test_sets, test_no_x_train_and_validation_and_test_sets, test_no_x_y_as_idx_train_and_validation_and_test_sets, test_exclude_algos, test_include_algos, test_include_exclude_algos, test_predict_on_train_set, test_nfolds_param, test_nfolds_eq_0, test_balance_classes, test_nfolds_default_and_fold_assignements_skipped_by_default, test_keep_cross_validation_fold_assignment_enabled_with_nfolds_neq_0, test_keep_cross_validation_fold_assignment_enabled_with_nfolds_eq_0, test_automl_stops_after_max_runtime_secs, test_no_model_takes_more_than_max_runtime_secs_per_model, test_stacked_ensembles_are_trained_after_timeout, test_automl_stops_after_max_models, test_stacked_ensembles_are_trained_after_max_models, test_stacked_ensembles_are_trained_with_blending_frame_even_if_nfolds_eq_0, ])
assert max_runtime == 12 assert max_models == 20 assert aml.leaderboard.nrows < 20 assert int(aml.training_info['duration_secs']) < 2*max_runtime # being generous to avoid errors on slow Jenkins def test_default_max_runtime_if_no_max_models_provided(): ds = import_dataset() aml = H2OAutoML(project_name="py_no_stopping_constraints", seed=1, verbosity='Info') with pu.Timeout(5, on_timeout=lambda: aml._job.cancel()): try: aml.train(y=ds.target, training_frame=ds.train) except H2OJobCancelled: pass max_runtime = aml._build_resp['build_control']['stopping_criteria']['max_runtime_secs'] max_models = aml._build_resp['build_control']['stopping_criteria']['max_models'] assert max_runtime == 3600 assert max_models == 0 pu.run_tests([ test_early_stopping_defaults, test_early_stopping_args, test_automl_stops_after_max_models, test_no_time_limit_if_max_models_is_provided, test_max_runtime_secs_alone, test_max_runtime_secs_can_be_set_in_combination_with_max_models_and_max_models_wins, test_max_runtime_secs_can_be_set_in_combination_with_max_models_and_max_runtime_wins, test_default_max_runtime_if_no_max_models_provided, ])
training_frame=dataset.train, blending_frame=dataset.blend if hasattr(dataset, 'blend') else None, **kwargs) return se def test_suite_stackedensemble_base_models(blending=False): def test_base_models_can_be_passed_as_objects_or_as_ids(): """This test checks the following: 1) That passing in a list of models for base_models works. 2) That passing in a list of models and model_ids results in the same stacked ensemble. """ ds = prepare_data(blending) base_models = train_base_models(ds) se1 = train_stacked_ensemble(ds, [m.model_id for m in base_models]) se2 = train_stacked_ensemble(ds, base_models) # Eval train AUC to assess equivalence assert se1.auc() == se2.auc() return [pu.tag_test(test, 'blending' if blending else None) for test in [ test_base_models_can_be_passed_as_objects_or_as_ids ]] pu.run_tests([ test_suite_stackedensemble_base_models(), test_suite_stackedensemble_base_models(blending=True), ])
h2o.rapids("(setproperty \"{}\" \"{}\")".format( "sys.ai.h2o.automl.algo_parameters.all.enabled", "true")) ds = import_dataset('regression') aml = H2OAutoML(project_name="py_test", algo_parameters=dict( distribution='poisson', family='poisson', ), exclude_algos=['StackedEnsemble'], max_runtime_secs=60, seed=1) aml.train(y=ds.target, training_frame=ds.train) model_names = [ aml.leaderboard[i, 0] for i in range(0, (aml.leaderboard.nrows)) ] for mn in model_names: m = h2o.get_model(mn) dist = m.params[ 'distribution'] if 'distribution' in m.params else m.params[ 'family'] if 'family' in m.params else None print("{}: distribution = {}".format(mn, dist)) except: h2o.rapids("(setproperty \"{}\" \"{}\")".format( "sys.ai.h2o.automl.algo_parameters.all.enabled", "false")) pu.run_tests([ test_default_automl_with_regression_task, test_workaround_for_distribution, ])
else: cols_to_test.append(col) gbm = H2OGradientBoostingEstimator(seed=1234, model_id="my_awesome_model") gbm.train(y=y, training_frame=train) assert isinstance( gbm.ice_plot(train, 'title').figure(), matplotlib.pyplot.Figure) assert isinstance( gbm.ice_plot(train, 'title', show_pdp=True).figure(), matplotlib.pyplot.Figure) assert isinstance( gbm.ice_plot(train, 'title', show_pdp=False).figure(), matplotlib.pyplot.Figure) assert isinstance( gbm.ice_plot(train, 'age').figure(), matplotlib.pyplot.Figure) assert isinstance( gbm.ice_plot(train, 'age', show_pdp=True).figure(), matplotlib.pyplot.Figure) assert isinstance( gbm.ice_plot(train, 'age', show_pdp=False).figure(), matplotlib.pyplot.Figure) matplotlib.pyplot.close("all") pyunit_utils.run_tests([ test_original_values, test_handle_orig_values, test_display_mode, test_binary_response_scale, test_show_pdd, test_grouping_column ])
get_aml = get_automl(aml.project_name) assert aml.project_name == get_aml["project_name"] assert aml.leader.model_id == get_aml["leader"].model_id assert aml.leaderboard.get_frame_data( ) == get_aml["leaderboard"].get_frame_data() assert aml.event_log.get_frame_data( ) == get_aml["event_log"].get_frame_data() assert aml.training_info == get_aml['training_info'] # PUBDEV-6599 assert aml.project_name == get_aml.project_name assert aml.leader.model_id == get_aml.leader.model_id assert aml.leaderboard.frame_id == get_aml.leaderboard.frame_id assert aml.event_log.frame_id == get_aml.event_log.frame_id assert aml.training_info == get_aml.training_info # Test predictions predictions = aml.predict(ds.test) predictions_from_output = get_aml.predict(ds.test) assert (predictions == predictions_from_output).all() # Test get_leaderboard PUBDEV-7454 assert (get_leaderboard(aml) == get_leaderboard(get_aml)).all() assert (get_leaderboard(aml, 'ALL') == get_leaderboard(get_aml, 'ALL')).all() pu.run_tests([test_get_automl])
def test_keep_cross_validation_fold_assignment_enabled_with_nfolds_eq_0(): print( "Check that fold assignments were skipped when `keep_cross_validation_fold_assignment` = True and nfolds = 0" ) ds = import_dataset() aml = H2OAutoML( project_name="py_aml_keep_cross_validation_fold_assignment_2", nfolds=0, max_models=3, seed=1, keep_cross_validation_fold_assignment=True) aml.train(y=ds.target, training_frame=ds.train) base_models = get_partitioned_model_names(aml.leaderboard).base amodel = h2o.get_model(base_models[0]) assert amodel.params['keep_cross_validation_fold_assignment'][ 'actual'] == False assert amodel._model_json["output"][ "cross_validation_fold_assignment_frame_id"] == None pu.run_tests([ test_nfolds_param, test_nfolds_eq_0, test_fold_column, test_weights_column, test_fold_column_with_weights_column, test_nfolds_default_and_fold_assignements_skipped_by_default, test_keep_cross_validation_fold_assignment_enabled_with_nfolds_neq_0, test_keep_cross_validation_fold_assignment_enabled_with_nfolds_eq_0, ])
h2o.remove(v[0]) h2o.remove(aml) clean = list_keys_in_memory() print(clean['all'].values) assert aml.key.startswith(project_name) assert not contains_leaderboard(aml.key, clean) assert not contains_event_log(aml.key, clean) assert len(clean['models_base']) == 0 assert len(clean['cv_models']) == 0 assert len(clean['models_all']) == 0 assert len(clean['metrics']) == 0 assert len(clean['predictions']) == 0 assert len(clean['automl']) == 0 for frame in [ds.train, ds.valid, ds.test]: assert frame_in_cluster(frame), "frame {} has been removed from cluster".format(frame.frame_id) return [ test_remove_automl_with_xval, test_remove_automl_with_xval_when_keeping_all_cv_details, test_remove_automl_no_xval, test_remove_automl_after_individual_manual_deletions ] pu.run_tests([ test_suite_clean_cv_predictions(), test_suite_clean_cv_models(), test_suite_remove_automl() ])
score = search.score(data.X_test, data.y_test) assert isinstance(score, float) skl_score = accuracy_score(data.y_test, preds) assert abs(score - skl_score) < 1e-6, "score={}, skl_score={}".format( score, skl_score) scores['mixed_pipeline_with_numpy_arrays'] = score def _assert_test_scores_equivalent(lk, rk): if lk in scores and rk in scores: assert abs(scores[lk] - abs(scores[rk])) < 1e-6, \ "expected equivalent scores but got {lk}={lscore} and {rk}={rscore}" \ .format(lk=lk, rk=rk, lscore=scores[lk], rscore=scores[rk]) elif lk not in scores: print("no scores for {}".format(lk)) else: print("no scores for {}".format(rk)) def test_scores_are_equivalent(): _assert_test_scores_equivalent('h2o_only_pipeline_with_h2o_frame', 'h2o_only_pipeline_with_numpy_arrays') pyunit_utils.run_tests([ test_h2o_only_pipeline_with_h2o_frames, test_h2o_only_pipeline_with_numpy_arrays, test_mixed_pipeline_with_numpy_arrays, test_scores_are_equivalent, ])
aml.train(y=ds.target, training_frame=ds.train) check_leaderboard(aml, exclude_algos, ["mean_per_class_error", "logloss", "rmse", "mse"], "mean_per_class_error") def test_leaderboard_for_multiclass_with_custom_sorting(): print("Check leaderboard for multiclass sort by logloss") ds = import_dataset('multiclass', split=False) exclude_algos = ["DeepLearning"] aml = H2OAutoML(project_name="py_aml_lb_test_custom_multiclass_sort", seed=automl_seed, max_models=10, nfolds=2, stopping_rounds=1, stopping_tolerance=0.5, exclude_algos=exclude_algos, sort_metric="logloss") aml.train(y=ds.target, training_frame=ds.train) check_leaderboard(aml, exclude_algos, ["logloss", "mean_per_class_error", "rmse", "mse"], "logloss") pu.run_tests([ test_leaderboard_for_multiclass, test_leaderboard_for_multiclass_with_custom_sorting, ])
print("GAM performance with test_data=train: {0}, with test_data=test: {1} and train=True: " "{2}".format(h2o_model.model_performance(test_data=train)._metric_json["MSE"], h2o_model.model_performance(test_data=test)._metric_json["MSE"], h2o_model.model_performance(train=True)._metric_json["MSE"])) assert abs(h2o_model.model_performance(test_data=train_clone)._metric_json["MSE"] - h2o_model.model_performance(train=True)._metric_json["MSE"]) < 1e-6 def gam_train_metrics_recalculate_poisson(): gam_train_metrics_recalculate("poisson") def gam_train_metrics_recalculate_tweedie(): gam_train_metrics_recalculate("tweedie") def gam_train_metrics_recalculate_gamma(): gam_train_metrics_recalculate("gamma") def gam_train_metrics_recalculate_gaussian(): gam_train_metrics_recalculate("gaussian") pyunit_utils.run_tests([ gam_train_metrics_recalculate_poisson, gam_train_metrics_recalculate_tweedie, gam_train_metrics_recalculate_gamma, gam_train_metrics_recalculate_gaussian ])
assert aml.get_best_model().model_id == aml.leaderboard[0, "model_id"] # Check it works with just criterion assert aml.get_best_model( criterion="mse").model_id == aml.leaderboard.sort(by="mse")[0, "model_id"] # Check it works with extra_cols top_model = h2o.automl.get_leaderboard( aml, extra_columns=["training_time_ms" ]).sort(by="training_time_ms")[0, "model_id"] assert aml.get_best_model( criterion="training_time_ms").model_id == top_model # Check validation works try: aml.get_best_model(algorithm="GXboost") assert False, "Algorithm validation does not work!" except H2OValueError: pass try: aml.get_best_model(criterion="lorem_ipsum_dolor_sit_amet") assert False, "Criterion validation does not work!" except H2OValueError: pass pu.run_tests([ test_get_best_model_per_family, ])
# f.write(encoded.get_frame_data()) golden = h2o.import_file("{}/golden/regression_kfold.csv".format(here)) assert golden.names == encoded.names assert pu.compare_frames(golden, encoded, 0, tol_numeric=1e-5) def test_regression_with_loo(): ds = load_dataset() te = H2OTargetEncoderEstimator(noise=0, data_leakage_handling="leave_one_out") te.train(y=ds.target, training_frame=ds.train) encoded = te.transform(ds.train, as_training=True) print(encoded) col_te_golden = [45.84296, 25.99922, 45.97153, 25.99922, 45.97153] col_te = encoded['sex_te'].head(5).as_data_frame().values.reshape( -1).tolist() assert_allclose(col_te, col_te_golden, atol=1e-5) # with open("{}/golden/regression_loo.csv".format(here), "w") as f: # f.write(encoded.get_frame_data()) golden = h2o.import_file("{}/golden/regression_loo.csv".format(here)) assert golden.names == encoded.names assert pu.compare_frames(golden, encoded, 0, tol_numeric=1e-5) pu.run_tests([ test_regression_with_none, test_regression_with_kfold, test_regression_with_loo, ])
seed=1234) te = H2OTargetEncoderEstimator(k=0.7, f=0.3, data_leakage_handling="KFold", fold_column=foldColumnName) te.train(training_frame=trainingFrame, x=teColumns, y=targetColumnName) print(te) print(trainingFrame) model_summary = te._model_json['output']['model_summary'].as_data_frame() print(model_summary) encoded_column_names = model_summary['encoded_column_name'] # Checking that we don't have empty entries in TwoDim table assert len(model_summary) == 2 encoded_columns_with_te_suffix = model_summary[ encoded_column_names.str.contains('_te', regex=True)] assert len(encoded_columns_with_te_suffix) == 2 transformed = te.transform(trainingFrame, data_leakage_handling="KFold") # Checking that fold column is not being encoded. assert foldColumnName + "_te" not in transformed.col_names testList = [test_target_encoder_model_summary_does_not_contain_fold_column] pyunit_utils.run_tests(testList)
print(clean['all'].values) assert aml.key.startswith(project_name) assert not contains_leaderboard(aml.key, clean) assert not contains_event_log(aml.key, clean) assert len(clean['models_base']) == 0 assert len(clean['cv_models']) == 0 assert len(clean['models_all']) == 0 assert len(clean['metrics']) == 0 assert len(clean['predictions']) == 0 assert len(clean['automl']) == 0 for frame in [train, blend, test]: assert frame_in_cluster( frame), "frame {} has been removed from cluster".format( frame.frame_id) return [ test_remove_automl_with_xval, test_remove_automl_with_xval_when_keeping_all_cv_details, test_remove_automl_no_xval, test_remove_automl_after_individual_manual_deletions ] pyunit_utils.run_tests( list( iter.chain.from_iterable([ test_suite_clean_cv_predictions(), test_suite_clean_cv_models(), test_suite_remove_automl() ])))
assert actual_value is None or model.params[prop_name]['actual'] == actual_value, \ "actual value for {prop} in model {model} is {val}, expected {exp}".format(prop=prop_name, model=mn, val=model.params[prop_name]['actual'], exp=actual_value) assert default_value is None or model.params[prop_name]['default'] == default_value, \ "default value for {prop} in model {model} is {val}, expected {exp}".format(prop=prop_name, model=mn, val=model.params[prop_name]['default'], exp=default_value) assert input_value is None or model.params[prop_name]['input'] == input_value, \ "default value for {prop} in model {model} is {val}, expected {exp}".format(prop=prop_name, model=mn, val=model.params[prop_name]['input'], exp=input_value) else: assert prop_name not in model.params.keys( ), "unexpected {prop} in model {model}".format(prop=prop_name, model=mn) def test_actual_default_input_stopping_rounds(): train = h2o.import_file(path=pu.locate("smalldata/extdata/australia.csv")) target = 'runoffnew' exclude_algos = ["DeepLearning", "GLM"] aml = H2OAutoML(project_name="actual_default_input_stopping_rounds", exclude_algos=exclude_algos, max_models=10, seed=automl_seed) aml.train(y=target, training_frame=train) base_models = get_partitioned_model_names(aml.leaderboard).base # when using cv, all cv models are trained with the stopping_rounds = 3 (default), but the final model resets stopping_rounds to 0 and use e. g. average ntrees, iterations... check_model_property(base_models, 'stopping_rounds', True, 0, 0, 3) pu.run_tests([ test_actual_default_input_stopping_rounds, ])
pd.testing.assert_frame_equal(pdf, hdf.as_data_frame(), check_dtype=False) def test_import_fails_on_unsupported_quotechar(): try: h2o.import_file(path=pyunit_utils.locate( "smalldata/parser/single_quotes_mixed.csv"), quotechar="f") assert False except H2OTypeError as e: assert e.var_name == "quotechar" def test_upload_fails_on_unsupported_quotechar(): try: h2o.upload_file(path=pyunit_utils.locate( "smalldata/parser/single_quotes_mixed.csv"), quotechar="f") assert False except H2OTypeError as e: assert e.var_name == "quotechar" pyunit_utils.run_tests([ test_import_single_quoted, test_upload_single_quoted, test_import_single_quoted_with_escaped_quotes, test_import_fails_on_unsupported_quotechar, test_upload_fails_on_unsupported_quotechar, ])
y = 'CAPSULE' train[y] = train[y].asfactor() aml = H2OAutoML(project_name="test_get_automl", max_models=2, seed=1234) aml.train(y=y, training_frame=train) get_aml = get_automl(aml.project_name) assert aml.project_name == get_aml["project_name"] assert aml.leader.model_id == get_aml["leader"].model_id assert aml.leaderboard.get_frame_data() == get_aml["leaderboard"].get_frame_data() assert aml.event_log.get_frame_data() == get_aml["event_log"].get_frame_data() assert aml.training_info == get_aml['training_info'] # PUBDEV-6599 assert aml.project_name == get_aml.project_name assert aml.leader.model_id == get_aml.leader.model_id assert aml.leaderboard.frame_id == get_aml.leaderboard.frame_id assert aml.event_log.frame_id == get_aml.event_log.frame_id assert aml.training_info == get_aml.training_info # Test predictions predictions = aml.predict(train) predictions_from_output = get_aml.predict(train) assert (predictions == predictions_from_output).all() pyunit_utils.run_tests([test_get_automl])
assert get_leaderboard( aml, extra_columns=["predict_time_per_row_ms", "training_time_ms"] ).names == std_columns + ["predict_time_per_row_ms", "training_time_ms"] assert get_leaderboard(aml, extra_columns=[ "unknown", "training_time_ms" ]).names == std_columns + ["training_time_ms"] lb_ext = get_leaderboard(aml, extra_columns='ALL') print(lb_ext) assert all(lb_ext[:, 1:].isnumeric() ), "metrics and extension columns should all be numeric" assert (lb_ext["training_time_ms"].as_data_frame().values >= 0).all() assert (lb_ext["predict_time_per_row_ms"].as_data_frame().values > 0).all() pyunit_utils.run_tests([ test_warn_on_empty_leaderboard, test_leaderboard_for_binomial, test_leaderboard_for_multinomial, test_leaderboard_for_regression, test_leaderboard_with_all_algos, test_leaderboard_with_no_algos, test_leaderboard_for_binomial_with_custom_sorting, test_leaderboard_for_multinomial_with_custom_sorting, test_leaderboard_for_regression_with_custom_sorting, test_AUTO_stopping_metric_with_no_sorting_metric_binomial, test_AUTO_stopping_metric_with_no_sorting_metric_regression, test_AUTO_stopping_metric_with_auc_sorting_metric, test_AUTO_stopping_metric_with_custom_sorting_metric, test_custom_leaderboard, ])
kw_args = [ dict(training_frame=ds['train'].frame_id), dict(training_frame=ds['train'], validation_frame=ds['valid'].frame_id), dict(training_frame=ds['train'], blending_frame=ds['blend'].frame_id), ] # Constructor validation for kwargs in kw_args: H2OStackedEnsembleEstimator(base_models=[], **kwargs) # train method validation base_model_params = dict(ntrees=3, nfolds=3, seed=seed, keep_cross_validation_predictions=True) for kwargs in kw_args: base_training_args = {k: v for k, v in kwargs.items() if k != 'blending_frame'} base_training_args['y'] = ds['target'] gbm = H2OGradientBoostingEstimator(**base_model_params) gbm.train(**base_training_args) rf = H2ORandomForestEstimator(**base_model_params) rf.train(**base_training_args) se = H2OStackedEnsembleEstimator(base_models=[gbm, rf]) se.train(y=ds['target'], **kwargs) pu.run_tests([ test_frames_can_be_passed_to_constructor, test_frames_can_be_overridden_in_train_method, test_frames_can_be_passed_as_key ])
uplift1.train(x=predictors, y=response, training_frame=train, validation_frame=valid) uplift2 = H2OUpliftRandomForestEstimator(ntrees=10, max_depth=5, treatment_column=treatment_column, uplift_metric="KL", min_rows=10, seed=123, auuc_type="qini") uplift2.train(x=predictors, y=response, training_frame=train, validation_frame=valid) try: print(h2o.make_leaderboard([uplift1, uplift2])) assert False, "Should have failed - no support for unsupervised models" except h2o.exceptions.H2OServerError: pass pyunit_utils.run_tests([ test_leaderboard_with_automl_uses_eventlog, test_make_leaderboard_without_leaderboard_frame, test_make_leaderboard_with_leaderboard_frame, test_make_leaderboard_unsupervised, test_make_leaderboard_uplift, ])
assert se.levelone_frame_id() is not None, \ "Level one frame should be available when keep_levelone_frame is True." def test_levelone_frame_has_expected_dimensions(): ds = prepare_data(blending) models = train_base_models(ds) se = train_stacked_ensemble(ds, models, keep_levelone_frame=True) level_one_frame = h2o.get_frame(se.levelone_frame_id()["name"]) se_training_frame = ds.blend if blending else ds.train num_col_level_one_frame = (se_training_frame[ds.y].unique().nrow) * len(models) + 1 # count_classes(probabilities) * count_models + 1 (target) assert level_one_frame.ncols == num_col_level_one_frame, \ "The number of columns in a level one frame should be numClasses * numBaseModels + 1." assert level_one_frame.nrows == se_training_frame.nrows, \ "The number of rows in the level one frame should match train number of rows. " return [pu.tag_test(test, 'blending' if blending else None) for test in [ test_levelone_frame_not_accessible_with__keep_levelone_frame__False, test_levelone_frame_accessible_with__keep_levelone_frame__True, test_levelone_frame_has_expected_dimensions ]] pu.run_tests([ test_suite_stackedensemble_levelone_frame(), test_suite_stackedensemble_levelone_frame(blending=True), ])