def test_get_automl(): train = h2o.import_file(path=pyunit_utils.locate("smalldata/logreg/prostate.csv")) y = 'CAPSULE' train[y] = train[y].asfactor() aml = H2OAutoML(project_name="test_get_automl", max_models=2, seed=1234) aml.train(y=y, training_frame=train) get_aml = get_automl(aml.project_name) assert aml.project_name == get_aml["project_name"] assert aml.leader.model_id == get_aml["leader"].model_id assert aml.leaderboard.get_frame_data() == get_aml["leaderboard"].get_frame_data() assert aml.event_log.get_frame_data() == get_aml["event_log"].get_frame_data() assert aml.training_info == get_aml['training_info'] # PUBDEV-6599 assert aml.project_name == get_aml.project_name assert aml.leader.model_id == get_aml.leader.model_id assert aml.leaderboard.frame_id == get_aml.leaderboard.frame_id assert aml.event_log.frame_id == get_aml.event_log.frame_id assert aml.training_info == get_aml.training_info # Test predictions predictions = aml.predict(train) predictions_from_output = get_aml.predict(train) assert (predictions == predictions_from_output).all()
def test_get_automl(): ds = import_dataset() aml = H2OAutoML(project_name="test_get_automl", max_models=2, seed=1234) aml.train(y=ds.target, training_frame=ds.train) get_aml = get_automl(aml.project_name) assert aml.project_name == get_aml["project_name"] assert aml.leader.model_id == get_aml["leader"].model_id assert aml.leaderboard.get_frame_data( ) == get_aml["leaderboard"].get_frame_data() assert aml.event_log.get_frame_data( ) == get_aml["event_log"].get_frame_data() assert aml.training_info == get_aml['training_info'] # PUBDEV-6599 assert aml.project_name == get_aml.project_name assert aml.leader.model_id == get_aml.leader.model_id assert aml.leaderboard.frame_id == get_aml.leaderboard.frame_id assert aml.event_log.frame_id == get_aml.event_log.frame_id assert aml.training_info == get_aml.training_info # Test predictions predictions = aml.predict(ds.test) predictions_from_output = get_aml.predict(ds.test) assert (predictions == predictions_from_output).all() # Test get_leaderboard PUBDEV-7454 assert (get_leaderboard(aml) == get_leaderboard(get_aml)).all() assert (get_leaderboard(aml, 'ALL') == get_leaderboard(get_aml, 'ALL')).all()
def prostate_automl_get_automl(): df = h2o.import_file(path=pyunit_utils.locate("smalldata/logreg/prostate.csv")) #Split frames fr = df.split_frame(ratios=[.8,.1]) #Set up train, validation, and test sets train = fr[0] valid = fr[1] test = fr[2] train["CAPSULE"] = train["CAPSULE"].asfactor() valid["CAPSULE"] = valid["CAPSULE"].asfactor() test["CAPSULE"] = test["CAPSULE"].asfactor() aml = H2OAutoML(project_name="py_aml0", stopping_rounds=3, stopping_tolerance=0.001, stopping_metric="AUC", max_models=2, seed=1234) aml.train(y="CAPSULE", training_frame=train) get_aml = get_automl(aml.project_name) assert aml.project_name == get_aml["project_name"] get_aml_leader = get_aml["leader"] assert aml.leader.model_id == get_aml_leader.model_id assert aml.leaderboard.get_frame_data() == get_aml["leaderboard"].get_frame_data()
def test_get_automl(): train = h2o.import_file( path=pyunit_utils.locate("smalldata/logreg/prostate.csv")) y = 'CAPSULE' train[y] = train[y].asfactor() aml = H2OAutoML(project_name="test_get_automl", max_models=2, seed=1234) aml.train(y=y, training_frame=train) get_aml = get_automl(aml.project_name) assert aml.project_name == get_aml["project_name"] assert aml.leader.model_id == get_aml["leader"].model_id assert aml.leaderboard.get_frame_data( ) == get_aml["leaderboard"].get_frame_data() assert aml.event_log.get_frame_data( ) == get_aml["event_log"].get_frame_data()
def automl_checkpoints(): ds = import_dataset() checkpoints_dir = tempfile.mkdtemp() aml = H2OAutoML(project_name="py_aml0", stopping_rounds=3, stopping_tolerance=0.001, stopping_metric="AUC", max_models=2, seed=1234, export_checkpoints_dir=checkpoints_dir) aml.train(y=ds.target, training_frame=ds.train) get_aml = get_automl(aml.project_name) num_files = len([f for f in os.listdir(checkpoints_dir) if "_cv" not in f]) # do not count CV models shutil.rmtree(checkpoints_dir) assert aml.project_name == get_aml["project_name"] assert num_files > 0, "No models generated by AutoML" assert get_aml[ "leaderboard"].nrows == num_files, "Not all generated autoML models were saved."
def automl_checkpoints(): df = h2o.import_file( path=pyunit_utils.locate("smalldata/logreg/prostate.csv")) # Split frames fr = df.split_frame(ratios=[.8, .1]) # Set up train, validation, and test sets train = fr[0] valid = fr[1] test = fr[2] train["CAPSULE"] = train["CAPSULE"].asfactor() valid["CAPSULE"] = valid["CAPSULE"].asfactor() test["CAPSULE"] = test["CAPSULE"].asfactor() checkpoints_dir = tempfile.mkdtemp() aml = H2OAutoML(project_name="py_aml0", stopping_rounds=3, stopping_tolerance=0.001, stopping_metric="AUC", max_models=2, seed=1234, export_checkpoints_dir=checkpoints_dir) aml.train(y="CAPSULE", training_frame=train) get_aml = get_automl(aml.project_name) num_files = len([f for f in os.listdir(checkpoints_dir) if "_cv" not in f]) # do not count CV models shutil.rmtree(checkpoints_dir) assert aml.project_name == get_aml["project_name"] assert num_files > 0, "No models generated by AutoML" assert get_aml[ "leaderboard"].nrows == num_files, "Not all generated autoML models were saved."