def test_get_automl():
    train = h2o.import_file(path=pyunit_utils.locate("smalldata/logreg/prostate.csv"))
    y = 'CAPSULE'
    train[y] = train[y].asfactor()

    aml = H2OAutoML(project_name="test_get_automl",
                    max_models=2,
                    seed=1234)
    aml.train(y=y, training_frame=train)

    get_aml = get_automl(aml.project_name)

    assert aml.project_name == get_aml["project_name"]
    assert aml.leader.model_id == get_aml["leader"].model_id
    assert aml.leaderboard.get_frame_data() == get_aml["leaderboard"].get_frame_data()
    assert aml.event_log.get_frame_data() == get_aml["event_log"].get_frame_data()
    assert aml.training_info == get_aml['training_info']

    # PUBDEV-6599
    assert aml.project_name == get_aml.project_name
    assert aml.leader.model_id == get_aml.leader.model_id
    assert aml.leaderboard.frame_id == get_aml.leaderboard.frame_id
    assert aml.event_log.frame_id == get_aml.event_log.frame_id
    assert aml.training_info == get_aml.training_info

    # Test predictions
    predictions = aml.predict(train)
    predictions_from_output = get_aml.predict(train)
    assert (predictions == predictions_from_output).all()
def test_get_automl():
    ds = import_dataset()
    aml = H2OAutoML(project_name="test_get_automl", max_models=2, seed=1234)
    aml.train(y=ds.target, training_frame=ds.train)

    get_aml = get_automl(aml.project_name)

    assert aml.project_name == get_aml["project_name"]
    assert aml.leader.model_id == get_aml["leader"].model_id
    assert aml.leaderboard.get_frame_data(
    ) == get_aml["leaderboard"].get_frame_data()
    assert aml.event_log.get_frame_data(
    ) == get_aml["event_log"].get_frame_data()
    assert aml.training_info == get_aml['training_info']

    # PUBDEV-6599
    assert aml.project_name == get_aml.project_name
    assert aml.leader.model_id == get_aml.leader.model_id
    assert aml.leaderboard.frame_id == get_aml.leaderboard.frame_id
    assert aml.event_log.frame_id == get_aml.event_log.frame_id
    assert aml.training_info == get_aml.training_info

    # Test predictions
    predictions = aml.predict(ds.test)
    predictions_from_output = get_aml.predict(ds.test)
    assert (predictions == predictions_from_output).all()

    # Test get_leaderboard PUBDEV-7454
    assert (get_leaderboard(aml) == get_leaderboard(get_aml)).all()
    assert (get_leaderboard(aml, 'ALL') == get_leaderboard(get_aml,
                                                           'ALL')).all()
Example #3
0
def prostate_automl_get_automl():

    df = h2o.import_file(path=pyunit_utils.locate("smalldata/logreg/prostate.csv"))

    #Split frames
    fr = df.split_frame(ratios=[.8,.1])

    #Set up train, validation, and test sets
    train = fr[0]
    valid = fr[1]
    test = fr[2]

    train["CAPSULE"] = train["CAPSULE"].asfactor()
    valid["CAPSULE"] = valid["CAPSULE"].asfactor()
    test["CAPSULE"] = test["CAPSULE"].asfactor()

    aml = H2OAutoML(project_name="py_aml0", stopping_rounds=3, stopping_tolerance=0.001, stopping_metric="AUC", max_models=2, seed=1234)
    aml.train(y="CAPSULE", training_frame=train)

    get_aml = get_automl(aml.project_name)

    assert aml.project_name == get_aml["project_name"]
    get_aml_leader = get_aml["leader"]
    assert aml.leader.model_id == get_aml_leader.model_id
    assert aml.leaderboard.get_frame_data() == get_aml["leaderboard"].get_frame_data()
Example #4
0
def test_get_automl():
    train = h2o.import_file(
        path=pyunit_utils.locate("smalldata/logreg/prostate.csv"))
    y = 'CAPSULE'
    train[y] = train[y].asfactor()

    aml = H2OAutoML(project_name="test_get_automl", max_models=2, seed=1234)
    aml.train(y=y, training_frame=train)

    get_aml = get_automl(aml.project_name)

    assert aml.project_name == get_aml["project_name"]
    assert aml.leader.model_id == get_aml["leader"].model_id
    assert aml.leaderboard.get_frame_data(
    ) == get_aml["leaderboard"].get_frame_data()
    assert aml.event_log.get_frame_data(
    ) == get_aml["event_log"].get_frame_data()
def automl_checkpoints():
    ds = import_dataset()
    checkpoints_dir = tempfile.mkdtemp()

    aml = H2OAutoML(project_name="py_aml0",
                    stopping_rounds=3,
                    stopping_tolerance=0.001,
                    stopping_metric="AUC",
                    max_models=2,
                    seed=1234,
                    export_checkpoints_dir=checkpoints_dir)
    aml.train(y=ds.target, training_frame=ds.train)

    get_aml = get_automl(aml.project_name)
    num_files = len([f for f in os.listdir(checkpoints_dir)
                     if "_cv" not in f])  # do not count CV models
    shutil.rmtree(checkpoints_dir)

    assert aml.project_name == get_aml["project_name"]
    assert num_files > 0, "No models generated by AutoML"
    assert get_aml[
        "leaderboard"].nrows == num_files, "Not all generated autoML models were saved."
def automl_checkpoints():

    df = h2o.import_file(
        path=pyunit_utils.locate("smalldata/logreg/prostate.csv"))

    # Split frames
    fr = df.split_frame(ratios=[.8, .1])

    # Set up train, validation, and test sets
    train = fr[0]
    valid = fr[1]
    test = fr[2]

    train["CAPSULE"] = train["CAPSULE"].asfactor()
    valid["CAPSULE"] = valid["CAPSULE"].asfactor()
    test["CAPSULE"] = test["CAPSULE"].asfactor()

    checkpoints_dir = tempfile.mkdtemp()

    aml = H2OAutoML(project_name="py_aml0",
                    stopping_rounds=3,
                    stopping_tolerance=0.001,
                    stopping_metric="AUC",
                    max_models=2,
                    seed=1234,
                    export_checkpoints_dir=checkpoints_dir)
    aml.train(y="CAPSULE", training_frame=train)

    get_aml = get_automl(aml.project_name)
    num_files = len([f for f in os.listdir(checkpoints_dir)
                     if "_cv" not in f])  # do not count CV models
    shutil.rmtree(checkpoints_dir)

    assert aml.project_name == get_aml["project_name"]
    assert num_files > 0, "No models generated by AutoML"
    assert get_aml[
        "leaderboard"].nrows == num_files, "Not all generated autoML models were saved."