def test_stacked_ensemble_is_able_to_use_imported_base_models():
    import tempfile, shutil, glob
    train = h2o.import_file(pu.locate("smalldata/iris/iris_train.csv"))
    test = h2o.import_file(pu.locate("smalldata/iris/iris_test.csv"))
    x = train.columns
    y = "species"
    x.remove(y)

    nfolds = 2
    gbm = H2OGradientBoostingEstimator(nfolds=nfolds,
                                       fold_assignment="Modulo",
                                       keep_cross_validation_predictions=True)
    gbm.train(x=x, y=y, training_frame=train)
    drf = H2ORandomForestEstimator(nfolds=nfolds,
                                  fold_assignment="Modulo",
                                  keep_cross_validation_predictions=True)
    drf.train(x=x, y=y, training_frame=train)

    se = H2OStackedEnsembleEstimator(training_frame=train,
                                     validation_frame=test,
                                     base_models=[gbm.model_id, drf.model_id])
    se.train(x=x, y=y, training_frame=train)

    assert len(se.base_models) == 2

    TMP_DIR = tempfile.mkdtemp()
    try:
        h2o.save_model(gbm, TMP_DIR + "/gbm.model")
        h2o.save_model(drf, TMP_DIR + "/drf.model")

        gbm_holdout_id = gbm.cross_validation_holdout_predictions().frame_id
        drf_holdout_id = drf.cross_validation_holdout_predictions().frame_id
        h2o.export_file(gbm.cross_validation_holdout_predictions(), TMP_DIR + "/gbm.holdout")
        h2o.export_file(drf.cross_validation_holdout_predictions(), TMP_DIR + "/drf.holdout")

        h2o.remove_all()

        h2o.import_file(TMP_DIR + "/gbm.holdout", gbm_holdout_id)
        h2o.import_file(TMP_DIR + "/drf.holdout", drf_holdout_id)

        gbm = h2o.upload_model(glob.glob(TMP_DIR + "/gbm.model/*")[0])
        drf = h2o.upload_model(glob.glob(TMP_DIR + "/drf.model/*")[0])

        train = h2o.import_file(pu.locate("smalldata/iris/iris_train.csv"), "some_other_name_of_training_frame")
        test = h2o.import_file(pu.locate("smalldata/iris/iris_test.csv"), "some_other_name_of_test_frame")
        x = train.columns
        y = "species"
        x.remove(y)

        se_loaded = H2OStackedEnsembleEstimator(training_frame=train,
                                                validation_frame=test,
                                                base_models=[gbm.model_id, drf.model_id])
        se_loaded.train(x=x, y=y, training_frame=train)

        assert len(se_loaded.base_models) == 2
    finally:
        shutil.rmtree(TMP_DIR)
예제 #2
0
def download_model():
    prostate = h2o.import_file(
        pyunit_utils.locate("smalldata/prostate/prostate.csv"))
    prostate["CAPSULE"] = prostate["CAPSULE"].asfactor()

    prostate_gbm = H2OGradientBoostingEstimator(distribution="bernoulli",
                                                ntrees=10,
                                                max_depth=8,
                                                min_rows=10,
                                                learn_rate=0.2)
    prostate_gbm.train(x=["AGE", "RACE", "PSA", "VOL", "GLEASON"],
                       y="CAPSULE",
                       training_frame=prostate)

    path = pyunit_utils.locate("results")

    downloaded_model_path = prostate_gbm.download_model(path=path)
    assert os.path.isfile(downloaded_model_path), \
        "Expected load file {0} to exist, but it does not.".format(downloaded_model_path)

    loaded_model = h2o.load_model(downloaded_model_path)
    assert isinstance(loaded_model, H2OGradientBoostingEstimator), \
        "Expected an H2OGradientBoostingEstimator, but got {0}".format(downloaded_model_path)

    uploaded_model = h2o.upload_model(downloaded_model_path)
    assert isinstance(uploaded_model, H2OGradientBoostingEstimator), \
        "Expected an H2OGradientBoostingEstimator, but got {0}".format(downloaded_model_path)
def model_download_with_cv():
    prostate = h2o.import_file(
        pyunit_utils.locate("smalldata/prostate/prostate.csv"))
    prostate["CAPSULE"] = prostate["CAPSULE"].asfactor()

    prostate_gbm = H2OGradientBoostingEstimator(
        nfolds=2, keep_cross_validation_predictions=True)
    prostate_gbm.train(x=["AGE", "RACE", "PSA", "DCAPS"],
                       y="CAPSULE",
                       training_frame=prostate)
    path = pyunit_utils.locate("results")

    model_path = h2o.download_model(prostate_gbm,
                                    path=path,
                                    export_cross_validation_predictions=True)
    assert os.path.isfile(
        model_path
    ), "Expected model artifact {0} to exist, but it does not.".format(
        model_path)

    h2o.remove_all()

    prostate_gbm_reloaded = h2o.upload_model(model_path)
    assert isinstance(prostate_gbm_reloaded, H2OGradientBoostingEstimator), \
        "Expected H2OGradientBoostingEstimator, but got {0}".format(prostate_gbm_reloaded)

    holdout_frame_id = prostate_gbm.cross_validation_holdout_predictions(
    ).frame_id
    assert h2o.get_frame(holdout_frame_id) is not None
예제 #4
0
파일: h2o.py 프로젝트: wwjiang007/mlflow
def _load_model(path, init=False):
    import h2o

    path = os.path.abspath(path)
    with open(os.path.join(path, "h2o.yaml")) as f:
        params = yaml.safe_load(f.read())
    if init:
        h2o.init(**(params["init"] if "init" in params else {}))
        h2o.no_progress()

    model_path = os.path.join(path, params["model_file"])
    if hasattr(h2o, "upload_model"):
        model = h2o.upload_model(model_path)
    else:
        warnings.warn(
            "If your cluster is remote, H2O may not load the model correctly. "
            "Please upgrade H2O version to a newer version")
        model = h2o.load_model(model_path)

    return model
예제 #5
0
# %%
# save GLM & reupload model

model_path = h2o.save_model(glm,path='../mlruns_h2o/',force=True)

print(model_path)

# load the model from server (if necesary)
# saved_model = h2o.load_model(model_path)

# download the model built above to your local machine (if necessary)
# my_local_model = h2o.download_model(saved_model, path="/Users/UserName/Desktop")

# upload the model that you just downloded above
# to the H2O cluster
uploaded_model = h2o.upload_model(model_path)

# %%
# Explain a model
exm = uploaded_model.explain(test)


# %%
# PDP 

pdp_table = uploaded_model.partial_plot(test,cols=['CREDIT_SCORE'], nbins = 20, plot=False)

# %%
# shutdown h2o server
h2o.cluster().shutdown(prompt=False)