def test_stacked_ensemble_is_able_to_use_imported_base_models(): import tempfile, shutil, glob train = h2o.import_file(pu.locate("smalldata/iris/iris_train.csv")) test = h2o.import_file(pu.locate("smalldata/iris/iris_test.csv")) x = train.columns y = "species" x.remove(y) nfolds = 2 gbm = H2OGradientBoostingEstimator(nfolds=nfolds, fold_assignment="Modulo", keep_cross_validation_predictions=True) gbm.train(x=x, y=y, training_frame=train) drf = H2ORandomForestEstimator(nfolds=nfolds, fold_assignment="Modulo", keep_cross_validation_predictions=True) drf.train(x=x, y=y, training_frame=train) se = H2OStackedEnsembleEstimator(training_frame=train, validation_frame=test, base_models=[gbm.model_id, drf.model_id]) se.train(x=x, y=y, training_frame=train) assert len(se.base_models) == 2 TMP_DIR = tempfile.mkdtemp() try: h2o.save_model(gbm, TMP_DIR + "/gbm.model") h2o.save_model(drf, TMP_DIR + "/drf.model") gbm_holdout_id = gbm.cross_validation_holdout_predictions().frame_id drf_holdout_id = drf.cross_validation_holdout_predictions().frame_id h2o.export_file(gbm.cross_validation_holdout_predictions(), TMP_DIR + "/gbm.holdout") h2o.export_file(drf.cross_validation_holdout_predictions(), TMP_DIR + "/drf.holdout") h2o.remove_all() h2o.import_file(TMP_DIR + "/gbm.holdout", gbm_holdout_id) h2o.import_file(TMP_DIR + "/drf.holdout", drf_holdout_id) gbm = h2o.upload_model(glob.glob(TMP_DIR + "/gbm.model/*")[0]) drf = h2o.upload_model(glob.glob(TMP_DIR + "/drf.model/*")[0]) train = h2o.import_file(pu.locate("smalldata/iris/iris_train.csv"), "some_other_name_of_training_frame") test = h2o.import_file(pu.locate("smalldata/iris/iris_test.csv"), "some_other_name_of_test_frame") x = train.columns y = "species" x.remove(y) se_loaded = H2OStackedEnsembleEstimator(training_frame=train, validation_frame=test, base_models=[gbm.model_id, drf.model_id]) se_loaded.train(x=x, y=y, training_frame=train) assert len(se_loaded.base_models) == 2 finally: shutil.rmtree(TMP_DIR)
def download_model(): prostate = h2o.import_file( pyunit_utils.locate("smalldata/prostate/prostate.csv")) prostate["CAPSULE"] = prostate["CAPSULE"].asfactor() prostate_gbm = H2OGradientBoostingEstimator(distribution="bernoulli", ntrees=10, max_depth=8, min_rows=10, learn_rate=0.2) prostate_gbm.train(x=["AGE", "RACE", "PSA", "VOL", "GLEASON"], y="CAPSULE", training_frame=prostate) path = pyunit_utils.locate("results") downloaded_model_path = prostate_gbm.download_model(path=path) assert os.path.isfile(downloaded_model_path), \ "Expected load file {0} to exist, but it does not.".format(downloaded_model_path) loaded_model = h2o.load_model(downloaded_model_path) assert isinstance(loaded_model, H2OGradientBoostingEstimator), \ "Expected an H2OGradientBoostingEstimator, but got {0}".format(downloaded_model_path) uploaded_model = h2o.upload_model(downloaded_model_path) assert isinstance(uploaded_model, H2OGradientBoostingEstimator), \ "Expected an H2OGradientBoostingEstimator, but got {0}".format(downloaded_model_path)
def model_download_with_cv(): prostate = h2o.import_file( pyunit_utils.locate("smalldata/prostate/prostate.csv")) prostate["CAPSULE"] = prostate["CAPSULE"].asfactor() prostate_gbm = H2OGradientBoostingEstimator( nfolds=2, keep_cross_validation_predictions=True) prostate_gbm.train(x=["AGE", "RACE", "PSA", "DCAPS"], y="CAPSULE", training_frame=prostate) path = pyunit_utils.locate("results") model_path = h2o.download_model(prostate_gbm, path=path, export_cross_validation_predictions=True) assert os.path.isfile( model_path ), "Expected model artifact {0} to exist, but it does not.".format( model_path) h2o.remove_all() prostate_gbm_reloaded = h2o.upload_model(model_path) assert isinstance(prostate_gbm_reloaded, H2OGradientBoostingEstimator), \ "Expected H2OGradientBoostingEstimator, but got {0}".format(prostate_gbm_reloaded) holdout_frame_id = prostate_gbm.cross_validation_holdout_predictions( ).frame_id assert h2o.get_frame(holdout_frame_id) is not None
def _load_model(path, init=False): import h2o path = os.path.abspath(path) with open(os.path.join(path, "h2o.yaml")) as f: params = yaml.safe_load(f.read()) if init: h2o.init(**(params["init"] if "init" in params else {})) h2o.no_progress() model_path = os.path.join(path, params["model_file"]) if hasattr(h2o, "upload_model"): model = h2o.upload_model(model_path) else: warnings.warn( "If your cluster is remote, H2O may not load the model correctly. " "Please upgrade H2O version to a newer version") model = h2o.load_model(model_path) return model
# %% # save GLM & reupload model model_path = h2o.save_model(glm,path='../mlruns_h2o/',force=True) print(model_path) # load the model from server (if necesary) # saved_model = h2o.load_model(model_path) # download the model built above to your local machine (if necessary) # my_local_model = h2o.download_model(saved_model, path="/Users/UserName/Desktop") # upload the model that you just downloded above # to the H2O cluster uploaded_model = h2o.upload_model(model_path) # %% # Explain a model exm = uploaded_model.explain(test) # %% # PDP pdp_table = uploaded_model.partial_plot(test,cols=['CREDIT_SCORE'], nbins = 20, plot=False) # %% # shutdown h2o server h2o.cluster().shutdown(prompt=False)