def model_download_with_cv(): prostate = h2o.import_file( pyunit_utils.locate("smalldata/prostate/prostate.csv")) prostate["CAPSULE"] = prostate["CAPSULE"].asfactor() prostate_gbm = H2OGradientBoostingEstimator( nfolds=2, keep_cross_validation_predictions=True) prostate_gbm.train(x=["AGE", "RACE", "PSA", "DCAPS"], y="CAPSULE", training_frame=prostate) path = pyunit_utils.locate("results") model_path = h2o.download_model(prostate_gbm, path=path, export_cross_validation_predictions=True) assert os.path.isfile( model_path ), "Expected model artifact {0} to exist, but it does not.".format( model_path) h2o.remove_all() prostate_gbm_reloaded = h2o.upload_model(model_path) assert isinstance(prostate_gbm_reloaded, H2OGradientBoostingEstimator), \ "Expected H2OGradientBoostingEstimator, but got {0}".format(prostate_gbm_reloaded) holdout_frame_id = prostate_gbm.cross_validation_holdout_predictions( ).frame_id assert h2o.get_frame(holdout_frame_id) is not None
def save_model( h2o_model, path, conda_env=None, mlflow_model=None, settings=None, signature: ModelSignature = None, input_example: ModelInputExample = None, ): """ Save an H2O model to a path on the local file system. :param h2o_model: H2O model to be saved. :param path: Local path where the model is to be saved. :param conda_env: Either a dictionary representation of a Conda environment or the path to a Conda environment yaml file. If provided, this describes the environment this model should be run in. At minimum, it should specify the dependencies contained in :func:`get_default_conda_env()`. If ``None``, the default :func:`get_default_conda_env()` environment is added to the model. The following is an *example* dictionary representation of a Conda environment:: { 'name': 'mlflow-env', 'channels': ['defaults'], 'dependencies': [ 'python=3.7.0', 'pip': [ 'h2o==3.20.0.8' ] ] } :param signature: (Experimental) :py:class:`ModelSignature <mlflow.models.ModelSignature>` describes model input and output :py:class:`Schema <mlflow.types.Schema>`. The model signature can be :py:func:`inferred <mlflow.models.infer_signature>` from datasets with valid model input (e.g. the training dataset with target column omitted) and valid model output (e.g. model predictions generated on the training dataset), for example: .. code-block:: python from mlflow.models.signature import infer_signature train = df.drop_column("target_label") predictions = ... # compute model predictions signature = infer_signature(train, predictions) :param input_example: (Experimental) Input example provides one or several instances of valid model input. The example can be used as a hint of what data to feed the model. The given example will be converted to a Pandas DataFrame and then serialized to json using the Pandas split-oriented format. Bytes are base64-encoded. :param mlflow_model: :py:mod:`mlflow.models.Model` this flavor is being added to. """ import h2o path = os.path.abspath(path) if os.path.exists(path): raise Exception("Path '{}' already exists".format(path)) model_data_subpath = "model.h2o" model_data_path = os.path.join(path, model_data_subpath) os.makedirs(model_data_path) if mlflow_model is None: mlflow_model = Model() if signature is not None: mlflow_model.signature = signature if input_example is not None: _save_example(mlflow_model, input_example, path) # Save h2o-model if hasattr(h2o, "download_model"): h2o_save_location = h2o.download_model(model=h2o_model, path=model_data_path) else: warnings.warn( "If your cluster is remote, H2O may not store the model correctly. " "Please upgrade H2O version to a newer version") h2o_save_location = h2o.save_model(model=h2o_model, path=model_data_path, force=True) model_file = os.path.basename(h2o_save_location) # Save h2o-settings if settings is None: settings = {} settings["full_file"] = h2o_save_location settings["model_file"] = model_file settings["model_dir"] = model_data_path with open(os.path.join(model_data_path, "h2o.yaml"), "w") as settings_file: yaml.safe_dump(settings, stream=settings_file) conda_env_subpath = "conda.yaml" if conda_env is None: conda_env = get_default_conda_env() elif not isinstance(conda_env, dict): with open(conda_env, "r") as f: conda_env = yaml.safe_load(f) with open(os.path.join(path, conda_env_subpath), "w") as f: yaml.safe_dump(conda_env, stream=f, default_flow_style=False) _log_pip_requirements(conda_env, path) pyfunc.add_to_model(mlflow_model, loader_module="mlflow.h2o", data=model_data_subpath, env=conda_env_subpath) mlflow_model.add_flavor(FLAVOR_NAME, h2o_version=h2o.__version__, data=model_data_subpath) mlflow_model.save(os.path.join(path, MLMODEL_FILE_NAME))
def save_model( h2o_model, path, conda_env=None, mlflow_model=None, settings=None, signature: ModelSignature = None, input_example: ModelInputExample = None, pip_requirements=None, extra_pip_requirements=None, ): """ Save an H2O model to a path on the local file system. :param h2o_model: H2O model to be saved. :param path: Local path where the model is to be saved. :param conda_env: {{ conda_env }} :param signature: :py:class:`ModelSignature <mlflow.models.ModelSignature>` describes model input and output :py:class:`Schema <mlflow.types.Schema>`. The model signature can be :py:func:`inferred <mlflow.models.infer_signature>` from datasets with valid model input (e.g. the training dataset with target column omitted) and valid model output (e.g. model predictions generated on the training dataset), for example: .. code-block:: python from mlflow.models.signature import infer_signature train = df.drop_column("target_label") predictions = ... # compute model predictions signature = infer_signature(train, predictions) :param input_example: Input example provides one or several instances of valid model input. The example can be used as a hint of what data to feed the model. The given example will be converted to a Pandas DataFrame and then serialized to json using the Pandas split-oriented format. Bytes are base64-encoded. :param mlflow_model: :py:mod:`mlflow.models.Model` this flavor is being added to. """ import h2o _validate_env_arguments(conda_env, pip_requirements, extra_pip_requirements) path = os.path.abspath(path) if os.path.exists(path): raise Exception("Path '{}' already exists".format(path)) model_data_subpath = "model.h2o" model_data_path = os.path.join(path, model_data_subpath) os.makedirs(model_data_path) if mlflow_model is None: mlflow_model = Model() if signature is not None: mlflow_model.signature = signature if input_example is not None: _save_example(mlflow_model, input_example, path) # Save h2o-model if hasattr(h2o, "download_model"): h2o_save_location = h2o.download_model(model=h2o_model, path=model_data_path) else: warnings.warn( "If your cluster is remote, H2O may not store the model correctly. " "Please upgrade H2O version to a newer version") h2o_save_location = h2o.save_model(model=h2o_model, path=model_data_path, force=True) model_file = os.path.basename(h2o_save_location) # Save h2o-settings if settings is None: settings = {} settings["full_file"] = h2o_save_location settings["model_file"] = model_file settings["model_dir"] = model_data_path with open(os.path.join(model_data_path, "h2o.yaml"), "w") as settings_file: yaml.safe_dump(settings, stream=settings_file) pyfunc.add_to_model(mlflow_model, loader_module="mlflow.h2o", data=model_data_subpath, env=_CONDA_ENV_FILE_NAME) mlflow_model.add_flavor(FLAVOR_NAME, h2o_version=h2o.__version__, data=model_data_subpath) mlflow_model.save(os.path.join(path, MLMODEL_FILE_NAME)) if conda_env is None: if pip_requirements is None: default_reqs = get_default_pip_requirements() # To ensure `_load_pyfunc` can successfully load the model during the dependency # inference, `mlflow_model.save` must be called beforehand to save an MLmodel file. inferred_reqs = mlflow.models.infer_pip_requirements( path, FLAVOR_NAME, fallback=default_reqs, ) default_reqs = sorted(set(inferred_reqs).union(default_reqs)) else: default_reqs = None conda_env, pip_requirements, pip_constraints = _process_pip_requirements( default_reqs, pip_requirements, extra_pip_requirements, ) else: conda_env, pip_requirements, pip_constraints = _process_conda_env( conda_env) with open(os.path.join(path, _CONDA_ENV_FILE_NAME), "w") as f: yaml.safe_dump(conda_env, stream=f, default_flow_style=False) # Save `constraints.txt` if necessary if pip_constraints: write_to(os.path.join(path, _CONSTRAINTS_FILE_NAME), "\n".join(pip_constraints)) # Save `requirements.txt` write_to(os.path.join(path, _REQUIREMENTS_FILE_NAME), "\n".join(pip_requirements))
df[y] = df[y].asfactor() # h2o-3 dataset partition function splits = df.split_frame(ratios=[0.7, 0.15], seed=1) # splits outcome division into 3 new variables train, valid, test = splits print(train.nrow, valid.nrow, test.nrow) # automl estimator aml = H2OAutoML(max_models=8, max_runtime_secs=900, seed=1) aml.train(x=x, y=y, training_frame=train) # performance evaluation: leaderboard lb = aml.leaderboard print(lb.head(rows=lb.nrows)) # predict with the leader yhat = aml.leader.predict(test) auc = aml.leader.auc() conf = aml.leader.confusion_matrix() print(yhat) print(auc) print(conf) # download best model h2o.download_model(model=aml.leader, path='test/output/') h2o.cluster().shutdown()