def model_download_with_cv():
    prostate = h2o.import_file(
        pyunit_utils.locate("smalldata/prostate/prostate.csv"))
    prostate["CAPSULE"] = prostate["CAPSULE"].asfactor()

    prostate_gbm = H2OGradientBoostingEstimator(
        nfolds=2, keep_cross_validation_predictions=True)
    prostate_gbm.train(x=["AGE", "RACE", "PSA", "DCAPS"],
                       y="CAPSULE",
                       training_frame=prostate)
    path = pyunit_utils.locate("results")

    model_path = h2o.download_model(prostate_gbm,
                                    path=path,
                                    export_cross_validation_predictions=True)
    assert os.path.isfile(
        model_path
    ), "Expected model artifact {0} to exist, but it does not.".format(
        model_path)

    h2o.remove_all()

    prostate_gbm_reloaded = h2o.upload_model(model_path)
    assert isinstance(prostate_gbm_reloaded, H2OGradientBoostingEstimator), \
        "Expected H2OGradientBoostingEstimator, but got {0}".format(prostate_gbm_reloaded)

    holdout_frame_id = prostate_gbm.cross_validation_holdout_predictions(
    ).frame_id
    assert h2o.get_frame(holdout_frame_id) is not None
Beispiel #2
0
def save_model(
    h2o_model,
    path,
    conda_env=None,
    mlflow_model=None,
    settings=None,
    signature: ModelSignature = None,
    input_example: ModelInputExample = None,
):
    """
    Save an H2O model to a path on the local file system.

    :param h2o_model: H2O model to be saved.
    :param path: Local path where the model is to be saved.
    :param conda_env: Either a dictionary representation of a Conda environment or the path to a
                      Conda environment yaml file. If provided, this describes the environment
                      this model should be run in. At minimum, it should specify the dependencies
                      contained in :func:`get_default_conda_env()`. If ``None``, the default
                      :func:`get_default_conda_env()` environment is added to the model.
                      The following is an *example* dictionary representation of a Conda
                      environment::

                        {
                            'name': 'mlflow-env',
                            'channels': ['defaults'],
                            'dependencies': [
                                'python=3.7.0',
                                'pip': [
                                    'h2o==3.20.0.8'
                                ]
                            ]
                        }

    :param signature: (Experimental) :py:class:`ModelSignature <mlflow.models.ModelSignature>`
                      describes model input and output :py:class:`Schema <mlflow.types.Schema>`.
                      The model signature can be :py:func:`inferred <mlflow.models.infer_signature>`
                      from datasets with valid model input (e.g. the training dataset with target
                      column omitted) and valid model output (e.g. model predictions generated on
                      the training dataset), for example:

                      .. code-block:: python

                        from mlflow.models.signature import infer_signature
                        train = df.drop_column("target_label")
                        predictions = ... # compute model predictions
                        signature = infer_signature(train, predictions)
    :param input_example: (Experimental) Input example provides one or several instances of valid
                          model input. The example can be used as a hint of what data to feed the
                          model. The given example will be converted to a Pandas DataFrame and then
                          serialized to json using the Pandas split-oriented format. Bytes are
                          base64-encoded.

    :param mlflow_model: :py:mod:`mlflow.models.Model` this flavor is being added to.
    """
    import h2o

    path = os.path.abspath(path)
    if os.path.exists(path):
        raise Exception("Path '{}' already exists".format(path))
    model_data_subpath = "model.h2o"
    model_data_path = os.path.join(path, model_data_subpath)
    os.makedirs(model_data_path)

    if mlflow_model is None:
        mlflow_model = Model()
    if signature is not None:
        mlflow_model.signature = signature
    if input_example is not None:
        _save_example(mlflow_model, input_example, path)

    # Save h2o-model
    if hasattr(h2o, "download_model"):
        h2o_save_location = h2o.download_model(model=h2o_model,
                                               path=model_data_path)
    else:
        warnings.warn(
            "If your cluster is remote, H2O may not store the model correctly. "
            "Please upgrade H2O version to a newer version")
        h2o_save_location = h2o.save_model(model=h2o_model,
                                           path=model_data_path,
                                           force=True)
    model_file = os.path.basename(h2o_save_location)

    # Save h2o-settings
    if settings is None:
        settings = {}
    settings["full_file"] = h2o_save_location
    settings["model_file"] = model_file
    settings["model_dir"] = model_data_path
    with open(os.path.join(model_data_path, "h2o.yaml"), "w") as settings_file:
        yaml.safe_dump(settings, stream=settings_file)

    conda_env_subpath = "conda.yaml"
    if conda_env is None:
        conda_env = get_default_conda_env()
    elif not isinstance(conda_env, dict):
        with open(conda_env, "r") as f:
            conda_env = yaml.safe_load(f)
    with open(os.path.join(path, conda_env_subpath), "w") as f:
        yaml.safe_dump(conda_env, stream=f, default_flow_style=False)

    _log_pip_requirements(conda_env, path)

    pyfunc.add_to_model(mlflow_model,
                        loader_module="mlflow.h2o",
                        data=model_data_subpath,
                        env=conda_env_subpath)
    mlflow_model.add_flavor(FLAVOR_NAME,
                            h2o_version=h2o.__version__,
                            data=model_data_subpath)
    mlflow_model.save(os.path.join(path, MLMODEL_FILE_NAME))
Beispiel #3
0
def save_model(
    h2o_model,
    path,
    conda_env=None,
    mlflow_model=None,
    settings=None,
    signature: ModelSignature = None,
    input_example: ModelInputExample = None,
    pip_requirements=None,
    extra_pip_requirements=None,
):
    """
    Save an H2O model to a path on the local file system.

    :param h2o_model: H2O model to be saved.
    :param path: Local path where the model is to be saved.
    :param conda_env: {{ conda_env }}
    :param signature: :py:class:`ModelSignature <mlflow.models.ModelSignature>`
                      describes model input and output :py:class:`Schema <mlflow.types.Schema>`.
                      The model signature can be :py:func:`inferred <mlflow.models.infer_signature>`
                      from datasets with valid model input (e.g. the training dataset with target
                      column omitted) and valid model output (e.g. model predictions generated on
                      the training dataset), for example:

                      .. code-block:: python

                        from mlflow.models.signature import infer_signature
                        train = df.drop_column("target_label")
                        predictions = ... # compute model predictions
                        signature = infer_signature(train, predictions)
    :param input_example: Input example provides one or several instances of valid
                          model input. The example can be used as a hint of what data to feed the
                          model. The given example will be converted to a Pandas DataFrame and then
                          serialized to json using the Pandas split-oriented format. Bytes are
                          base64-encoded.

    :param mlflow_model: :py:mod:`mlflow.models.Model` this flavor is being added to.
    """
    import h2o

    _validate_env_arguments(conda_env, pip_requirements,
                            extra_pip_requirements)

    path = os.path.abspath(path)
    if os.path.exists(path):
        raise Exception("Path '{}' already exists".format(path))
    model_data_subpath = "model.h2o"
    model_data_path = os.path.join(path, model_data_subpath)
    os.makedirs(model_data_path)

    if mlflow_model is None:
        mlflow_model = Model()
    if signature is not None:
        mlflow_model.signature = signature
    if input_example is not None:
        _save_example(mlflow_model, input_example, path)

    # Save h2o-model
    if hasattr(h2o, "download_model"):
        h2o_save_location = h2o.download_model(model=h2o_model,
                                               path=model_data_path)
    else:
        warnings.warn(
            "If your cluster is remote, H2O may not store the model correctly. "
            "Please upgrade H2O version to a newer version")
        h2o_save_location = h2o.save_model(model=h2o_model,
                                           path=model_data_path,
                                           force=True)
    model_file = os.path.basename(h2o_save_location)

    # Save h2o-settings
    if settings is None:
        settings = {}
    settings["full_file"] = h2o_save_location
    settings["model_file"] = model_file
    settings["model_dir"] = model_data_path
    with open(os.path.join(model_data_path, "h2o.yaml"), "w") as settings_file:
        yaml.safe_dump(settings, stream=settings_file)

    pyfunc.add_to_model(mlflow_model,
                        loader_module="mlflow.h2o",
                        data=model_data_subpath,
                        env=_CONDA_ENV_FILE_NAME)
    mlflow_model.add_flavor(FLAVOR_NAME,
                            h2o_version=h2o.__version__,
                            data=model_data_subpath)
    mlflow_model.save(os.path.join(path, MLMODEL_FILE_NAME))

    if conda_env is None:
        if pip_requirements is None:
            default_reqs = get_default_pip_requirements()
            # To ensure `_load_pyfunc` can successfully load the model during the dependency
            # inference, `mlflow_model.save` must be called beforehand to save an MLmodel file.
            inferred_reqs = mlflow.models.infer_pip_requirements(
                path,
                FLAVOR_NAME,
                fallback=default_reqs,
            )
            default_reqs = sorted(set(inferred_reqs).union(default_reqs))
        else:
            default_reqs = None
        conda_env, pip_requirements, pip_constraints = _process_pip_requirements(
            default_reqs,
            pip_requirements,
            extra_pip_requirements,
        )
    else:
        conda_env, pip_requirements, pip_constraints = _process_conda_env(
            conda_env)

    with open(os.path.join(path, _CONDA_ENV_FILE_NAME), "w") as f:
        yaml.safe_dump(conda_env, stream=f, default_flow_style=False)

    # Save `constraints.txt` if necessary
    if pip_constraints:
        write_to(os.path.join(path, _CONSTRAINTS_FILE_NAME),
                 "\n".join(pip_constraints))

    # Save `requirements.txt`
    write_to(os.path.join(path, _REQUIREMENTS_FILE_NAME),
             "\n".join(pip_requirements))
Beispiel #4
0
df[y] = df[y].asfactor()

# h2o-3 dataset partition function
splits = df.split_frame(ratios=[0.7, 0.15], seed=1)

# splits outcome division into 3 new variables
train, valid, test = splits
print(train.nrow, valid.nrow, test.nrow)

# automl estimator
aml = H2OAutoML(max_models=8, max_runtime_secs=900, seed=1)

aml.train(x=x, y=y, training_frame=train)

# performance evaluation: leaderboard
lb = aml.leaderboard
print(lb.head(rows=lb.nrows))

# predict with the leader
yhat = aml.leader.predict(test)
auc = aml.leader.auc()
conf = aml.leader.confusion_matrix()

print(yhat)
print(auc)
print(conf)

# download best model
h2o.download_model(model=aml.leader, path='test/output/')

h2o.cluster().shutdown()