Exemple #1
0
def train(data_path, ntrees, log_as_onnx):
    train_data, test_data, train_cols = prepare_data(args.data_path)
    with mlflow.start_run() as run:
        exp = client.get_experiment(run.info.experiment_id)
        print("MLflow:")
        print("  run id:", run.info.run_id)
        print("  experiment id:", run.info.experiment_id)
        print("  experiment name:", exp.name)
        print("  experiment artifact_location:", exp.artifact_location)
        rf = H2ORandomForestEstimator(ntrees=ntrees)
        rf.train(train_cols,
                 "quality",
                 training_frame=train_data,
                 validation_frame=test_data)

        mlflow.log_param("ntrees", ntrees)

        mlflow.log_metric("rmse", rf.rmse())
        mlflow.log_metric("r2", rf.r2())
        mlflow.log_metric("mae", rf.mae())

        mlflow.set_tag("mlflow_version", mlflow.__version__)
        mlflow.set_tag("h2o_version", h2o.__version__)

        mlflow.h2o.log_model(rf, "h2o-model")

        if log_as_onnx:
            import onnxmltools
            from onnxmltools.convert import convert_h2o
            print("onnxmltools.version:", onnxmltools.__version__)
            path = f"{exp.artifact_location}/{run.info.run_id}/artifacts/h2o-model/model.h2o"
            onnx_model = convert_h2o(path)
            print("onnx_model.type:", type(onnx_model))
f_train_y = h2o.H2OFrame(y_train)
f_train = f_train_x.cbind(f_train_y.asfactor())

glm_logistic = H2OGradientBoostingEstimator(ntrees=10, max_depth=5)
glm_logistic.train(x=xc, y=yc, training_frame=f_train)

if not os.path.exists("model"):
    os.mkdir("model")
pth = glm_logistic.download_mojo(path="model")

###########################
# Convert a model into ONNX
# +++++++++++++++++++++++++

initial_type = [('float_input', FloatTensorType([None, 4]))]
onx = convert_h2o(pth, initial_types=initial_type)

h2o.cluster().shutdown()

###################################
# Compute the predictions with onnxruntime
# ++++++++++++++++++++++++++++++++++++++++

sess = rt.InferenceSession(onx.SerializeToString())
input_name = sess.get_inputs()[0].name
label_name = sess.get_outputs()[0].name
pred_onx = sess.run(
    [label_name], {input_name: X_test.astype(numpy.float32)})[0]
print(pred_onx)

##################################
def _convert_mojo(mojo_path):
    f = open(mojo_path, "rb")
    mojo_content = f.read()
    f.close()
    return convert_h2o(mojo_content)