def train(data_path, ntrees, log_as_onnx): train_data, test_data, train_cols = prepare_data(args.data_path) with mlflow.start_run() as run: exp = client.get_experiment(run.info.experiment_id) print("MLflow:") print(" run id:", run.info.run_id) print(" experiment id:", run.info.experiment_id) print(" experiment name:", exp.name) print(" experiment artifact_location:", exp.artifact_location) rf = H2ORandomForestEstimator(ntrees=ntrees) rf.train(train_cols, "quality", training_frame=train_data, validation_frame=test_data) mlflow.log_param("ntrees", ntrees) mlflow.log_metric("rmse", rf.rmse()) mlflow.log_metric("r2", rf.r2()) mlflow.log_metric("mae", rf.mae()) mlflow.set_tag("mlflow_version", mlflow.__version__) mlflow.set_tag("h2o_version", h2o.__version__) mlflow.h2o.log_model(rf, "h2o-model") if log_as_onnx: import onnxmltools from onnxmltools.convert import convert_h2o print("onnxmltools.version:", onnxmltools.__version__) path = f"{exp.artifact_location}/{run.info.run_id}/artifacts/h2o-model/model.h2o" onnx_model = convert_h2o(path) print("onnx_model.type:", type(onnx_model))
f_train_y = h2o.H2OFrame(y_train) f_train = f_train_x.cbind(f_train_y.asfactor()) glm_logistic = H2OGradientBoostingEstimator(ntrees=10, max_depth=5) glm_logistic.train(x=xc, y=yc, training_frame=f_train) if not os.path.exists("model"): os.mkdir("model") pth = glm_logistic.download_mojo(path="model") ########################### # Convert a model into ONNX # +++++++++++++++++++++++++ initial_type = [('float_input', FloatTensorType([None, 4]))] onx = convert_h2o(pth, initial_types=initial_type) h2o.cluster().shutdown() ################################### # Compute the predictions with onnxruntime # ++++++++++++++++++++++++++++++++++++++++ sess = rt.InferenceSession(onx.SerializeToString()) input_name = sess.get_inputs()[0].name label_name = sess.get_outputs()[0].name pred_onx = sess.run( [label_name], {input_name: X_test.astype(numpy.float32)})[0] print(pred_onx) ##################################
def _convert_mojo(mojo_path): f = open(mojo_path, "rb") mojo_content = f.read() f.close() return convert_h2o(mojo_content)