def test_onnx_no_test_data_double(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 if CastTransformer is None: model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) else: # newer version of sklearn-onnx model = make_pipeline( CastTransformer(dtype=np.float32), GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)) np.random.seed(0) X = np.random.rand(100, 200) y = np.random.randint(num_classes, size=100) model.fit(X, y) # Create ONNX-ML model onnx_ml_model = convert_sklearn( model, initial_types=[("input", DoubleTensorType([None, X.shape[1]]))], target_opset=11) # Test onnx requires no test_data hb_model = hummingbird.ml.convert(onnx_ml_model, "onnx") assert hb_model
def convert_model(model, name, input_types): """ Runs the appropriate conversion method. :param model: model, *scikit-learn*, *keras*, or *coremltools* object :return: *onnx* model """ from sklearn.base import BaseEstimator if model.__class__.__name__.startswith("LGBM"): from onnxmltools.convert import convert_lightgbm model, prefix = convert_lightgbm(model, name, input_types), "LightGbm" elif isinstance(model, BaseEstimator): from onnxmltools.convert import convert_sklearn model, prefix = convert_sklearn(model, name, input_types), "Sklearn" else: from keras.models import Model if isinstance(model, Model): from onnxmltools.convert import convert_keras model, prefix = convert_keras(model, name, input_types), "Keras" else: from onnxmltools.convert import convert_coreml model, prefix = convert_coreml(model, name, input_types), "Cml" if model is None: raise RuntimeError("Unable to convert model of type '{0}'.".format(type(model))) return model, prefix
def convert_model(model, name, input_types): """ Runs the appropriate conversion method. :param model: model :return: *onnx* model """ from sklearn.base import BaseEstimator if model.__class__.__name__.startswith("LGBM"): from onnxmltools.convert import convert_lightgbm model, prefix = convert_lightgbm(model, name, input_types), "LightGbm" elif model.__class__.__name__.startswith("XGB"): from onnxmltools.convert import convert_xgboost model, prefix = convert_xgboost(model, name, input_types), "XGB" elif model.__class__.__name__ == 'Booster': import lightgbm if isinstance(model, lightgbm.Booster): from onnxmltools.convert import convert_lightgbm model, prefix = convert_lightgbm(model, name, input_types), "LightGbm" else: raise RuntimeError("Unable to convert model of type '{0}'.".format( type(model))) elif isinstance(model, BaseEstimator): from onnxmltools.convert import convert_sklearn model, prefix = convert_sklearn(model, name, input_types), "Sklearn" else: from onnxmltools.convert import convert_coreml model, prefix = convert_coreml(model, name, input_types), "Cml" if model is None: raise RuntimeError("Unable to convert model of type '{0}'.".format( type(model))) return model, prefix
def test_sklearn_classifier_multi_discrete_int_labels(self): iris = load_iris() x = iris.data[:, :2] y = iris.target y[y == 0] = 10 y[y == 1] = 20 y[y == 2] = -30 x_train, x_test, y_train, _ = train_test_split(x, y, test_size=0.5, random_state=42) xgb = RandomForestClassifier(n_estimators=3) xgb.fit(x_train, y_train) conv_model = convert_sklearn( xgb, initial_types=[('input', FloatTensorType(shape=[None, x_test.shape[1]]))]) self.assertTrue(conv_model is not None) dump_data_and_model( x_test.astype("float32"), xgb, conv_model, basename="SklearnRFClassifierMultiDiscreteIntLabels", allow_failure="StrictVersion(" "onnx.__version__)" "< StrictVersion('1.3.0')", )
def test_onnx_deafault_n_threads(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) # Create ONNX-ML model onnx_ml_model = convert_sklearn( model, initial_types=[("input", FloatTensorType([X.shape[0], X.shape[1]]))], target_opset=9) hb_model = hummingbird.ml.convert(onnx_ml_model, "onnx", X) self.assertIsNotNone(hb_model) self.assertTrue(hb_model._session.get_session_options( ).intra_op_num_threads == psutil.cpu_count(logical=False)) self.assertTrue( hb_model._session.get_session_options().inter_op_num_threads == 1)
def common_test_xgboost_10_skl(self, missing, replace=False): this = os.path.abspath(os.path.dirname(__file__)) data = os.path.join(this, "data_fail.csv") data = pandas.read_csv(data) for col in data: dtype = data[col].dtype if dtype in ['float64', 'float32']: data[col].fillna(0., inplace=True) if dtype in ['int64']: data[col].fillna(0, inplace=True) elif dtype in ['O']: data[col].fillna('N/A', inplace=True) data['pclass'] = data['pclass'] * float(1) full_df = data.drop('survived', axis=1) full_labels = data['survived'] train_df, test_df, train_labels, test_labels = train_test_split( full_df, full_labels, test_size=.2, random_state=11) col_transformer = self._column_tranformer_fitted_from_df(full_df) param_distributions = { "colsample_bytree": 0.5, "gamma": 0.2, 'learning_rate': 0.3, 'max_depth': 2, 'min_child_weight': 1., 'n_estimators': 1, 'missing': missing, } regressor = XGBRegressor(verbose=0, objective='reg:squarederror', **param_distributions) regressor.fit(col_transformer.transform(train_df), train_labels) model = Pipeline(steps=[('preprocessor', col_transformer), ('regressor', regressor)]) update_registered_converter(XGBRegressor, 'XGBRegressor', calculate_linear_regressor_output_shapes, convert_xgb) # last step input_xgb = model.steps[0][-1].transform(test_df[:5]).astype( np.float32) if replace: input_xgb[input_xgb[:, :] == missing] = np.nan onnx_last = convert_sklearn( model.steps[1][-1], initial_types=[ ('X', FloatTensorType(shape=[None, input_xgb.shape[1]])) ], target_opset=get_opset_number_from_onnx()) session = rt.InferenceSession(onnx_last.SerializeToString()) pred_skl = model.steps[1][-1].predict(input_xgb).ravel() pred_onx = session.run(None, {'X': input_xgb})[0].ravel() assert_almost_equal(pred_skl, pred_onx)
def scikit_learn_converter(args): from sklearn.externals import joblib source_model = joblib.load(args.source) from onnxmltools.convert.common.data_types import FloatTensorType from onnxmltools.convert import convert_sklearn onnx_model = convert_sklearn( source_model, initial_types=[('input', FloatTensorType(source_model.coef_.shape))], target_opset=get_opset(args.ONNXVersion)) return onnx_model
def test_onnx_no_test_data_string(self): warnings.filterwarnings("ignore") model = OneHotEncoder() X = np.array([["a", "b", "c"]]) model.fit(X) # Create ONNX-ML model onnx_ml_model = convert_sklearn( model, initial_types=[("input", StringTensorType([X.shape[0], X.shape[1]]))], target_opset=11 ) # Test backends are not case sensitive self.assertRaises(RuntimeError, hummingbird.ml.convert, onnx_ml_model, "onnx")
def test_sklearn_classifier_multi(self): xgb, x_test = _fit_classification_model(RandomForestClassifier(), 3) conv_model = convert_sklearn( xgb, initial_types=[('input', FloatTensorType(shape=[None, x_test.shape[1]]))]) self.assertTrue(conv_model is not None) dump_data_and_model( x_test, xgb, conv_model, basename="SklearnRFClassifierMulti", allow_failure="StrictVersion(" "onnx.__version__)" "< StrictVersion('1.3.0')", )
def test_onnx_no_test_data_int(self): warnings.filterwarnings("ignore") model = OneHotEncoder() X = np.array([[1, 2, 3]], dtype=np.int32) model.fit(X) # Create ONNX-ML model onnx_ml_model = convert_sklearn( model, initial_types=[("input", Int32TensorType([X.shape[0], X.shape[1]]))], target_opset=11 ) # Test onnx requires no test_data hb_model = hummingbird.ml.convert(onnx_ml_model, "onnx") assert hb_model
def test_onnx_no_test_data_long(self): warnings.filterwarnings("ignore") model = model = StandardScaler(with_mean=True, with_std=True) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.int64) model.fit(X) # Create ONNX-ML model onnx_ml_model = convert_sklearn( model, initial_types=[("input", Int64TensorType([X.shape[0], X.shape[1]]))], target_opset=11 ) # Test onnx requires no test_data hb_model = hummingbird.ml.convert(onnx_ml_model, "onnx") assert hb_model
def test_pandas_batch_onnxml(self): import pandas max_depth = 10 iris = datasets.load_iris() X = iris.data[:, :3] y = iris.target columns = ["vA", "vB", "vC"] X_train = pandas.DataFrame(X, columns=columns) pipeline = Pipeline(steps=[ ("preprocessor", ColumnTransformer( transformers=[], remainder="passthrough", )), ("classifier", GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)), ]) pipeline.fit(X_train, y) # Create ONNX-ML model onnx_ml_model = convert_sklearn( pipeline, initial_types=[ ("vA", DoubleTensorType([X.shape[0], 1])), ("vB", DoubleTensorType([X.shape[0], 1])), ("vC", DoubleTensorType([X.shape[0], 1])), ], target_opset=9, ) hb_model = hummingbird.ml.convert( onnx_ml_model, "onnx", extra_config={constants.BATCH_SIZE: 10}) self.assertTrue(hb_model is not None) np.testing.assert_allclose( pipeline.predict_proba(X_train), hb_model.predict_proba(X_train), rtol=1e-06, atol=1e-06, )
def test_onnx_no_test_data_double(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) y = np.random.randint(num_classes, size=100) model.fit(X, y) # Create ONNX-ML model onnx_ml_model = convert_sklearn( model, initial_types=[("input", DoubleTensorType([X.shape[0], X.shape[1]]))], target_opset=11 ) # Test onnx requires no test_data hb_model = hummingbird.ml.convert(onnx_ml_model, "onnx") assert hb_model
def test_sklearn_regressor(self): iris = load_diabetes() x = iris.data y = iris.target x_train, x_test, y_train, _ = train_test_split(x, y, test_size=0.5, random_state=42) xgb = RandomForestRegressor() xgb.fit(x_train, y_train) conv_model = convert_sklearn( xgb, initial_types=[('input', FloatTensorType(shape=[None, x_test.shape[1]]))]) self.assertTrue(conv_model is not None) dump_data_and_model( x_test.astype("float32"), xgb, conv_model, basename="SklearnRFRegressor-Dec3", allow_failure="StrictVersion(" "onnx.__version__)" "< StrictVersion('1.3.0')", )
def _test_decision_tree(self, X, model, extra_config={}): # Create ONNX-ML model onnx_ml_model = convert_sklearn( model, initial_types=[("input", FloatTensorType([X.shape[0], X.shape[1]]))], target_opset=11) # Create ONNX model onnx_model = convert(onnx_ml_model, "onnx", X, extra_config) # Get the predictions for the ONNX-ML model session = ort.InferenceSession(onnx_ml_model.SerializeToString()) output_names = [ session.get_outputs()[i].name for i in range(len(session.get_outputs())) ] onnx_ml_pred = [[] for i in range(len(output_names))] inputs = {session.get_inputs()[0].name: X} pred = session.run(output_names, inputs) for i in range(len(output_names)): if "label" in output_names[i]: onnx_ml_pred[1] = pred[i] else: onnx_ml_pred[0] = pred[i] # Get the predictions for the ONNX model onnx_pred = [[] for i in range(len(output_names))] if len(output_names) == 1: # regression onnx_pred = onnx_model.predict(X) else: # classification for i in range(len(output_names)): if "label" in output_names[i]: onnx_pred[1] = onnx_model.predict(X) else: onnx_pred[0] = onnx_model.predict_proba(X) return onnx_ml_pred, onnx_pred, output_names
def test_onnx_test_data(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) # Create ONNX-ML model onnx_ml_model = convert_sklearn( model, initial_types=[("input", FloatTensorType([X.shape[0], X.shape[1]]))], target_opset=11) # Test onnx requires test_data self.assertRaises(RuntimeError, hummingbird.ml.convert, onnx_ml_model, "onnx")
def test_pandas_batch_onnxml(self): import pandas max_depth = 10 iris = datasets.load_iris() X = iris.data[:, :3] y = iris.target columns = ["vA", "vB", "vC"] X_train = pandas.DataFrame(X, columns=columns) if CastTransformer is None: pipeline = Pipeline(steps=[ ("preprocessor", ColumnTransformer( transformers=[], remainder="passthrough", )), ("classifier", GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)), ]) else: # newer version of sklearn-onnx pipeline = Pipeline(steps=[ ("preprocessor", ColumnTransformer( transformers=[], remainder="passthrough", )), ('cast', CastTransformer(dtype=np.float32)), ("classifier", GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)), ]) pipeline.fit(X_train, y) # Create ONNX-ML model onnx_ml_model = convert_sklearn( pipeline, initial_types=[ ("vA", DoubleTensorType([X.shape[0], 1])), ("vB", DoubleTensorType([X.shape[0], 1])), ("vC", DoubleTensorType([X.shape[0], 1])), ], target_opset=9, ) batch_size = 10 remainder_size = X.shape[0] % batch_size hb_model = hummingbird.ml.convert_batch( onnx_ml_model, "onnx", pandas.DataFrame(X[:batch_size], columns=columns), remainder_size) self.assertTrue(hb_model is not None) np.testing.assert_allclose( pipeline.predict_proba(X_train), hb_model.predict_proba(X_train), rtol=1e-06, atol=1e-06, )
from onnxconverter_common.data_types import FloatTensorType from onnxmltools.convert import convert_sklearn iris = load_iris() X, y = iris.data, iris.target X_train, X_test, y_train, y_test = train_test_split(X, y) clr = RandomForestClassifier() clr.fit(X_train, y_train) print(clr) ########################### # Convert a model into ONNX # +++++++++++++++++++++++++ initial_type = [('float_input', FloatTensorType([None, 4]))] onx = convert_sklearn(clr, initial_types=initial_type) with open("rf_iris.onnx", "wb") as f: f.write(onx.SerializeToString()) ################################### # Compute the prediction with onnxruntime # +++++++++++++++++++++++++++++++++++++++ sess = rt.InferenceSession("rf_iris.onnx") input_name = sess.get_inputs()[0].name label_name = sess.get_outputs()[0].name pred_onx = sess.run([label_name], {input_name: X_test.astype(numpy.float32)})[0] print(pred_onx) #######################################