class TestNoExtra(unittest.TestCase): """ These tests are meant to be run on a clean container after doing `pip install hummingbird-ml` without any of the `extra` packages """ # Test no LGBM returns false on lightgbm_installed() @unittest.skipIf(lightgbm_installed(), reason="Test when LightGBM is not installed") def test_lightgbm_installed_false(self): warnings.filterwarnings("ignore") assert not lightgbm_installed() # Test no XGB returns false on xgboost_installed() @unittest.skipIf(xgboost_installed(), reason="Test when XGBoost is not installed") def test_xgboost_installed_false(self): warnings.filterwarnings("ignore") assert not xgboost_installed() # Test no ONNX returns false on onnx_installed() @unittest.skipIf(onnx_runtime_installed(), reason="Test when ONNX is not installed") def test_onnx_installed_false(self): warnings.filterwarnings("ignore") assert not onnx_runtime_installed() # Test no ONNXMLTOOLS returns false on onnx_ml_tools_installed() @unittest.skipIf(onnx_ml_tools_installed(), reason="Test when ONNXMLTOOLS is not installed") def test_onnx_ml_installed_false(self): warnings.filterwarnings("ignore") assert not onnx_ml_tools_installed() # Test no TVM returns false on tvm_installed() @unittest.skipIf(onnx_ml_tools_installed(), reason="Test when TVM is not installed") def test_tvm_installed_false(self): warnings.filterwarnings("ignore") assert not tvm_installed() # Test that we can import the converter successfully without installing [extra] def test_import_convert_no_extra(self): try: from hummingbird.ml import convert except Exception: # TODO something more specific? self.fail( "Unexpected Error on importing convert without extra packages")
class TestONNXOneHotEncoder(unittest.TestCase): # Test OneHotEncoder with ints @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS" ) def test_one_hot_encoder_onnx_int(self, rtol=1e-06, atol=1e-06): model = OneHotEncoder() X = np.array([[1, 2, 3]], dtype=np.int32) model.fit(X) # Create ONNX-ML model onnx_ml_model = convert_sklearn(model, initial_types=[("int_input", IntTensorType_onnx(X.shape))]) # Create ONNX model by calling converter onnx_model = convert(onnx_ml_model, "onnx", X) # Get the predictions for the ONNX-ML model session = ort.InferenceSession(onnx_ml_model.SerializeToString()) output_names = [session.get_outputs()[i].name for i in range(len(session.get_outputs()))] inputs = {session.get_inputs()[0].name: X} onnx_ml_pred = session.run(output_names, inputs) # Get the predictions for the ONNX model onnx_pred = onnx_model.transform(X) # Check that predicted values match np.testing.assert_allclose(onnx_ml_pred, onnx_pred, rtol=rtol, atol=atol) # Test OneHotEncoder with 2 inputs @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS" ) def test_one_hot_encoder_onnx2(self, rtol=1e-06, atol=1e-06): model = OneHotEncoder() X = np.array([[1, 2, 3], [2, 1, 3]], dtype=np.int32) model.fit(X) # Create ONNX-ML model onnx_ml_model = convert_sklearn(model, initial_types=[("int_input", IntTensorType_onnx(X.shape))]) # Create ONNX model by calling converter onnx_model = convert(onnx_ml_model, "onnx", X) # Get the predictions for the ONNX-ML model session = ort.InferenceSession(onnx_ml_model.SerializeToString()) output_names = [session.get_outputs()[i].name for i in range(len(session.get_outputs()))] inputs = {session.get_inputs()[0].name: X} onnx_ml_pred = session.run(output_names, inputs) # Get the predictions for the ONNX model onnx_pred = onnx_model.transform(X) # Check that predicted values match np.testing.assert_allclose(onnx_ml_pred, onnx_pred, rtol=rtol, atol=atol) # Test OneHotEncoder with int64 @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS" ) def test_one_hot_encoder_onnx_int64(self, rtol=1e-06, atol=1e-06): model = OneHotEncoder() X = np.array([[1, 2, 3]], dtype=np.int64) model.fit(X) # Create ONNX-ML model onnx_ml_model = convert_sklearn(model, initial_types=[("int_input", LongTensorType_onnx(X.shape))]) # Create ONNX model by calling converter onnx_model = convert(onnx_ml_model, "onnx", X) # Get the predictions for the ONNX-ML model session = ort.InferenceSession(onnx_ml_model.SerializeToString()) output_names = [session.get_outputs()[i].name for i in range(len(session.get_outputs()))] inputs = {session.get_inputs()[0].name: X} onnx_ml_pred = session.run(output_names, inputs) # Get the predictions for the ONNX model onnx_pred = onnx_model.transform(X) # Check that predicted values match np.testing.assert_allclose(onnx_ml_pred, onnx_pred, rtol=rtol, atol=atol) # # Test OneHotEncoder with strings # @unittest.skipIf( # not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS" # ) # def test_model_one_hot_encoder_string(self): # model = OneHotEncoder() # data = [['a', 'r', 'x'], ['a', 'r', 'x'], ['aaaa', 'r', 'x'], ['a', 'r', 'xx']] # model.fit(data) # # max word length is the smallest number which is divisible by 4 and larger than or equal to the length of any word # max_word_length = 4 # num_columns = 3 # # Create ONNX-ML model # onnx_ml_model = convert_sklearn( # model, # initial_types=[("input", StringTensorType_onnx([4, 3]))] # ) # pytorch_input = np.array(data, dtype='|S'+str(max_word_length)).view(np.int32).reshape(-1, num_columns, max_word_length // 4) # # Create ONNX model by calling converter # onnx_model = convert(onnx_ml_model, "onnx", pytorch_input) # # Get the predictions for the ONNX-ML model # session = ort.InferenceSession(onnx_ml_model.SerializeToString()) # output_names = [session.get_outputs()[i].name for i in range(len(session.get_outputs()))] # inputs = {session.get_inputs()[0].name: data} # onnx_ml_pred = session.run(output_names, inputs) # # Get the predictions for the ONNX model # session = ort.InferenceSession(onnx_model.SerializeToString()) # inputs_pyt = {session.get_inputs()[0].name: pytorch_input} # onnx_pred = session.run(output_names, inputs_pyt) # return onnx_ml_pred, onnx_pred # Test OneHotEncoder temporary failcase @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS" ) def test_ohe_string_raises_notimpl_onnx(self): warnings.filterwarnings("ignore") model = OneHotEncoder() data = [["a", "r", "x"], ["a", "r", "x"], ["aaaa", "r", "x"], ["a", "r", "xx"]] model.fit(data) # Create ONNX-ML model onnx_ml_model = convert_sklearn(model, initial_types=[("input", StringTensorType_onnx([4, 3]))]) # Create ONNX model by calling converter, should raise error for strings self.assertRaises(RuntimeError, convert, onnx_ml_model, "onnx", data) # Test OneHotEncoder failcase when input data type is not supported @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS" ) def test_ohe_string_raises_type_error_onnx(self): warnings.filterwarnings("ignore") model = OneHotEncoder() data = [["a", "r", "x"], ["a", "r", "x"], ["aaaa", "r", "x"], ["a", "r", "xx"]] model.fit(data) # Create ONNX-ML model onnx_ml_model = convert_sklearn(model, initial_types=[("input", StringTensorType_onnx([4, 3]))]) # Create ONNX model by calling converter, should raise error for strings self.assertRaises(RuntimeError, convert, onnx_ml_model, "onnx")
def test_onnx_ml_installed_false(self): warnings.filterwarnings("ignore") assert not onnx_ml_tools_installed()
class TestONNXImputer(unittest.TestCase): def _test_imputer_converter(self, model, mode="onnx"): warnings.filterwarnings("ignore") X = np.array([[1, 2], [np.nan, 3], [7, 6]], dtype=np.float32) model.fit(X) # Create ONNX-ML model onnx_ml_model = convert_sklearn(model, initial_types=[("float_input", FloatTensorType_onnx(X.shape))]) # Get the predictions for the ONNX-ML model session = ort.InferenceSession(onnx_ml_model.SerializeToString()) output_names = [session.get_outputs()[i].name for i in range(len(session.get_outputs()))] inputs = {session.get_inputs()[0].name: X} onnx_ml_pred = session.run(output_names, inputs)[0] # Create test model by calling converter model = convert(onnx_ml_model, mode, X) # Get the predictions for the test model pred = model.transform(X) return onnx_ml_pred, pred @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS" ) def test_onnx_imputer_const(self, rtol=1e-06, atol=1e-06): model = SimpleImputer(strategy="constant") onnx_ml_pred, onnx_pred = self._test_imputer_converter(model) # Check that predicted values match np.testing.assert_allclose(onnx_ml_pred, onnx_pred, rtol=rtol, atol=atol) @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS" ) def test_onnx_imputer_const_nan0(self, rtol=1e-06, atol=1e-06): model = SimpleImputer(strategy="constant", fill_value=0) onnx_ml_pred, onnx_pred = self._test_imputer_converter(model) # Check that predicted values match np.testing.assert_allclose(onnx_ml_pred, onnx_pred, rtol=rtol, atol=atol) @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS" ) def test_onnx_imputer_mean(self, rtol=1e-06, atol=1e-06): model = SimpleImputer(strategy="mean", fill_value="nan") onnx_ml_pred, onnx_pred = self._test_imputer_converter(model) # Check that predicted values match np.testing.assert_allclose(onnx_ml_pred, onnx_pred, rtol=rtol, atol=atol) @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS" ) def test_onnx_imputer_converter_raises_rt(self): warnings.filterwarnings("ignore") model = SimpleImputer(strategy="mean", fill_value="nan") X = np.array([[1, 2], [np.nan, 3], [7, 6]], dtype=np.float32) model.fit(X) # Create ONNX-ML model onnx_ml_model = convert_sklearn(model, initial_types=[("float_input", FloatTensorType_onnx(X.shape))]) onnx_ml_model.graph.node[0].attribute[0].name = "".encode() self.assertRaises(RuntimeError, convert, onnx_ml_model, "onnx", X) @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS" ) def test_onnx_imputer_torch(self, rtol=1e-06, atol=1e-06): model = SimpleImputer(strategy="constant") onnx_ml_pred, onnx_pred = self._test_imputer_converter(model, mode="torch") # Check that predicted values match np.testing.assert_allclose(onnx_ml_pred, onnx_pred, rtol=rtol, atol=atol)
class TestONNXLightGBMConverter(unittest.TestCase): def __init__(self, *args, **kwargs): super(TestONNXLightGBMConverter, self).__init__(*args, **kwargs) # Base test implementation comparing ONNXML and ONNX models. def _test_lgbm(self, X, model, extra_config={}): # Create ONNX-ML model onnx_ml_model = convert_lightgbm( model, initial_types=[("input", FloatTensorType([X.shape[0], X.shape[1]]))], target_opset=9) # Create ONNX model onnx_model = convert(onnx_ml_model, "onnx", extra_config=extra_config) # Get the predictions for the ONNX-ML model session = ort.InferenceSession(onnx_ml_model.SerializeToString()) output_names = [ session.get_outputs()[i].name for i in range(len(session.get_outputs())) ] onnx_ml_pred = [[] for i in range(len(output_names))] inputs = {session.get_inputs()[0].name: X} pred = session.run(output_names, inputs) for i in range(len(output_names)): if "label" in output_names[i]: onnx_ml_pred[1] = pred[i] else: onnx_ml_pred[0] = pred[i] # Get the predictions for the ONNX model onnx_pred = [[] for i in range(len(output_names))] if len(output_names) == 1: # regression onnx_pred = onnx_model.predict(X) else: # classification onnx_pred[0] = onnx_model.predict_proba(X) onnx_pred[1] = onnx_model.predict(X) return onnx_ml_pred, onnx_pred, output_names # Utility function for testing regression models. def _test_regressor(self, X, model, rtol=1e-06, atol=1e-06, extra_config={}): onnx_ml_pred, onnx_pred, output_names = self._test_lgbm( X, model, extra_config) # Check that predicted values match np.testing.assert_allclose(onnx_ml_pred[0].ravel(), onnx_pred, rtol=rtol, atol=atol) # Utility function for testing classification models. def _test_classifier(self, X, model, rtol=1e-06, atol=1e-06, extra_config={}): onnx_ml_pred, onnx_pred, output_names = self._test_lgbm( X, model, extra_config) np.testing.assert_allclose(onnx_ml_pred[1], onnx_pred[1], rtol=rtol, atol=atol) # labels np.testing.assert_allclose(list( map(lambda x: list(x.values()), onnx_ml_pred[0])), onnx_pred[0], rtol=rtol, atol=atol) # probs # Check that ONNXML models can also target other backends. @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lightgbm_onnx_pytorch(self): warnings.filterwarnings("ignore") X = [[0, 1], [1, 1], [2, 0]] X = np.array(X, dtype=np.float32) y = np.array([100, -10, 50], dtype=np.float32) model = lgb.LGBMRegressor(n_estimators=3, min_child_samples=1) model.fit(X, y) # Create ONNX-ML model onnx_ml_model = convert_lightgbm( model, initial_types=[("input", FloatTensorType([X.shape[0], X.shape[1]]))], target_opset=9) pt_model = convert(onnx_ml_model, "torch", X) assert pt_model # Get the predictions for the ONNX-ML model session = ort.InferenceSession(onnx_ml_model.SerializeToString()) output_names = [ session.get_outputs()[i].name for i in range(len(session.get_outputs())) ] onnx_ml_pred = [[] for i in range(len(output_names))] inputs = {session.get_inputs()[0].name: X} onnx_ml_pred = session.run(output_names, inputs) np.testing.assert_allclose(onnx_ml_pred[0].flatten(), pt_model.predict(X)) # Basic regression test. @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lgbm_onnxml_model_regressor(self): warnings.filterwarnings("ignore") n_features = 28 n_total = 100 np.random.seed(0) X = np.random.rand(n_total, n_features) X = np.array(X, dtype=np.float32) y = np.random.randint(n_total, size=n_total) # Create LightGBM model model = lgb.LGBMRegressor() model.fit(X, y) import platform # TODO bug on newer macOS versions? if platform.system() == "Darwin": self._test_regressor(X, model, rtol=1e-05, atol=1e-04) else: self._test_regressor(X, model) # Regression test with 3 estimators (taken from ONNXMLTOOLS). @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lightgbm_regressor(self): warnings.filterwarnings("ignore") X = [[0, 1], [1, 1], [2, 0]] X = np.array(X, dtype=np.float32) y = np.array([100, -10, 50], dtype=np.float32) model = lgb.LGBMRegressor(n_estimators=3, min_child_samples=1) model.fit(X, y) self._test_regressor(X, model) # Regression test with 1 estimator (taken from ONNXMLTOOLS). @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lightgbm_regressor1(self): warnings.filterwarnings("ignore") model = lgb.LGBMRegressor(n_estimators=1, min_child_samples=1) X = [[0, 1], [1, 1], [2, 0]] X = np.array(X, dtype=np.float32) y = np.array([100, -10, 50], dtype=np.float32) model.fit(X, y) self._test_regressor(X, model) # Regression test with 2 estimators (taken from ONNXMLTOOLS). @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lightgbm_regressor2(self): warnings.filterwarnings("ignore") model = lgb.LGBMRegressor(n_estimators=2, max_depth=1, min_child_samples=1) X = [[0, 1], [1, 1], [2, 0]] X = np.array(X, dtype=np.float32) y = np.array([100, -10, 50], dtype=np.float32) model.fit(X, y) self._test_regressor(X, model) # Regression test with gbdt boosting type (taken from ONNXMLTOOLS). @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lightgbm_booster_regressor(self): warnings.filterwarnings("ignore") X = [[0, 1], [1, 1], [2, 0]] X = np.array(X, dtype=np.float32) y = [0, 1, 1.1] data = lgb.Dataset(X, label=y) model = lgb.train( { "boosting_type": "gbdt", "objective": "regression", "n_estimators": 3, "min_child_samples": 1, "max_depth": 1 }, data, ) self._test_regressor(X, model) # Binary classication test. @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lgbm_onnxml_model_binary(self): warnings.filterwarnings("ignore") n_features = 28 n_total = 100 np.random.seed(0) X = np.random.rand(n_total, n_features) X = np.array(X, dtype=np.float32) y = np.random.randint(2, size=n_total) # Create LightGBM model model = lgb.LGBMClassifier() model.fit(X, y) self._test_classifier(X, model) # Binary classication test with float64. @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lgbm_onnxml_model_binary_float64(self): warnings.filterwarnings("ignore") n_features = 28 n_total = 100 np.random.seed(0) X = np.random.rand(n_total, n_features) X = np.array(X, dtype=np.float32) y = np.random.randint(2, size=n_total) # Create LightGBM model model = lgb.LGBMClassifier() model.fit(X, y) onnx_model = convert(model, "onnx", X) np.testing.assert_allclose(model.predict(X), onnx_model.predict(X)) # Binary classification test with 3 estimators (taken from ONNXMLTOOLS). @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lightgbm_classifier(self): warnings.filterwarnings("ignore") model = lgb.LGBMClassifier(n_estimators=3, min_child_samples=1) X = [[0, 1], [1, 1], [2, 0]] X = np.array(X, dtype=np.float32) y = [0, 1, 0] model.fit(X, y) self._test_classifier(X, model) # Binary classification test with 3 estimators zipmap (taken from ONNXMLTOOLS). @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lightgbm_classifier_zipmap(self): warnings.filterwarnings("ignore") X = [[0, 1], [1, 1], [2, 0], [1, 2]] X = np.array(X, dtype=np.float32) y = [0, 1, 0, 1] model = lgb.LGBMClassifier(n_estimators=3, min_child_samples=1) model.fit(X, y) self._test_classifier(X, model) # Binary classification test with 3 estimators and selecting boosting type (taken from ONNXMLTOOLS). @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lightgbm_booster_classifier(self): warnings.filterwarnings("ignore") X = [[0, 1], [1, 1], [2, 0], [1, 2]] X = np.array(X, dtype=np.float32) y = [0, 1, 0, 1] data = lgb.Dataset(X, label=y) model = lgb.train( { "boosting_type": "gbdt", "objective": "binary", "n_estimators": 3, "min_child_samples": 1 }, data) self._test_classifier(X, model) # Binary classification test with 3 estimators and selecting boosting type zipmap (taken from ONNXMLTOOLS). @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lightgbm_booster_classifier_zipmap(self): warnings.filterwarnings("ignore") X = [[0, 1], [1, 1], [2, 0], [1, 2]] X = np.array(X, dtype=np.float32) y = [0, 1, 0, 1] data = lgb.Dataset(X, label=y) model = lgb.train( { "boosting_type": "gbdt", "objective": "binary", "n_estimators": 3, "min_child_samples": 1 }, data) self._test_classifier(X, model) # Multiclass classification test. @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lgbm_onnxml_model_multi(self): warnings.filterwarnings("ignore") n_features = 28 n_total = 100 np.random.seed(0) X = np.random.rand(n_total, n_features) X = np.array(X, dtype=np.float32) y = np.random.randint(3, size=n_total) # Create LightGBM model model = lgb.LGBMClassifier() model.fit(X, y) self._test_classifier(X, model) # Multiclass classification test with 3 estimators (taken from ONNXMLTOOLS). @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lightgbm_classifier_multi(self): warnings.filterwarnings("ignore") model = lgb.LGBMClassifier(n_estimators=3, min_child_samples=1) X = [[0, 1], [1, 1], [2, 0], [0.5, 0.5], [1.1, 1.1], [2.1, 0.1]] X = np.array(X, dtype=np.float32) y = [0, 1, 2, 1, 1, 2] model.fit(X, y) self._test_classifier(X, model) # Multiclass classification test with 3 estimators and selecting boosting type (taken from ONNXMLTOOLS). @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lightgbm_booster_multi_classifier(self): warnings.filterwarnings("ignore") X = [[0, 1], [1, 1], [2, 0], [1, 2], [-1, 2], [1, -2]] X = np.array(X, dtype=np.float32) y = [0, 1, 0, 1, 2, 2] data = lgb.Dataset(X, label=y) model = lgb.train( { "boosting_type": "gbdt", "objective": "multiclass", "n_estimators": 3, "min_child_samples": 1, "num_class": 3 }, data, ) self._test_classifier(X, model)
class TestBackends(unittest.TestCase): # Test backends are browsable def test_backends(self): warnings.filterwarnings("ignore") self.assertTrue(len(hummingbird.ml.backends) > 0) # Test backends are not case sensitive def test_backends_case_sensitive(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "tOrCh") self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06) # Test pytorch is still a valid backend name def test_backends_pytorch(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "pytOrCh") self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06) # Test not supported backends def test_unsupported_backend(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) # Test backends are not case sensitive self.assertRaises(MissingBackend, hummingbird.ml.convert, model, "scala") # Test torchscript requires test_data def test_torchscript_test_data(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) # Test torcscript requires test_input self.assertRaises(RuntimeError, hummingbird.ml.convert, model, "torch.jit") # Test onnx requires test_data or initial_types @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") def test_onnx_test_data(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) # Create ONNX-ML model onnx_ml_model = convert_sklearn( model, initial_types=[("input", FloatTensorType([X.shape[0], X.shape[1]]))], target_opset=11) # Test onnx requires test_data self.assertRaises(RuntimeError, hummingbird.ml.convert, onnx_ml_model, "onnx")
import torch import hummingbird.ml from hummingbird.ml._utils import ( onnx_ml_tools_installed, onnx_runtime_installed, pandas_installed, lightgbm_installed, tvm_installed, ) from hummingbird.ml import constants if lightgbm_installed(): import lightgbm as lgb if onnx_ml_tools_installed(): from onnxmltools.convert import convert_sklearn, convert_lightgbm class TestExtraConf(unittest.TestCase): # Test default number of threads. It will only work on mac after 1.6 https://github.com/pytorch/pytorch/issues/43036 @unittest.skipIf( sys.platform == "darwin" and LooseVersion(torch.__version__) <= LooseVersion("1.6.0"), reason="PyTorch has a bug on mac related to multi-threading", ) def test_torch_deafault_n_threads(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0)
class TestBackends(unittest.TestCase): # Test backends are browsable def test_backends(self): warnings.filterwarnings("ignore") self.assertTrue(len(hummingbird.ml.backends) > 0) # Test backends are not case sensitive def test_backends_case_sensitive(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "tOrCh") self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06) # Test pytorch is still a valid backend name def test_backends_pytorch(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "pytOrCh") self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06) # Test pytorch save and load def test_pytorch_save_load(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "torch") self.assertIsNotNone(hb_model) hb_model.save("pt-tmp") hb_model_loaded = hummingbird.ml.TorchContainer.load("pt-tmp") np.testing.assert_allclose(hb_model_loaded.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06) os.remove("pt-tmp.zip") shutil.rmtree("pt-tmp") # Test pytorch save and generic load def test_pytorch_save_generic_load(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "torch") self.assertIsNotNone(hb_model) hb_model.save("pt-tmp") hb_model_loaded = hummingbird.ml.load("pt-tmp") np.testing.assert_allclose(hb_model_loaded.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06) os.remove("pt-tmp.zip") shutil.rmtree("pt-tmp") # Test torchscript save and load def test_torchscript_save_load(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "torch.jit", X) self.assertIsNotNone(hb_model) hb_model.save("ts-tmp") hb_model_loaded = hummingbird.ml.TorchContainer.load("ts-tmp") np.testing.assert_allclose(hb_model_loaded.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06) os.remove("ts-tmp.zip") shutil.rmtree("ts-tmp") # Test torchscript save and generic load def test_torchscript_save_generic_load(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "torch.jit", X) self.assertIsNotNone(hb_model) hb_model.save("ts-tmp") hb_model_loaded = hummingbird.ml.load("ts-tmp") np.testing.assert_allclose(hb_model_loaded.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06) os.remove("ts-tmp.zip") shutil.rmtree("ts-tmp") # Test not supported backends def test_unsupported_backend(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) # Test scala backend rises an exception self.assertRaises(MissingBackend, hummingbird.ml.convert, model, "scala") # Test torchscript requires test_data def test_torchscript_test_data(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) # Test torcscript requires test_input self.assertRaises(RuntimeError, hummingbird.ml.convert, model, "torch.jit") # Test TVM requires test_data @unittest.skipIf(not tvm_installed(), reason="TVM test requires TVM installed") def test_tvm_test_data(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) # Test tvm requires test_input self.assertRaises(RuntimeError, hummingbird.ml.convert, model, "tvm") # Test tvm save and load @unittest.skipIf(not tvm_installed(), reason="TVM test requires TVM installed") def test_tvm_save_load(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "tvm", X) self.assertIsNotNone(hb_model) hb_model.save("tvm-tmp") hb_model_loaded = hummingbird.ml.TVMContainer.load("tvm-tmp") np.testing.assert_allclose(hb_model_loaded.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06) os.remove("tvm-tmp.zip") shutil.rmtree("tvm-tmp") # Test tvm save and generic load @unittest.skipIf(not tvm_installed(), reason="TVM test requires TVM installed") def test_tvm_save_generic_load(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "tvm", X) self.assertIsNotNone(hb_model) hb_model.save("tvm-tmp") hb_model_loaded = hummingbird.ml.load("tvm-tmp") np.testing.assert_allclose(hb_model_loaded.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06) os.remove("tvm-tmp.zip") shutil.rmtree("tvm-tmp") # Test tvm save and load zip file @unittest.skipIf(not tvm_installed(), reason="TVM test requires TVM installed") def test_tvm_save_load_zip(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "tvm", X) self.assertIsNotNone(hb_model) hb_model.save("tvm-tmp.zip") hb_model_loaded = hummingbird.ml.TVMContainer.load("tvm-tmp.zip") np.testing.assert_allclose(hb_model_loaded.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06) os.remove("tvm-tmp.zip") shutil.rmtree("tvm-tmp") # Test onnx requires test_data or initial_types @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") def test_onnx_no_test_data_float(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) # Create ONNX-ML model onnx_ml_model = convert_sklearn( model, initial_types=[("input", FloatTensorType([X.shape[0], X.shape[1]]))], target_opset=11) # Test onnx requires no test_data hb_model = hummingbird.ml.convert(onnx_ml_model, "onnx") assert hb_model # Test onnx 0 shape input @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") def test_onnx_zero_shape_input(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) y = np.random.randint(num_classes, size=100) model.fit(X, y) # Create ONNX-ML model onnx_ml_model = convert_sklearn(model, initial_types=[ ("input", DoubleTensorType([0, X.shape[1]])) ], target_opset=11) # Test onnx requires no test_data hb_model = hummingbird.ml.convert(onnx_ml_model, "onnx") assert hb_model # Test onnx no test_data, double input @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") def test_onnx_no_test_data_double(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) y = np.random.randint(num_classes, size=100) model.fit(X, y) # Create ONNX-ML model onnx_ml_model = convert_sklearn( model, initial_types=[("input", DoubleTensorType([X.shape[0], X.shape[1]]))], target_opset=11) # Test onnx requires no test_data hb_model = hummingbird.ml.convert(onnx_ml_model, "onnx") assert hb_model # Test onnx no test_data, long input @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") def test_onnx_no_test_data_long(self): warnings.filterwarnings("ignore") model = model = StandardScaler(with_mean=True, with_std=True) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.int64) model.fit(X) # Create ONNX-ML model onnx_ml_model = convert_sklearn( model, initial_types=[("input", Int64TensorType([X.shape[0], X.shape[1]]))], target_opset=11) # Test onnx requires no test_data hb_model = hummingbird.ml.convert(onnx_ml_model, "onnx") assert hb_model # Test onnx no test_data, int input @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") def test_onnx_no_test_data_int(self): warnings.filterwarnings("ignore") model = OneHotEncoder() X = np.array([[1, 2, 3]], dtype=np.int32) model.fit(X) # Create ONNX-ML model onnx_ml_model = convert_sklearn( model, initial_types=[("input", Int32TensorType([X.shape[0], X.shape[1]]))], target_opset=11) # Test onnx requires no test_data hb_model = hummingbird.ml.convert(onnx_ml_model, "onnx") assert hb_model # Test onnx no test_data, string input @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") def test_onnx_no_test_data_string(self): warnings.filterwarnings("ignore") model = OneHotEncoder() X = np.array([["a", "b", "c"]]) model.fit(X) # Create ONNX-ML model onnx_ml_model = convert_sklearn( model, initial_types=[("input", StringTensorType([X.shape[0], X.shape[1]]))], target_opset=11) # Test backends are not case sensitive self.assertRaises(RuntimeError, hummingbird.ml.convert, onnx_ml_model, "onnx") # Test ONNX save and load @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") def test_onnx_save_load(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "onnx", X) self.assertIsNotNone(hb_model) hb_model.save("onnx-tmp") hb_model_loaded = hummingbird.ml.ONNXContainer.load("onnx-tmp") np.testing.assert_allclose(hb_model_loaded.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06) os.remove("onnx-tmp.zip") shutil.rmtree("onnx-tmp") # Test ONNX save and generic load @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") def test_onnx_save_generic_load(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "onnx", X) self.assertIsNotNone(hb_model) hb_model.save("onnx-tmp") hb_model_loaded = hummingbird.ml.load("onnx-tmp") np.testing.assert_allclose(hb_model_loaded.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06) os.remove("onnx-tmp.zip") shutil.rmtree("onnx-tmp") # Test for when the user forgets to add a target (ex: convert(model, output) rather than convert(model, 'torch')) due to API change def test_forgotten_backend_string(self): from sklearn.preprocessing import LabelEncoder model = LabelEncoder() data = np.array([1, 4, 5, 2, 0, 2], dtype=np.int32) model.fit(data) self.assertRaises(ValueError, hummingbird.ml.convert, model, [("input", Int32TensorType([6, 1]))])
class TestONNXSVC(unittest.TestCase): def _test_sv(self, classes, mode="torch"): """ This helper function tests conversion of `ai.onnx.ml.SVMClassifier` which is created from a scikit-learn SVC. This then calls either "_to_onnx" or "_to_torch" """ n_features = 20 n_total = 100 np.random.seed(0) warnings.filterwarnings("ignore") X = np.random.rand(n_total, n_features) X = np.array(X, dtype=np.float32) y = np.random.randint(classes, size=n_total) # Create SKL model for testing model = SVC() model.fit(X, y) # Create ONNX-ML model onnx_ml_model = convert_sklearn(model, initial_types=[ ("float_input", FloatTensorType_onnx(X.shape)) ]) # Get the predictions for the ONNX-ML model session = ort.InferenceSession(onnx_ml_model.SerializeToString()) output_names = [ session.get_outputs()[i].name for i in range(len(session.get_outputs())) ] onnx_ml_pred = [[] for i in range(len(output_names))] inputs = {session.get_inputs()[0].name: X} pred = session.run(output_names, inputs) for i in range(len(output_names)): if "label" in output_names[i]: onnx_ml_pred[1] = pred[i] else: onnx_ml_pred[0] = pred[i] model = convert(onnx_ml_model, mode, X) pred = model.predict(X) return onnx_ml_pred, pred @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS") # test ai.onnx.ml.SVMClassifier with 2 classes for onnxml-> pytorch def test_logistic_regression_onnxml_binary_torch(self, rtol=1e-06, atol=1e-06): onnx_ml_pred, pred = self._test_sv(2) # Check that predicted values match np.testing.assert_allclose(onnx_ml_pred[1], pred, rtol=rtol, atol=atol) @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS") # test ai.onnx.ml.SVMClassifier with 3 classes for onnxml-> pytorch def test_logistic_regression_onnxml_multi_torch(self, rtol=1e-06, atol=1e-06): onnx_ml_pred, pred = self._test_sv(3) # Check that predicted values match np.testing.assert_allclose(onnx_ml_pred[1], pred, rtol=rtol, atol=atol)
class TestBackends(unittest.TestCase): # Test backends are browsable def test_backends(self): warnings.filterwarnings("ignore") self.assertTrue(len(hummingbird.ml.backends) > 0) # Test backends are not case sensitive def test_backends_case_sensitive(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "tOrCh") self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06) # Test pytorch is still a valid backend name def test_backends_pytorch(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "pytOrCh") self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06) # Test not supported backends def test_unsupported_backend(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) # Test scala backend rises an exception self.assertRaises(MissingBackend, hummingbird.ml.convert, model, "scala") # Test torchscript requires test_data def test_torchscript_test_data(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) # Test torcscript requires test_input self.assertRaises(RuntimeError, hummingbird.ml.convert, model, "torch.jit") # Test TVM requires test_data @unittest.skipIf(not tvm_installed(), reason="TVM test requires TVM installed") def test_tvm_test_data(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) # Test tvm requires test_input self.assertRaises(RuntimeError, hummingbird.ml.convert, model, "tvm") # Test onnx requires test_data or initial_types @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS" ) def test_onnx_no_test_data_float(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) # Create ONNX-ML model onnx_ml_model = convert_sklearn( model, initial_types=[("input", FloatTensorType([X.shape[0], X.shape[1]]))], target_opset=11 ) # Test onnx requires no test_data hb_model = hummingbird.ml.convert(onnx_ml_model, "onnx") assert hb_model # Test onnx no test_data, double input @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS" ) def test_onnx_no_test_data_double(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) y = np.random.randint(num_classes, size=100) model.fit(X, y) # Create ONNX-ML model onnx_ml_model = convert_sklearn( model, initial_types=[("input", DoubleTensorType([X.shape[0], X.shape[1]]))], target_opset=11 ) # Test onnx requires no test_data hb_model = hummingbird.ml.convert(onnx_ml_model, "onnx") assert hb_model # Test onnx no test_data, long input @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS" ) def test_onnx_no_test_data_long(self): warnings.filterwarnings("ignore") model = model = StandardScaler(with_mean=True, with_std=True) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.int64) model.fit(X) # Create ONNX-ML model onnx_ml_model = convert_sklearn( model, initial_types=[("input", Int64TensorType([X.shape[0], X.shape[1]]))], target_opset=11 ) # Test onnx requires no test_data hb_model = hummingbird.ml.convert(onnx_ml_model, "onnx") assert hb_model # Test onnx no test_data, int input @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS" ) def test_onnx_no_test_data_int(self): warnings.filterwarnings("ignore") model = OneHotEncoder() X = np.array([[1, 2, 3]], dtype=np.int32) model.fit(X) # Create ONNX-ML model onnx_ml_model = convert_sklearn( model, initial_types=[("input", Int32TensorType([X.shape[0], X.shape[1]]))], target_opset=11 ) # Test onnx requires no test_data hb_model = hummingbird.ml.convert(onnx_ml_model, "onnx") assert hb_model # Test onnx no test_data, string input @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS" ) def test_onnx_no_test_data_string(self): warnings.filterwarnings("ignore") model = OneHotEncoder() X = np.array([["a", "b", "c"]]) model.fit(X) # Create ONNX-ML model onnx_ml_model = convert_sklearn( model, initial_types=[("input", StringTensorType([X.shape[0], X.shape[1]]))], target_opset=11 ) # Test backends are not case sensitive self.assertRaises(RuntimeError, hummingbird.ml.convert, onnx_ml_model, "onnx")
class TestBackends(unittest.TestCase): # Test backends are browsable def test_backends(self): warnings.filterwarnings("ignore") self.assertTrue(len(hummingbird.ml.backends) > 0) # Test backends are not case sensitive def test_backends_case_sensitive(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "tOrCh") self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06) # Test pytorch is still a valid backend name def test_backends_pytorch(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "pytOrCh") self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06) # Test pytorch save and load def test_pytorch_save_load(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "torch") self.assertIsNotNone(hb_model) hb_model.save("pt-tmp") hb_model_loaded = hummingbird.ml.TorchContainer.load("pt-tmp") np.testing.assert_allclose(hb_model_loaded.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06) os.remove("pt-tmp.zip") # Test pytorch save and generic load def test_pytorch_save_generic_load(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "torch") self.assertIsNotNone(hb_model) hb_model.save("pt-tmp") hb_model_loaded = hummingbird.ml.load("pt-tmp") np.testing.assert_allclose(hb_model_loaded.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06) os.remove("pt-tmp.zip") def test_pytorch_save_load_load(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "torch") self.assertIsNotNone(hb_model) hb_model.save("pt-tmp") hummingbird.ml.load("pt-tmp") hummingbird.ml.load("pt-tmp") os.remove("pt-tmp.zip") def test_pytorch_save_load_more_versions(self): from hummingbird.ml.operator_converters import constants warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "torch") self.assertIsNotNone(hb_model) hb_model.save("pt-tmp") shutil.unpack_archive("pt-tmp.zip", "pt-tmp", format="zip") # Adding a new library does not create problems. with open( os.path.join("pt-tmp", constants.SAVE_LOAD_MODEL_CONFIGURATION_PATH), "r") as file: configuration = file.readlines() configuration.append("\nlibx=1.3") os.remove( os.path.join("pt-tmp", constants.SAVE_LOAD_MODEL_CONFIGURATION_PATH)) with open( os.path.join("pt-tmp", constants.SAVE_LOAD_MODEL_CONFIGURATION_PATH), "w") as file: file.writelines(configuration) shutil.make_archive("pt-tmp", "zip", "pt-tmp") hummingbird.ml.load("pt-tmp") os.remove("pt-tmp.zip") def test_pytorch_save_load_less_versions(self): from hummingbird.ml.operator_converters import constants warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "torch") self.assertIsNotNone(hb_model) hb_model.save("pt-tmp") shutil.unpack_archive("pt-tmp.zip", "pt-tmp", format="zip") # Removing a library does not create problems. with open( os.path.join("pt-tmp", constants.SAVE_LOAD_MODEL_CONFIGURATION_PATH), "r") as file: configuration = file.readlines() configuration = configuration[-1] os.remove( os.path.join("pt-tmp", constants.SAVE_LOAD_MODEL_CONFIGURATION_PATH)) with open( os.path.join("pt-tmp", constants.SAVE_LOAD_MODEL_CONFIGURATION_PATH), "w") as file: file.writelines(configuration) shutil.make_archive("pt-tmp", "zip", "pt-tmp") hummingbird.ml.load("pt-tmp") os.remove("pt-tmp.zip") def test_pytorch_save_load_different_versions(self): from hummingbird.ml.operator_converters import constants warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "torch") self.assertIsNotNone(hb_model) hb_model.save("pt-tmp") shutil.unpack_archive("pt-tmp.zip", "pt-tmp", format="zip") # Changing the version of a library does not create problems. with open( os.path.join("pt-tmp", constants.SAVE_LOAD_MODEL_CONFIGURATION_PATH), "r") as file: configuration = file.readlines() configuration[0] = "hummingbird=0.0.0.1\n" os.remove( os.path.join("pt-tmp", constants.SAVE_LOAD_MODEL_CONFIGURATION_PATH)) with open( os.path.join("pt-tmp", constants.SAVE_LOAD_MODEL_CONFIGURATION_PATH), "w") as file: file.writelines(configuration) shutil.make_archive("pt-tmp", "zip", "pt-tmp") hummingbird.ml.load("pt-tmp") os.remove("pt-tmp.zip") # Test torchscript save and load def test_torchscript_save_load(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "torch.jit", X) self.assertIsNotNone(hb_model) hb_model.save("ts-tmp") hb_model_loaded = hummingbird.ml.TorchContainer.load("ts-tmp") np.testing.assert_allclose(hb_model_loaded.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06) os.remove("ts-tmp.zip") # Test torchscript save and generic load def test_torchscript_save_generic_load(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "torch.jit", X) self.assertIsNotNone(hb_model) hb_model.save("ts-tmp") hb_model_loaded = hummingbird.ml.load("ts-tmp") np.testing.assert_allclose(hb_model_loaded.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06) os.remove("ts-tmp.zip") def test_load_fails_bad_path(self): # Asserts for bad path with extension self.assertRaises(AssertionError, hummingbird.ml.load, "nonsense.zip") self.assertRaises(AssertionError, hummingbird.ml.TorchContainer.load, "nonsense.zip") # Asserts for bad path with no extension self.assertRaises(AssertionError, hummingbird.ml.load, "nonsense") self.assertRaises(AssertionError, hummingbird.ml.TorchContainer.load, "nonsense") @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") def test_load_fails_bad_path_onnx(self): self.assertRaises(AssertionError, hummingbird.ml.ONNXContainer.load, "nonsense.zip") self.assertRaises(AssertionError, hummingbird.ml.ONNXContainer.load, "nonsense") @unittest.skipIf(not tvm_installed(), reason="TVM test requires TVM installed") def test_load_fails_bad_path_tvm(self): self.assertRaises(AssertionError, hummingbird.ml.TVMContainer.load, "nonsense.zip") self.assertRaises(AssertionError, hummingbird.ml.TVMContainer.load, "nonsense") # Test not supported backends def test_unsupported_backend(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) # Test scala backend rises an exception self.assertRaises(MissingBackend, hummingbird.ml.convert, model, "scala") # Test torchscript requires test_data def test_torchscript_test_data(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) # Test torcscript requires test_input self.assertRaises(RuntimeError, hummingbird.ml.convert, model, "torch.jit") # Test TVM requires test_data @unittest.skipIf(not tvm_installed(), reason="TVM test requires TVM installed") def test_tvm_test_data(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) # Test tvm requires test_input self.assertRaises(RuntimeError, hummingbird.ml.convert, model, "tvm") # Test tvm save and load @unittest.skipIf(not tvm_installed(), reason="TVM test requires TVM installed") def test_tvm_save_load(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "tvm", X) self.assertIsNotNone(hb_model) hb_model.save("tvm-tmp") hb_model_loaded = hummingbird.ml.TVMContainer.load("tvm-tmp") np.testing.assert_allclose(hb_model_loaded.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06) os.remove("tvm-tmp.zip") # Test tvm save and generic load @unittest.skipIf(not tvm_installed(), reason="TVM test requires TVM installed") def test_tvm_save_generic_load(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "tvm", X) self.assertIsNotNone(hb_model) hb_model.save("tvm-tmp") hb_model_loaded = hummingbird.ml.load("tvm-tmp") np.testing.assert_allclose(hb_model_loaded.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06) os.remove("tvm-tmp.zip") # Test tvm save and load zip file @unittest.skipIf(not tvm_installed(), reason="TVM test requires TVM installed") def test_tvm_save_load_zip(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "tvm", X) self.assertIsNotNone(hb_model) hb_model.save("tvm-tmp.zip") hb_model_loaded = hummingbird.ml.TVMContainer.load("tvm-tmp.zip") np.testing.assert_allclose(hb_model_loaded.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06) os.remove("tvm-tmp.zip") @unittest.skipIf(not tvm_installed(), reason="TVM test requires TVM installed") def test_tvm_save_load_load(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "tvm", X) self.assertIsNotNone(hb_model) hb_model.save("tvm-tmp.zip") hummingbird.ml.TVMContainer.load("tvm-tmp.zip") hummingbird.ml.TVMContainer.load("tvm-tmp.zip") os.remove("tvm-tmp.zip") @unittest.skipIf(not tvm_installed(), reason="TVM test requires TVM installed") def test_tvm_save_load_no_versions(self): from hummingbird.ml.operator_converters import constants warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "tvm", X) self.assertIsNotNone(hb_model) hb_model.save("tvm-tmp") shutil.unpack_archive("tvm-tmp.zip", "tvm-tmp", format="zip") # Removing the configuration file with the versions does not create problems. os.remove( os.path.join("tvm-tmp", constants.SAVE_LOAD_MODEL_CONFIGURATION_PATH)) hummingbird.ml.load("tvm-tmp") os.remove("tvm-tmp.zip") # Test onnx requires test_data or initial_types @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") def test_onnx_no_test_data_float(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) # Create ONNX-ML model onnx_ml_model = convert_sklearn( model, initial_types=[("input", FloatTensorType([X.shape[0], X.shape[1]]))], target_opset=11) # Test onnx requires no test_data hb_model = hummingbird.ml.convert(onnx_ml_model, "onnx") assert hb_model # Test onnx 0 shape input @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") def test_onnx_zero_shape_input(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) y = np.random.randint(num_classes, size=100) model.fit(X, y) # Create ONNX-ML model onnx_ml_model = convert_sklearn(model, initial_types=[ ("input", DoubleTensorType([0, X.shape[1]])) ], target_opset=11) # Test onnx requires no test_data hb_model = hummingbird.ml.convert(onnx_ml_model, "onnx") assert hb_model # Test onnx no test_data, double input @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") def test_onnx_no_test_data_double(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) y = np.random.randint(num_classes, size=100) model.fit(X, y) # Create ONNX-ML model onnx_ml_model = convert_sklearn( model, initial_types=[("input", DoubleTensorType([X.shape[0], X.shape[1]]))], target_opset=11) # Test onnx requires no test_data hb_model = hummingbird.ml.convert(onnx_ml_model, "onnx") assert hb_model # Test onnx no test_data, long input @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") def test_onnx_no_test_data_long(self): warnings.filterwarnings("ignore") model = model = StandardScaler(with_mean=True, with_std=True) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.int64) model.fit(X) # Create ONNX-ML model onnx_ml_model = convert_sklearn( model, initial_types=[("input", Int64TensorType([X.shape[0], X.shape[1]]))], target_opset=11) # Test onnx requires no test_data hb_model = hummingbird.ml.convert(onnx_ml_model, "onnx") assert hb_model # Test onnx no test_data, int input @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") def test_onnx_no_test_data_int(self): warnings.filterwarnings("ignore") model = OneHotEncoder() X = np.array([[1, 2, 3]], dtype=np.int32) model.fit(X) # Create ONNX-ML model onnx_ml_model = convert_sklearn( model, initial_types=[("input", Int32TensorType([X.shape[0], X.shape[1]]))], target_opset=11) # Test onnx requires no test_data hb_model = hummingbird.ml.convert(onnx_ml_model, "onnx") assert hb_model # Test onnx no test_data, string input @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") def test_onnx_no_test_data_string(self): warnings.filterwarnings("ignore") model = OneHotEncoder() X = np.array([["a", "b", "c"]]) model.fit(X) # Create ONNX-ML model onnx_ml_model = convert_sklearn( model, initial_types=[("input", StringTensorType([X.shape[0], X.shape[1]]))], target_opset=11) # Test backends are not case sensitive self.assertRaises(RuntimeError, hummingbird.ml.convert, onnx_ml_model, "onnx") # Test ONNX save and load @unittest.skipIf(not onnx_runtime_installed(), reason="ONNX test requires ORT") def test_onnx_save_load(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "onnx", X) self.assertIsNotNone(hb_model) hb_model.save("onnx-tmp") hb_model_loaded = hummingbird.ml.ONNXContainer.load("onnx-tmp") np.testing.assert_allclose(hb_model_loaded.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06) os.remove("onnx-tmp.zip") # Test ONNX save and generic load @unittest.skipIf(not onnx_runtime_installed(), reason="ONNX test requires ORT") def test_onnx_save_generic_load(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "onnx", X) self.assertIsNotNone(hb_model) hb_model.save("onnx-tmp") hb_model_loaded = hummingbird.ml.load("onnx-tmp") np.testing.assert_allclose(hb_model_loaded.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06) os.remove("onnx-tmp.zip") # Test ONNX save and generic load @unittest.skipIf(not onnx_runtime_installed(), reason="ONNX test requires ORT") def test_onnx_save_load_load(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "onnx", X) self.assertIsNotNone(hb_model) hb_model.save("onnx-tmp") hummingbird.ml.load("onnx-tmp") hummingbird.ml.load("onnx-tmp") os.remove("onnx-tmp.zip") @unittest.skipIf(not onnx_runtime_installed(), reason="ONNX test requires ORT") def test_onnx_save_load_no_versions(self): from hummingbird.ml.operator_converters import constants warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "onnx", X) self.assertIsNotNone(hb_model) hb_model.save("onnx-tmp") shutil.unpack_archive("onnx-tmp.zip", "onnx-tmp", format="zip") # Removing the configuration file with the versions does not create problems. os.remove( os.path.join("onnx-tmp", constants.SAVE_LOAD_MODEL_CONFIGURATION_PATH)) hummingbird.ml.load("onnx-tmp") os.remove("onnx-tmp.zip") # Test for when the user forgets to add a target (ex: convert(model, output) rather than convert(model, 'torch')) due to API change def test_forgotten_backend_string(self): from sklearn.preprocessing import LabelEncoder model = LabelEncoder() data = np.array([1, 4, 5, 2, 0, 2], dtype=np.int32) model.fit(data) self.assertRaises(ValueError, hummingbird.ml.convert, model, [("input", Int32TensorType([6, 1]))]) # Test ONNX @unittest.skipIf(not onnx_runtime_installed(), reason="ONNX test requires ORT") def test_onnx(self): import numpy as np import lightgbm as lgb from hummingbird.ml import convert # Create some random data for binary classification. num_classes = 2 X = np.array(np.random.rand(10000, 28), dtype=np.float32) y = np.random.randint(num_classes, size=10000) model = lgb.LGBMClassifier() model.fit(X, y) self.assertRaises(RuntimeError, hummingbird.ml.convert, model, "onnx") # Test Spark UDF @unittest.skipIf( os.name == "nt" or not sparkml_installed() or LooseVersion(pyspark.__version__) < LooseVersion("3"), reason="UDF Test requires spark >= 3", ) def test_udf_torch(self): X, y = load_iris(return_X_y=True) X_train, X_test, y_train, y_test = train_test_split( X, y, random_state=77, test_size=0.2, ) spark_df = sql_context.createDataFrame(pd.DataFrame(data=X_train)) sql_context.registerDataFrameAsTable(spark_df, "IRIS") model = GradientBoostingClassifier(n_estimators=10) model.fit(X_train, y_train) hb_model = hummingbird.ml.convert(model, "torch") # Broadcast the model. broadcasted_model = spark.sparkContext.broadcast(hb_model) # UDF definition. @pandas_udf("long") def udf_hb_predict( iterator: Iterator[pd.Series]) -> Iterator[pd.Series]: model = broadcasted_model.value for args in iterator: data_unmangled = pd.concat([feature for feature in args], axis=1) predictions = model.predict(data_unmangled) yield pd.Series(np.array(predictions)) # Register the UDF. sql_context.udf.register("PREDICT", udf_hb_predict) # Run the query. sql_context.sql( "SELECT SUM(prediction) FROM (SELECT PREDICT(*) as prediction FROM IRIS)" ).show() @unittest.skipIf( os.name == "nt" or not sparkml_installed() or LooseVersion(pyspark.__version__) < LooseVersion("3"), reason="UDF Test requires spark >= 3", ) def test_udf_torch_jit_broadcast(self): import pickle X, y = load_iris(return_X_y=True) X_train, X_test, y_train, y_test = train_test_split( X, y, random_state=77, test_size=0.2, ) spark_df = sql_context.createDataFrame(pd.DataFrame(data=X_train)) sql_context.registerDataFrameAsTable(spark_df, "IRIS") model = GradientBoostingClassifier(n_estimators=10) model.fit(X_train, y_train) hb_model = hummingbird.ml.convert(model, "torch.jit", X_test) # Broadcast the model returns an error. self.assertRaises(pickle.PickleError, spark.sparkContext.broadcast, hb_model) @unittest.skipIf( os.name == "nt" or not sparkml_installed() or LooseVersion(pyspark.__version__) < LooseVersion("3"), reason="UDF Test requires spark >= 3", ) def test_udf_torch_jit_spark_file(self): import dill import torch.jit X, y = load_iris(return_X_y=True) X_train, X_test, y_train, y_test = train_test_split( X, y, random_state=77, test_size=0.2, ) spark_df = sql_context.createDataFrame(pd.DataFrame(data=X_train)) sql_context.registerDataFrameAsTable(spark_df, "IRIS") model = GradientBoostingClassifier(n_estimators=10) model.fit(X_train, y_train) hb_model = hummingbird.ml.convert(model, "torch.jit", X_test) # Save the file locally. if os.path.exists("deployed_model.zip"): os.remove("deployed_model.zip") torch.jit.save(hb_model.model, "deployed_model.zip") hb_model._model = None # Share the model using spark file and broadcast the container. spark.sparkContext.addFile("deployed_model.zip") broadcasted_container = spark.sparkContext.broadcast(hb_model) # UDF definition. @pandas_udf("long") def udf_hb_predict( iterator: Iterator[pd.Series]) -> Iterator[pd.Series]: location = SparkFiles.get("deployed_model.zip") torch_model = torch.jit.load(location) container = broadcasted_container.value container._model = torch_model model = container for args in iterator: data_unmangled = pd.concat([feature for feature in args], axis=1) predictions = model.predict(data_unmangled.values) yield pd.Series(np.array(predictions)) # Register the UDF. sql_context.udf.register("PREDICT", udf_hb_predict) # Run the query. sql_context.sql( "SELECT SUM(prediction) FROM (SELECT PREDICT(*) as prediction FROM IRIS)" ).show() os.remove("deployed_model.zip")
class TestONNXOneHotEncoder(unittest.TestCase): # Test OneHotEncoder with ints @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS" ) def test_one_hot_encoder_onnx_int(self, rtol=1e-06, atol=1e-06): model = OneHotEncoder() X = np.array([[1, 2, 3]], dtype=np.int32) model.fit(X) # Create ONNX-ML model onnx_ml_model = convert_sklearn(model, initial_types=[("int_input", IntTensorType_onnx(X.shape))]) # Create ONNX model by calling converter onnx_model = convert(onnx_ml_model, "onnx", X) # Get the predictions for the ONNX-ML model session = ort.InferenceSession(onnx_ml_model.SerializeToString()) output_names = [session.get_outputs()[i].name for i in range(len(session.get_outputs()))] inputs = {session.get_inputs()[0].name: X} onnx_ml_pred = session.run(output_names, inputs)[0] # Get the predictions for the ONNX model onnx_pred = onnx_model.transform(X) # Check that predicted values match np.testing.assert_allclose(onnx_ml_pred, onnx_pred, rtol=rtol, atol=atol) # Test OneHotEncoder with 2 inputs @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS" ) def test_one_hot_encoder_onnx2(self, rtol=1e-06, atol=1e-06): model = OneHotEncoder() X = np.array([[1, 2, 3], [2, 1, 3]], dtype=np.int32) model.fit(X) # Create ONNX-ML model onnx_ml_model = convert_sklearn(model, initial_types=[("int_input", IntTensorType_onnx(X.shape))]) # Create ONNX model by calling converter onnx_model = convert(onnx_ml_model, "onnx", X) # Get the predictions for the ONNX-ML model session = ort.InferenceSession(onnx_ml_model.SerializeToString()) output_names = [session.get_outputs()[i].name for i in range(len(session.get_outputs()))] inputs = {session.get_inputs()[0].name: X} onnx_ml_pred = session.run(output_names, inputs)[0] # Get the predictions for the ONNX model onnx_pred = onnx_model.transform(X) # Check that predicted values match np.testing.assert_allclose(onnx_ml_pred, onnx_pred, rtol=rtol, atol=atol) # Test OneHotEncoder with int64 @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS" ) def test_one_hot_encoder_onnx_int64(self, rtol=1e-06, atol=1e-06): model = OneHotEncoder() X = np.array([[1, 2, 3]], dtype=np.int64) model.fit(X) # Create ONNX-ML model onnx_ml_model = convert_sklearn(model, initial_types=[("int_input", LongTensorType_onnx(X.shape))]) # Create ONNX model by calling converter onnx_model = convert(onnx_ml_model, "onnx", X) # Get the predictions for the ONNX-ML model session = ort.InferenceSession(onnx_ml_model.SerializeToString()) output_names = [session.get_outputs()[i].name for i in range(len(session.get_outputs()))] inputs = {session.get_inputs()[0].name: X} onnx_ml_pred = session.run(output_names, inputs)[0] # Get the predictions for the ONNX model onnx_pred = onnx_model.transform(X) # Check that predicted values match np.testing.assert_allclose(onnx_ml_pred, onnx_pred, rtol=rtol, atol=atol) # Test OneHotEncoder with strings. This test only works with pytorch >= 1.8 @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS" ) @unittest.skipIf( LooseVersion(torch.__version__) < LooseVersion("1.8.0"), reason="PyTorch exporter returns an error until version 1.8.0", ) def test_model_one_hot_encoder_string(self): model = OneHotEncoder() data = [["a", "r", "x"], ["a", "r", "x"], ["aaaa", "r", "x"], ["a", "r", "xx"]] model.fit(data) onnx_ml_model = convert_sklearn(model, initial_types=[("input", StringTensorType_onnx([4, 3]))]) # Create ONNX model by calling converter onnx_model = convert(onnx_ml_model, "onnx", data) # Get the predictions for the ONNX-ML model session = ort.InferenceSession(onnx_ml_model.SerializeToString()) output_names = [session.get_outputs()[i].name for i in range(len(session.get_outputs()))] inputs = {session.get_inputs()[0].name: data} onnx_ml_pred = session.run(output_names, inputs) # Get the predictions for the ONNX model onnx_pred = onnx_model.transform(data) return onnx_ml_pred, onnx_pred # Test OneHotEncoder failcase when input data type is not supported @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS" ) def test_ohe_string_raises_type_error_onnx(self): warnings.filterwarnings("ignore") model = OneHotEncoder() data = [["a", "r", "x"], ["a", "r", "x"], ["aaaa", "r", "x"], ["a", "r", "xx"]] model.fit(data) # Create ONNX-ML model onnx_ml_model = convert_sklearn(model, initial_types=[("input", StringTensorType_onnx([4, 3]))]) # Create ONNX model by calling converter, should raise error for strings self.assertRaises(RuntimeError, convert, onnx_ml_model, "onnx")
class TestSklearnNormalizer(unittest.TestCase): def _test_regressor(self, classes): n_features = 20 n_total = 100 np.random.seed(0) warnings.filterwarnings("ignore") X = np.random.rand(n_total, n_features) X = np.array(X, dtype=np.float32) y = np.random.randint(classes, size=n_total) # Create SKL model for testing model = LogisticRegression(solver="liblinear", multi_class="ovr", fit_intercept=True) model.fit(X, y) # Create ONNX-ML model onnx_ml_model = convert_sklearn(model, initial_types=[("float_input", FloatTensorType_onnx(X.shape))]) # Create ONNX model by calling converter onnx_model = convert(onnx_ml_model, "onnx", X) # Get the predictions for the ONNX-ML model session = ort.InferenceSession(onnx_ml_model.SerializeToString()) output_names = [session.get_outputs()[i].name for i in range(len(session.get_outputs()))] onnx_ml_pred = [[] for i in range(len(output_names))] inputs = {session.get_inputs()[0].name: X} pred = session.run(output_names, inputs) for i in range(len(output_names)): if output_names[i] == "output_label": onnx_ml_pred[1] = pred[i] else: onnx_ml_pred[0] = pred[i] # Get the predictions for the ONNX model session = ort.InferenceSession(onnx_model.SerializeToString()) onnx_pred = [[] for i in range(len(output_names))] pred = session.run(output_names, inputs) for i in range(len(output_names)): if output_names[i] == "output_label": onnx_pred[1] = pred[i] else: onnx_pred[0] = pred[i] return onnx_ml_pred, onnx_pred @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS" ) def test_logistic_regression_onnxml_binary(self, rtol=1e-06, atol=1e-06): onnx_ml_pred, onnx_pred = self._test_regressor(2) # Check that predicted values match np.testing.assert_allclose(onnx_ml_pred[1], onnx_pred[1], rtol=rtol, atol=atol) # labels np.testing.assert_allclose( list(map(lambda x: list(x.values()), onnx_ml_pred[0])), onnx_pred[0], rtol=rtol, atol=atol ) # probs @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS" ) def test_logistic_regression_onnxml_multi(self, rtol=1e-06, atol=1e-06): onnx_ml_pred, onnx_pred = self._test_regressor(3) # Check that predicted values match np.testing.assert_allclose(onnx_ml_pred[1], onnx_pred[1], rtol=rtol, atol=atol) # labels np.testing.assert_allclose( list(map(lambda x: list(x.values()), onnx_ml_pred[0])), onnx_pred[0], rtol=rtol, atol=atol ) # probs
class TestONNXScaler(unittest.TestCase): def _test_scaler_converter(self, model): warnings.filterwarnings("ignore") X = np.array([[0.0, 0.0, 3.0], [1.0, -1.0, 0.0], [0.0, 2.0, 1.0], [1.0, 0.0, -2.0]], dtype=np.float32) model.fit(X) # Create ONNX-ML model onnx_ml_model = convert_sklearn( model, initial_types=[("float_input", FloatTensorType([None, X.shape[1]]))]) # Create ONNX model by calling converter onnx_model = convert(onnx_ml_model, "onnx", X) # Get the predictions for the ONNX-ML model session = ort.InferenceSession(onnx_ml_model.SerializeToString()) output_names = [ session.get_outputs()[i].name for i in range(len(session.get_outputs())) ] inputs = {session.get_inputs()[0].name: X} onnx_ml_pred = session.run(output_names, inputs)[0] # Get the predictions for the ONNX model onnx_pred = onnx_model.transform(X) return onnx_ml_pred, onnx_pred # Test StandardScaler with_mean=True, with_std=True @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS") def test_standard_scaler_onnx_tt(self, rtol=1e-06, atol=1e-06): model = StandardScaler(with_mean=True, with_std=True) onnx_ml_pred, onnx_pred = self._test_scaler_converter(model) # Check that predicted values match np.testing.assert_allclose(onnx_ml_pred, onnx_pred, rtol=rtol, atol=atol) # Test StandardScaler with_mean=True, with_std=False @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS") def test_standard_scaler_onnx_tf(self, rtol=1e-06, atol=1e-06): model = StandardScaler(with_mean=True, with_std=False) onnx_ml_pred, onnx_pred = self._test_scaler_converter(model) # Check that predicted values match np.testing.assert_allclose(onnx_ml_pred, onnx_pred, rtol=rtol, atol=atol) # Test StandardScaler with_mean=False, with_std=False @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS") def test_standard_scaler_onnx_ff(self, rtol=1e-06, atol=1e-06): model = StandardScaler(with_mean=False, with_std=False) onnx_ml_pred, onnx_pred = self._test_scaler_converter(model) # Check that predicted values match np.testing.assert_allclose(onnx_ml_pred, onnx_pred, rtol=rtol, atol=atol) # Test RobustScaler with with_centering=True @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS") def test_robust_scaler_onnx_t(self, rtol=1e-06, atol=1e-06): model = RobustScaler(with_centering=True) onnx_ml_pred, onnx_pred = self._test_scaler_converter(model) # Check that predicted values match np.testing.assert_allclose(onnx_ml_pred, onnx_pred, rtol=rtol, atol=atol) # Test RobustScaler with with_centering=False @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS") def test_robust_scaler_onnx_f(self, rtol=1e-06, atol=1e-06): model = RobustScaler(with_centering=False) onnx_ml_pred, onnx_pred = self._test_scaler_converter(model) # Check that predicted values match np.testing.assert_allclose(onnx_ml_pred, onnx_pred, rtol=rtol, atol=atol) # Test MaxAbsScaler @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS") def test_max_abs_scaler_onnx(self, rtol=1e-06, atol=1e-06): model = MaxAbsScaler() onnx_ml_pred, onnx_pred = self._test_scaler_converter(model) # Check that predicted values match np.testing.assert_allclose(onnx_ml_pred, onnx_pred, rtol=rtol, atol=atol) # Test MinMaxScaler @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS") def test_min_max_scaler_onnx(self, rtol=1e-06, atol=1e-06): model = MinMaxScaler() onnx_ml_pred, onnx_pred = self._test_scaler_converter(model) # Check that predicted values match np.testing.assert_allclose(onnx_ml_pred, onnx_pred, rtol=rtol, atol=atol) # Test that malformed models throw an exception @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS") def test_scaler_converter_raises_rt_onnx(self): warnings.filterwarnings("ignore") X = np.array([[0.0, 0.0, 3.0], [1.0, -1.0, 0.0], [0.0, 2.0, 1.0], [1.0, 0.0, -2.0]], dtype=np.float32) # Create SKL model for testing model = StandardScaler() model.fit(X) # Generate test input onnx_ml_model = convert_sklearn(model, initial_types=[ ("float_input", FloatTensorType(X.shape)) ]) onnx_ml_model.graph.node[0].attribute[0].name = "".encode() self.assertRaises(RuntimeError, convert, onnx_ml_model, "onnx", X) # Test with float64 @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS") def test_scaler_converter_float_64(self): warnings.filterwarnings("ignore") X = np.array([[0.0, 0.0, 3.0], [1.0, -1.0, 0.0], [0.0, 2.0, 1.0], [1.0, 0.0, -2.0]], dtype=np.float64) # Create SKL model for testing model = StandardScaler() model.fit(X) # Generate test input onnx_ml_model = convert_sklearn(model, initial_types=[("double_input", DoubleTensorType( [None, X.shape[1]]))]) # Create ONNX model by calling converter onnx_model = convert(onnx_ml_model, "onnx", X) # Get the predictions for the ONNX-ML model session = ort.InferenceSession(onnx_ml_model.SerializeToString()) output_names = [ session.get_outputs()[i].name for i in range(len(session.get_outputs())) ] inputs = {session.get_inputs()[0].name: X} onnx_ml_pred = session.run(output_names, inputs)[0] # Get the predictions for the ONNX model onnx_pred = onnx_model.transform(X) # Check that predicted values match np.testing.assert_allclose(onnx_ml_pred, onnx_pred, rtol=1e-06, atol=1e-06)
class TestONNXDecisionTreeConverter(unittest.TestCase): def __init__(self, *args, **kwargs): super(TestONNXDecisionTreeConverter, self).__init__(*args, **kwargs) # Base test implementation comparing ONNXML and ONNX models. def _test_decision_tree(self, X, model, extra_config={}): # Create ONNX-ML model onnx_ml_model = convert_sklearn( model, initial_types=[("input", FloatTensorType([X.shape[0], X.shape[1]]))], target_opset=11) # Create ONNX model onnx_model = convert(onnx_ml_model, "onnx", X, extra_config) # Get the predictions for the ONNX-ML model session = ort.InferenceSession(onnx_ml_model.SerializeToString()) output_names = [ session.get_outputs()[i].name for i in range(len(session.get_outputs())) ] onnx_ml_pred = [[] for i in range(len(output_names))] inputs = {session.get_inputs()[0].name: X} pred = session.run(output_names, inputs) for i in range(len(output_names)): if "label" in output_names[i]: onnx_ml_pred[1] = pred[i] else: onnx_ml_pred[0] = pred[i] # Get the predictions for the ONNX model onnx_pred = [[] for i in range(len(output_names))] if len(output_names) == 1: # regression onnx_pred = onnx_model.predict(X) else: # classification for i in range(len(output_names)): if "label" in output_names[i]: onnx_pred[1] = onnx_model.predict(X) else: onnx_pred[0] = onnx_model.predict_proba(X) return onnx_ml_pred, onnx_pred, output_names # Utility function for testing regression models. def _test_regressor(self, X, model, rtol=1e-06, atol=1e-06, extra_config={}): onnx_ml_pred, onnx_pred, output_names = self._test_decision_tree( X, model, extra_config) # Check that predicted values match np.testing.assert_allclose(onnx_ml_pred[0].ravel(), onnx_pred, rtol=rtol, atol=atol) # Utility function for testing classification models. def _test_classifier(self, X, model, rtol=1e-06, atol=1e-06, extra_config={}): onnx_ml_pred, onnx_pred, output_names = self._test_decision_tree( X, model, extra_config) np.testing.assert_allclose(onnx_ml_pred[1], onnx_pred[1], rtol=rtol, atol=atol) # labels np.testing.assert_allclose(list( map(lambda x: list(x.values()), onnx_ml_pred[0])), onnx_pred[0], rtol=rtol, atol=atol) # probs # Regression. # Regression test with Decision Tree. @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") def test_decision_tree_regressor(self): warnings.filterwarnings("ignore") model = DecisionTreeRegressor() X = [[0, 1], [1, 1], [2, 0]] X = np.array(X, dtype=np.float32) y = np.array([100, -10, 50], dtype=np.float32) model.fit(X, y) self._test_regressor(X, model) # Basic regression test with decision tree. @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") def test_decision_tree_regressor_random(self): warnings.filterwarnings("ignore") n_features = 28 n_total = 100 np.random.seed(0) X = np.random.rand(n_total, n_features) X = np.array(X, dtype=np.float32) y = np.random.randint(n_total, size=n_total) # Create DecisionTree model model = DecisionTreeRegressor() model.fit(X, y) self._test_regressor(X, model) # Regression test with Random Forest, 1 estimator. @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") def test_random_forest_regressor_1(self): warnings.filterwarnings("ignore") model = RandomForestRegressor(n_estimators=1) X = [[0, 1], [1, 1], [2, 0]] X = np.array(X, dtype=np.float32) y = np.array([100, -10, 50], dtype=np.float32) model.fit(X, y) self._test_regressor(X, model) # Basic regression test with Random Forest. @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") def test_random_forest_regressor_random(self): warnings.filterwarnings("ignore") n_features = 28 n_total = 100 np.random.seed(0) X = np.random.rand(n_total, n_features) X = np.array(X, dtype=np.float32) y = np.random.randint(n_total, size=n_total) # Create RandomForest model model = RandomForestRegressor() model.fit(X, y) self._test_regressor(X, model, rtol=1e-03, atol=1e-03) # Binary. # Binary classication test random. @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") def test_decision_tree_binary_random(self): warnings.filterwarnings("ignore") n_features = 28 n_total = 100 np.random.seed(0) X = np.random.rand(n_total, n_features) X = np.array(X, dtype=np.float32) y = np.random.randint(2, size=n_total) # Create DecisionTree model model = DecisionTreeClassifier() model.fit(X, y) self._test_classifier(X, model) # Binary classification test Decision Tree. @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") def test_decision_tree_binary(self): warnings.filterwarnings("ignore") model = DecisionTreeClassifier() X = [[0, 1], [1, 1], [2, 0]] X = np.array(X, dtype=np.float32) y = [0, 1, 0] model.fit(X, y) self._test_classifier(X, model) # Binary classification test Random Forest with 3 estimators (taken from ONNXMLTOOLS). @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") def test_random_forest_classifier(self): warnings.filterwarnings("ignore") X = [[0, 1], [1, 1], [2, 0], [1, 2]] X = np.array(X, dtype=np.float32) y = [0, 1, 0, 1] model = RandomForestClassifier(n_estimators=3) model.fit(X, y) self._test_classifier(X, model) # Binary classification test Random Forest with 3 estimators random. @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") def test_random_forest_classifier_random(self): warnings.filterwarnings("ignore") n_features = 28 n_total = 100 np.random.seed(0) X = np.random.rand(n_total, n_features) X = np.array(X, dtype=np.float32) y = np.random.randint(2, size=n_total) model = RandomForestClassifier(n_estimators=10) model.fit(X, y) self._test_classifier(X, model) # Multiclass classification test. # Multiclass classification test with DecisionTree, random. @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") def test_decision_tree_multi_random(self): warnings.filterwarnings("ignore") n_features = 28 n_total = 100 np.random.seed(0) X = np.random.rand(n_total, n_features) X = np.array(X, dtype=np.float32) y = np.random.randint(3, size=n_total) # Create the DecisionTree model model = DecisionTreeClassifier() model.fit(X, y) self._test_classifier(X, model) # Multiclass classification test with DecisionTree (taken from ONNXMLTOOLS). @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") def test_decision_tree_multi(self): warnings.filterwarnings("ignore") model = DecisionTreeClassifier() X = [[0, 1], [1, 1], [2, 0], [0.5, 0.5], [1.1, 1.1], [2.1, 0.1]] X = np.array(X, dtype=np.float32) y = [0, 1, 2, 1, 1, 2] model.fit(X, y) self._test_classifier(X, model) # Multiclass classification test with Random Forest. @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") def test_random_forest_multi_random(self): warnings.filterwarnings("ignore") n_features = 28 n_total = 100 np.random.seed(0) X = np.random.rand(n_total, n_features) X = np.array(X, dtype=np.float32) y = np.random.randint(3, size=n_total) # Create the RandomForest model model = RandomForestClassifier(n_estimators=10) model.fit(X, y) self._test_classifier(X, model)
class TestSklearnNormalizer(unittest.TestCase): def _test_normalizer_converter(self, norm): warnings.filterwarnings("ignore") X = np.array([[1, 2, 3], [4, 3, 0], [0, 1, 4], [0, 5, 6]], dtype=np.float32) # Create SKL model for testing model = Normalizer(norm=norm) model.fit(X) # Create ONNX-ML model onnx_ml_model = convert_sklearn(model, initial_types=[ ("float_input", FloatTensorType_onnx(X.shape)) ]) # Create ONNX model by calling converter onnx_model = convert(onnx_ml_model, "onnx", X) # Get the predictions for the ONNX-ML model session = ort.InferenceSession(onnx_ml_model.SerializeToString()) output_names = [ session.get_outputs()[i].name for i in range(len(session.get_outputs())) ] onnx_ml_pred = [[] for i in range(len(output_names))] inputs = {session.get_inputs()[0].name: X} onnx_ml_pred = session.run(output_names, inputs) # Get the predictions for the ONNX model session = ort.InferenceSession(onnx_model.SerializeToString()) onnx_pred = [[] for i in range(len(output_names))] onnx_pred = session.run(output_names, inputs) return onnx_ml_pred, onnx_pred @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS") def test_onnx_normalizer_l1(self, rtol=1e-06, atol=1e-06): onnx_ml_pred, onnx_pred = self._test_normalizer_converter("l1") # Check that predicted values match np.testing.assert_allclose(onnx_ml_pred, onnx_pred, rtol=rtol, atol=atol) @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS") def test_onnx_normalizer_l2(self, rtol=1e-06, atol=1e-06): onnx_ml_pred, onnx_pred = self._test_normalizer_converter("l2") # Check that predicted values match np.testing.assert_allclose(onnx_ml_pred, onnx_pred, rtol=rtol, atol=atol) @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS") def test_onnx_normalizer_max(self, rtol=1e-06, atol=1e-06): onnx_ml_pred, onnx_pred = self._test_normalizer_converter("max") # Check that predicted values match np.testing.assert_allclose(onnx_ml_pred, onnx_pred, rtol=rtol, atol=atol) @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS") def test_onnx_normalizer_converter_raises_rt(self): warnings.filterwarnings("ignore") X = np.array([[1, 2, 3], [4, 3, 0], [0, 1, 4], [0, 5, 6]], dtype=np.float32) model = Normalizer(norm="l1") model.fit(X) # generate test input onnx_ml_model = convert_sklearn(model, initial_types=[ ("float_input", FloatTensorType_onnx(X.shape)) ]) onnx_ml_model.graph.node[0].attribute[0].s = "".encode() self.assertRaises(RuntimeError, convert, onnx_ml_model, "onnx", X)
class TestONNXLinear(unittest.TestCase): def _test_linear(self, classes): """ This helper function tests conversion of `ai.onnx.ml.LinearClassifier` which is created from a scikit-learn LogisticRegression. This tests `convert_onnx_linear_model` in `hummingbird.ml.operator_converters.onnxml_linear` """ n_features = 20 n_total = 100 np.random.seed(0) warnings.filterwarnings("ignore") X = np.random.rand(n_total, n_features) X = np.array(X, dtype=np.float32) y = np.random.randint(classes, size=n_total) # Create SKL model for testing model = LogisticRegression(solver="liblinear", multi_class="ovr", fit_intercept=True) model.fit(X, y) # Create ONNX-ML model onnx_ml_model = convert_sklearn(model, initial_types=[ ("float_input", FloatTensorType_onnx(X.shape)) ]) # Create ONNX model by calling converter onnx_model = convert(onnx_ml_model, "onnx", X) # Get the predictions for the ONNX-ML model session = ort.InferenceSession(onnx_ml_model.SerializeToString()) output_names = [ session.get_outputs()[i].name for i in range(len(session.get_outputs())) ] onnx_ml_pred = [[] for i in range(len(output_names))] inputs = {session.get_inputs()[0].name: X} pred = session.run(output_names, inputs) for i in range(len(output_names)): if "label" in output_names[i]: onnx_ml_pred[1] = pred[i] else: onnx_ml_pred[0] = pred[i] # Get the predictions for the ONNX model onnx_pred = [[] for i in range(len(output_names))] if len(output_names) == 1: # regression onnx_pred = onnx_model.predict(X) else: # classification onnx_pred[0] = onnx_model.predict_proba(X) onnx_pred[1] = onnx_model.predict(X) return onnx_ml_pred, onnx_pred @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS") # test ai.onnx.ml.LinearClassifier with 2 classes def test_logistic_regression_onnxml_binary(self, rtol=1e-06, atol=1e-06): onnx_ml_pred, onnx_pred = self._test_linear(2) # Check that predicted values match np.testing.assert_allclose(onnx_ml_pred[1], onnx_pred[1], rtol=rtol, atol=atol) # labels np.testing.assert_allclose(list( map(lambda x: list(x.values()), onnx_ml_pred[0])), onnx_pred[0], rtol=rtol, atol=atol) # probs @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS") # test ai.onnx.ml.LinearClassifier with 3 classes def test_logistic_regression_onnxml_multi(self, rtol=1e-06, atol=1e-06): onnx_ml_pred, onnx_pred = self._test_linear(3) # Check that predicted values match np.testing.assert_allclose(onnx_ml_pred[1], onnx_pred[1], rtol=rtol, atol=atol) # labels np.testing.assert_allclose(list( map(lambda x: list(x.values()), onnx_ml_pred[0])), onnx_pred[0], rtol=rtol, atol=atol) # probs def _test_regressor(self, values): """ This helper function tests conversion of `ai.onnx.ml.LinearRegressor` which is created from a scikit-learn LinearRegression. This tests `convert_onnx_linear_regression_model` in `hummingbird.ml.operator_converters.onnxml_linear` """ n_features = 20 n_total = 100 np.random.seed(0) warnings.filterwarnings("ignore") X = np.random.rand(n_total, n_features) X = np.array(X, dtype=np.float32) y = np.random.randint(values, size=n_total) # Create SKL model for testing model = LinearRegression() model.fit(X, y) # Create ONNX-ML model onnx_ml_model = convert_sklearn(model, initial_types=[ ("float_input", FloatTensorType_onnx(X.shape)) ]) # Create ONNX model by calling converter onnx_model = convert(onnx_ml_model, "onnx", X) # Get the predictions for the ONNX-ML model session = ort.InferenceSession(onnx_ml_model.SerializeToString()) output_names = [ session.get_outputs()[i].name for i in range(len(session.get_outputs())) ] inputs = {session.get_inputs()[0].name: X} onnx_ml_pred = session.run(output_names, inputs) # Get the predictions for the ONNX model onnx_pred = onnx_model.predict(X) return onnx_ml_pred, onnx_pred @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS") # test ai.onnx.ml.LinearRegressor with 2 values def test_linear_regression_onnxml_small(self, rtol=1e-06, atol=1e-06): onnx_ml_pred, onnx_pred = self._test_regressor(2) # Check that predicted values match np.testing.assert_allclose(onnx_ml_pred[0].ravel(), onnx_pred, rtol=rtol, atol=atol) @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS") # test ai.onnx.ml.LinearRegressor with 100 values def test_linear_regression_onnxml_large(self, rtol=1e-06, atol=1e-06): onnx_ml_pred, onnx_pred = self._test_regressor(100) # Check that predicted values match np.testing.assert_allclose(onnx_ml_pred[0].ravel(), onnx_pred, rtol=rtol, atol=atol) @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS") # test for malformed model/problem with parsing def test_onnx_linear_converter_raises_rt(self): n_features = 20 n_total = 100 np.random.seed(0) warnings.filterwarnings("ignore") X = np.random.rand(n_total, n_features) X = np.array(X, dtype=np.float32) y = np.random.randint(3, size=n_total) model = LinearRegression() model.fit(X, y) # generate test input onnx_ml_model = convert_sklearn(model, initial_types=[ ("float_input", FloatTensorType_onnx(X.shape)) ]) onnx_ml_model.graph.node[0].attribute[0].name = "".encode() self.assertRaises(RuntimeError, convert, onnx_ml_model, "onnx", X)
class TestONNXBinarizer(unittest.TestCase): def _test_binarizer_converter(self, threshold): warnings.filterwarnings("ignore") X = np.array([[1, 2, 3], [4, 3, 0], [0, 1, 4], [0, 5, 6]], dtype=np.float32) # Create SKL model for testing model = Binarizer(threshold=threshold) model.fit(X) # Create ONNX-ML model onnx_ml_model = convert_sklearn(model, initial_types=[ ("float_input", FloatTensorType_onnx(X.shape)) ]) # Create ONNX model by calling converter onnx_model = convert(onnx_ml_model, "onnx", X) # Get the predictions for the ONNX-ML model session = ort.InferenceSession(onnx_ml_model.SerializeToString()) output_names = [ session.get_outputs()[i].name for i in range(len(session.get_outputs())) ] inputs = {session.get_inputs()[0].name: X} onnx_ml_pred = session.run(output_names, inputs)[0] # Get the predictions for the ONNX model onnx_pred = onnx_model.transform(X) return onnx_ml_pred, onnx_pred @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS") # Check 0.0 threshold def test_binarizer_converter_0thresh(self, rtol=1e-06, atol=1e-06): onnx_ml_pred, onnx_pred = self._test_binarizer_converter(0.0) np.testing.assert_allclose(onnx_ml_pred, onnx_pred, rtol=rtol, atol=atol) @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS") # Check positive threshold def test_binarizer_converter_posthresh(self, rtol=1e-06, atol=1e-06): onnx_ml_pred, onnx_pred = self._test_binarizer_converter(2.0) # Check that predicted values match np.testing.assert_allclose(onnx_ml_pred, onnx_pred, rtol=rtol, atol=atol) @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS") # Check neg threshold def test_binarizer_converter_negthresh(self, rtol=1e-06, atol=1e-06): onnx_ml_pred, onnx_pred = self._test_binarizer_converter(-2.0) # Check that predicted values match np.testing.assert_allclose(onnx_ml_pred, onnx_pred, rtol=rtol, atol=atol) @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS") # if the model is corrupt, we should get a RuntimeError def test_onnx_binarizer_converter_raises_rt(self): warnings.filterwarnings("ignore") X = np.array([[1, 2, 3], [4, 3, 0], [0, 1, 4], [0, 5, 6]], dtype=np.float32) model = Binarizer(threshold=0) model.fit(X) # generate test input onnx_ml_model = convert_sklearn(model, initial_types=[ ("float_input", FloatTensorType_onnx(X.shape)) ]) onnx_ml_model.graph.node[0].attribute[0].name = "".encode() self.assertRaises(RuntimeError, convert, onnx_ml_model, "onnx", X)
class TestONNXLightGBMConverter(unittest.TestCase): def __init__(self, *args, **kwargs): super(TestONNXLightGBMConverter, self).__init__(*args, **kwargs) # Base test implementation comparing ONNXML and ONNX models. def _test_lgbm(self, X, model, extra_config={}): # Create ONNX-ML model onnx_ml_model = convert_lightgbm( model, initial_types=[("input", FloatTensorType([X.shape[0], X.shape[1]]))], target_opset=9 ) # Create ONNX model onnx_model = convert(onnx_ml_model, "onnx", X, extra_config) # Get the predictions for the ONNX-ML model session = ort.InferenceSession(onnx_ml_model.SerializeToString()) output_names = [session.get_outputs()[i].name for i in range(len(session.get_outputs()))] onnx_ml_pred = [[] for i in range(len(output_names))] inputs = {session.get_inputs()[0].name: X} pred = session.run(output_names, inputs) for i in range(len(output_names)): if output_names[i] == "label": onnx_ml_pred[1] = pred[i] else: onnx_ml_pred[0] = pred[i] # Get the predictions for the ONNX model session = ort.InferenceSession(onnx_model.SerializeToString()) onnx_pred = [[] for i in range(len(output_names))] pred = session.run(output_names, inputs) for i in range(len(output_names)): if output_names[i] == "label": onnx_pred[1] = pred[i] else: onnx_pred[0] = pred[i] return onnx_ml_pred, onnx_pred, output_names # Utility function for testing regression models. def _test_regressor(self, X, model, rtol=1e-06, atol=1e-06, extra_config={}): onnx_ml_pred, onnx_pred, output_names = self._test_lgbm(X, model, extra_config) # Check that predicted values match np.testing.assert_allclose(onnx_ml_pred[0], onnx_pred[0], rtol=rtol, atol=atol) # Utility function for testing classification models. def _test_classifier(self, X, model, rtol=1e-06, atol=1e-06, extra_config={}): onnx_ml_pred, onnx_pred, output_names = self._test_lgbm(X, model, extra_config) np.testing.assert_allclose(onnx_ml_pred[1], onnx_pred[1], rtol=rtol, atol=atol) # labels np.testing.assert_allclose( list(map(lambda x: list(x.values()), onnx_ml_pred[0])), onnx_pred[0], rtol=rtol, atol=atol ) # probs # Check that ONNXML models can only target the ONNX backend. @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS" ) def test_lightgbm_pytorch(self): warnings.filterwarnings("ignore") X = [[0, 1], [1, 1], [2, 0]] X = np.array(X, dtype=np.float32) y = np.array([100, -10, 50], dtype=np.float32) model = lgb.LGBMRegressor(n_estimators=3, min_child_samples=1) model.fit(X, y) # Create ONNX-ML model onnx_ml_model = convert_lightgbm( model, initial_types=[("input", FloatTensorType([X.shape[0], X.shape[1]]))], target_opset=9 ) self.assertRaises(RuntimeError, convert, onnx_ml_model, "torch") # Check converter with extra configs. @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS" ) def test_lightgbm_pytorch_extra_config(self): warnings.filterwarnings("ignore") X = [[0, 1], [1, 1], [2, 0]] X = np.array(X, dtype=np.float32) y = np.array([100, -10, 50], dtype=np.float32) model = lgb.LGBMRegressor(n_estimators=3, min_child_samples=1) model.fit(X, y) # Create ONNX-ML model onnx_ml_model = convert_lightgbm( model, initial_types=[("input", FloatTensorType([X.shape[0], X.shape[1]]))], target_opset=9 ) # Create ONNX model model_name = "hummingbird.ml.test.lightgbm" extra_config = {} extra_config[constants.ONNX_OUTPUT_MODEL_NAME] = model_name extra_config[constants.ONNX_INITIAL_TYPES] = [("input", FloatTensorType([X.shape[0], X.shape[1]]))] onnx_model = convert(onnx_ml_model, "onnx", extra_config=extra_config) assert onnx_model.graph.name == model_name # Basic regression test. @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS" ) def test_lgbm_onnxml_model_regressor(self): warnings.filterwarnings("ignore") n_features = 28 n_total = 100 X = np.random.rand(n_total, n_features) X = np.array(X, dtype=np.float32) y = np.random.randint(n_total, size=n_total) # Create LightGBM model model = lgb.LGBMRegressor() model.fit(X, y) self._test_regressor(X, model, rtol=1e-02, atol=1e-02) # Lower tolerance to avoid random errors # Regression test with 3 estimators (taken from ONNXMLTOOLS). @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS" ) def test_lightgbm_regressor(self): warnings.filterwarnings("ignore") X = [[0, 1], [1, 1], [2, 0]] X = np.array(X, dtype=np.float32) y = np.array([100, -10, 50], dtype=np.float32) model = lgb.LGBMRegressor(n_estimators=3, min_child_samples=1) model.fit(X, y) self._test_regressor(X, model) # Regression test with 1 estimator (taken from ONNXMLTOOLS). @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS" ) def test_lightgbm_regressor1(self): warnings.filterwarnings("ignore") model = lgb.LGBMRegressor(n_estimators=1, min_child_samples=1) X = [[0, 1], [1, 1], [2, 0]] X = np.array(X, dtype=np.float32) y = np.array([100, -10, 50], dtype=np.float32) model.fit(X, y) self._test_regressor(X, model) # Regression test with 2 estimators (taken from ONNXMLTOOLS). @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS" ) def test_lightgbm_regressor2(self): warnings.filterwarnings("ignore") model = lgb.LGBMRegressor(n_estimators=2, max_depth=1, min_child_samples=1) X = [[0, 1], [1, 1], [2, 0]] X = np.array(X, dtype=np.float32) y = np.array([100, -10, 50], dtype=np.float32) model.fit(X, y) self._test_regressor(X, model) # Regression test with gbdt boosting type (taken from ONNXMLTOOLS). @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS" ) def test_lightgbm_booster_regressor(self): warnings.filterwarnings("ignore") X = [[0, 1], [1, 1], [2, 0]] X = np.array(X, dtype=np.float32) y = [0, 1, 1.1] data = lgb.Dataset(X, label=y) model = lgb.train( {"boosting_type": "gbdt", "objective": "regression", "n_estimators": 3, "min_child_samples": 1, "max_depth": 1}, data, ) self._test_regressor(X, model) # Binary classication test. @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS" ) def test_lgbm_onnxml_model_binary(self): warnings.filterwarnings("ignore") n_features = 28 n_total = 100 np.random.seed(0) X = np.random.rand(n_total, n_features) X = np.array(X, dtype=np.float32) y = np.random.randint(2, size=n_total) # Create LightGBM model model = lgb.LGBMClassifier() model.fit(X, y) self._test_classifier(X, model) # Binary classification test with 3 estimators (taken from ONNXMLTOOLS). @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS" ) def test_lightgbm_classifier(self): warnings.filterwarnings("ignore") model = lgb.LGBMClassifier(n_estimators=3, min_child_samples=1) X = [[0, 1], [1, 1], [2, 0]] X = np.array(X, dtype=np.float32) y = [0, 1, 0] model.fit(X, y) self._test_classifier(X, model) # Binary classification test with 3 estimators zipmap (taken from ONNXMLTOOLS). @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS" ) def test_lightgbm_classifier_zipmap(self): warnings.filterwarnings("ignore") X = [[0, 1], [1, 1], [2, 0], [1, 2]] X = np.array(X, dtype=np.float32) y = [0, 1, 0, 1] model = lgb.LGBMClassifier(n_estimators=3, min_child_samples=1) model.fit(X, y) self._test_classifier(X, model) # Binary classification test with 3 estimators and selecting boosting type (taken from ONNXMLTOOLS). @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS" ) def test_lightgbm_booster_classifier(self): warnings.filterwarnings("ignore") X = [[0, 1], [1, 1], [2, 0], [1, 2]] X = np.array(X, dtype=np.float32) y = [0, 1, 0, 1] data = lgb.Dataset(X, label=y) model = lgb.train({"boosting_type": "gbdt", "objective": "binary", "n_estimators": 3, "min_child_samples": 1}, data) self._test_classifier(X, model) # Binary classification test with 3 estimators and selecting boosting type zipmap (taken from ONNXMLTOOLS). @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS" ) def test_lightgbm_booster_classifier_zipmap(self): warnings.filterwarnings("ignore") X = [[0, 1], [1, 1], [2, 0], [1, 2]] X = np.array(X, dtype=np.float32) y = [0, 1, 0, 1] data = lgb.Dataset(X, label=y) model = lgb.train({"boosting_type": "gbdt", "objective": "binary", "n_estimators": 3, "min_child_samples": 1}, data) self._test_classifier(X, model) # Multiclass classification test. @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS" ) def test_lgbm_onnxml_model_multi(self): warnings.filterwarnings("ignore") n_features = 28 n_total = 100 np.random.seed(0) X = np.random.rand(n_total, n_features) X = np.array(X, dtype=np.float32) y = np.random.randint(3, size=n_total) # Create LightGBM model model = lgb.LGBMClassifier() model.fit(X, y) self._test_classifier(X, model) # Multiclass classification test with 3 estimators (taken from ONNXMLTOOLS). @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS" ) def test_lightgbm_classifier_multi(self): warnings.filterwarnings("ignore") model = lgb.LGBMClassifier(n_estimators=3, min_child_samples=1) X = [[0, 1], [1, 1], [2, 0], [0.5, 0.5], [1.1, 1.1], [2.1, 0.1]] X = np.array(X, dtype=np.float32) y = [0, 1, 2, 1, 1, 2] model.fit(X, y) self._test_classifier(X, model) # Multiclass classification test with 3 estimators and selecting boosting type (taken from ONNXMLTOOLS). @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS" ) @unittest.skipIf( True, reason='ONNXMLTOOLS fails with "ValueError: unsupported LightGbm objective: multiclass num_class:3"' ) def test_lightgbm_booster_multi_classifier(self): warnings.filterwarnings("ignore") X = [[0, 1], [1, 1], [2, 0], [1, 2], [-1, 2], [1, -2]] X = np.array(X, dtype=np.float32) y = [0, 1, 0, 1, 2, 2] data = lgb.Dataset(X, label=y) model = lgb.train( {"boosting_type": "gbdt", "objective": "multiclass", "n_estimators": 3, "min_child_samples": 1, "num_class": 3}, data, ) self._test_classifier(X, model)
class TestExtraConf(unittest.TestCase): # Test default number of threads. It will only work on mac after 1.6 https://github.com/pytorch/pytorch/issues/43036 @unittest.skipIf( sys.platform == "darwin" and LooseVersion(torch.__version__) <= LooseVersion("1.6.0"), reason="PyTorch has a bug on mac related to multi-threading", ) def test_torch_deafault_n_threads(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "torch") self.assertIsNotNone(hb_model) self.assertTrue(torch.get_num_threads() == psutil.cpu_count(logical=False)) self.assertTrue(torch.get_num_interop_threads() == 1) # Test one thread in pytorch. @unittest.skipIf( sys.platform == "darwin" and LooseVersion(torch.__version__) > LooseVersion("1.6.0"), reason="Setting threading multi times will break on mac", ) def test_torch_one_thread(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "torch", extra_config={constants.N_THREADS: 1}) self.assertIsNotNone(hb_model) self.assertTrue(torch.get_num_threads() == 1) self.assertTrue(torch.get_num_interop_threads() == 1) # Test default number of threads onnx. @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS" ) def test_onnx_deafault_n_threads(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) # Create ONNX-ML model onnx_ml_model = convert_sklearn( model, initial_types=[("input", FloatTensorType([X.shape[0], X.shape[1]]))], target_opset=9 ) hb_model = hummingbird.ml.convert(onnx_ml_model, "onnx", X) self.assertIsNotNone(hb_model) self.assertTrue(hb_model._session.get_session_options().intra_op_num_threads == psutil.cpu_count(logical=False)) self.assertTrue(hb_model._session.get_session_options().inter_op_num_threads == 1) # Test one thread onnx. @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS" ) def test_onnx_one_thread(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "onnx", X, extra_config={constants.N_THREADS: 1}) self.assertIsNotNone(hb_model) self.assertTrue(hb_model._session.get_session_options().intra_op_num_threads == 1) self.assertTrue(hb_model._session.get_session_options().inter_op_num_threads == 1) # Test pytorch regressor with batching. def test_torch_regression_batch(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingRegressor(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) batch_size = 10 remainder_size = X.shape[0] % batch_size hb_model = hummingbird.ml.convert_batch(model, "torch", X[:batch_size, :], remainder_size=remainder_size) self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06) # Test pytorch classifier with batching. def test_torch_classification_batch(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) batch_size = 10 remainder_size = X.shape[0] % batch_size hb_model = hummingbird.ml.convert_batch(model, "torch", X[:batch_size, :], remainder_size=remainder_size) self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06) np.testing.assert_allclose(model.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06) # Test pytorch classifier with batching. def test_torch_iforest_batch(self): warnings.filterwarnings("ignore") num_classes = 2 model = IsolationForest(n_estimators=10, max_samples=2) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) batch_size = 10 remainder_size = X.shape[0] % batch_size hb_model = hummingbird.ml.convert_batch(model, "torch", X[:batch_size, :], remainder_size=remainder_size) self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06) np.testing.assert_allclose(model.decision_function(X), hb_model.decision_function(X), rtol=1e-06, atol=1e-06) np.testing.assert_allclose(model.score_samples(X), hb_model.score_samples(X), rtol=1e-06, atol=1e-06) # Test pytorch regressor with batching and uneven rows. def test_torch_batch_regression_uneven(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingRegressor(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(105, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=105) model.fit(X, y) batch_size = 10 remainder_size = X.shape[0] % batch_size hb_model = hummingbird.ml.convert_batch(model, "torch", X[:batch_size, :], remainder_size=remainder_size) self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06) # Test pytorch classification with batching and uneven rows. def test_torch_batch_classification_uneven(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(105, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=105) model.fit(X, y) batch_size = 10 remainder_size = X.shape[0] % batch_size hb_model = hummingbird.ml.convert_batch(model, "torch", X[:batch_size, :], remainder_size=remainder_size) self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06) # Test pytorch transform with batching and uneven rows. def test_torch_batch_transform(self): warnings.filterwarnings("ignore") model = StandardScaler(with_mean=True, with_std=True) np.random.seed(0) X = np.random.rand(105, 200) X = np.array(X, dtype=np.float32) model.fit(X) batch_size = 10 remainder_size = X.shape[0] % batch_size hb_model = hummingbird.ml.convert_batch(model, "torch", X[:batch_size, :], remainder_size=remainder_size) self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.transform(X), hb_model.transform(X), rtol=1e-06, atol=1e-06) # Test torchscript regression with batching. def test_torchscript_regression_batch(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingRegressor(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(103, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=103) model.fit(X, y) batch_size = 10 remainder_size = X.shape[0] % batch_size hb_model = hummingbird.ml.convert_batch(model, "torch.jit", X[:batch_size, :], remainder_size=remainder_size) self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06) # Test torchscript classification with batching. def test_torchscript_classification_batch(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(103, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=103) model.fit(X, y) batch_size = 10 remainder_size = X.shape[0] % batch_size hb_model = hummingbird.ml.convert_batch(model, "torch.jit", X[:batch_size, :], remainder_size=remainder_size) self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06) np.testing.assert_allclose(model.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06) # Test torchscript iforest with batching. def test_torchscript_iforest_batch(self): warnings.filterwarnings("ignore") num_classes = 2 model = IsolationForest(n_estimators=10, max_samples=2) np.random.seed(0) X = np.random.rand(103, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=103) model.fit(X, y) batch_size = 10 remainder_size = X.shape[0] % batch_size hb_model = hummingbird.ml.convert_batch(model, "torch.jit", X[:batch_size, :], remainder_size=remainder_size) self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06) np.testing.assert_allclose(model.decision_function(X), hb_model.decision_function(X), rtol=1e-06, atol=1e-06) np.testing.assert_allclose(model.score_samples(X), hb_model.score_samples(X), rtol=1e-06, atol=1e-06) # Test torchscript transform with batching and uneven rows. def test_torchscript_batch_transform(self): warnings.filterwarnings("ignore") model = StandardScaler(with_mean=True, with_std=True) np.random.seed(0) X = np.random.rand(101, 200) X = np.array(X, dtype=np.float32) model.fit(X) batch_size = 10 remainder_size = X.shape[0] % batch_size hb_model = hummingbird.ml.convert_batch(model, "torch.jit", X[:batch_size, :], remainder_size=remainder_size) self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.transform(X), hb_model.transform(X), rtol=1e-06, atol=1e-06) # Test onnx transform with batching and uneven rows. @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS" ) def test_onnx_batch_transform(self): warnings.filterwarnings("ignore") model = StandardScaler(with_mean=True, with_std=True) np.random.seed(0) X = np.random.rand(101, 200) X = np.array(X, dtype=np.float32) model.fit(X) batch_size = 10 remainder_size = X.shape[0] % batch_size hb_model = hummingbird.ml.convert_batch(model, "onnx", X[:batch_size, :], remainder_size=remainder_size) self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.transform(X), hb_model.transform(X), rtol=1e-06, atol=1e-06) # Test onnx regression with batching. @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS" ) def test_onnx_regression_batch(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingRegressor(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(103, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=103) model.fit(X, y) batch_size = 10 remainder_size = X.shape[0] % batch_size hb_model = hummingbird.ml.convert_batch(model, "onnx", X[:batch_size, :], remainder_size=remainder_size) self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06) # Test onnx classification with batching. @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS" ) def test_onnx_classification_batch(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(103, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=103) model.fit(X, y) batch_size = 10 remainder_size = X.shape[0] % batch_size hb_model = hummingbird.ml.convert_batch(model, "onnx", X[:batch_size, :], remainder_size=remainder_size) self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06) np.testing.assert_allclose(model.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06) # Test onnx iforest with batching. @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS" ) def test_onnx_iforest_batch(self): warnings.filterwarnings("ignore") num_classes = 2 model = IsolationForest(n_estimators=10, max_samples=2) np.random.seed(0) X = np.random.rand(103, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=103) model.fit(X, y) batch_size = 10 remainder_size = X.shape[0] % batch_size hb_model = hummingbird.ml.convert_batch(model, "onnx", X[:batch_size, :], remainder_size=remainder_size) self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06) np.testing.assert_allclose(model.decision_function(X), hb_model.decision_function(X), rtol=1e-06, atol=1e-06) np.testing.assert_allclose(model.score_samples(X), hb_model.score_samples(X), rtol=1e-06, atol=1e-06) # Test tvm transform with batching. @unittest.skipIf(not tvm_installed(), reason="TVM test require TVM") def test_tvm_batch_transform(self): warnings.filterwarnings("ignore") model = StandardScaler(with_mean=True, with_std=True) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) model.fit(X) batch_size = 10 hb_model = hummingbird.ml.convert_batch(model, "tvm", X[:batch_size, :]) self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.transform(X), hb_model.transform(X), rtol=1e-06, atol=1e-06) # Test tvm regression with batching. @unittest.skipIf(not tvm_installed(), reason="TVM test require TVM") def test_tvm_regression_batch(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingRegressor(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(103, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=103) model.fit(X, y) batch_size = 10 remainder_size = X.shape[0] % batch_size hb_model = hummingbird.ml.convert_batch(model, "tvm", X[:batch_size, :], remainder_size=remainder_size) self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06) # Test tvm classification with batching. @unittest.skipIf(not tvm_installed(), reason="TVM test require TVM") def test_tvm_classification_batch(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) batch_size = 10 hb_model = hummingbird.ml.convert_batch(model, "tvm", X[:batch_size, :]) self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06) np.testing.assert_allclose(model.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06) # Test tvm iforest with batching. @unittest.skipIf(not tvm_installed(), reason="TVM test require TVM") def test_tvm_iforest_batch(self): warnings.filterwarnings("ignore") num_classes = 2 model = IsolationForest(n_estimators=10, max_samples=2) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) batch_size = 10 hb_model = hummingbird.ml.convert_batch(model, "tvm", X[:batch_size, :]) self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06) np.testing.assert_allclose(model.decision_function(X), hb_model.decision_function(X), rtol=1e-06, atol=1e-06) np.testing.assert_allclose(model.score_samples(X), hb_model.score_samples(X), rtol=1e-06, atol=1e-06) # Test tvm transform with batching and uneven numer of records. @unittest.skipIf(not tvm_installed(), reason="TVM test require TVM") def test_tvm_batch_remainder_transform(self): warnings.filterwarnings("ignore") model = StandardScaler(with_mean=True, with_std=True) np.random.seed(0) X = np.random.rand(105, 200) X = np.array(X, dtype=np.float32) model.fit(X) batch_size = 10 remainder_size = X.shape[0] % batch_size hb_model = hummingbird.ml.convert_batch(model, "tvm", X[:batch_size, :], remainder_size) self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.transform(X), hb_model.transform(X), rtol=1e-06, atol=1e-06) # Test tvm regression with batching and uneven numer of records. @unittest.skipIf(not tvm_installed(), reason="TVM test require TVM") def test_tvm_regression_remainder_batch(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingRegressor(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(105, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=105) model.fit(X, y) batch_size = 10 remainder_size = X.shape[0] % batch_size hb_model = hummingbird.ml.convert_batch(model, "tvm", X[:batch_size, :], remainder_size) self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06) # Test tvm classification with batching and uneven numer of records. @unittest.skipIf(not tvm_installed(), reason="TVM test require TVM") def test_tvm_classification_remainder_batch(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(105, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=105) model.fit(X, y) batch_size = 10 remainder_size = X.shape[0] % batch_size hb_model = hummingbird.ml.convert_batch(model, "tvm", X[:batch_size, :], remainder_size) self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06) np.testing.assert_allclose(model.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06) # Test tvm iforest with batching and uneven numer of records. @unittest.skipIf(not tvm_installed(), reason="TVM test require TVM") def test_tvm_iforest_remainder_batch(self): warnings.filterwarnings("ignore") num_classes = 2 model = IsolationForest(n_estimators=10, max_samples=2) np.random.seed(0) X = np.random.rand(105, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=105) model.fit(X, y) batch_size = 10 remainder_size = X.shape[0] % batch_size hb_model = hummingbird.ml.convert_batch(model, "tvm", X[:batch_size, :], remainder_size) self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06) np.testing.assert_allclose(model.decision_function(X), hb_model.decision_function(X), rtol=1e-06, atol=1e-06) np.testing.assert_allclose(model.score_samples(X), hb_model.score_samples(X), rtol=1e-06, atol=1e-06) # Test batch with pandas. @unittest.skipIf(not pandas_installed(), reason="Test requires pandas installed") def test_pandas_batch(self): import pandas max_depth = 10 iris = datasets.load_iris() X = iris.data[:149, :3] y = iris.target[:149] columns = ["vA", "vB", "vC"] X_train = pandas.DataFrame(X, columns=columns) pipeline = Pipeline( steps=[ ("preprocessor", ColumnTransformer(transformers=[], remainder="passthrough",)), ("classifier", GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)), ] ) pipeline.fit(X_train, y) batch_size = 10 remainder_size = X.shape[0] % batch_size torch_model = hummingbird.ml.convert_batch( pipeline, "torch", pandas.DataFrame(X[:batch_size], columns=columns), remainder_size ) self.assertTrue(torch_model is not None) np.testing.assert_allclose( pipeline.predict_proba(X_train), torch_model.predict_proba(X_train), rtol=1e-06, atol=1e-06, ) # Test batch with pandas ts. @unittest.skipIf(not pandas_installed(), reason="Test requires pandas installed") def test_pandas_batch_ts(self): import pandas max_depth = 10 iris = datasets.load_iris() X = iris.data[:149, :3] y = iris.target[:149] columns = ["vA", "vB", "vC"] X_train = pandas.DataFrame(X, columns=columns) pipeline = Pipeline( steps=[ ("preprocessor", ColumnTransformer(transformers=[], remainder="passthrough",)), ("classifier", GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)), ] ) pipeline.fit(X_train, y) batch_size = 10 remainder_size = X.shape[0] % batch_size torch_model = hummingbird.ml.convert_batch( pipeline, "torch.jit", pandas.DataFrame(X[:batch_size], columns=columns), remainder_size ) self.assertTrue(torch_model is not None) np.testing.assert_allclose( pipeline.predict_proba(X_train), torch_model.predict_proba(X_train), rtol=1e-06, atol=1e-06, ) # Test batch with pandas onnx. @unittest.skipIf(not pandas_installed(), reason="Test requires pandas installed") @unittest.skipIf(not onnx_runtime_installed(), reason="ONNXML test require ONNX and ORT") def test_pandas_batch_onnx(self): import pandas max_depth = 10 iris = datasets.load_iris() X = iris.data[:149, :3] y = iris.target[:149] columns = ["vA", "vB", "vC"] X_train = pandas.DataFrame(X, columns=columns) pipeline = Pipeline( steps=[ ("preprocessor", ColumnTransformer(transformers=[], remainder="passthrough",)), ("classifier", GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)), ] ) pipeline.fit(X_train, y) batch_size = 10 remainder_size = X.shape[0] % batch_size hb_model = hummingbird.ml.convert_batch( pipeline, "onnx", pandas.DataFrame(X[:batch_size], columns=columns), remainder_size ) self.assertTrue(hb_model is not None) np.testing.assert_allclose( pipeline.predict_proba(X_train), hb_model.predict_proba(X_train), rtol=1e-06, atol=1e-06, ) # Test batch with pandas from onnxml. @unittest.skipIf(not pandas_installed(), reason="Test requires pandas installed") @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS" ) def test_pandas_batch_onnxml(self): import pandas max_depth = 10 iris = datasets.load_iris() X = iris.data[:, :3] y = iris.target columns = ["vA", "vB", "vC"] X_train = pandas.DataFrame(X, columns=columns) pipeline = Pipeline( steps=[ ("preprocessor", ColumnTransformer(transformers=[], remainder="passthrough",)), ("classifier", GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)), ] ) pipeline.fit(X_train, y) # Create ONNX-ML model onnx_ml_model = convert_sklearn( pipeline, initial_types=[ ("vA", DoubleTensorType([X.shape[0], 1])), ("vB", DoubleTensorType([X.shape[0], 1])), ("vC", DoubleTensorType([X.shape[0], 1])), ], target_opset=9, ) batch_size = 10 remainder_size = X.shape[0] % batch_size hb_model = hummingbird.ml.convert_batch( onnx_ml_model, "onnx", pandas.DataFrame(X[:batch_size], columns=columns), remainder_size ) self.assertTrue(hb_model is not None) np.testing.assert_allclose( pipeline.predict_proba(X_train), hb_model.predict_proba(X_train), rtol=1e-06, atol=1e-06, ) # Test batch with pandas tvm. @unittest.skipIf(not pandas_installed(), reason="Test requires pandas installed") @unittest.skipIf(not tvm_installed(), reason="TVM test requires TVM") def test_pandas_batch_tvm(self): import pandas max_depth = 10 iris = datasets.load_iris() X = iris.data[:149, :3] y = iris.target[:149] columns = ["vA", "vB", "vC"] X_train = pandas.DataFrame(X, columns=columns) pipeline = Pipeline( steps=[ ("preprocessor", ColumnTransformer(transformers=[], remainder="passthrough",)), ("classifier", GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)), ] ) pipeline.fit(X_train, y) batch_size = 10 remainder_size = X.shape[0] % batch_size hb_model = hummingbird.ml.convert_batch( pipeline, "tvm", pandas.DataFrame(X[:batch_size], columns=columns), remainder_size ) self.assertTrue(hb_model is not None) np.testing.assert_allclose( pipeline.predict_proba(X_train), hb_model.predict_proba(X_train), rtol=1e-06, atol=1e-06, ) # Check converter with model name set as extra_config. @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS" ) @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lightgbm_pytorch_extra_config(self): warnings.filterwarnings("ignore") X = [[0, 1], [1, 1], [2, 0]] X = np.array(X, dtype=np.float32) y = np.array([100, -10, 50], dtype=np.float32) model = lgb.LGBMRegressor(n_estimators=3, min_child_samples=1) model.fit(X, y) # Create ONNX-ML model onnx_ml_model = convert_lightgbm( model, initial_types=[("input", FloatTensorType([X.shape[0], X.shape[1]]))], target_opset=9 ) # Create ONNX model model_name = "hummingbird.ml.test.lightgbm" onnx_model = hummingbird.ml.convert(onnx_ml_model, "onnx", extra_config={constants.ONNX_OUTPUT_MODEL_NAME: model_name}) assert onnx_model.model.graph.name == model_name # Test max fuse depth configuration in TVM. @unittest.skipIf(not tvm_installed(), reason="TVM test requires TVM installed") def test_tvm_max_fuse(self): warnings.filterwarnings("ignore") X = [[0, 1], [1, 1], [2, 0]] X = np.array(X, dtype=np.float32) y = np.array([100, -10, 50], dtype=np.float32) model = lgb.LGBMRegressor(n_estimators=3, min_child_samples=1) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "tvm", X, extra_config={constants.TVM_MAX_FUSE_DEPTH: 30}) self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06) # Test TVM without padding returns an errror is sizes don't match. @unittest.skipIf(not tvm_installed(), reason="TVM test requires TVM installed") def test_tvm_no_padding(self): warnings.filterwarnings("ignore") np.random.seed(0) X = np.random.rand(100, 20) X = np.array(X, dtype=np.float32) y = np.random.randint(2, size=100) model = lgb.LGBMRegressor(n_estimators=10) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "tvm", X) self.assertIsNotNone(hb_model) self.assertRaises(AssertionError, hb_model.predict, X[:98]) # Test padding in TVM. @unittest.skipIf(not tvm_installed(), reason="TVM test requires TVM installed") def test_tvm_padding(self): warnings.filterwarnings("ignore") np.random.seed(0) X = np.random.rand(100, 20) X = np.array(X, dtype=np.float32) y = np.random.randint(2, size=100) model = lgb.LGBMRegressor(n_estimators=10) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "tvm", X, extra_config={constants.TVM_PAD_INPUT: True}) self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.predict(X[:98]), hb_model.predict(X[:98]), rtol=1e-06, atol=1e-06) # Test padding in TVM does not create problems when not necessary. @unittest.skipIf(not tvm_installed(), reason="TVM test requires TVM installed") def test_tvm_padding_2(self): warnings.filterwarnings("ignore") X = [[0, 1], [1, 1], [2, 0]] X = np.array(X, dtype=np.float32) y = np.array([100, -10, 50], dtype=np.float32) model = lgb.LGBMRegressor(n_estimators=3, min_child_samples=1) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "tvm", X, extra_config={constants.TVM_PAD_INPUT: True}) self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06) # Test max string lentgh. def test_max_str_length(self): model = LabelEncoder() data = [ "paris", "tokyo", "amsterdam", "tokyo", ] model.fit(data) torch_model = hummingbird.ml.convert(model, "torch", extra_config={constants.MAX_STRING_LENGTH: 20}) np.testing.assert_allclose(model.transform(data), torch_model.transform(data), rtol=1e-06, atol=1e-06)
class TestONNXLabelEncoder(unittest.TestCase): # Test LabelEncoder with longs @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS") def test_model_label_encoder_int_onnxml(self): model = LabelEncoder() X = np.array([1, 4, 5, 2, 0, 2], dtype=np.int64) model.fit(X) # Create ONNX-ML model onnx_ml_model = convert_sklearn(model, initial_types=[ ("input", LongTensorType_onnx(X.shape)) ]) # Create ONNX model by calling converter onnx_model = convert(onnx_ml_model, "onnx", X) # Get the predictions for the ONNX-ML model session = ort.InferenceSession(onnx_ml_model.SerializeToString()) output_names = [ session.get_outputs()[i].name for i in range(len(session.get_outputs())) ] inputs = {session.get_inputs()[0].name: X} onnx_ml_pred = np.array(session.run(output_names, inputs)).ravel() # Get the predictions for the ONNX model onnx_pred = onnx_model.transform(X).ravel() # Check that predicted values match np.testing.assert_allclose(onnx_ml_pred, onnx_pred, rtol=1e-06, atol=1e-06) # Test LabelEncoder with strings on Pytorch >=1.8.0 @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS") @unittest.skipIf( LooseVersion(torch.__version__) < LooseVersion("1.8.0"), reason="PyTorch exporter don't support nonzero until version 1.8.0", ) def test_model_label_encoder_str_onnxml(self): model = LabelEncoder() data = [ "paris", "milan", "amsterdam", "tokyo", ] model.fit(data) onnx_ml_model = convert_sklearn(model, initial_types=[ ("input", StringTensorType_onnx([4])) ]) onnx_model = convert(onnx_ml_model, "onnx", data) # Get the predictions for the ONNX-ML model session = ort.InferenceSession(onnx_ml_model.SerializeToString()) output_names = [ session.get_outputs()[i].name for i in range(len(session.get_outputs())) ] inputs = {session.get_inputs()[0].name: data} onnx_ml_pred = session.run(output_names, inputs) # Get the predictions for the ONNX model onnx_pred = onnx_model.transform(data) # Check that predicted values match np.testing.assert_allclose(onnx_ml_pred[0], onnx_pred, rtol=1e-06, atol=1e-06) # Test LabelEncoder String failcase for torch < 1.8.0 @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS") @unittest.skipIf( LooseVersion(torch.__version__) >= LooseVersion("1.8.0"), reason= "PyTorch exporter supports nonzero only from version 1.8.0 and should fail on older versions", ) def test_le_string_raises_rt_onnx(self): warnings.filterwarnings("ignore") model = LabelEncoder() data = [ "paris", "milan", "amsterdam", "tokyo", ] model.fit(data) onnx_ml_model = convert_sklearn(model, initial_types=[ ("input", StringTensorType_onnx([4])) ]) # Create ONNX model by calling converter, should raise error for strings self.assertRaises(RuntimeError, convert, onnx_ml_model, "onnx", data) @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS") # if the model is corrupt, we should get a RuntimeError def test_onnx_label_encoder_converter_raises_rt(self): warnings.filterwarnings("ignore") model = LabelEncoder() X = np.array([1, 4, 5, 2, 0, 2], dtype=np.int64) model.fit(X) # generate test input onnx_ml_model = convert_sklearn(model, initial_types=[ ("float_input", FloatTensorType_onnx(X.shape)) ]) onnx_ml_model.graph.node[0].attribute[0].name = "".encode() self.assertRaises(RuntimeError, convert, onnx_ml_model, "onnx", X)