class TestNoExtra(unittest.TestCase): """ These tests are meant to be run on a clean container after doing `pip install hummingbird-ml` without any of the `extra` packages """ # Test no LGBM returns false on lightgbm_installed() @unittest.skipIf(lightgbm_installed(), reason="Test when LightGBM is not installed") def test_lightgbm_installed_false(self): warnings.filterwarnings("ignore") assert not lightgbm_installed() # Test no XGB returns false on xgboost_installed() @unittest.skipIf(xgboost_installed(), reason="Test when XGBoost is not installed") def test_xgboost_installed_false(self): warnings.filterwarnings("ignore") assert not xgboost_installed() # Test that we can import the converter successfully without installing [extra] def test_import_convert_no_extra(self): try: from hummingbird.ml import convert except Exception: # TODO something more specific? self.fail( "Unexpected Error on importing convert without extra packages")
class TestExtraConf(unittest.TestCase): # Test default number of threads. It will only work on mac after 1.6 https://github.com/pytorch/pytorch/issues/43036 @unittest.skipIf( sys.platform == "darwin" and LooseVersion(torch.__version__) <= LooseVersion("1.6.0"), reason="PyTorch has a bug on mac related to multi-threading", ) def test_torch_deafault_n_threads(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "torch") self.assertIsNotNone(hb_model) self.assertTrue(torch.get_num_threads() == psutil.cpu_count(logical=False)) self.assertTrue(torch.get_num_interop_threads() == 1) # Test one thread in pytorch. @unittest.skipIf( sys.platform == "darwin" and LooseVersion(torch.__version__) > LooseVersion("1.6.0"), reason="Setting threading multi times will break on mac", ) def test_torch_one_thread(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "torch", extra_config={constants.N_THREADS: 1}) self.assertIsNotNone(hb_model) self.assertTrue(torch.get_num_threads() == 1) self.assertTrue(torch.get_num_interop_threads() == 1) # Test default number of threads onnx. @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS" ) def test_onnx_deafault_n_threads(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) # Create ONNX-ML model onnx_ml_model = convert_sklearn( model, initial_types=[("input", FloatTensorType([X.shape[0], X.shape[1]]))], target_opset=9 ) hb_model = hummingbird.ml.convert(onnx_ml_model, "onnx", X) self.assertIsNotNone(hb_model) self.assertTrue(hb_model._session.get_session_options().intra_op_num_threads == psutil.cpu_count(logical=False)) self.assertTrue(hb_model._session.get_session_options().inter_op_num_threads == 1) # Test one thread onnx. @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS" ) def test_onnx_one_thread(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "onnx", X, extra_config={constants.N_THREADS: 1}) self.assertIsNotNone(hb_model) self.assertTrue(hb_model._session.get_session_options().intra_op_num_threads == 1) self.assertTrue(hb_model._session.get_session_options().inter_op_num_threads == 1) # Test pytorch regressor with batching. def test_torch_regression_batch(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingRegressor(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) batch_size = 10 remainder_size = X.shape[0] % batch_size hb_model = hummingbird.ml.convert_batch(model, "torch", X[:batch_size, :], remainder_size=remainder_size) self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06) # Test pytorch classifier with batching. def test_torch_classification_batch(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) batch_size = 10 remainder_size = X.shape[0] % batch_size hb_model = hummingbird.ml.convert_batch(model, "torch", X[:batch_size, :], remainder_size=remainder_size) self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06) np.testing.assert_allclose(model.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06) # Test pytorch classifier with batching. def test_torch_iforest_batch(self): warnings.filterwarnings("ignore") num_classes = 2 model = IsolationForest(n_estimators=10, max_samples=2) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) batch_size = 10 remainder_size = X.shape[0] % batch_size hb_model = hummingbird.ml.convert_batch(model, "torch", X[:batch_size, :], remainder_size=remainder_size) self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06) np.testing.assert_allclose(model.decision_function(X), hb_model.decision_function(X), rtol=1e-06, atol=1e-06) np.testing.assert_allclose(model.score_samples(X), hb_model.score_samples(X), rtol=1e-06, atol=1e-06) # Test pytorch regressor with batching and uneven rows. def test_torch_batch_regression_uneven(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingRegressor(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(105, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=105) model.fit(X, y) batch_size = 10 remainder_size = X.shape[0] % batch_size hb_model = hummingbird.ml.convert_batch(model, "torch", X[:batch_size, :], remainder_size=remainder_size) self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06) # Test pytorch classification with batching and uneven rows. def test_torch_batch_classification_uneven(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(105, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=105) model.fit(X, y) batch_size = 10 remainder_size = X.shape[0] % batch_size hb_model = hummingbird.ml.convert_batch(model, "torch", X[:batch_size, :], remainder_size=remainder_size) self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06) # Test pytorch transform with batching and uneven rows. def test_torch_batch_transform(self): warnings.filterwarnings("ignore") model = StandardScaler(with_mean=True, with_std=True) np.random.seed(0) X = np.random.rand(105, 200) X = np.array(X, dtype=np.float32) model.fit(X) batch_size = 10 remainder_size = X.shape[0] % batch_size hb_model = hummingbird.ml.convert_batch(model, "torch", X[:batch_size, :], remainder_size=remainder_size) self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.transform(X), hb_model.transform(X), rtol=1e-06, atol=1e-06) # Test torchscript regression with batching. def test_torchscript_regression_batch(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingRegressor(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(103, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=103) model.fit(X, y) batch_size = 10 remainder_size = X.shape[0] % batch_size hb_model = hummingbird.ml.convert_batch(model, "torch.jit", X[:batch_size, :], remainder_size=remainder_size) self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06) # Test torchscript classification with batching. def test_torchscript_classification_batch(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(103, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=103) model.fit(X, y) batch_size = 10 remainder_size = X.shape[0] % batch_size hb_model = hummingbird.ml.convert_batch(model, "torch.jit", X[:batch_size, :], remainder_size=remainder_size) self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06) np.testing.assert_allclose(model.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06) # Test torchscript iforest with batching. def test_torchscript_iforest_batch(self): warnings.filterwarnings("ignore") num_classes = 2 model = IsolationForest(n_estimators=10, max_samples=2) np.random.seed(0) X = np.random.rand(103, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=103) model.fit(X, y) batch_size = 10 remainder_size = X.shape[0] % batch_size hb_model = hummingbird.ml.convert_batch(model, "torch.jit", X[:batch_size, :], remainder_size=remainder_size) self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06) np.testing.assert_allclose(model.decision_function(X), hb_model.decision_function(X), rtol=1e-06, atol=1e-06) np.testing.assert_allclose(model.score_samples(X), hb_model.score_samples(X), rtol=1e-06, atol=1e-06) # Test torchscript transform with batching and uneven rows. def test_torchscript_batch_transform(self): warnings.filterwarnings("ignore") model = StandardScaler(with_mean=True, with_std=True) np.random.seed(0) X = np.random.rand(101, 200) X = np.array(X, dtype=np.float32) model.fit(X) batch_size = 10 remainder_size = X.shape[0] % batch_size hb_model = hummingbird.ml.convert_batch(model, "torch.jit", X[:batch_size, :], remainder_size=remainder_size) self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.transform(X), hb_model.transform(X), rtol=1e-06, atol=1e-06) # Test onnx transform with batching and uneven rows. @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS" ) def test_onnx_batch_transform(self): warnings.filterwarnings("ignore") model = StandardScaler(with_mean=True, with_std=True) np.random.seed(0) X = np.random.rand(101, 200) X = np.array(X, dtype=np.float32) model.fit(X) batch_size = 10 remainder_size = X.shape[0] % batch_size hb_model = hummingbird.ml.convert_batch(model, "onnx", X[:batch_size, :], remainder_size=remainder_size) self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.transform(X), hb_model.transform(X), rtol=1e-06, atol=1e-06) # Test onnx regression with batching. @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS" ) def test_onnx_regression_batch(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingRegressor(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(103, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=103) model.fit(X, y) batch_size = 10 remainder_size = X.shape[0] % batch_size hb_model = hummingbird.ml.convert_batch(model, "onnx", X[:batch_size, :], remainder_size=remainder_size) self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06) # Test onnx classification with batching. @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS" ) def test_onnx_classification_batch(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(103, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=103) model.fit(X, y) batch_size = 10 remainder_size = X.shape[0] % batch_size hb_model = hummingbird.ml.convert_batch(model, "onnx", X[:batch_size, :], remainder_size=remainder_size) self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06) np.testing.assert_allclose(model.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06) # Test onnx iforest with batching. @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS" ) def test_onnx_iforest_batch(self): warnings.filterwarnings("ignore") num_classes = 2 model = IsolationForest(n_estimators=10, max_samples=2) np.random.seed(0) X = np.random.rand(103, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=103) model.fit(X, y) batch_size = 10 remainder_size = X.shape[0] % batch_size hb_model = hummingbird.ml.convert_batch(model, "onnx", X[:batch_size, :], remainder_size=remainder_size) self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06) np.testing.assert_allclose(model.decision_function(X), hb_model.decision_function(X), rtol=1e-06, atol=1e-06) np.testing.assert_allclose(model.score_samples(X), hb_model.score_samples(X), rtol=1e-06, atol=1e-06) # Test tvm transform with batching. @unittest.skipIf(not tvm_installed(), reason="TVM test require TVM") def test_tvm_batch_transform(self): warnings.filterwarnings("ignore") model = StandardScaler(with_mean=True, with_std=True) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) model.fit(X) batch_size = 10 hb_model = hummingbird.ml.convert_batch(model, "tvm", X[:batch_size, :]) self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.transform(X), hb_model.transform(X), rtol=1e-06, atol=1e-06) # Test tvm regression with batching. @unittest.skipIf(not tvm_installed(), reason="TVM test require TVM") def test_tvm_regression_batch(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingRegressor(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(103, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=103) model.fit(X, y) batch_size = 10 remainder_size = X.shape[0] % batch_size hb_model = hummingbird.ml.convert_batch(model, "tvm", X[:batch_size, :], remainder_size=remainder_size) self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06) # Test tvm classification with batching. @unittest.skipIf(not tvm_installed(), reason="TVM test require TVM") def test_tvm_classification_batch(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) batch_size = 10 hb_model = hummingbird.ml.convert_batch(model, "tvm", X[:batch_size, :]) self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06) np.testing.assert_allclose(model.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06) # Test tvm iforest with batching. @unittest.skipIf(not tvm_installed(), reason="TVM test require TVM") def test_tvm_iforest_batch(self): warnings.filterwarnings("ignore") num_classes = 2 model = IsolationForest(n_estimators=10, max_samples=2) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) batch_size = 10 hb_model = hummingbird.ml.convert_batch(model, "tvm", X[:batch_size, :]) self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06) np.testing.assert_allclose(model.decision_function(X), hb_model.decision_function(X), rtol=1e-06, atol=1e-06) np.testing.assert_allclose(model.score_samples(X), hb_model.score_samples(X), rtol=1e-06, atol=1e-06) # Test tvm transform with batching and uneven numer of records. @unittest.skipIf(not tvm_installed(), reason="TVM test require TVM") def test_tvm_batch_remainder_transform(self): warnings.filterwarnings("ignore") model = StandardScaler(with_mean=True, with_std=True) np.random.seed(0) X = np.random.rand(105, 200) X = np.array(X, dtype=np.float32) model.fit(X) batch_size = 10 remainder_size = X.shape[0] % batch_size hb_model = hummingbird.ml.convert_batch(model, "tvm", X[:batch_size, :], remainder_size) self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.transform(X), hb_model.transform(X), rtol=1e-06, atol=1e-06) # Test tvm regression with batching and uneven numer of records. @unittest.skipIf(not tvm_installed(), reason="TVM test require TVM") def test_tvm_regression_remainder_batch(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingRegressor(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(105, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=105) model.fit(X, y) batch_size = 10 remainder_size = X.shape[0] % batch_size hb_model = hummingbird.ml.convert_batch(model, "tvm", X[:batch_size, :], remainder_size) self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06) # Test tvm classification with batching and uneven numer of records. @unittest.skipIf(not tvm_installed(), reason="TVM test require TVM") def test_tvm_classification_remainder_batch(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(105, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=105) model.fit(X, y) batch_size = 10 remainder_size = X.shape[0] % batch_size hb_model = hummingbird.ml.convert_batch(model, "tvm", X[:batch_size, :], remainder_size) self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06) np.testing.assert_allclose(model.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06) # Test tvm iforest with batching and uneven numer of records. @unittest.skipIf(not tvm_installed(), reason="TVM test require TVM") def test_tvm_iforest_remainder_batch(self): warnings.filterwarnings("ignore") num_classes = 2 model = IsolationForest(n_estimators=10, max_samples=2) np.random.seed(0) X = np.random.rand(105, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=105) model.fit(X, y) batch_size = 10 remainder_size = X.shape[0] % batch_size hb_model = hummingbird.ml.convert_batch(model, "tvm", X[:batch_size, :], remainder_size) self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06) np.testing.assert_allclose(model.decision_function(X), hb_model.decision_function(X), rtol=1e-06, atol=1e-06) np.testing.assert_allclose(model.score_samples(X), hb_model.score_samples(X), rtol=1e-06, atol=1e-06) # Test batch with pandas. @unittest.skipIf(not pandas_installed(), reason="Test requires pandas installed") def test_pandas_batch(self): import pandas max_depth = 10 iris = datasets.load_iris() X = iris.data[:149, :3] y = iris.target[:149] columns = ["vA", "vB", "vC"] X_train = pandas.DataFrame(X, columns=columns) pipeline = Pipeline( steps=[ ("preprocessor", ColumnTransformer(transformers=[], remainder="passthrough",)), ("classifier", GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)), ] ) pipeline.fit(X_train, y) batch_size = 10 remainder_size = X.shape[0] % batch_size torch_model = hummingbird.ml.convert_batch( pipeline, "torch", pandas.DataFrame(X[:batch_size], columns=columns), remainder_size ) self.assertTrue(torch_model is not None) np.testing.assert_allclose( pipeline.predict_proba(X_train), torch_model.predict_proba(X_train), rtol=1e-06, atol=1e-06, ) # Test batch with pandas ts. @unittest.skipIf(not pandas_installed(), reason="Test requires pandas installed") def test_pandas_batch_ts(self): import pandas max_depth = 10 iris = datasets.load_iris() X = iris.data[:149, :3] y = iris.target[:149] columns = ["vA", "vB", "vC"] X_train = pandas.DataFrame(X, columns=columns) pipeline = Pipeline( steps=[ ("preprocessor", ColumnTransformer(transformers=[], remainder="passthrough",)), ("classifier", GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)), ] ) pipeline.fit(X_train, y) batch_size = 10 remainder_size = X.shape[0] % batch_size torch_model = hummingbird.ml.convert_batch( pipeline, "torch.jit", pandas.DataFrame(X[:batch_size], columns=columns), remainder_size ) self.assertTrue(torch_model is not None) np.testing.assert_allclose( pipeline.predict_proba(X_train), torch_model.predict_proba(X_train), rtol=1e-06, atol=1e-06, ) # Test batch with pandas onnx. @unittest.skipIf(not pandas_installed(), reason="Test requires pandas installed") @unittest.skipIf(not onnx_runtime_installed(), reason="ONNXML test require ONNX and ORT") def test_pandas_batch_onnx(self): import pandas max_depth = 10 iris = datasets.load_iris() X = iris.data[:149, :3] y = iris.target[:149] columns = ["vA", "vB", "vC"] X_train = pandas.DataFrame(X, columns=columns) pipeline = Pipeline( steps=[ ("preprocessor", ColumnTransformer(transformers=[], remainder="passthrough",)), ("classifier", GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)), ] ) pipeline.fit(X_train, y) batch_size = 10 remainder_size = X.shape[0] % batch_size hb_model = hummingbird.ml.convert_batch( pipeline, "onnx", pandas.DataFrame(X[:batch_size], columns=columns), remainder_size ) self.assertTrue(hb_model is not None) np.testing.assert_allclose( pipeline.predict_proba(X_train), hb_model.predict_proba(X_train), rtol=1e-06, atol=1e-06, ) # Test batch with pandas from onnxml. @unittest.skipIf(not pandas_installed(), reason="Test requires pandas installed") @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS" ) def test_pandas_batch_onnxml(self): import pandas max_depth = 10 iris = datasets.load_iris() X = iris.data[:, :3] y = iris.target columns = ["vA", "vB", "vC"] X_train = pandas.DataFrame(X, columns=columns) pipeline = Pipeline( steps=[ ("preprocessor", ColumnTransformer(transformers=[], remainder="passthrough",)), ("classifier", GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)), ] ) pipeline.fit(X_train, y) # Create ONNX-ML model onnx_ml_model = convert_sklearn( pipeline, initial_types=[ ("vA", DoubleTensorType([X.shape[0], 1])), ("vB", DoubleTensorType([X.shape[0], 1])), ("vC", DoubleTensorType([X.shape[0], 1])), ], target_opset=9, ) batch_size = 10 remainder_size = X.shape[0] % batch_size hb_model = hummingbird.ml.convert_batch( onnx_ml_model, "onnx", pandas.DataFrame(X[:batch_size], columns=columns), remainder_size ) self.assertTrue(hb_model is not None) np.testing.assert_allclose( pipeline.predict_proba(X_train), hb_model.predict_proba(X_train), rtol=1e-06, atol=1e-06, ) # Test batch with pandas tvm. @unittest.skipIf(not pandas_installed(), reason="Test requires pandas installed") @unittest.skipIf(not tvm_installed(), reason="TVM test requires TVM") def test_pandas_batch_tvm(self): import pandas max_depth = 10 iris = datasets.load_iris() X = iris.data[:149, :3] y = iris.target[:149] columns = ["vA", "vB", "vC"] X_train = pandas.DataFrame(X, columns=columns) pipeline = Pipeline( steps=[ ("preprocessor", ColumnTransformer(transformers=[], remainder="passthrough",)), ("classifier", GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)), ] ) pipeline.fit(X_train, y) batch_size = 10 remainder_size = X.shape[0] % batch_size hb_model = hummingbird.ml.convert_batch( pipeline, "tvm", pandas.DataFrame(X[:batch_size], columns=columns), remainder_size ) self.assertTrue(hb_model is not None) np.testing.assert_allclose( pipeline.predict_proba(X_train), hb_model.predict_proba(X_train), rtol=1e-06, atol=1e-06, ) # Check converter with model name set as extra_config. @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS" ) @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lightgbm_pytorch_extra_config(self): warnings.filterwarnings("ignore") X = [[0, 1], [1, 1], [2, 0]] X = np.array(X, dtype=np.float32) y = np.array([100, -10, 50], dtype=np.float32) model = lgb.LGBMRegressor(n_estimators=3, min_child_samples=1) model.fit(X, y) # Create ONNX-ML model onnx_ml_model = convert_lightgbm( model, initial_types=[("input", FloatTensorType([X.shape[0], X.shape[1]]))], target_opset=9 ) # Create ONNX model model_name = "hummingbird.ml.test.lightgbm" onnx_model = hummingbird.ml.convert(onnx_ml_model, "onnx", extra_config={constants.ONNX_OUTPUT_MODEL_NAME: model_name}) assert onnx_model.model.graph.name == model_name # Test max fuse depth configuration in TVM. @unittest.skipIf(not tvm_installed(), reason="TVM test requires TVM installed") def test_tvm_max_fuse(self): warnings.filterwarnings("ignore") X = [[0, 1], [1, 1], [2, 0]] X = np.array(X, dtype=np.float32) y = np.array([100, -10, 50], dtype=np.float32) model = lgb.LGBMRegressor(n_estimators=3, min_child_samples=1) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "tvm", X, extra_config={constants.TVM_MAX_FUSE_DEPTH: 30}) self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06) # Test TVM without padding returns an errror is sizes don't match. @unittest.skipIf(not tvm_installed(), reason="TVM test requires TVM installed") def test_tvm_no_padding(self): warnings.filterwarnings("ignore") np.random.seed(0) X = np.random.rand(100, 20) X = np.array(X, dtype=np.float32) y = np.random.randint(2, size=100) model = lgb.LGBMRegressor(n_estimators=10) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "tvm", X) self.assertIsNotNone(hb_model) self.assertRaises(AssertionError, hb_model.predict, X[:98]) # Test padding in TVM. @unittest.skipIf(not tvm_installed(), reason="TVM test requires TVM installed") def test_tvm_padding(self): warnings.filterwarnings("ignore") np.random.seed(0) X = np.random.rand(100, 20) X = np.array(X, dtype=np.float32) y = np.random.randint(2, size=100) model = lgb.LGBMRegressor(n_estimators=10) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "tvm", X, extra_config={constants.TVM_PAD_INPUT: True}) self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.predict(X[:98]), hb_model.predict(X[:98]), rtol=1e-06, atol=1e-06) # Test padding in TVM does not create problems when not necessary. @unittest.skipIf(not tvm_installed(), reason="TVM test requires TVM installed") def test_tvm_padding_2(self): warnings.filterwarnings("ignore") X = [[0, 1], [1, 1], [2, 0]] X = np.array(X, dtype=np.float32) y = np.array([100, -10, 50], dtype=np.float32) model = lgb.LGBMRegressor(n_estimators=3, min_child_samples=1) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "tvm", X, extra_config={constants.TVM_PAD_INPUT: True}) self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06) # Test max string lentgh. def test_max_str_length(self): model = LabelEncoder() data = [ "paris", "tokyo", "amsterdam", "tokyo", ] model.fit(data) torch_model = hummingbird.ml.convert(model, "torch", extra_config={constants.MAX_STRING_LENGTH: 20}) np.testing.assert_allclose(model.transform(data), torch_model.transform(data), rtol=1e-06, atol=1e-06)
from sklearn.preprocessing import StandardScaler, LabelEncoder from sklearn.compose import ColumnTransformer from sklearn.pipeline import Pipeline import torch import hummingbird.ml from hummingbird.ml._utils import ( onnx_ml_tools_installed, onnx_runtime_installed, pandas_installed, lightgbm_installed, tvm_installed, ) from hummingbird.ml import constants if lightgbm_installed(): import lightgbm as lgb if onnx_ml_tools_installed(): from onnxmltools.convert import convert_sklearn, convert_lightgbm class TestExtraConf(unittest.TestCase): # Test default number of threads. It will only work on mac after 1.6 https://github.com/pytorch/pytorch/issues/43036 @unittest.skipIf( sys.platform == "darwin" and LooseVersion(torch.__version__) <= LooseVersion("1.6.0"), reason="PyTorch has a bug on mac related to multi-threading", ) def test_torch_deafault_n_threads(self): warnings.filterwarnings("ignore") max_depth = 10
class TestLGBMConverter(unittest.TestCase): # Check tree implementation @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lgbm_implementation(self): warnings.filterwarnings("ignore") np.random.seed(0) X = np.random.rand(10, 1) X = np.array(X, dtype=np.float32) y = np.random.randint(2, size=10) for model in [ lgb.LGBMClassifier(n_estimators=1, max_depth=1), lgb.LGBMRegressor(n_estimators=1, max_depth=1) ]: for extra_config_param in ["tree_trav", "perf_tree_trav", "gemm"]: model.fit(X, y) torch_model = hummingbird.ml.convert( model, "torch", extra_config={"tree_implementation": extra_config_param}) self.assertIsNotNone(torch_model) self.assertEqual( str(type(list(torch_model.operator_map.values())[0])), gbdt_implementation_map[extra_config_param]) def _run_lgbm_classifier_converter(self, num_classes, extra_config={}): warnings.filterwarnings("ignore") for max_depth in [1, 3, 8, 10, 12, None]: model = lgb.LGBMClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) torch_model = hummingbird.ml.convert(model, "torch", extra_config=extra_config) self.assertIsNotNone(torch_model) np.testing.assert_allclose(model.predict_proba(X), torch_model.predict_proba(X), rtol=1e-06, atol=1e-06) # Binary classifier @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lgbm_binary_classifier_converter(self): self._run_lgbm_classifier_converter(2) # Gemm classifier @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lgbm_gemm_classifier_converter(self): self._run_lgbm_classifier_converter( 2, extra_config={"tree_implementation": "gemm"}) # Tree_trav classifier @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lgbm_tree_trav_classifier_converter(self): self._run_lgbm_classifier_converter( 2, extra_config={"tree_implementation": "tree_trav"}) # Perf_tree_trav classifier @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lgbm_perf_tree_trav_classifier_converter(self): self._run_lgbm_classifier_converter( 2, extra_config={"tree_implementation": "perf_tree_trav"}) # Multi classifier @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lgbm_multi_classifier_converter(self): self._run_lgbm_classifier_converter(3) # Gemm multi classifier @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lgbm_gemm_multi_classifier_converter(self): self._run_lgbm_classifier_converter( 3, extra_config={"tree_implementation": "gemm"}) # Tree_trav multi classifier @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lgbm_tree_trav_multi_classifier_converter(self): self._run_lgbm_classifier_converter( 3, extra_config={"tree_implementation": "tree_trav"}) # Perf_tree_trav multi classifier @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lgbm_perf_tree_trav_multi_classifier_converter(self): self._run_lgbm_classifier_converter( 3, extra_config={"tree_implementation": "perf_tree_trav"}) def _run_lgbm_ranker_converter(self, num_classes, extra_config={}, label_gain=None): warnings.filterwarnings("ignore") for max_depth in [1, 3, 8, 10, 12, None]: model = lgb.LGBMRanker(n_estimators=10, max_depth=max_depth, label_gain=label_gain) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y, group=[X.shape[0]], eval_set=[(X, y)], eval_group=[X.shape[0]]) torch_model = hummingbird.ml.convert(model, "torch", extra_config=extra_config) self.assertIsNotNone(torch_model) np.testing.assert_allclose(model.predict(X), torch_model.predict(X), rtol=1e-06, atol=1e-06) # Ranker - small, no label gain @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lgbm_binary_ranker_converter_no_label(self): self._run_lgbm_ranker_converter(30) # Ranker @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lgbm_binary_ranker_converter(self): self._run_lgbm_ranker_converter(1000, label_gain=list(range(1000))) # Gemm ranker @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lgbm_gemm_ranker_converter(self): self._run_lgbm_ranker_converter( 1000, extra_config={"tree_implementation": "gemm"}, label_gain=list(range(1000))) # Tree_trav ranker @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lgbm_tree_trav_ranker_converter(self): self._run_lgbm_ranker_converter( 1000, extra_config={"tree_implementation": "tree_trav"}, label_gain=list(range(1000))) # Perf_tree_trav ranker @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lgbm_perf_tree_trav_ranker_converter(self): self._run_lgbm_ranker_converter( 1000, extra_config={"tree_implementation": "perf_tree_trav"}, label_gain=list(range(1000))) def _run_lgbm_regressor_converter(self, num_classes, extra_config={}): warnings.filterwarnings("ignore") for max_depth in [1, 3, 8, 10, 12, None]: model = lgb.LGBMRegressor(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) torch_model = hummingbird.ml.convert(model, "torch", extra_config=extra_config) self.assertIsNotNone(torch_model) np.testing.assert_allclose(model.predict(X), torch_model.predict(X), rtol=1e-06, atol=1e-06) # Regressor @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lgbm_binary_regressor_converter(self): self._run_lgbm_regressor_converter(1000) # Gemm regressor @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lgbm_gemm_regressor_converter(self): self._run_lgbm_regressor_converter( 1000, extra_config={"tree_implementation": "gemm"}) # Tree_trav regressor @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lgbm_tree_trav_regressor_converter(self): self._run_lgbm_regressor_converter( 1000, extra_config={"tree_implementation": "tree_trav"}) # Perf_tree_trav regressor @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lgbm_perf_tree_trav_regressor_converter(self): self._run_lgbm_regressor_converter( 1000, extra_config={"tree_implementation": "perf_tree_trav"})
def test_lightgbm_installed_false(self): warnings.filterwarnings("ignore") assert not lightgbm_installed()
class TestONNXLightGBMConverter(unittest.TestCase): def __init__(self, *args, **kwargs): super(TestONNXLightGBMConverter, self).__init__(*args, **kwargs) # Base test implementation comparing ONNXML and ONNX models. def _test_lgbm(self, X, model, extra_config={}): # Create ONNX-ML model onnx_ml_model = convert_lightgbm( model, initial_types=[("input", FloatTensorType([X.shape[0], X.shape[1]]))], target_opset=9) # Create ONNX model onnx_model = convert(onnx_ml_model, "onnx", extra_config=extra_config) # Get the predictions for the ONNX-ML model session = ort.InferenceSession(onnx_ml_model.SerializeToString()) output_names = [ session.get_outputs()[i].name for i in range(len(session.get_outputs())) ] onnx_ml_pred = [[] for i in range(len(output_names))] inputs = {session.get_inputs()[0].name: X} pred = session.run(output_names, inputs) for i in range(len(output_names)): if "label" in output_names[i]: onnx_ml_pred[1] = pred[i] else: onnx_ml_pred[0] = pred[i] # Get the predictions for the ONNX model onnx_pred = [[] for i in range(len(output_names))] if len(output_names) == 1: # regression onnx_pred = onnx_model.predict(X) else: # classification onnx_pred[0] = onnx_model.predict_proba(X) onnx_pred[1] = onnx_model.predict(X) return onnx_ml_pred, onnx_pred, output_names # Utility function for testing regression models. def _test_regressor(self, X, model, rtol=1e-06, atol=1e-06, extra_config={}): onnx_ml_pred, onnx_pred, output_names = self._test_lgbm( X, model, extra_config) # Check that predicted values match np.testing.assert_allclose(onnx_ml_pred[0].ravel(), onnx_pred, rtol=rtol, atol=atol) # Utility function for testing classification models. def _test_classifier(self, X, model, rtol=1e-06, atol=1e-06, extra_config={}): onnx_ml_pred, onnx_pred, output_names = self._test_lgbm( X, model, extra_config) np.testing.assert_allclose(onnx_ml_pred[1], onnx_pred[1], rtol=rtol, atol=atol) # labels np.testing.assert_allclose(list( map(lambda x: list(x.values()), onnx_ml_pred[0])), onnx_pred[0], rtol=rtol, atol=atol) # probs # Check that ONNXML models can also target other backends. @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lightgbm_onnx_pytorch(self): warnings.filterwarnings("ignore") X = [[0, 1], [1, 1], [2, 0]] X = np.array(X, dtype=np.float32) y = np.array([100, -10, 50], dtype=np.float32) model = lgb.LGBMRegressor(n_estimators=3, min_child_samples=1) model.fit(X, y) # Create ONNX-ML model onnx_ml_model = convert_lightgbm( model, initial_types=[("input", FloatTensorType([X.shape[0], X.shape[1]]))], target_opset=9) pt_model = convert(onnx_ml_model, "torch", X) assert pt_model # Get the predictions for the ONNX-ML model session = ort.InferenceSession(onnx_ml_model.SerializeToString()) output_names = [ session.get_outputs()[i].name for i in range(len(session.get_outputs())) ] onnx_ml_pred = [[] for i in range(len(output_names))] inputs = {session.get_inputs()[0].name: X} onnx_ml_pred = session.run(output_names, inputs) np.testing.assert_allclose(onnx_ml_pred[0].flatten(), pt_model.predict(X)) # Basic regression test. @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lgbm_onnxml_model_regressor(self): warnings.filterwarnings("ignore") n_features = 28 n_total = 100 np.random.seed(0) X = np.random.rand(n_total, n_features) X = np.array(X, dtype=np.float32) y = np.random.randint(n_total, size=n_total) # Create LightGBM model model = lgb.LGBMRegressor() model.fit(X, y) import platform # TODO bug on newer macOS versions? if platform.system() == "Darwin": self._test_regressor(X, model, rtol=1e-05, atol=1e-04) else: self._test_regressor(X, model) # Regression test with 3 estimators (taken from ONNXMLTOOLS). @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lightgbm_regressor(self): warnings.filterwarnings("ignore") X = [[0, 1], [1, 1], [2, 0]] X = np.array(X, dtype=np.float32) y = np.array([100, -10, 50], dtype=np.float32) model = lgb.LGBMRegressor(n_estimators=3, min_child_samples=1) model.fit(X, y) self._test_regressor(X, model) # Regression test with 1 estimator (taken from ONNXMLTOOLS). @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lightgbm_regressor1(self): warnings.filterwarnings("ignore") model = lgb.LGBMRegressor(n_estimators=1, min_child_samples=1) X = [[0, 1], [1, 1], [2, 0]] X = np.array(X, dtype=np.float32) y = np.array([100, -10, 50], dtype=np.float32) model.fit(X, y) self._test_regressor(X, model) # Regression test with 2 estimators (taken from ONNXMLTOOLS). @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lightgbm_regressor2(self): warnings.filterwarnings("ignore") model = lgb.LGBMRegressor(n_estimators=2, max_depth=1, min_child_samples=1) X = [[0, 1], [1, 1], [2, 0]] X = np.array(X, dtype=np.float32) y = np.array([100, -10, 50], dtype=np.float32) model.fit(X, y) self._test_regressor(X, model) # Regression test with gbdt boosting type (taken from ONNXMLTOOLS). @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lightgbm_booster_regressor(self): warnings.filterwarnings("ignore") X = [[0, 1], [1, 1], [2, 0]] X = np.array(X, dtype=np.float32) y = [0, 1, 1.1] data = lgb.Dataset(X, label=y) model = lgb.train( { "boosting_type": "gbdt", "objective": "regression", "n_estimators": 3, "min_child_samples": 1, "max_depth": 1 }, data, ) self._test_regressor(X, model) # Binary classication test. @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lgbm_onnxml_model_binary(self): warnings.filterwarnings("ignore") n_features = 28 n_total = 100 np.random.seed(0) X = np.random.rand(n_total, n_features) X = np.array(X, dtype=np.float32) y = np.random.randint(2, size=n_total) # Create LightGBM model model = lgb.LGBMClassifier() model.fit(X, y) self._test_classifier(X, model) # Binary classication test with float64. @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lgbm_onnxml_model_binary_float64(self): warnings.filterwarnings("ignore") n_features = 28 n_total = 100 np.random.seed(0) X = np.random.rand(n_total, n_features) X = np.array(X, dtype=np.float32) y = np.random.randint(2, size=n_total) # Create LightGBM model model = lgb.LGBMClassifier() model.fit(X, y) onnx_model = convert(model, "onnx", X) np.testing.assert_allclose(model.predict(X), onnx_model.predict(X)) # Binary classification test with 3 estimators (taken from ONNXMLTOOLS). @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lightgbm_classifier(self): warnings.filterwarnings("ignore") model = lgb.LGBMClassifier(n_estimators=3, min_child_samples=1) X = [[0, 1], [1, 1], [2, 0]] X = np.array(X, dtype=np.float32) y = [0, 1, 0] model.fit(X, y) self._test_classifier(X, model) # Binary classification test with 3 estimators zipmap (taken from ONNXMLTOOLS). @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lightgbm_classifier_zipmap(self): warnings.filterwarnings("ignore") X = [[0, 1], [1, 1], [2, 0], [1, 2]] X = np.array(X, dtype=np.float32) y = [0, 1, 0, 1] model = lgb.LGBMClassifier(n_estimators=3, min_child_samples=1) model.fit(X, y) self._test_classifier(X, model) # Binary classification test with 3 estimators and selecting boosting type (taken from ONNXMLTOOLS). @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lightgbm_booster_classifier(self): warnings.filterwarnings("ignore") X = [[0, 1], [1, 1], [2, 0], [1, 2]] X = np.array(X, dtype=np.float32) y = [0, 1, 0, 1] data = lgb.Dataset(X, label=y) model = lgb.train( { "boosting_type": "gbdt", "objective": "binary", "n_estimators": 3, "min_child_samples": 1 }, data) self._test_classifier(X, model) # Binary classification test with 3 estimators and selecting boosting type zipmap (taken from ONNXMLTOOLS). @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lightgbm_booster_classifier_zipmap(self): warnings.filterwarnings("ignore") X = [[0, 1], [1, 1], [2, 0], [1, 2]] X = np.array(X, dtype=np.float32) y = [0, 1, 0, 1] data = lgb.Dataset(X, label=y) model = lgb.train( { "boosting_type": "gbdt", "objective": "binary", "n_estimators": 3, "min_child_samples": 1 }, data) self._test_classifier(X, model) # Multiclass classification test. @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lgbm_onnxml_model_multi(self): warnings.filterwarnings("ignore") n_features = 28 n_total = 100 np.random.seed(0) X = np.random.rand(n_total, n_features) X = np.array(X, dtype=np.float32) y = np.random.randint(3, size=n_total) # Create LightGBM model model = lgb.LGBMClassifier() model.fit(X, y) self._test_classifier(X, model) # Multiclass classification test with 3 estimators (taken from ONNXMLTOOLS). @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lightgbm_classifier_multi(self): warnings.filterwarnings("ignore") model = lgb.LGBMClassifier(n_estimators=3, min_child_samples=1) X = [[0, 1], [1, 1], [2, 0], [0.5, 0.5], [1.1, 1.1], [2.1, 0.1]] X = np.array(X, dtype=np.float32) y = [0, 1, 2, 1, 1, 2] model.fit(X, y) self._test_classifier(X, model) # Multiclass classification test with 3 estimators and selecting boosting type (taken from ONNXMLTOOLS). @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lightgbm_booster_multi_classifier(self): warnings.filterwarnings("ignore") X = [[0, 1], [1, 1], [2, 0], [1, 2], [-1, 2], [1, -2]] X = np.array(X, dtype=np.float32) y = [0, 1, 0, 1, 2, 2] data = lgb.Dataset(X, label=y) model = lgb.train( { "boosting_type": "gbdt", "objective": "multiclass", "n_estimators": 3, "min_child_samples": 1, "num_class": 3 }, data, ) self._test_classifier(X, model)
class TestLGBMConverter(unittest.TestCase): # Check tree implementation @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lgbm_implementation(self): warnings.filterwarnings("ignore") np.random.seed(0) X = np.random.rand(10, 1) X = np.array(X, dtype=np.float32) y = np.random.randint(2, size=10) for model in [ lgb.LGBMClassifier(n_estimators=1, max_depth=1), lgb.LGBMRegressor(n_estimators=1, max_depth=1) ]: for extra_config_param in ["tree_trav", "perf_tree_trav", "gemm"]: model.fit(X, y) torch_model = hummingbird.ml.convert( model, "torch", extra_config={"tree_implementation": extra_config_param}) self.assertIsNotNone(torch_model) self.assertEqual( str(type(list(torch_model.model._operators)[0])), gbdt_implementation_map[extra_config_param]) def _run_lgbm_classifier_converter(self, num_classes, extra_config={}): warnings.filterwarnings("ignore") for max_depth in [1, 3, 8, 10, 12, None]: model = lgb.LGBMClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) torch_model = hummingbird.ml.convert(model, "torch", extra_config=extra_config) self.assertIsNotNone(torch_model) np.testing.assert_allclose(model.predict_proba(X), torch_model.predict_proba(X), rtol=1e-06, atol=1e-06) # Binary classifier @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lgbm_binary_classifier_converter(self): self._run_lgbm_classifier_converter(2) # Gemm classifier @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lgbm_gemm_classifier_converter(self): self._run_lgbm_classifier_converter( 2, extra_config={"tree_implementation": "gemm"}) # Tree_trav classifier @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lgbm_tree_trav_classifier_converter(self): self._run_lgbm_classifier_converter( 2, extra_config={"tree_implementation": "tree_trav"}) # Perf_tree_trav classifier @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lgbm_perf_tree_trav_classifier_converter(self): self._run_lgbm_classifier_converter( 2, extra_config={"tree_implementation": "perf_tree_trav"}) # Multi classifier @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lgbm_multi_classifier_converter(self): self._run_lgbm_classifier_converter(3) # Gemm multi classifier @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lgbm_gemm_multi_classifier_converter(self): self._run_lgbm_classifier_converter( 3, extra_config={"tree_implementation": "gemm"}) # Tree_trav multi classifier @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lgbm_tree_trav_multi_classifier_converter(self): self._run_lgbm_classifier_converter( 3, extra_config={"tree_implementation": "tree_trav"}) # Perf_tree_trav multi classifier @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lgbm_perf_tree_trav_multi_classifier_converter(self): self._run_lgbm_classifier_converter( 3, extra_config={"tree_implementation": "perf_tree_trav"}) def _run_lgbm_ranker_converter(self, num_classes, extra_config={}, label_gain=None): warnings.filterwarnings("ignore") for max_depth in [1, 3, 8, 10, 12, None]: model = lgb.LGBMRanker(n_estimators=10, max_depth=max_depth, label_gain=label_gain) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y, group=[X.shape[0]], eval_set=[(X, y)], eval_group=[X.shape[0]]) torch_model = hummingbird.ml.convert(model, "torch", extra_config=extra_config) self.assertIsNotNone(torch_model) np.testing.assert_allclose(model.predict(X), torch_model.predict(X), rtol=1e-06, atol=1e-06) # Ranker - small, no label gain @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lgbm_binary_ranker_converter_no_label(self): self._run_lgbm_ranker_converter(30) # Ranker @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lgbm_binary_ranker_converter(self): self._run_lgbm_ranker_converter(1000, label_gain=list(range(1000))) # Gemm ranker @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lgbm_gemm_ranker_converter(self): self._run_lgbm_ranker_converter( 1000, extra_config={"tree_implementation": "gemm"}, label_gain=list(range(1000))) # Tree_trav ranker @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lgbm_tree_trav_ranker_converter(self): self._run_lgbm_ranker_converter( 1000, extra_config={"tree_implementation": "tree_trav"}, label_gain=list(range(1000))) # Perf_tree_trav ranker @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lgbm_perf_tree_trav_ranker_converter(self): self._run_lgbm_ranker_converter( 1000, extra_config={"tree_implementation": "perf_tree_trav"}, label_gain=list(range(1000))) def _run_lgbm_regressor_converter(self, num_classes, extra_config={}): warnings.filterwarnings("ignore") for max_depth in [1, 3, 8, 10, 12, None]: model = lgb.LGBMRegressor(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) torch_model = hummingbird.ml.convert(model, "torch", extra_config=extra_config) self.assertIsNotNone(torch_model) np.testing.assert_allclose(model.predict(X), torch_model.predict(X), rtol=1e-06, atol=1e-06) # Regressor @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lgbm_binary_regressor_converter(self): self._run_lgbm_regressor_converter(1000) # Gemm regressor @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lgbm_gemm_regressor_converter(self): self._run_lgbm_regressor_converter( 1000, extra_config={"tree_implementation": "gemm"}) # Tree_trav regressor @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lgbm_tree_trav_regressor_converter(self): self._run_lgbm_regressor_converter( 1000, extra_config={"tree_implementation": "tree_trav"}) # Perf_tree_trav regressor @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lgbm_perf_tree_trav_regressor_converter(self): self._run_lgbm_regressor_converter( 1000, extra_config={"tree_implementation": "perf_tree_trav"}) # Float 64 classification test helper def _run_float64_lgbm_classifier_converter(self, num_classes, extra_config={}): warnings.filterwarnings("ignore") for max_depth in [1, 3, 8, 10, 12, None]: model = lgb.LGBMClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) y = np.random.randint(num_classes, size=100) model.fit(X, y) torch_model = hummingbird.ml.convert(model, "torch", extra_config=extra_config) self.assertIsNotNone(torch_model) np.testing.assert_allclose(model.predict_proba(X), torch_model.predict_proba(X), rtol=1e-06, atol=1e-06) # Gemm classifier (float64 data) @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_float64_lgbm_gemm_classifier_converter(self): self._run_float64_lgbm_classifier_converter( 2, extra_config={"tree_implementation": "gemm"}) # Tree_trav classifier (float64 data) @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_float64_lgbm_tree_trav_classifier_converter(self): self._run_float64_lgbm_classifier_converter( 2, extra_config={"tree_implementation": "tree_trav"}) # Perf_tree_trav classifier (float64 data) @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_float64_lgbm_perf_tree_trav_classifier_converter(self): self._run_float64_lgbm_classifier_converter( 2, extra_config={"tree_implementation": "perf_tree_trav"}) # Float 64 regression test helper def _run_float64_lgbm_regressor_converter(self, num_classes, extra_config={}): warnings.filterwarnings("ignore") for max_depth in [1, 3, 8, 10, 12, None]: model = lgb.LGBMRegressor(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) y = np.random.randint(num_classes, size=100) model.fit(X, y) torch_model = hummingbird.ml.convert(model, "torch", extra_config=extra_config) self.assertIsNotNone(torch_model) np.testing.assert_allclose(model.predict(X), torch_model.predict(X), rtol=1e-06, atol=1e-06) # Gemm regressor (float64 data) @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_float64_lgbm_gemm_regressor_converter(self): self._run_float64_lgbm_regressor_converter( 1000, extra_config={"tree_implementation": "gemm"}) # Tree_trav regressor (float64 data) @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_float64_lgbm_tree_trav_regressor_converter(self): self._run_float64_lgbm_regressor_converter( 1000, extra_config={"tree_implementation": "tree_trav"}) # Perf_tree_trav regressor (float64 data) @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_float64_lgbm_perf_tree_trav_regressor_converter(self): self._run_float64_lgbm_regressor_converter( 1000, extra_config={"tree_implementation": "perf_tree_trav"}) # Random forest in lgbm, the conversion fails with the latest # version of lightgbm. The direct converter to pytorch should be # updated or the model could be converted into ONNX then # converted into pytorch. # For more details, see ONNX converter at https://github.com/onnx/ # onnxmltools/blob/master/onnxmltools/convert/lightgbm/ # operator_converters/LightGbm.py#L313. @unittest.skipIf( True, reason="boosting_type=='rf' produces different probabilites.") @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lgbm_classifier_random_forest_rf(self): warnings.filterwarnings("ignore") model = lgb.LGBMClassifier(boosting_type="rf", n_estimators=128, max_depth=5, subsample=0.3, bagging_freq=1) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(2, size=100) model.fit(X, y) torch_model = hummingbird.ml.convert(model, "torch") self.assertIsNotNone(torch_model) np.testing.assert_allclose(model.predict_proba(X), torch_model.predict_proba(X), rtol=1e-06, atol=1e-06) # Random forest in lgbm @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lgbm_classifier_random_forest_gbdt(self): warnings.filterwarnings("ignore") model = lgb.LGBMClassifier(boosting_type="gbdt", n_estimators=128, max_depth=5, subsample=0.3, bagging_freq=1) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(2, size=100) model.fit(X, y) torch_model = hummingbird.ml.convert(model, "torch") self.assertIsNotNone(torch_model) np.testing.assert_allclose(model.predict_proba(X), torch_model.predict_proba(X), rtol=1e-06, atol=1e-06) # Test Tweedie loss in lgbm @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lgbm_tweedie(self): warnings.filterwarnings("ignore") model = lgb.LGBMRegressor(objective="tweedie", n_estimators=2, max_depth=5) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(100, size=100) model.fit(X, y) torch_model = hummingbird.ml.convert(model, "torch") self.assertIsNotNone(torch_model) np.testing.assert_allclose(model.predict(X), torch_model.predict(X), rtol=1e-06, atol=1e-06) # Backend tests. # Test TorchScript backend regression. @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lgbm_regressor_converter_torchscript(self): warnings.filterwarnings("ignore") for max_depth in [1, 3, 8, 10, 12]: model = lgb.LGBMRegressor(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(1000, size=100) model.fit(X, y) torch_model = hummingbird.ml.convert(model, "torchscript", X, extra_config={}) self.assertIsNotNone(torch_model) np.testing.assert_allclose(model.predict(X), torch_model.predict(X), rtol=1e-06, atol=1e-06) # Test TorchScript backend classification. @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lgbm_classifier_converter_torchscript(self): warnings.filterwarnings("ignore") for max_depth in [1, 3, 8, 10, 12]: model = lgb.LGBMClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(2, size=100) model.fit(X, y) torch_model = hummingbird.ml.convert(model, "torchscript", X, extra_config={}) self.assertIsNotNone(torch_model) np.testing.assert_allclose(model.predict_proba(X), torch_model.predict_proba(X), rtol=1e-06, atol=1e-06) # Check that we can export into ONNX. @unittest.skipIf(not onnx_runtime_installed(), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lightgbm_onnx(self): warnings.filterwarnings("ignore") X = [[0, 1], [1, 1], [2, 0]] X = np.array(X, dtype=np.float32) y = np.array([100, -10, 50], dtype=np.float32) model = lgb.LGBMRegressor(n_estimators=3, min_child_samples=1) model.fit(X, y) # Create ONNX model onnx_model = hummingbird.ml.convert(model, "onnx", X) np.testing.assert_allclose( onnx_model.predict(X).flatten(), model.predict(X)) # TVM backend tests. @unittest.skipIf(not (tvm_installed()), reason="TVM tests require TVM") def test_lightgbm_tvm_regressor(self): warnings.filterwarnings("ignore") for tree_implementation in ["gemm", "tree_trav", "perf_tree_trav"]: X = [[0, 1], [1, 1], [2, 0]] X = np.array(X, dtype=np.float32) y = np.array([100, -10, 50], dtype=np.float32) model = lgb.LGBMRegressor(n_estimators=3, min_child_samples=1) model.fit(X, y) # Create TVM model. tvm_model = hummingbird.ml.convert( model, "tvm", X, extra_config={"tree_implementation": tree_implementation}) # Check results. np.testing.assert_allclose(tvm_model.predict(X), model.predict(X)) @unittest.skipIf(not (tvm_installed()), reason="TVM tests require TVM installed") def test_lightgbm_tvm_classifier(self): warnings.filterwarnings("ignore") for tree_implementation in ["gemm", "tree_trav", "perf_tree_trav"]: X = [[0, 1], [1, 1], [2, 0]] X = np.array(X, dtype=np.float32) y = np.array([0, 1, 0], dtype=np.float32) model = lgb.LGBMClassifier(n_estimators=3, min_child_samples=1) model.fit(X, y) # Create TVM model. tvm_model = hummingbird.ml.convert( model, "tvm", X, extra_config={"tree_implementation": tree_implementation}) # Check results. np.testing.assert_allclose(tvm_model.predict(X), model.predict(X)) np.testing.assert_allclose(tvm_model.predict_proba(X), model.predict_proba(X)) # Test TVM with large input datasets. @unittest.skipIf(not (tvm_installed()), reason="TVM tests require TVM installed") def test_lightgbm_tvm_classifier_large_dataset(self): warnings.filterwarnings("ignore") for tree_implementation in ["gemm", "tree_trav", "perf_tree_trav"]: size = 200000 X = np.random.rand(size, 28) X = np.array(X, dtype=np.float32) y = np.random.randint(2, size=size) model = lgb.LGBMClassifier(n_estimators=100, max_depth=3) model.fit(X, y) # Create TVM model. tvm_model = hummingbird.ml.convert( model, "tvm", X, extra_config={ constants.TREE_IMPLEMENTATION: tree_implementation, constants.TREE_OP_PRECISION_DTYPE: "float64" }) # Check results. np.testing.assert_allclose(tvm_model.predict_proba(X), model.predict_proba(X), rtol=1e-04, atol=1e-04)