def __init__(self, model, n_threads=None, batch_size=None, extra_config={}): super(ONNXSklearnContainer, self).__init__(model, n_threads, batch_size, extra_config) assert onnx_runtime_installed( ), "ONNX Container requires ONNX runtime installed." sess_options = ort.SessionOptions() if self._n_threads is not None: sess_options.intra_op_num_threads = self._n_threads sess_options.inter_op_num_threads = 1 sess_options.execution_mode = ort.ExecutionMode.ORT_SEQUENTIAL self._session = ort.InferenceSession(self._model.SerializeToString(), sess_options=sess_options) self._output_names = [ self._session.get_outputs()[i].name for i in range(len(self._session.get_outputs())) ] self._input_names = [ input.name for input in self._session.get_inputs() ] self._extra_config = extra_config
class TestNoExtra(unittest.TestCase): """ These tests are meant to be run on a clean container after doing `pip install hummingbird-ml` without any of the `extra` packages """ # Test no LGBM returns false on lightgbm_installed() @unittest.skipIf(lightgbm_installed(), reason="Test when LightGBM is not installed") def test_lightgbm_installed_false(self): warnings.filterwarnings("ignore") assert not lightgbm_installed() # Test no XGB returns false on xgboost_installed() @unittest.skipIf(xgboost_installed(), reason="Test when XGBoost is not installed") def test_xgboost_installed_false(self): warnings.filterwarnings("ignore") assert not xgboost_installed() # Test no ONNX returns false on onnx_installed() @unittest.skipIf(onnx_runtime_installed(), reason="Test when ONNX is not installed") def test_onnx_installed_false(self): warnings.filterwarnings("ignore") assert not onnx_runtime_installed() # Test no ONNXMLTOOLS returns false on onnx_ml_tools_installed() @unittest.skipIf(onnx_ml_tools_installed(), reason="Test when ONNXMLTOOLS is not installed") def test_onnx_ml_installed_false(self): warnings.filterwarnings("ignore") assert not onnx_ml_tools_installed() # Test no TVM returns false on tvm_installed() @unittest.skipIf(onnx_ml_tools_installed(), reason="Test when TVM is not installed") def test_tvm_installed_false(self): warnings.filterwarnings("ignore") assert not tvm_installed() # Test that we can import the converter successfully without installing [extra] def test_import_convert_no_extra(self): try: from hummingbird.ml import convert except Exception: # TODO something more specific? self.fail( "Unexpected Error on importing convert without extra packages")
class TestBackends(unittest.TestCase): # Test backends are browsable def test_backends(self): warnings.filterwarnings("ignore") self.assertTrue(len(hummingbird.ml.backends) > 0) # Test backends are not case sensitive def test_backends_case_sensitive(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "tOrCh") self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06) # Test pytorch is still a valid backend name def test_backends_pytorch(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "pytOrCh") self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06) # Test not supported backends def test_unsupported_backend(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) # Test backends are not case sensitive self.assertRaises(MissingBackend, hummingbird.ml.convert, model, "scala") # Test torchscript requires test_data def test_torchscript_test_data(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) # Test torcscript requires test_input self.assertRaises(RuntimeError, hummingbird.ml.convert, model, "torch.jit") # Test onnx requires test_data or initial_types @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") def test_onnx_test_data(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) # Create ONNX-ML model onnx_ml_model = convert_sklearn( model, initial_types=[("input", FloatTensorType([X.shape[0], X.shape[1]]))], target_opset=11) # Test onnx requires test_data self.assertRaises(RuntimeError, hummingbird.ml.convert, onnx_ml_model, "onnx")
class TestONNXBinarizer(unittest.TestCase): def _test_binarizer_converter(self, threshold): warnings.filterwarnings("ignore") X = np.array([[1, 2, 3], [4, 3, 0], [0, 1, 4], [0, 5, 6]], dtype=np.float32) # Create SKL model for testing model = Binarizer(threshold=threshold) model.fit(X) # Create ONNX-ML model onnx_ml_model = convert_sklearn(model, initial_types=[ ("float_input", FloatTensorType_onnx(X.shape)) ]) # Create ONNX model by calling converter onnx_model = convert(onnx_ml_model, "onnx", X) # Get the predictions for the ONNX-ML model session = ort.InferenceSession(onnx_ml_model.SerializeToString()) output_names = [ session.get_outputs()[i].name for i in range(len(session.get_outputs())) ] inputs = {session.get_inputs()[0].name: X} onnx_ml_pred = session.run(output_names, inputs)[0] # Get the predictions for the ONNX model onnx_pred = onnx_model.transform(X) return onnx_ml_pred, onnx_pred @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS") # Check 0.0 threshold def test_binarizer_converter_0thresh(self, rtol=1e-06, atol=1e-06): onnx_ml_pred, onnx_pred = self._test_binarizer_converter(0.0) np.testing.assert_allclose(onnx_ml_pred, onnx_pred, rtol=rtol, atol=atol) @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS") # Check positive threshold def test_binarizer_converter_posthresh(self, rtol=1e-06, atol=1e-06): onnx_ml_pred, onnx_pred = self._test_binarizer_converter(2.0) # Check that predicted values match np.testing.assert_allclose(onnx_ml_pred, onnx_pred, rtol=rtol, atol=atol) @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS") # Check neg threshold def test_binarizer_converter_negthresh(self, rtol=1e-06, atol=1e-06): onnx_ml_pred, onnx_pred = self._test_binarizer_converter(-2.0) # Check that predicted values match np.testing.assert_allclose(onnx_ml_pred, onnx_pred, rtol=rtol, atol=atol) @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS") # if the model is corrupt, we should get a RuntimeError def test_onnx_binarizer_converter_raises_rt(self): warnings.filterwarnings("ignore") X = np.array([[1, 2, 3], [4, 3, 0], [0, 1, 4], [0, 5, 6]], dtype=np.float32) model = Binarizer(threshold=0) model.fit(X) # generate test input onnx_ml_model = convert_sklearn(model, initial_types=[ ("float_input", FloatTensorType_onnx(X.shape)) ]) onnx_ml_model.graph.node[0].attribute[0].name = "".encode() self.assertRaises(RuntimeError, convert, onnx_ml_model, "onnx", X)
class TestBackends(unittest.TestCase): # Test backends are browsable def test_backends(self): warnings.filterwarnings("ignore") self.assertTrue(len(hummingbird.ml.backends) > 0) # Test backends are not case sensitive def test_backends_case_sensitive(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "tOrCh") self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06) # Test pytorch is still a valid backend name def test_backends_pytorch(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "pytOrCh") self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06) # Test pytorch save and load def test_pytorch_save_load(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "torch") self.assertIsNotNone(hb_model) hb_model.save("pt-tmp") hb_model_loaded = hummingbird.ml.TorchContainer.load("pt-tmp") np.testing.assert_allclose(hb_model_loaded.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06) os.remove("pt-tmp.zip") shutil.rmtree("pt-tmp") # Test pytorch save and generic load def test_pytorch_save_generic_load(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "torch") self.assertIsNotNone(hb_model) hb_model.save("pt-tmp") hb_model_loaded = hummingbird.ml.load("pt-tmp") np.testing.assert_allclose(hb_model_loaded.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06) os.remove("pt-tmp.zip") shutil.rmtree("pt-tmp") # Test torchscript save and load def test_torchscript_save_load(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "torch.jit", X) self.assertIsNotNone(hb_model) hb_model.save("ts-tmp") hb_model_loaded = hummingbird.ml.TorchContainer.load("ts-tmp") np.testing.assert_allclose(hb_model_loaded.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06) os.remove("ts-tmp.zip") shutil.rmtree("ts-tmp") # Test torchscript save and generic load def test_torchscript_save_generic_load(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "torch.jit", X) self.assertIsNotNone(hb_model) hb_model.save("ts-tmp") hb_model_loaded = hummingbird.ml.load("ts-tmp") np.testing.assert_allclose(hb_model_loaded.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06) os.remove("ts-tmp.zip") shutil.rmtree("ts-tmp") # Test not supported backends def test_unsupported_backend(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) # Test scala backend rises an exception self.assertRaises(MissingBackend, hummingbird.ml.convert, model, "scala") # Test torchscript requires test_data def test_torchscript_test_data(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) # Test torcscript requires test_input self.assertRaises(RuntimeError, hummingbird.ml.convert, model, "torch.jit") # Test TVM requires test_data @unittest.skipIf(not tvm_installed(), reason="TVM test requires TVM installed") def test_tvm_test_data(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) # Test tvm requires test_input self.assertRaises(RuntimeError, hummingbird.ml.convert, model, "tvm") # Test tvm save and load @unittest.skipIf(not tvm_installed(), reason="TVM test requires TVM installed") def test_tvm_save_load(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "tvm", X) self.assertIsNotNone(hb_model) hb_model.save("tvm-tmp") hb_model_loaded = hummingbird.ml.TVMContainer.load("tvm-tmp") np.testing.assert_allclose(hb_model_loaded.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06) os.remove("tvm-tmp.zip") shutil.rmtree("tvm-tmp") # Test tvm save and generic load @unittest.skipIf(not tvm_installed(), reason="TVM test requires TVM installed") def test_tvm_save_generic_load(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "tvm", X) self.assertIsNotNone(hb_model) hb_model.save("tvm-tmp") hb_model_loaded = hummingbird.ml.load("tvm-tmp") np.testing.assert_allclose(hb_model_loaded.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06) os.remove("tvm-tmp.zip") shutil.rmtree("tvm-tmp") # Test tvm save and load zip file @unittest.skipIf(not tvm_installed(), reason="TVM test requires TVM installed") def test_tvm_save_load_zip(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "tvm", X) self.assertIsNotNone(hb_model) hb_model.save("tvm-tmp.zip") hb_model_loaded = hummingbird.ml.TVMContainer.load("tvm-tmp.zip") np.testing.assert_allclose(hb_model_loaded.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06) os.remove("tvm-tmp.zip") shutil.rmtree("tvm-tmp") # Test onnx requires test_data or initial_types @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") def test_onnx_no_test_data_float(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) # Create ONNX-ML model onnx_ml_model = convert_sklearn( model, initial_types=[("input", FloatTensorType([X.shape[0], X.shape[1]]))], target_opset=11) # Test onnx requires no test_data hb_model = hummingbird.ml.convert(onnx_ml_model, "onnx") assert hb_model # Test onnx 0 shape input @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") def test_onnx_zero_shape_input(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) y = np.random.randint(num_classes, size=100) model.fit(X, y) # Create ONNX-ML model onnx_ml_model = convert_sklearn(model, initial_types=[ ("input", DoubleTensorType([0, X.shape[1]])) ], target_opset=11) # Test onnx requires no test_data hb_model = hummingbird.ml.convert(onnx_ml_model, "onnx") assert hb_model # Test onnx no test_data, double input @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") def test_onnx_no_test_data_double(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) y = np.random.randint(num_classes, size=100) model.fit(X, y) # Create ONNX-ML model onnx_ml_model = convert_sklearn( model, initial_types=[("input", DoubleTensorType([X.shape[0], X.shape[1]]))], target_opset=11) # Test onnx requires no test_data hb_model = hummingbird.ml.convert(onnx_ml_model, "onnx") assert hb_model # Test onnx no test_data, long input @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") def test_onnx_no_test_data_long(self): warnings.filterwarnings("ignore") model = model = StandardScaler(with_mean=True, with_std=True) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.int64) model.fit(X) # Create ONNX-ML model onnx_ml_model = convert_sklearn( model, initial_types=[("input", Int64TensorType([X.shape[0], X.shape[1]]))], target_opset=11) # Test onnx requires no test_data hb_model = hummingbird.ml.convert(onnx_ml_model, "onnx") assert hb_model # Test onnx no test_data, int input @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") def test_onnx_no_test_data_int(self): warnings.filterwarnings("ignore") model = OneHotEncoder() X = np.array([[1, 2, 3]], dtype=np.int32) model.fit(X) # Create ONNX-ML model onnx_ml_model = convert_sklearn( model, initial_types=[("input", Int32TensorType([X.shape[0], X.shape[1]]))], target_opset=11) # Test onnx requires no test_data hb_model = hummingbird.ml.convert(onnx_ml_model, "onnx") assert hb_model # Test onnx no test_data, string input @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") def test_onnx_no_test_data_string(self): warnings.filterwarnings("ignore") model = OneHotEncoder() X = np.array([["a", "b", "c"]]) model.fit(X) # Create ONNX-ML model onnx_ml_model = convert_sklearn( model, initial_types=[("input", StringTensorType([X.shape[0], X.shape[1]]))], target_opset=11) # Test backends are not case sensitive self.assertRaises(RuntimeError, hummingbird.ml.convert, onnx_ml_model, "onnx") # Test ONNX save and load @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") def test_onnx_save_load(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "onnx", X) self.assertIsNotNone(hb_model) hb_model.save("onnx-tmp") hb_model_loaded = hummingbird.ml.ONNXContainer.load("onnx-tmp") np.testing.assert_allclose(hb_model_loaded.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06) os.remove("onnx-tmp.zip") shutil.rmtree("onnx-tmp") # Test ONNX save and generic load @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") def test_onnx_save_generic_load(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "onnx", X) self.assertIsNotNone(hb_model) hb_model.save("onnx-tmp") hb_model_loaded = hummingbird.ml.load("onnx-tmp") np.testing.assert_allclose(hb_model_loaded.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06) os.remove("onnx-tmp.zip") shutil.rmtree("onnx-tmp") # Test for when the user forgets to add a target (ex: convert(model, output) rather than convert(model, 'torch')) due to API change def test_forgotten_backend_string(self): from sklearn.preprocessing import LabelEncoder model = LabelEncoder() data = np.array([1, 4, 5, 2, 0, 2], dtype=np.int32) model.fit(data) self.assertRaises(ValueError, hummingbird.ml.convert, model, [("input", Int32TensorType([6, 1]))])
class TestONNXScaler(unittest.TestCase): def _test_scaler_converter(self, model): warnings.filterwarnings("ignore") X = np.array([[0.0, 0.0, 3.0], [1.0, -1.0, 0.0], [0.0, 2.0, 1.0], [1.0, 0.0, -2.0]], dtype=np.float32) model.fit(X) # Create ONNX-ML model onnx_ml_model = convert_sklearn( model, initial_types=[("float_input", FloatTensorType([None, X.shape[1]]))]) # Create ONNX model by calling converter onnx_model = convert(onnx_ml_model, "onnx", X) # Get the predictions for the ONNX-ML model session = ort.InferenceSession(onnx_ml_model.SerializeToString()) output_names = [ session.get_outputs()[i].name for i in range(len(session.get_outputs())) ] inputs = {session.get_inputs()[0].name: X} onnx_ml_pred = session.run(output_names, inputs)[0] # Get the predictions for the ONNX model onnx_pred = onnx_model.transform(X) return onnx_ml_pred, onnx_pred # Test StandardScaler with_mean=True, with_std=True @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS") def test_standard_scaler_onnx_tt(self, rtol=1e-06, atol=1e-06): model = StandardScaler(with_mean=True, with_std=True) onnx_ml_pred, onnx_pred = self._test_scaler_converter(model) # Check that predicted values match np.testing.assert_allclose(onnx_ml_pred, onnx_pred, rtol=rtol, atol=atol) # Test StandardScaler with_mean=True, with_std=False @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS") def test_standard_scaler_onnx_tf(self, rtol=1e-06, atol=1e-06): model = StandardScaler(with_mean=True, with_std=False) onnx_ml_pred, onnx_pred = self._test_scaler_converter(model) # Check that predicted values match np.testing.assert_allclose(onnx_ml_pred, onnx_pred, rtol=rtol, atol=atol) # Test StandardScaler with_mean=False, with_std=False @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS") def test_standard_scaler_onnx_ff(self, rtol=1e-06, atol=1e-06): model = StandardScaler(with_mean=False, with_std=False) onnx_ml_pred, onnx_pred = self._test_scaler_converter(model) # Check that predicted values match np.testing.assert_allclose(onnx_ml_pred, onnx_pred, rtol=rtol, atol=atol) # Test RobustScaler with with_centering=True @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS") def test_robust_scaler_onnx_t(self, rtol=1e-06, atol=1e-06): model = RobustScaler(with_centering=True) onnx_ml_pred, onnx_pred = self._test_scaler_converter(model) # Check that predicted values match np.testing.assert_allclose(onnx_ml_pred, onnx_pred, rtol=rtol, atol=atol) # Test RobustScaler with with_centering=False @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS") def test_robust_scaler_onnx_f(self, rtol=1e-06, atol=1e-06): model = RobustScaler(with_centering=False) onnx_ml_pred, onnx_pred = self._test_scaler_converter(model) # Check that predicted values match np.testing.assert_allclose(onnx_ml_pred, onnx_pred, rtol=rtol, atol=atol) # Test MaxAbsScaler @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS") def test_max_abs_scaler_onnx(self, rtol=1e-06, atol=1e-06): model = MaxAbsScaler() onnx_ml_pred, onnx_pred = self._test_scaler_converter(model) # Check that predicted values match np.testing.assert_allclose(onnx_ml_pred, onnx_pred, rtol=rtol, atol=atol) # Test MinMaxScaler @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS") def test_min_max_scaler_onnx(self, rtol=1e-06, atol=1e-06): model = MinMaxScaler() onnx_ml_pred, onnx_pred = self._test_scaler_converter(model) # Check that predicted values match np.testing.assert_allclose(onnx_ml_pred, onnx_pred, rtol=rtol, atol=atol) # Test that malformed models throw an exception @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS") def test_scaler_converter_raises_rt_onnx(self): warnings.filterwarnings("ignore") X = np.array([[0.0, 0.0, 3.0], [1.0, -1.0, 0.0], [0.0, 2.0, 1.0], [1.0, 0.0, -2.0]], dtype=np.float32) # Create SKL model for testing model = StandardScaler() model.fit(X) # Generate test input onnx_ml_model = convert_sklearn(model, initial_types=[ ("float_input", FloatTensorType(X.shape)) ]) onnx_ml_model.graph.node[0].attribute[0].name = "".encode() self.assertRaises(RuntimeError, convert, onnx_ml_model, "onnx", X) # Test with float64 @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS") def test_scaler_converter_float_64(self): warnings.filterwarnings("ignore") X = np.array([[0.0, 0.0, 3.0], [1.0, -1.0, 0.0], [0.0, 2.0, 1.0], [1.0, 0.0, -2.0]], dtype=np.float64) # Create SKL model for testing model = StandardScaler() model.fit(X) # Generate test input onnx_ml_model = convert_sklearn(model, initial_types=[("double_input", DoubleTensorType( [None, X.shape[1]]))]) # Create ONNX model by calling converter onnx_model = convert(onnx_ml_model, "onnx", X) # Get the predictions for the ONNX-ML model session = ort.InferenceSession(onnx_ml_model.SerializeToString()) output_names = [ session.get_outputs()[i].name for i in range(len(session.get_outputs())) ] inputs = {session.get_inputs()[0].name: X} onnx_ml_pred = session.run(output_names, inputs)[0] # Get the predictions for the ONNX model onnx_pred = onnx_model.transform(X) # Check that predicted values match np.testing.assert_allclose(onnx_ml_pred, onnx_pred, rtol=1e-06, atol=1e-06)
def test_onnx_installed_false(self): warnings.filterwarnings("ignore") assert not onnx_runtime_installed()
class TestONNXImputer(unittest.TestCase): def _test_imputer_converter(self, model, mode="onnx"): warnings.filterwarnings("ignore") X = np.array([[1, 2], [np.nan, 3], [7, 6]], dtype=np.float32) model.fit(X) # Create ONNX-ML model onnx_ml_model = convert_sklearn(model, initial_types=[("float_input", FloatTensorType_onnx(X.shape))]) # Get the predictions for the ONNX-ML model session = ort.InferenceSession(onnx_ml_model.SerializeToString()) output_names = [session.get_outputs()[i].name for i in range(len(session.get_outputs()))] inputs = {session.get_inputs()[0].name: X} onnx_ml_pred = session.run(output_names, inputs)[0] # Create test model by calling converter model = convert(onnx_ml_model, mode, X) # Get the predictions for the test model pred = model.transform(X) return onnx_ml_pred, pred @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS" ) def test_onnx_imputer_const(self, rtol=1e-06, atol=1e-06): model = SimpleImputer(strategy="constant") onnx_ml_pred, onnx_pred = self._test_imputer_converter(model) # Check that predicted values match np.testing.assert_allclose(onnx_ml_pred, onnx_pred, rtol=rtol, atol=atol) @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS" ) def test_onnx_imputer_const_nan0(self, rtol=1e-06, atol=1e-06): model = SimpleImputer(strategy="constant", fill_value=0) onnx_ml_pred, onnx_pred = self._test_imputer_converter(model) # Check that predicted values match np.testing.assert_allclose(onnx_ml_pred, onnx_pred, rtol=rtol, atol=atol) @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS" ) def test_onnx_imputer_mean(self, rtol=1e-06, atol=1e-06): model = SimpleImputer(strategy="mean", fill_value="nan") onnx_ml_pred, onnx_pred = self._test_imputer_converter(model) # Check that predicted values match np.testing.assert_allclose(onnx_ml_pred, onnx_pred, rtol=rtol, atol=atol) @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS" ) def test_onnx_imputer_converter_raises_rt(self): warnings.filterwarnings("ignore") model = SimpleImputer(strategy="mean", fill_value="nan") X = np.array([[1, 2], [np.nan, 3], [7, 6]], dtype=np.float32) model.fit(X) # Create ONNX-ML model onnx_ml_model = convert_sklearn(model, initial_types=[("float_input", FloatTensorType_onnx(X.shape))]) onnx_ml_model.graph.node[0].attribute[0].name = "".encode() self.assertRaises(RuntimeError, convert, onnx_ml_model, "onnx", X) @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS" ) def test_onnx_imputer_torch(self, rtol=1e-06, atol=1e-06): model = SimpleImputer(strategy="constant") onnx_ml_pred, onnx_pred = self._test_imputer_converter(model, mode="torch") # Check that predicted values match np.testing.assert_allclose(onnx_ml_pred, onnx_pred, rtol=rtol, atol=atol)
class TestBackends(unittest.TestCase): # Test backends are browsable def test_backends(self): warnings.filterwarnings("ignore") self.assertTrue(len(hummingbird.ml.backends) > 0) # Test backends are not case sensitive def test_backends_case_sensitive(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "tOrCh") self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06) # Test pytorch is still a valid backend name def test_backends_pytorch(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "pytOrCh") self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06) # Test not supported backends def test_unsupported_backend(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) # Test scala backend rises an exception self.assertRaises(MissingBackend, hummingbird.ml.convert, model, "scala") # Test torchscript requires test_data def test_torchscript_test_data(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) # Test torcscript requires test_input self.assertRaises(RuntimeError, hummingbird.ml.convert, model, "torch.jit") # Test TVM requires test_data @unittest.skipIf(not tvm_installed(), reason="TVM test requires TVM installed") def test_tvm_test_data(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) # Test tvm requires test_input self.assertRaises(RuntimeError, hummingbird.ml.convert, model, "tvm") # Test onnx requires test_data or initial_types @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS" ) def test_onnx_no_test_data_float(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) # Create ONNX-ML model onnx_ml_model = convert_sklearn( model, initial_types=[("input", FloatTensorType([X.shape[0], X.shape[1]]))], target_opset=11 ) # Test onnx requires no test_data hb_model = hummingbird.ml.convert(onnx_ml_model, "onnx") assert hb_model # Test onnx no test_data, double input @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS" ) def test_onnx_no_test_data_double(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) y = np.random.randint(num_classes, size=100) model.fit(X, y) # Create ONNX-ML model onnx_ml_model = convert_sklearn( model, initial_types=[("input", DoubleTensorType([X.shape[0], X.shape[1]]))], target_opset=11 ) # Test onnx requires no test_data hb_model = hummingbird.ml.convert(onnx_ml_model, "onnx") assert hb_model # Test onnx no test_data, long input @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS" ) def test_onnx_no_test_data_long(self): warnings.filterwarnings("ignore") model = model = StandardScaler(with_mean=True, with_std=True) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.int64) model.fit(X) # Create ONNX-ML model onnx_ml_model = convert_sklearn( model, initial_types=[("input", Int64TensorType([X.shape[0], X.shape[1]]))], target_opset=11 ) # Test onnx requires no test_data hb_model = hummingbird.ml.convert(onnx_ml_model, "onnx") assert hb_model # Test onnx no test_data, int input @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS" ) def test_onnx_no_test_data_int(self): warnings.filterwarnings("ignore") model = OneHotEncoder() X = np.array([[1, 2, 3]], dtype=np.int32) model.fit(X) # Create ONNX-ML model onnx_ml_model = convert_sklearn( model, initial_types=[("input", Int32TensorType([X.shape[0], X.shape[1]]))], target_opset=11 ) # Test onnx requires no test_data hb_model = hummingbird.ml.convert(onnx_ml_model, "onnx") assert hb_model # Test onnx no test_data, string input @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS" ) def test_onnx_no_test_data_string(self): warnings.filterwarnings("ignore") model = OneHotEncoder() X = np.array([["a", "b", "c"]]) model.fit(X) # Create ONNX-ML model onnx_ml_model = convert_sklearn( model, initial_types=[("input", StringTensorType([X.shape[0], X.shape[1]]))], target_opset=11 ) # Test backends are not case sensitive self.assertRaises(RuntimeError, hummingbird.ml.convert, onnx_ml_model, "onnx")
class TestBackends(unittest.TestCase): # Test backends are browsable def test_backends(self): warnings.filterwarnings("ignore") self.assertTrue(len(hummingbird.ml.backends) > 0) # Test backends are not case sensitive def test_backends_case_sensitive(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "tOrCh") self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06) # Test pytorch is still a valid backend name def test_backends_pytorch(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "pytOrCh") self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06) # Test pytorch save and load def test_pytorch_save_load(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "torch") self.assertIsNotNone(hb_model) hb_model.save("pt-tmp") hb_model_loaded = hummingbird.ml.TorchContainer.load("pt-tmp") np.testing.assert_allclose(hb_model_loaded.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06) os.remove("pt-tmp.zip") # Test pytorch save and generic load def test_pytorch_save_generic_load(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "torch") self.assertIsNotNone(hb_model) hb_model.save("pt-tmp") hb_model_loaded = hummingbird.ml.load("pt-tmp") np.testing.assert_allclose(hb_model_loaded.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06) os.remove("pt-tmp.zip") def test_pytorch_save_load_load(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "torch") self.assertIsNotNone(hb_model) hb_model.save("pt-tmp") hummingbird.ml.load("pt-tmp") hummingbird.ml.load("pt-tmp") os.remove("pt-tmp.zip") def test_pytorch_save_load_more_versions(self): from hummingbird.ml.operator_converters import constants warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "torch") self.assertIsNotNone(hb_model) hb_model.save("pt-tmp") shutil.unpack_archive("pt-tmp.zip", "pt-tmp", format="zip") # Adding a new library does not create problems. with open( os.path.join("pt-tmp", constants.SAVE_LOAD_MODEL_CONFIGURATION_PATH), "r") as file: configuration = file.readlines() configuration.append("\nlibx=1.3") os.remove( os.path.join("pt-tmp", constants.SAVE_LOAD_MODEL_CONFIGURATION_PATH)) with open( os.path.join("pt-tmp", constants.SAVE_LOAD_MODEL_CONFIGURATION_PATH), "w") as file: file.writelines(configuration) shutil.make_archive("pt-tmp", "zip", "pt-tmp") hummingbird.ml.load("pt-tmp") os.remove("pt-tmp.zip") def test_pytorch_save_load_less_versions(self): from hummingbird.ml.operator_converters import constants warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "torch") self.assertIsNotNone(hb_model) hb_model.save("pt-tmp") shutil.unpack_archive("pt-tmp.zip", "pt-tmp", format="zip") # Removing a library does not create problems. with open( os.path.join("pt-tmp", constants.SAVE_LOAD_MODEL_CONFIGURATION_PATH), "r") as file: configuration = file.readlines() configuration = configuration[-1] os.remove( os.path.join("pt-tmp", constants.SAVE_LOAD_MODEL_CONFIGURATION_PATH)) with open( os.path.join("pt-tmp", constants.SAVE_LOAD_MODEL_CONFIGURATION_PATH), "w") as file: file.writelines(configuration) shutil.make_archive("pt-tmp", "zip", "pt-tmp") hummingbird.ml.load("pt-tmp") os.remove("pt-tmp.zip") def test_pytorch_save_load_different_versions(self): from hummingbird.ml.operator_converters import constants warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "torch") self.assertIsNotNone(hb_model) hb_model.save("pt-tmp") shutil.unpack_archive("pt-tmp.zip", "pt-tmp", format="zip") # Changing the version of a library does not create problems. with open( os.path.join("pt-tmp", constants.SAVE_LOAD_MODEL_CONFIGURATION_PATH), "r") as file: configuration = file.readlines() configuration[0] = "hummingbird=0.0.0.1\n" os.remove( os.path.join("pt-tmp", constants.SAVE_LOAD_MODEL_CONFIGURATION_PATH)) with open( os.path.join("pt-tmp", constants.SAVE_LOAD_MODEL_CONFIGURATION_PATH), "w") as file: file.writelines(configuration) shutil.make_archive("pt-tmp", "zip", "pt-tmp") hummingbird.ml.load("pt-tmp") os.remove("pt-tmp.zip") # Test torchscript save and load def test_torchscript_save_load(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "torch.jit", X) self.assertIsNotNone(hb_model) hb_model.save("ts-tmp") hb_model_loaded = hummingbird.ml.TorchContainer.load("ts-tmp") np.testing.assert_allclose(hb_model_loaded.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06) os.remove("ts-tmp.zip") # Test torchscript save and generic load def test_torchscript_save_generic_load(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "torch.jit", X) self.assertIsNotNone(hb_model) hb_model.save("ts-tmp") hb_model_loaded = hummingbird.ml.load("ts-tmp") np.testing.assert_allclose(hb_model_loaded.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06) os.remove("ts-tmp.zip") def test_load_fails_bad_path(self): # Asserts for bad path with extension self.assertRaises(AssertionError, hummingbird.ml.load, "nonsense.zip") self.assertRaises(AssertionError, hummingbird.ml.TorchContainer.load, "nonsense.zip") # Asserts for bad path with no extension self.assertRaises(AssertionError, hummingbird.ml.load, "nonsense") self.assertRaises(AssertionError, hummingbird.ml.TorchContainer.load, "nonsense") @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") def test_load_fails_bad_path_onnx(self): self.assertRaises(AssertionError, hummingbird.ml.ONNXContainer.load, "nonsense.zip") self.assertRaises(AssertionError, hummingbird.ml.ONNXContainer.load, "nonsense") @unittest.skipIf(not tvm_installed(), reason="TVM test requires TVM installed") def test_load_fails_bad_path_tvm(self): self.assertRaises(AssertionError, hummingbird.ml.TVMContainer.load, "nonsense.zip") self.assertRaises(AssertionError, hummingbird.ml.TVMContainer.load, "nonsense") # Test not supported backends def test_unsupported_backend(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) # Test scala backend rises an exception self.assertRaises(MissingBackend, hummingbird.ml.convert, model, "scala") # Test torchscript requires test_data def test_torchscript_test_data(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) # Test torcscript requires test_input self.assertRaises(RuntimeError, hummingbird.ml.convert, model, "torch.jit") # Test TVM requires test_data @unittest.skipIf(not tvm_installed(), reason="TVM test requires TVM installed") def test_tvm_test_data(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) # Test tvm requires test_input self.assertRaises(RuntimeError, hummingbird.ml.convert, model, "tvm") # Test tvm save and load @unittest.skipIf(not tvm_installed(), reason="TVM test requires TVM installed") def test_tvm_save_load(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "tvm", X) self.assertIsNotNone(hb_model) hb_model.save("tvm-tmp") hb_model_loaded = hummingbird.ml.TVMContainer.load("tvm-tmp") np.testing.assert_allclose(hb_model_loaded.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06) os.remove("tvm-tmp.zip") # Test tvm save and generic load @unittest.skipIf(not tvm_installed(), reason="TVM test requires TVM installed") def test_tvm_save_generic_load(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "tvm", X) self.assertIsNotNone(hb_model) hb_model.save("tvm-tmp") hb_model_loaded = hummingbird.ml.load("tvm-tmp") np.testing.assert_allclose(hb_model_loaded.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06) os.remove("tvm-tmp.zip") # Test tvm save and load zip file @unittest.skipIf(not tvm_installed(), reason="TVM test requires TVM installed") def test_tvm_save_load_zip(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "tvm", X) self.assertIsNotNone(hb_model) hb_model.save("tvm-tmp.zip") hb_model_loaded = hummingbird.ml.TVMContainer.load("tvm-tmp.zip") np.testing.assert_allclose(hb_model_loaded.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06) os.remove("tvm-tmp.zip") @unittest.skipIf(not tvm_installed(), reason="TVM test requires TVM installed") def test_tvm_save_load_load(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "tvm", X) self.assertIsNotNone(hb_model) hb_model.save("tvm-tmp.zip") hummingbird.ml.TVMContainer.load("tvm-tmp.zip") hummingbird.ml.TVMContainer.load("tvm-tmp.zip") os.remove("tvm-tmp.zip") @unittest.skipIf(not tvm_installed(), reason="TVM test requires TVM installed") def test_tvm_save_load_no_versions(self): from hummingbird.ml.operator_converters import constants warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "tvm", X) self.assertIsNotNone(hb_model) hb_model.save("tvm-tmp") shutil.unpack_archive("tvm-tmp.zip", "tvm-tmp", format="zip") # Removing the configuration file with the versions does not create problems. os.remove( os.path.join("tvm-tmp", constants.SAVE_LOAD_MODEL_CONFIGURATION_PATH)) hummingbird.ml.load("tvm-tmp") os.remove("tvm-tmp.zip") # Test onnx requires test_data or initial_types @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") def test_onnx_no_test_data_float(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) # Create ONNX-ML model onnx_ml_model = convert_sklearn( model, initial_types=[("input", FloatTensorType([X.shape[0], X.shape[1]]))], target_opset=11) # Test onnx requires no test_data hb_model = hummingbird.ml.convert(onnx_ml_model, "onnx") assert hb_model # Test onnx 0 shape input @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") def test_onnx_zero_shape_input(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) y = np.random.randint(num_classes, size=100) model.fit(X, y) # Create ONNX-ML model onnx_ml_model = convert_sklearn(model, initial_types=[ ("input", DoubleTensorType([0, X.shape[1]])) ], target_opset=11) # Test onnx requires no test_data hb_model = hummingbird.ml.convert(onnx_ml_model, "onnx") assert hb_model # Test onnx no test_data, double input @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") def test_onnx_no_test_data_double(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) y = np.random.randint(num_classes, size=100) model.fit(X, y) # Create ONNX-ML model onnx_ml_model = convert_sklearn( model, initial_types=[("input", DoubleTensorType([X.shape[0], X.shape[1]]))], target_opset=11) # Test onnx requires no test_data hb_model = hummingbird.ml.convert(onnx_ml_model, "onnx") assert hb_model # Test onnx no test_data, long input @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") def test_onnx_no_test_data_long(self): warnings.filterwarnings("ignore") model = model = StandardScaler(with_mean=True, with_std=True) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.int64) model.fit(X) # Create ONNX-ML model onnx_ml_model = convert_sklearn( model, initial_types=[("input", Int64TensorType([X.shape[0], X.shape[1]]))], target_opset=11) # Test onnx requires no test_data hb_model = hummingbird.ml.convert(onnx_ml_model, "onnx") assert hb_model # Test onnx no test_data, int input @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") def test_onnx_no_test_data_int(self): warnings.filterwarnings("ignore") model = OneHotEncoder() X = np.array([[1, 2, 3]], dtype=np.int32) model.fit(X) # Create ONNX-ML model onnx_ml_model = convert_sklearn( model, initial_types=[("input", Int32TensorType([X.shape[0], X.shape[1]]))], target_opset=11) # Test onnx requires no test_data hb_model = hummingbird.ml.convert(onnx_ml_model, "onnx") assert hb_model # Test onnx no test_data, string input @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") def test_onnx_no_test_data_string(self): warnings.filterwarnings("ignore") model = OneHotEncoder() X = np.array([["a", "b", "c"]]) model.fit(X) # Create ONNX-ML model onnx_ml_model = convert_sklearn( model, initial_types=[("input", StringTensorType([X.shape[0], X.shape[1]]))], target_opset=11) # Test backends are not case sensitive self.assertRaises(RuntimeError, hummingbird.ml.convert, onnx_ml_model, "onnx") # Test ONNX save and load @unittest.skipIf(not onnx_runtime_installed(), reason="ONNX test requires ORT") def test_onnx_save_load(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "onnx", X) self.assertIsNotNone(hb_model) hb_model.save("onnx-tmp") hb_model_loaded = hummingbird.ml.ONNXContainer.load("onnx-tmp") np.testing.assert_allclose(hb_model_loaded.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06) os.remove("onnx-tmp.zip") # Test ONNX save and generic load @unittest.skipIf(not onnx_runtime_installed(), reason="ONNX test requires ORT") def test_onnx_save_generic_load(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "onnx", X) self.assertIsNotNone(hb_model) hb_model.save("onnx-tmp") hb_model_loaded = hummingbird.ml.load("onnx-tmp") np.testing.assert_allclose(hb_model_loaded.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06) os.remove("onnx-tmp.zip") # Test ONNX save and generic load @unittest.skipIf(not onnx_runtime_installed(), reason="ONNX test requires ORT") def test_onnx_save_load_load(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "onnx", X) self.assertIsNotNone(hb_model) hb_model.save("onnx-tmp") hummingbird.ml.load("onnx-tmp") hummingbird.ml.load("onnx-tmp") os.remove("onnx-tmp.zip") @unittest.skipIf(not onnx_runtime_installed(), reason="ONNX test requires ORT") def test_onnx_save_load_no_versions(self): from hummingbird.ml.operator_converters import constants warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "onnx", X) self.assertIsNotNone(hb_model) hb_model.save("onnx-tmp") shutil.unpack_archive("onnx-tmp.zip", "onnx-tmp", format="zip") # Removing the configuration file with the versions does not create problems. os.remove( os.path.join("onnx-tmp", constants.SAVE_LOAD_MODEL_CONFIGURATION_PATH)) hummingbird.ml.load("onnx-tmp") os.remove("onnx-tmp.zip") # Test for when the user forgets to add a target (ex: convert(model, output) rather than convert(model, 'torch')) due to API change def test_forgotten_backend_string(self): from sklearn.preprocessing import LabelEncoder model = LabelEncoder() data = np.array([1, 4, 5, 2, 0, 2], dtype=np.int32) model.fit(data) self.assertRaises(ValueError, hummingbird.ml.convert, model, [("input", Int32TensorType([6, 1]))]) # Test ONNX @unittest.skipIf(not onnx_runtime_installed(), reason="ONNX test requires ORT") def test_onnx(self): import numpy as np import lightgbm as lgb from hummingbird.ml import convert # Create some random data for binary classification. num_classes = 2 X = np.array(np.random.rand(10000, 28), dtype=np.float32) y = np.random.randint(num_classes, size=10000) model = lgb.LGBMClassifier() model.fit(X, y) self.assertRaises(RuntimeError, hummingbird.ml.convert, model, "onnx") # Test Spark UDF @unittest.skipIf( os.name == "nt" or not sparkml_installed() or LooseVersion(pyspark.__version__) < LooseVersion("3"), reason="UDF Test requires spark >= 3", ) def test_udf_torch(self): X, y = load_iris(return_X_y=True) X_train, X_test, y_train, y_test = train_test_split( X, y, random_state=77, test_size=0.2, ) spark_df = sql_context.createDataFrame(pd.DataFrame(data=X_train)) sql_context.registerDataFrameAsTable(spark_df, "IRIS") model = GradientBoostingClassifier(n_estimators=10) model.fit(X_train, y_train) hb_model = hummingbird.ml.convert(model, "torch") # Broadcast the model. broadcasted_model = spark.sparkContext.broadcast(hb_model) # UDF definition. @pandas_udf("long") def udf_hb_predict( iterator: Iterator[pd.Series]) -> Iterator[pd.Series]: model = broadcasted_model.value for args in iterator: data_unmangled = pd.concat([feature for feature in args], axis=1) predictions = model.predict(data_unmangled) yield pd.Series(np.array(predictions)) # Register the UDF. sql_context.udf.register("PREDICT", udf_hb_predict) # Run the query. sql_context.sql( "SELECT SUM(prediction) FROM (SELECT PREDICT(*) as prediction FROM IRIS)" ).show() @unittest.skipIf( os.name == "nt" or not sparkml_installed() or LooseVersion(pyspark.__version__) < LooseVersion("3"), reason="UDF Test requires spark >= 3", ) def test_udf_torch_jit_broadcast(self): import pickle X, y = load_iris(return_X_y=True) X_train, X_test, y_train, y_test = train_test_split( X, y, random_state=77, test_size=0.2, ) spark_df = sql_context.createDataFrame(pd.DataFrame(data=X_train)) sql_context.registerDataFrameAsTable(spark_df, "IRIS") model = GradientBoostingClassifier(n_estimators=10) model.fit(X_train, y_train) hb_model = hummingbird.ml.convert(model, "torch.jit", X_test) # Broadcast the model returns an error. self.assertRaises(pickle.PickleError, spark.sparkContext.broadcast, hb_model) @unittest.skipIf( os.name == "nt" or not sparkml_installed() or LooseVersion(pyspark.__version__) < LooseVersion("3"), reason="UDF Test requires spark >= 3", ) def test_udf_torch_jit_spark_file(self): import dill import torch.jit X, y = load_iris(return_X_y=True) X_train, X_test, y_train, y_test = train_test_split( X, y, random_state=77, test_size=0.2, ) spark_df = sql_context.createDataFrame(pd.DataFrame(data=X_train)) sql_context.registerDataFrameAsTable(spark_df, "IRIS") model = GradientBoostingClassifier(n_estimators=10) model.fit(X_train, y_train) hb_model = hummingbird.ml.convert(model, "torch.jit", X_test) # Save the file locally. if os.path.exists("deployed_model.zip"): os.remove("deployed_model.zip") torch.jit.save(hb_model.model, "deployed_model.zip") hb_model._model = None # Share the model using spark file and broadcast the container. spark.sparkContext.addFile("deployed_model.zip") broadcasted_container = spark.sparkContext.broadcast(hb_model) # UDF definition. @pandas_udf("long") def udf_hb_predict( iterator: Iterator[pd.Series]) -> Iterator[pd.Series]: location = SparkFiles.get("deployed_model.zip") torch_model = torch.jit.load(location) container = broadcasted_container.value container._model = torch_model model = container for args in iterator: data_unmangled = pd.concat([feature for feature in args], axis=1) predictions = model.predict(data_unmangled.values) yield pd.Series(np.array(predictions)) # Register the UDF. sql_context.udf.register("PREDICT", udf_hb_predict) # Run the query. sql_context.sql( "SELECT SUM(prediction) FROM (SELECT PREDICT(*) as prediction FROM IRIS)" ).show() os.remove("deployed_model.zip")
class TestSklearnPipeline(unittest.TestCase): def test_pipeline(self): data = np.array([[0, 0], [0, 0], [1, 1], [1, 1]], dtype=np.float32) scaler = StandardScaler() scaler.fit(data) model = Pipeline([("scaler1", scaler), ("scaler2", scaler)]) torch_model = hummingbird.ml.convert(model, "torch") self.assertTrue(torch_model is not None) np.testing.assert_allclose( model.transform(data), torch_model.transform(data), rtol=1e-06, atol=1e-06, ) def test_pipeline2(self): data = np.array([[0.0, 0.0], [0.0, 0.0], [1.0, 1.0], [1.0, 1.0]], dtype=np.float32) scaler = StandardScaler() scaler.fit(data) model = Pipeline([("scaler1", scaler), ("scaler2", scaler)]) torch_model = hummingbird.ml.convert(model, "torch") self.assertTrue(torch_model is not None) np.testing.assert_allclose( model.transform(data), torch_model.transform(data), rtol=1e-06, atol=1e-06, ) def test_combine_inputs_union_in_pipeline(self): from sklearn.preprocessing import StandardScaler from sklearn.pipeline import Pipeline data = np.array([[0.0, 0.0], [0.0, 0.0], [1.0, 1.0], [1.0, 1.0]], dtype=np.float32) model = Pipeline([ ("scaler1", StandardScaler()), ("union", FeatureUnion([("scaler2", StandardScaler()), ("scaler3", MinMaxScaler())])), ]) model.fit(data) torch_model = hummingbird.ml.convert(model, "torch") self.assertTrue(torch_model is not None) np.testing.assert_allclose( model.transform(data), torch_model.transform(data), rtol=1e-06, atol=1e-06, ) def test_combine_inputs_floats_ints(self): data = [[0, 0.0], [0, 0.0], [1, 1.0], [1, 1.0]] scaler = StandardScaler() scaler.fit(data) model = Pipeline([("scaler1", scaler), ("scaler2", scaler)]) torch_model = hummingbird.ml.convert(model, "torch") self.assertTrue(torch_model is not None) np.testing.assert_allclose( model.transform(data), torch_model.transform(data), rtol=1e-06, atol=1e-06, ) @unittest.skipIf(not pandas_installed(), reason="Test requires pandas installed") def test_pipeline_column_transformer_1(self): iris = datasets.load_iris() X = iris.data[:, :3] y = iris.target X_train = pandas.DataFrame(X, columns=["vA", "vB", "vC"]) X_train["vcat"] = X_train["vA"].apply(lambda x: 1 if x > 0.5 else 2) X_train["vcat2"] = X_train["vB"].apply(lambda x: 3 if x > 0.5 else 4) y_train = y % 2 numeric_features = [0, 1, 2] # ["vA", "vB", "vC"] classifier = LogisticRegression( C=0.01, class_weight=dict(zip([False, True], [0.2, 0.8])), n_jobs=1, max_iter=10, solver="liblinear", tol=1e-3, ) numeric_transformer = Pipeline(steps=[("scaler", StandardScaler())]) preprocessor = ColumnTransformer(transformers=[("num", numeric_transformer, numeric_features)]) model = Pipeline(steps=[("precprocessor", preprocessor), ("classifier", classifier)]) model.fit(X_train, y_train) X_test = X_train[:11] torch_model = hummingbird.ml.convert(model, "torch") self.assertTrue(torch_model is not None) np.testing.assert_allclose( model.predict_proba(X_test), torch_model.predict_proba(X_test.values), rtol=1e-06, atol=1e-06, ) @unittest.skipIf(not pandas_installed(), reason="Test requires pandas installed") def test_pipeline_column_transformer_string(self): """ TODO: Hummingbird does not yet support strings in this context. Should raise error. When this feature is complete, change this test. """ # fit titanic_url = "https://raw.githubusercontent.com/amueller/scipy-2017-sklearn/091d371/notebooks/datasets/titanic3.csv" data = pandas.read_csv(titanic_url) X = data.drop("survived", axis=1) y = data["survived"] # SimpleImputer on string is not available for string # in ONNX-ML specifications. # So we do it beforehand. X["pclass"].fillna("missing", inplace=True) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) numeric_features = ["age", "fare"] numeric_transformer = Pipeline( steps=[("imputer", SimpleImputer( strategy="median")), ("scaler", StandardScaler())]) categorical_features = ["pclass"] categorical_transformer = Pipeline( steps=[("onehot", OneHotEncoder(handle_unknown="ignore"))]) preprocessor = ColumnTransformer(transformers=[ ("num", numeric_transformer, numeric_features), ("cat", categorical_transformer, categorical_features), ]) clf = Pipeline( steps=[("preprocessor", preprocessor ), ("classifier", LogisticRegression(solver="liblinear"))]) to_drop = { "parch", "sibsp", "cabin", "ticket", "name", "body", "home.dest", "boat", "sex", "embarked" } X_train = X_train.copy() X_test = X_test.copy() X_train["pclass"] = X_train["pclass"].astype(np.int64) X_test["pclass"] = X_test["pclass"].astype(np.int64) X_train = X_train.drop(to_drop, axis=1) X_test = X_test.drop(to_drop, axis=1) clf.fit(X_train, y_train) torch_model = hummingbird.ml.convert(clf, "torch", X_test) self.assertTrue(torch_model is not None) np.testing.assert_allclose( clf.predict(X_test), torch_model.predict(X_test), rtol=1e-06, atol=1e-06, ) @unittest.skipIf(not pandas_installed(), reason="Test requires pandas installed") def test_pipeline_column_transformer(self): iris = datasets.load_iris() X = iris.data[:, :3] y = iris.target X_train = pandas.DataFrame(X, columns=["vA", "vB", "vC"]) X_train["vcat"] = X_train["vA"].apply(lambda x: 1 if x > 0.5 else 2) X_train["vcat2"] = X_train["vB"].apply(lambda x: 3 if x > 0.5 else 4) y_train = y % 2 numeric_features = [0, 1, 2] # ["vA", "vB", "vC"] categorical_features = [3, 4] # ["vcat", "vcat2"] classifier = LogisticRegression( C=0.01, class_weight=dict(zip([False, True], [0.2, 0.8])), n_jobs=1, max_iter=10, solver="liblinear", tol=1e-3, ) numeric_transformer = Pipeline(steps=[("scaler", StandardScaler())]) categorical_transformer = Pipeline( steps=[("onehot", OneHotEncoder(sparse=True, handle_unknown="ignore"))]) preprocessor = ColumnTransformer(transformers=[ ("num", numeric_transformer, numeric_features), ("cat", categorical_transformer, categorical_features), ]) model = Pipeline(steps=[("precprocessor", preprocessor), ("classifier", classifier)]) model.fit(X_train, y_train) X_test = X_train[:11] torch_model = hummingbird.ml.convert(model, "torch") self.assertTrue(torch_model is not None) np.testing.assert_allclose( model.predict_proba(X_test), torch_model.predict_proba(X_test.values), rtol=1e-06, atol=1e-06, ) @unittest.skipIf(not pandas_installed(), reason="Test requires pandas installed") def test_pipeline_column_transformer_pandas(self): iris = datasets.load_iris() X = iris.data[:, :3] y = iris.target X_train = pandas.DataFrame(X, columns=["vA", "vB", "vC"]) X_train["vcat"] = X_train["vA"].apply(lambda x: 1 if x > 0.5 else 2) X_train["vcat2"] = X_train["vB"].apply(lambda x: 3 if x > 0.5 else 4) y_train = y % 2 numeric_features = [0, 1, 2] # ["vA", "vB", "vC"] categorical_features = [3, 4] # ["vcat", "vcat2"] classifier = LogisticRegression( C=0.01, class_weight=dict(zip([False, True], [0.2, 0.8])), n_jobs=1, max_iter=10, solver="liblinear", tol=1e-3, ) numeric_transformer = Pipeline(steps=[("scaler", StandardScaler())]) categorical_transformer = Pipeline( steps=[("onehot", OneHotEncoder(sparse=True, handle_unknown="ignore"))]) preprocessor = ColumnTransformer(transformers=[ ("num", numeric_transformer, numeric_features), ("cat", categorical_transformer, categorical_features), ]) model = Pipeline(steps=[("precprocessor", preprocessor), ("classifier", classifier)]) model.fit(X_train, y_train) X_test = X_train[:11] torch_model = hummingbird.ml.convert(model, "torch", X_test) self.assertTrue(torch_model is not None) np.testing.assert_allclose( model.predict_proba(X_test), torch_model.predict_proba(X_test), rtol=1e-06, atol=1e-06, ) @unittest.skipIf(not pandas_installed(), reason="Test requires pandas installed") def test_pipeline_column_transformer_pandas_ts(self): iris = datasets.load_iris() X = np.array( iris.data[:, :3], np.float32 ) # If we don't use float32 here, with python 3.5 and torch 1.5.1 will fail. y = iris.target X_train = pandas.DataFrame(X, columns=["vA", "vB", "vC"]) X_train["vcat"] = X_train["vA"].apply(lambda x: 1 if x > 0.5 else 2) X_train["vcat2"] = X_train["vB"].apply(lambda x: 3 if x > 0.5 else 4) y_train = y % 2 numeric_features = [0, 1, 2] # ["vA", "vB", "vC"] categorical_features = [3, 4] # ["vcat", "vcat2"] classifier = LogisticRegression( C=0.01, class_weight=dict(zip([False, True], [0.2, 0.8])), n_jobs=1, max_iter=10, solver="liblinear", tol=1e-3, ) numeric_transformer = Pipeline(steps=[("scaler", StandardScaler())]) categorical_transformer = Pipeline( steps=[("onehot", OneHotEncoder(sparse=True, handle_unknown="ignore"))]) preprocessor = ColumnTransformer(transformers=[ ("num", numeric_transformer, numeric_features), ("cat", categorical_transformer, categorical_features), ]) model = Pipeline(steps=[("preprocessor", preprocessor), ("classifier", classifier)]) model.fit(X_train, y_train) X_test = X_train[:11] torch_model = hummingbird.ml.convert(model, "torch.jit", X_test) self.assertTrue(torch_model is not None) np.testing.assert_allclose( model.predict_proba(X_test), torch_model.predict_proba(X_test), rtol=1e-06, atol=1e-06, ) @unittest.skipIf(not pandas_installed(), reason="Test requires pandas installed") def test_pipeline_column_transformer_weights(self): iris = datasets.load_iris() X = iris.data[:, :3] y = iris.target X_train = pandas.DataFrame(X, columns=["vA", "vB", "vC"]) X_train["vcat"] = X_train["vA"].apply(lambda x: 1 if x > 0.5 else 2) X_train["vcat2"] = X_train["vB"].apply(lambda x: 3 if x > 0.5 else 4) y_train = y % 2 numeric_features = [0, 1, 2] # ["vA", "vB", "vC"] categorical_features = [3, 4] # ["vcat", "vcat2"] classifier = LogisticRegression( C=0.01, class_weight=dict(zip([False, True], [0.2, 0.8])), n_jobs=1, max_iter=10, solver="liblinear", tol=1e-3, ) numeric_transformer = Pipeline(steps=[("scaler", StandardScaler())]) categorical_transformer = Pipeline( steps=[("onehot", OneHotEncoder(sparse=True, handle_unknown="ignore"))]) preprocessor = ColumnTransformer( transformers=[ ("num", numeric_transformer, numeric_features), ("cat", categorical_transformer, categorical_features), ], transformer_weights={ "num": 2, "cat": 3 }, ) model = Pipeline(steps=[("preprocessor", preprocessor), ("classifier", classifier)]) model.fit(X_train, y_train) X_test = X_train[:11] torch_model = hummingbird.ml.convert(model, "torch") self.assertTrue(torch_model is not None) np.testing.assert_allclose( model.predict_proba(X_test), torch_model.predict_proba(X_test.values), rtol=1e-06, atol=1e-06, ) @unittest.skipIf(not pandas_installed(), reason="Test requires pandas installed") def test_pipeline_column_transformer_weights_pandas(self): iris = datasets.load_iris() X = iris.data[:, :3] y = iris.target X_train = pandas.DataFrame(X, columns=["vA", "vB", "vC"]) X_train["vcat"] = X_train["vA"].apply(lambda x: 1 if x > 0.5 else 2) X_train["vcat2"] = X_train["vB"].apply(lambda x: 3 if x > 0.5 else 4) y_train = y % 2 numeric_features = [0, 1, 2] # ["vA", "vB", "vC"] categorical_features = [3, 4] # ["vcat", "vcat2"] classifier = LogisticRegression( C=0.01, class_weight=dict(zip([False, True], [0.2, 0.8])), n_jobs=1, max_iter=10, solver="liblinear", tol=1e-3, ) numeric_transformer = Pipeline(steps=[("scaler", StandardScaler())]) categorical_transformer = Pipeline( steps=[("onehot", OneHotEncoder(sparse=True, handle_unknown="ignore"))]) preprocessor = ColumnTransformer( transformers=[ ("num", numeric_transformer, numeric_features), ("cat", categorical_transformer, categorical_features), ], transformer_weights={ "num": 2, "cat": 3 }, ) model = Pipeline(steps=[("precprocessor", preprocessor), ("classifier", classifier)]) model.fit(X_train, y_train) X_test = X_train[:11] torch_model = hummingbird.ml.convert(model, "torch", X_test) self.assertTrue(torch_model is not None) np.testing.assert_allclose( model.predict_proba(X_test), torch_model.predict_proba(X_test), rtol=1e-06, atol=1e-06, ) @unittest.skipIf(not pandas_installed(), reason="Test requires pandas installed") def test_pipeline_column_transformer_drop(self): iris = datasets.load_iris() X = iris.data[:, :3] y = iris.target X_train = pandas.DataFrame(X, columns=["vA", "vB", "vC"]) X_train["vcat"] = X_train["vA"].apply(lambda x: 1 if x > 0.5 else 2) X_train["vcat2"] = X_train["vB"].apply(lambda x: 3 if x > 0.5 else 4) y_train = y % 2 numeric_features = [0, 1] # ["vA", "vB"] categorical_features = [3, 4] # ["vcat", "vcat2"] classifier = LogisticRegression( C=0.01, class_weight=dict(zip([False, True], [0.2, 0.8])), n_jobs=1, max_iter=10, solver="liblinear", tol=1e-3, ) numeric_transformer = Pipeline(steps=[("scaler", StandardScaler())]) categorical_transformer = Pipeline( steps=[("onehot", OneHotEncoder(sparse=True, handle_unknown="ignore"))]) preprocessor = ColumnTransformer( transformers=[ ("num", numeric_transformer, numeric_features), ("cat", categorical_transformer, categorical_features), ], transformer_weights={ "num": 2, "cat": 3 }, remainder="drop", ) model = Pipeline(steps=[("precprocessor", preprocessor), ("classifier", classifier)]) model.fit(X_train, y_train) X_test = X_train[:11] torch_model = hummingbird.ml.convert(model, "torch") self.assertTrue(torch_model is not None) np.testing.assert_allclose( model.predict_proba(X_test), torch_model.predict_proba(X_test.values), rtol=1e-06, atol=1e-06, ) @unittest.skipIf(not pandas_installed(), reason="Test requires pandas installed") def test_pipeline_column_transformer_drop_noweights(self): iris = datasets.load_iris() X = iris.data[:, :3] y = iris.target X_train = pandas.DataFrame(X, columns=["vA", "vB", "vC"]) X_train["vcat"] = X_train["vA"].apply(lambda x: 1 if x > 0.5 else 2) X_train["vcat2"] = X_train["vB"].apply(lambda x: 3 if x > 0.5 else 4) y_train = y % 2 numeric_features = [0, 1] # ["vA", "vB"] categorical_features = [3, 4] # ["vcat", "vcat2"] classifier = LogisticRegression( C=0.01, class_weight=dict(zip([False, True], [0.2, 0.8])), n_jobs=1, max_iter=10, solver="liblinear", tol=1e-3, ) numeric_transformer = Pipeline(steps=[("scaler", StandardScaler())]) categorical_transformer = Pipeline( steps=[("onehot", OneHotEncoder(sparse=True, handle_unknown="ignore"))]) preprocessor = ColumnTransformer( transformers=[ ("num", numeric_transformer, numeric_features), ("cat", categorical_transformer, categorical_features), ], remainder="drop", ) model = Pipeline(steps=[("precprocessor", preprocessor), ("classifier", classifier)]) model.fit(X_train, y_train) X_test = X_train[:11] torch_model = hummingbird.ml.convert(model, "torch") self.assertTrue(torch_model is not None) np.testing.assert_allclose( model.predict_proba(X_test), torch_model.predict_proba(X_test.values), rtol=1e-06, atol=1e-06, ) @unittest.skipIf(ColumnTransformer is None, reason="ColumnTransformer not available in 0.19") @unittest.skipIf(not pandas_installed(), reason="Test requires pandas installed") def test_pipeline_column_transformer_passthrough(self): iris = datasets.load_iris() X = iris.data[:, :3] y = iris.target X_train = pandas.DataFrame(X, columns=["vA", "vB", "vC"]) X_train["vcat"] = X_train["vA"].apply(lambda x: 1 if x > 0.5 else 2) X_train["vcat2"] = X_train["vB"].apply(lambda x: 3 if x > 0.5 else 4) y_train = y % 2 numeric_features = [0, 1] # ["vA", "vB"] categorical_features = [3, 4] # ["vcat", "vcat2"] classifier = LogisticRegression( C=0.01, class_weight=dict(zip([False, True], [0.2, 0.8])), n_jobs=1, max_iter=10, solver="liblinear", tol=1e-3, ) numeric_transformer = Pipeline(steps=[("scaler", StandardScaler())]) categorical_transformer = Pipeline( steps=[("onehot", OneHotEncoder(sparse=True, handle_unknown="ignore"))]) preprocessor = ColumnTransformer( transformers=[ ("num", numeric_transformer, numeric_features), ("cat", categorical_transformer, categorical_features), ], transformer_weights={ "num": 2, "cat": 3 }, remainder="passthrough", ) model = Pipeline(steps=[("precprocessor", preprocessor), ("classifier", classifier)]) model.fit(X_train, y_train) X_test = X_train[:11] torch_model = hummingbird.ml.convert(model, "torch") self.assertTrue(torch_model is not None) np.testing.assert_allclose( model.predict_proba(X_test), torch_model.predict_proba(X_test.values), rtol=1e-06, atol=1e-06, ) @unittest.skipIf(ColumnTransformer is None, reason="ColumnTransformer not available in 0.19") @unittest.skipIf(not pandas_installed(), reason="Test requires pandas installed") def test_pipeline_column_transformer_passthrough_noweights(self): iris = datasets.load_iris() X = iris.data[:, :3] y = iris.target X_train = pandas.DataFrame(X, columns=["vA", "vB", "vC"]) X_train["vcat"] = X_train["vA"].apply(lambda x: 1 if x > 0.5 else 2) X_train["vcat2"] = X_train["vB"].apply(lambda x: 3 if x > 0.5 else 4) y_train = y % 2 numeric_features = [0, 1] # ["vA", "vB"] categorical_features = [3, 4] # ["vcat", "vcat2"] classifier = LogisticRegression( C=0.01, class_weight=dict(zip([False, True], [0.2, 0.8])), n_jobs=1, max_iter=10, solver="liblinear", tol=1e-3, ) numeric_transformer = Pipeline(steps=[("scaler", StandardScaler())]) categorical_transformer = Pipeline( steps=[("onehot", OneHotEncoder(sparse=True, handle_unknown="ignore"))]) preprocessor = ColumnTransformer( transformers=[ ("num", numeric_transformer, numeric_features), ("cat", categorical_transformer, categorical_features), ], remainder="passthrough", ) model = Pipeline(steps=[("precprocessor", preprocessor), ("classifier", classifier)]) model.fit(X_train, y_train) X_test = X_train[:11] torch_model = hummingbird.ml.convert(model, "torch") self.assertTrue(torch_model is not None) np.testing.assert_allclose( model.predict_proba(X_test), torch_model.predict_proba(X_test.values), rtol=1e-06, atol=1e-06, ) @unittest.skipIf(ColumnTransformer is None, reason="ColumnTransformer not available in 0.19") @unittest.skipIf(not pandas_installed(), reason="Test requires pandas installed") def test_pipeline_column_transformer_passthrough_slice(self): iris = datasets.load_iris() X = iris.data[:, :3] y = iris.target X_train = pandas.DataFrame(X, columns=["vA", "vB", "vC"]) X_train["vcat"] = X_train["vA"].apply(lambda x: 1 if x > 0.5 else 2) X_train["vcat2"] = X_train["vB"].apply(lambda x: 3 if x > 0.5 else 4) y_train = y % 2 numeric_features = slice(0, 1) # ["vA", "vB"] categorical_features = slice(3, 4) # ["vcat", "vcat2"] classifier = LogisticRegression( C=0.01, class_weight=dict(zip([False, True], [0.2, 0.8])), n_jobs=1, max_iter=10, solver="liblinear", tol=1e-3, ) numeric_transformer = Pipeline(steps=[("scaler", StandardScaler())]) categorical_transformer = Pipeline( steps=[("onehot", OneHotEncoder(sparse=True, handle_unknown="ignore"))]) preprocessor = ColumnTransformer( transformers=[ ("num", numeric_transformer, numeric_features), ("cat", categorical_transformer, categorical_features), ], transformer_weights={ "num": 2, "cat": 3 }, remainder="passthrough", ) model = Pipeline(steps=[("precprocessor", preprocessor), ("classifier", classifier)]) model.fit(X_train, y_train) X_test = X_train[:11] torch_model = hummingbird.ml.convert(model, "torch") self.assertTrue(torch_model is not None) np.testing.assert_allclose( model.predict_proba(X_test), torch_model.predict_proba(X_test.values), rtol=1e-06, atol=1e-06, ) # Taken from https://github.com/microsoft/hummingbird/issues/388https://github.com/microsoft/hummingbird/issues/388 def test_pipeline_pca_rf(self): X, y = make_regression(n_samples=1000, n_features=8, n_informative=5, n_targets=1, random_state=0, shuffle=True) pca = PCA(n_components=8, svd_solver="randomized", whiten=True) clf = make_pipeline( StandardScaler(), pca, RandomForestRegressor(n_estimators=10, max_depth=30, random_state=0)) clf.fit(X, y) model = hummingbird.ml.convert(clf, "pytorch") prediction_sk = clf.predict([[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]) prediction_hb = model.predict( [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]) np.testing.assert_allclose(prediction_sk, prediction_hb, rtol=1e-06, atol=1e-06) @unittest.skipIf(ColumnTransformer is None, reason="ColumnTransformer not available in 0.19") @unittest.skipIf(not onnx_runtime_installed(), reason="Test requires ORT installed") def test_pipeline_many_inputs(self): n_features = 18 X = np.random.rand(100, n_features) y = np.random.randint(1000, size=100) scaler_transformer = Pipeline(steps=[("scaler", StandardScaler())]) preprocessor = ColumnTransformer( transformers=[("scaling", scaler_transformer, list(range(n_features)))]) model = RandomForestRegressor(n_estimators=10, max_depth=9) pipeline = Pipeline(steps=[("preprocessor", preprocessor), ("model", model)]) pipeline.fit(X, y) X_test = tuple(np.split(X, n_features, axis=1)) hb_model = hummingbird.ml.convert(pipeline, "onnx", X_test) assert len(hb_model.model.graph.input) == n_features np.testing.assert_allclose( pipeline.predict(X), np.array(hb_model.predict(X_test)).flatten(), rtol=1e-06, atol=1e-06, ) @unittest.skipIf(ColumnTransformer is None, reason="ColumnTransformer not available in 0.19") @unittest.skipIf(not onnx_runtime_installed(), reason="Test requires ORT installed") def test_pipeline_many_inputs_with_schema(self): n_features = 5 X = np.random.rand(100, n_features) y = np.random.randint(1000, size=100) input_column_names = ["A", "B", "C", "D", "E"] output_column_names = ["score"] scaler_transformer = Pipeline(steps=[("scaler", StandardScaler())]) preprocessor = ColumnTransformer( transformers=[("scaling", scaler_transformer, list(range(n_features)))]) model = RandomForestRegressor(n_estimators=10, max_depth=9) pipeline = Pipeline(steps=[("preprocessor", preprocessor), ("model", model)]) pipeline.fit(X, y) X_test = tuple(np.split(X, n_features, axis=1)) extra_config = { constants.INPUT_NAMES: input_column_names, constants.OUTPUT_NAMES: output_column_names } hb_model = hummingbird.ml.convert(pipeline, "onnx", X_test, extra_config=extra_config) graph_inputs = [input.name for input in hb_model.model.graph.input] graph_outputs = [output.name for output in hb_model.model.graph.output] assert len(hb_model.model.graph.input) == n_features assert graph_inputs == input_column_names assert graph_outputs == output_column_names
class TestONNXOneHotEncoder(unittest.TestCase): # Test OneHotEncoder with ints @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS" ) def test_one_hot_encoder_onnx_int(self, rtol=1e-06, atol=1e-06): model = OneHotEncoder() X = np.array([[1, 2, 3]], dtype=np.int32) model.fit(X) # Create ONNX-ML model onnx_ml_model = convert_sklearn(model, initial_types=[("int_input", IntTensorType_onnx(X.shape))]) # Create ONNX model by calling converter onnx_model = convert(onnx_ml_model, "onnx", X) # Get the predictions for the ONNX-ML model session = ort.InferenceSession(onnx_ml_model.SerializeToString()) output_names = [session.get_outputs()[i].name for i in range(len(session.get_outputs()))] inputs = {session.get_inputs()[0].name: X} onnx_ml_pred = session.run(output_names, inputs)[0] # Get the predictions for the ONNX model onnx_pred = onnx_model.transform(X) # Check that predicted values match np.testing.assert_allclose(onnx_ml_pred, onnx_pred, rtol=rtol, atol=atol) # Test OneHotEncoder with 2 inputs @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS" ) def test_one_hot_encoder_onnx2(self, rtol=1e-06, atol=1e-06): model = OneHotEncoder() X = np.array([[1, 2, 3], [2, 1, 3]], dtype=np.int32) model.fit(X) # Create ONNX-ML model onnx_ml_model = convert_sklearn(model, initial_types=[("int_input", IntTensorType_onnx(X.shape))]) # Create ONNX model by calling converter onnx_model = convert(onnx_ml_model, "onnx", X) # Get the predictions for the ONNX-ML model session = ort.InferenceSession(onnx_ml_model.SerializeToString()) output_names = [session.get_outputs()[i].name for i in range(len(session.get_outputs()))] inputs = {session.get_inputs()[0].name: X} onnx_ml_pred = session.run(output_names, inputs)[0] # Get the predictions for the ONNX model onnx_pred = onnx_model.transform(X) # Check that predicted values match np.testing.assert_allclose(onnx_ml_pred, onnx_pred, rtol=rtol, atol=atol) # Test OneHotEncoder with int64 @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS" ) def test_one_hot_encoder_onnx_int64(self, rtol=1e-06, atol=1e-06): model = OneHotEncoder() X = np.array([[1, 2, 3]], dtype=np.int64) model.fit(X) # Create ONNX-ML model onnx_ml_model = convert_sklearn(model, initial_types=[("int_input", LongTensorType_onnx(X.shape))]) # Create ONNX model by calling converter onnx_model = convert(onnx_ml_model, "onnx", X) # Get the predictions for the ONNX-ML model session = ort.InferenceSession(onnx_ml_model.SerializeToString()) output_names = [session.get_outputs()[i].name for i in range(len(session.get_outputs()))] inputs = {session.get_inputs()[0].name: X} onnx_ml_pred = session.run(output_names, inputs)[0] # Get the predictions for the ONNX model onnx_pred = onnx_model.transform(X) # Check that predicted values match np.testing.assert_allclose(onnx_ml_pred, onnx_pred, rtol=rtol, atol=atol) # Test OneHotEncoder with strings. This test only works with pytorch >= 1.8 @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS" ) @unittest.skipIf( LooseVersion(torch.__version__) < LooseVersion("1.8.0"), reason="PyTorch exporter returns an error until version 1.8.0", ) def test_model_one_hot_encoder_string(self): model = OneHotEncoder() data = [["a", "r", "x"], ["a", "r", "x"], ["aaaa", "r", "x"], ["a", "r", "xx"]] model.fit(data) onnx_ml_model = convert_sklearn(model, initial_types=[("input", StringTensorType_onnx([4, 3]))]) # Create ONNX model by calling converter onnx_model = convert(onnx_ml_model, "onnx", data) # Get the predictions for the ONNX-ML model session = ort.InferenceSession(onnx_ml_model.SerializeToString()) output_names = [session.get_outputs()[i].name for i in range(len(session.get_outputs()))] inputs = {session.get_inputs()[0].name: data} onnx_ml_pred = session.run(output_names, inputs) # Get the predictions for the ONNX model onnx_pred = onnx_model.transform(data) return onnx_ml_pred, onnx_pred # Test OneHotEncoder failcase when input data type is not supported @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS" ) def test_ohe_string_raises_type_error_onnx(self): warnings.filterwarnings("ignore") model = OneHotEncoder() data = [["a", "r", "x"], ["a", "r", "x"], ["aaaa", "r", "x"], ["a", "r", "xx"]] model.fit(data) # Create ONNX-ML model onnx_ml_model = convert_sklearn(model, initial_types=[("input", StringTensorType_onnx([4, 3]))]) # Create ONNX model by calling converter, should raise error for strings self.assertRaises(RuntimeError, convert, onnx_ml_model, "onnx")
class TestIsolationForestConverter(unittest.TestCase): # Check tree implementation def test_iforest_implementation(self): warnings.filterwarnings("ignore") np.random.seed(0) X = np.random.rand(10, 1) X = np.array(X, dtype=np.float32) model = IsolationForest(n_estimators=1, max_samples=2) for extra_config_param in ["tree_trav", "perf_tree_trav", "gemm"]: model.fit(X) torch_model = hummingbird.ml.convert( model, "torch", extra_config={"tree_implementation": extra_config_param}) self.assertIsNotNone(torch_model) self.assertEqual( str(type(list(torch_model.model._operator_map.values())[0])), iforest_implementation_map[extra_config_param]) def _run_isolation_forest_converter(self, extra_config={}): warnings.filterwarnings("ignore") for max_samples in [2**1, 2**3, 2**8, 2**10, 2**12]: model = IsolationForest(n_estimators=10, max_samples=max_samples) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) model.fit(X) torch_model = hummingbird.ml.convert(model, "torch", extra_config=extra_config) self.assertIsNotNone(torch_model) np.testing.assert_allclose(model.decision_function(X), torch_model.decision_function(X), rtol=1e-06, atol=1e-06) np.testing.assert_allclose(model.score_samples(X), torch_model.score_samples(X), rtol=1e-06, atol=1e-06) np.testing.assert_array_equal(model.predict(X), torch_model.predict(X)) # Isolation Forest def test_isolation_forest_converter(self): self._run_isolation_forest_converter() # Gemm Isolation Forest def test_isolation_forest_gemm_converter(self): self._run_isolation_forest_converter( extra_config={"tree_implementation": "gemm"}) # Tree_trav Isolation Forest def test_isolation_forest_tree_trav_converter(self): self._run_isolation_forest_converter( extra_config={"tree_implementation": "tree_trav"}) # Perf_tree_trav Isolation Forest def test_isolation_forest_perf_tree_trav_converter(self): self._run_isolation_forest_converter( extra_config={"tree_implementation": "perf_tree_trav"}) # Float 64 data tests def test_float64_isolation_forest_converter(self): warnings.filterwarnings("ignore") for max_samples in [2**1, 2**3, 2**8, 2**10, 2**12]: model = IsolationForest(n_estimators=10, max_samples=max_samples) np.random.seed(0) X = np.random.rand(100, 200) model.fit(X) torch_model = hummingbird.ml.convert(model, "torch", extra_config={}) self.assertIsNotNone(torch_model) np.testing.assert_allclose(model.decision_function(X), torch_model.decision_function(X), rtol=1e-06, atol=1e-06) np.testing.assert_allclose(model.score_samples(X), torch_model.score_samples(X), rtol=1e-06, atol=1e-06) np.testing.assert_array_equal(model.predict(X), torch_model.predict(X)) # Test TorchScript backend. def test_isolation_forest_ts_converter(self): warnings.filterwarnings("ignore") for max_samples in [2**1, 2**3, 2**8, 2**10, 2**12]: model = IsolationForest(n_estimators=10, max_samples=max_samples) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) model.fit(X) torch_model = hummingbird.ml.convert(model, "torch.jit", X, extra_config={}) self.assertIsNotNone(torch_model) np.testing.assert_allclose(model.decision_function(X), torch_model.decision_function(X), rtol=1e-06, atol=1e-06) np.testing.assert_allclose(model.score_samples(X), torch_model.score_samples(X), rtol=1e-06, atol=1e-06) np.testing.assert_array_equal(model.predict(X), torch_model.predict(X)) # Test ONNX backend. @unittest.skipIf(not (onnx_runtime_installed()), reason="ONNX tests require ORT") def test_isolation_forest_onnx_converter(self): warnings.filterwarnings("ignore") for max_samples in [2**1, 2**3, 2**8, 2**10, 2**12]: model = IsolationForest(n_estimators=10, max_samples=max_samples) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) model.fit(X) onnx_model = hummingbird.ml.convert(model, "onnx", X, extra_config={}) self.assertIsNotNone(onnx_model) np.testing.assert_allclose(model.decision_function(X), onnx_model.decision_function(X), rtol=1e-06, atol=1e-06) np.testing.assert_allclose(model.score_samples(X), onnx_model.score_samples(X), rtol=1e-06, atol=1e-06) np.testing.assert_array_equal(model.predict(X), onnx_model.predict(X)) # Test TVM backend. @unittest.skipIf(not (tvm_installed()), reason="TVM test requires TVM") def test_isolation_forest_tvm_converter(self): warnings.filterwarnings("ignore") for max_samples in [2**1, 2**3, 2**8, 2**10, 2**12]: model = IsolationForest(n_estimators=10, max_samples=max_samples) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) model.fit(X) hb_model = hummingbird.ml.convert( model, "tvm", X, extra_config={constants.TVM_MAX_FUSE_DEPTH: 30}) self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.decision_function(X), hb_model.decision_function(X), rtol=1e-06, atol=1e-06) np.testing.assert_allclose(model.score_samples(X), hb_model.score_samples(X), rtol=1e-06, atol=1e-06) np.testing.assert_array_equal(model.predict(X), hb_model.predict(X))
class TestSklearnNormalizer(unittest.TestCase): def _test_regressor(self, classes): n_features = 20 n_total = 100 np.random.seed(0) warnings.filterwarnings("ignore") X = np.random.rand(n_total, n_features) X = np.array(X, dtype=np.float32) y = np.random.randint(classes, size=n_total) # Create SKL model for testing model = LogisticRegression(solver="liblinear", multi_class="ovr", fit_intercept=True) model.fit(X, y) # Create ONNX-ML model onnx_ml_model = convert_sklearn(model, initial_types=[("float_input", FloatTensorType_onnx(X.shape))]) # Create ONNX model by calling converter onnx_model = convert(onnx_ml_model, "onnx", X) # Get the predictions for the ONNX-ML model session = ort.InferenceSession(onnx_ml_model.SerializeToString()) output_names = [session.get_outputs()[i].name for i in range(len(session.get_outputs()))] onnx_ml_pred = [[] for i in range(len(output_names))] inputs = {session.get_inputs()[0].name: X} pred = session.run(output_names, inputs) for i in range(len(output_names)): if output_names[i] == "output_label": onnx_ml_pred[1] = pred[i] else: onnx_ml_pred[0] = pred[i] # Get the predictions for the ONNX model session = ort.InferenceSession(onnx_model.SerializeToString()) onnx_pred = [[] for i in range(len(output_names))] pred = session.run(output_names, inputs) for i in range(len(output_names)): if output_names[i] == "output_label": onnx_pred[1] = pred[i] else: onnx_pred[0] = pred[i] return onnx_ml_pred, onnx_pred @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS" ) def test_logistic_regression_onnxml_binary(self, rtol=1e-06, atol=1e-06): onnx_ml_pred, onnx_pred = self._test_regressor(2) # Check that predicted values match np.testing.assert_allclose(onnx_ml_pred[1], onnx_pred[1], rtol=rtol, atol=atol) # labels np.testing.assert_allclose( list(map(lambda x: list(x.values()), onnx_ml_pred[0])), onnx_pred[0], rtol=rtol, atol=atol ) # probs @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS" ) def test_logistic_regression_onnxml_multi(self, rtol=1e-06, atol=1e-06): onnx_ml_pred, onnx_pred = self._test_regressor(3) # Check that predicted values match np.testing.assert_allclose(onnx_ml_pred[1], onnx_pred[1], rtol=rtol, atol=atol) # labels np.testing.assert_allclose( list(map(lambda x: list(x.values()), onnx_ml_pred[0])), onnx_pred[0], rtol=rtol, atol=atol ) # probs
class TestONNXLightGBMConverter(unittest.TestCase): def __init__(self, *args, **kwargs): super(TestONNXLightGBMConverter, self).__init__(*args, **kwargs) # Base test implementation comparing ONNXML and ONNX models. def _test_lgbm(self, X, model, extra_config={}): # Create ONNX-ML model onnx_ml_model = convert_lightgbm( model, initial_types=[("input", FloatTensorType([X.shape[0], X.shape[1]]))], target_opset=9 ) # Create ONNX model onnx_model = convert(onnx_ml_model, "onnx", X, extra_config) # Get the predictions for the ONNX-ML model session = ort.InferenceSession(onnx_ml_model.SerializeToString()) output_names = [session.get_outputs()[i].name for i in range(len(session.get_outputs()))] onnx_ml_pred = [[] for i in range(len(output_names))] inputs = {session.get_inputs()[0].name: X} pred = session.run(output_names, inputs) for i in range(len(output_names)): if output_names[i] == "label": onnx_ml_pred[1] = pred[i] else: onnx_ml_pred[0] = pred[i] # Get the predictions for the ONNX model session = ort.InferenceSession(onnx_model.SerializeToString()) onnx_pred = [[] for i in range(len(output_names))] pred = session.run(output_names, inputs) for i in range(len(output_names)): if output_names[i] == "label": onnx_pred[1] = pred[i] else: onnx_pred[0] = pred[i] return onnx_ml_pred, onnx_pred, output_names # Utility function for testing regression models. def _test_regressor(self, X, model, rtol=1e-06, atol=1e-06, extra_config={}): onnx_ml_pred, onnx_pred, output_names = self._test_lgbm(X, model, extra_config) # Check that predicted values match np.testing.assert_allclose(onnx_ml_pred[0], onnx_pred[0], rtol=rtol, atol=atol) # Utility function for testing classification models. def _test_classifier(self, X, model, rtol=1e-06, atol=1e-06, extra_config={}): onnx_ml_pred, onnx_pred, output_names = self._test_lgbm(X, model, extra_config) np.testing.assert_allclose(onnx_ml_pred[1], onnx_pred[1], rtol=rtol, atol=atol) # labels np.testing.assert_allclose( list(map(lambda x: list(x.values()), onnx_ml_pred[0])), onnx_pred[0], rtol=rtol, atol=atol ) # probs # Check that ONNXML models can only target the ONNX backend. @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS" ) def test_lightgbm_pytorch(self): warnings.filterwarnings("ignore") X = [[0, 1], [1, 1], [2, 0]] X = np.array(X, dtype=np.float32) y = np.array([100, -10, 50], dtype=np.float32) model = lgb.LGBMRegressor(n_estimators=3, min_child_samples=1) model.fit(X, y) # Create ONNX-ML model onnx_ml_model = convert_lightgbm( model, initial_types=[("input", FloatTensorType([X.shape[0], X.shape[1]]))], target_opset=9 ) self.assertRaises(RuntimeError, convert, onnx_ml_model, "torch") # Check converter with extra configs. @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS" ) def test_lightgbm_pytorch_extra_config(self): warnings.filterwarnings("ignore") X = [[0, 1], [1, 1], [2, 0]] X = np.array(X, dtype=np.float32) y = np.array([100, -10, 50], dtype=np.float32) model = lgb.LGBMRegressor(n_estimators=3, min_child_samples=1) model.fit(X, y) # Create ONNX-ML model onnx_ml_model = convert_lightgbm( model, initial_types=[("input", FloatTensorType([X.shape[0], X.shape[1]]))], target_opset=9 ) # Create ONNX model model_name = "hummingbird.ml.test.lightgbm" extra_config = {} extra_config[constants.ONNX_OUTPUT_MODEL_NAME] = model_name extra_config[constants.ONNX_INITIAL_TYPES] = [("input", FloatTensorType([X.shape[0], X.shape[1]]))] onnx_model = convert(onnx_ml_model, "onnx", extra_config=extra_config) assert onnx_model.graph.name == model_name # Basic regression test. @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS" ) def test_lgbm_onnxml_model_regressor(self): warnings.filterwarnings("ignore") n_features = 28 n_total = 100 X = np.random.rand(n_total, n_features) X = np.array(X, dtype=np.float32) y = np.random.randint(n_total, size=n_total) # Create LightGBM model model = lgb.LGBMRegressor() model.fit(X, y) self._test_regressor(X, model, rtol=1e-02, atol=1e-02) # Lower tolerance to avoid random errors # Regression test with 3 estimators (taken from ONNXMLTOOLS). @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS" ) def test_lightgbm_regressor(self): warnings.filterwarnings("ignore") X = [[0, 1], [1, 1], [2, 0]] X = np.array(X, dtype=np.float32) y = np.array([100, -10, 50], dtype=np.float32) model = lgb.LGBMRegressor(n_estimators=3, min_child_samples=1) model.fit(X, y) self._test_regressor(X, model) # Regression test with 1 estimator (taken from ONNXMLTOOLS). @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS" ) def test_lightgbm_regressor1(self): warnings.filterwarnings("ignore") model = lgb.LGBMRegressor(n_estimators=1, min_child_samples=1) X = [[0, 1], [1, 1], [2, 0]] X = np.array(X, dtype=np.float32) y = np.array([100, -10, 50], dtype=np.float32) model.fit(X, y) self._test_regressor(X, model) # Regression test with 2 estimators (taken from ONNXMLTOOLS). @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS" ) def test_lightgbm_regressor2(self): warnings.filterwarnings("ignore") model = lgb.LGBMRegressor(n_estimators=2, max_depth=1, min_child_samples=1) X = [[0, 1], [1, 1], [2, 0]] X = np.array(X, dtype=np.float32) y = np.array([100, -10, 50], dtype=np.float32) model.fit(X, y) self._test_regressor(X, model) # Regression test with gbdt boosting type (taken from ONNXMLTOOLS). @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS" ) def test_lightgbm_booster_regressor(self): warnings.filterwarnings("ignore") X = [[0, 1], [1, 1], [2, 0]] X = np.array(X, dtype=np.float32) y = [0, 1, 1.1] data = lgb.Dataset(X, label=y) model = lgb.train( {"boosting_type": "gbdt", "objective": "regression", "n_estimators": 3, "min_child_samples": 1, "max_depth": 1}, data, ) self._test_regressor(X, model) # Binary classication test. @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS" ) def test_lgbm_onnxml_model_binary(self): warnings.filterwarnings("ignore") n_features = 28 n_total = 100 np.random.seed(0) X = np.random.rand(n_total, n_features) X = np.array(X, dtype=np.float32) y = np.random.randint(2, size=n_total) # Create LightGBM model model = lgb.LGBMClassifier() model.fit(X, y) self._test_classifier(X, model) # Binary classification test with 3 estimators (taken from ONNXMLTOOLS). @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS" ) def test_lightgbm_classifier(self): warnings.filterwarnings("ignore") model = lgb.LGBMClassifier(n_estimators=3, min_child_samples=1) X = [[0, 1], [1, 1], [2, 0]] X = np.array(X, dtype=np.float32) y = [0, 1, 0] model.fit(X, y) self._test_classifier(X, model) # Binary classification test with 3 estimators zipmap (taken from ONNXMLTOOLS). @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS" ) def test_lightgbm_classifier_zipmap(self): warnings.filterwarnings("ignore") X = [[0, 1], [1, 1], [2, 0], [1, 2]] X = np.array(X, dtype=np.float32) y = [0, 1, 0, 1] model = lgb.LGBMClassifier(n_estimators=3, min_child_samples=1) model.fit(X, y) self._test_classifier(X, model) # Binary classification test with 3 estimators and selecting boosting type (taken from ONNXMLTOOLS). @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS" ) def test_lightgbm_booster_classifier(self): warnings.filterwarnings("ignore") X = [[0, 1], [1, 1], [2, 0], [1, 2]] X = np.array(X, dtype=np.float32) y = [0, 1, 0, 1] data = lgb.Dataset(X, label=y) model = lgb.train({"boosting_type": "gbdt", "objective": "binary", "n_estimators": 3, "min_child_samples": 1}, data) self._test_classifier(X, model) # Binary classification test with 3 estimators and selecting boosting type zipmap (taken from ONNXMLTOOLS). @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS" ) def test_lightgbm_booster_classifier_zipmap(self): warnings.filterwarnings("ignore") X = [[0, 1], [1, 1], [2, 0], [1, 2]] X = np.array(X, dtype=np.float32) y = [0, 1, 0, 1] data = lgb.Dataset(X, label=y) model = lgb.train({"boosting_type": "gbdt", "objective": "binary", "n_estimators": 3, "min_child_samples": 1}, data) self._test_classifier(X, model) # Multiclass classification test. @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS" ) def test_lgbm_onnxml_model_multi(self): warnings.filterwarnings("ignore") n_features = 28 n_total = 100 np.random.seed(0) X = np.random.rand(n_total, n_features) X = np.array(X, dtype=np.float32) y = np.random.randint(3, size=n_total) # Create LightGBM model model = lgb.LGBMClassifier() model.fit(X, y) self._test_classifier(X, model) # Multiclass classification test with 3 estimators (taken from ONNXMLTOOLS). @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS" ) def test_lightgbm_classifier_multi(self): warnings.filterwarnings("ignore") model = lgb.LGBMClassifier(n_estimators=3, min_child_samples=1) X = [[0, 1], [1, 1], [2, 0], [0.5, 0.5], [1.1, 1.1], [2.1, 0.1]] X = np.array(X, dtype=np.float32) y = [0, 1, 2, 1, 1, 2] model.fit(X, y) self._test_classifier(X, model) # Multiclass classification test with 3 estimators and selecting boosting type (taken from ONNXMLTOOLS). @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS" ) @unittest.skipIf( True, reason='ONNXMLTOOLS fails with "ValueError: unsupported LightGbm objective: multiclass num_class:3"' ) def test_lightgbm_booster_multi_classifier(self): warnings.filterwarnings("ignore") X = [[0, 1], [1, 1], [2, 0], [1, 2], [-1, 2], [1, -2]] X = np.array(X, dtype=np.float32) y = [0, 1, 0, 1, 2, 2] data = lgb.Dataset(X, label=y) model = lgb.train( {"boosting_type": "gbdt", "objective": "multiclass", "n_estimators": 3, "min_child_samples": 1, "num_class": 3}, data, ) self._test_classifier(X, model)
class TestONNXSVC(unittest.TestCase): def _test_sv(self, classes, mode="torch"): """ This helper function tests conversion of `ai.onnx.ml.SVMClassifier` which is created from a scikit-learn SVC. This then calls either "_to_onnx" or "_to_torch" """ n_features = 20 n_total = 100 np.random.seed(0) warnings.filterwarnings("ignore") X = np.random.rand(n_total, n_features) X = np.array(X, dtype=np.float32) y = np.random.randint(classes, size=n_total) # Create SKL model for testing model = SVC() model.fit(X, y) # Create ONNX-ML model onnx_ml_model = convert_sklearn(model, initial_types=[ ("float_input", FloatTensorType_onnx(X.shape)) ]) # Get the predictions for the ONNX-ML model session = ort.InferenceSession(onnx_ml_model.SerializeToString()) output_names = [ session.get_outputs()[i].name for i in range(len(session.get_outputs())) ] onnx_ml_pred = [[] for i in range(len(output_names))] inputs = {session.get_inputs()[0].name: X} pred = session.run(output_names, inputs) for i in range(len(output_names)): if "label" in output_names[i]: onnx_ml_pred[1] = pred[i] else: onnx_ml_pred[0] = pred[i] model = convert(onnx_ml_model, mode, X) pred = model.predict(X) return onnx_ml_pred, pred @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS") # test ai.onnx.ml.SVMClassifier with 2 classes for onnxml-> pytorch def test_logistic_regression_onnxml_binary_torch(self, rtol=1e-06, atol=1e-06): onnx_ml_pred, pred = self._test_sv(2) # Check that predicted values match np.testing.assert_allclose(onnx_ml_pred[1], pred, rtol=rtol, atol=atol) @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS") # test ai.onnx.ml.SVMClassifier with 3 classes for onnxml-> pytorch def test_logistic_regression_onnxml_multi_torch(self, rtol=1e-06, atol=1e-06): onnx_ml_pred, pred = self._test_sv(3) # Check that predicted values match np.testing.assert_allclose(onnx_ml_pred[1], pred, rtol=rtol, atol=atol)
class TestONNXLightGBMConverter(unittest.TestCase): def __init__(self, *args, **kwargs): super(TestONNXLightGBMConverter, self).__init__(*args, **kwargs) # Base test implementation comparing ONNXML and ONNX models. def _test_lgbm(self, X, model, extra_config={}): # Create ONNX-ML model onnx_ml_model = convert_lightgbm( model, initial_types=[("input", FloatTensorType([X.shape[0], X.shape[1]]))], target_opset=9) # Create ONNX model onnx_model = convert(onnx_ml_model, "onnx", extra_config=extra_config) # Get the predictions for the ONNX-ML model session = ort.InferenceSession(onnx_ml_model.SerializeToString()) output_names = [ session.get_outputs()[i].name for i in range(len(session.get_outputs())) ] onnx_ml_pred = [[] for i in range(len(output_names))] inputs = {session.get_inputs()[0].name: X} pred = session.run(output_names, inputs) for i in range(len(output_names)): if "label" in output_names[i]: onnx_ml_pred[1] = pred[i] else: onnx_ml_pred[0] = pred[i] # Get the predictions for the ONNX model onnx_pred = [[] for i in range(len(output_names))] if len(output_names) == 1: # regression onnx_pred = onnx_model.predict(X) else: # classification onnx_pred[0] = onnx_model.predict_proba(X) onnx_pred[1] = onnx_model.predict(X) return onnx_ml_pred, onnx_pred, output_names # Utility function for testing regression models. def _test_regressor(self, X, model, rtol=1e-06, atol=1e-06, extra_config={}): onnx_ml_pred, onnx_pred, output_names = self._test_lgbm( X, model, extra_config) # Check that predicted values match np.testing.assert_allclose(onnx_ml_pred[0].ravel(), onnx_pred, rtol=rtol, atol=atol) # Utility function for testing classification models. def _test_classifier(self, X, model, rtol=1e-06, atol=1e-06, extra_config={}): onnx_ml_pred, onnx_pred, output_names = self._test_lgbm( X, model, extra_config) np.testing.assert_allclose(onnx_ml_pred[1], onnx_pred[1], rtol=rtol, atol=atol) # labels np.testing.assert_allclose(list( map(lambda x: list(x.values()), onnx_ml_pred[0])), onnx_pred[0], rtol=rtol, atol=atol) # probs # Check that ONNXML models can also target other backends. @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lightgbm_onnx_pytorch(self): warnings.filterwarnings("ignore") X = [[0, 1], [1, 1], [2, 0]] X = np.array(X, dtype=np.float32) y = np.array([100, -10, 50], dtype=np.float32) model = lgb.LGBMRegressor(n_estimators=3, min_child_samples=1) model.fit(X, y) # Create ONNX-ML model onnx_ml_model = convert_lightgbm( model, initial_types=[("input", FloatTensorType([X.shape[0], X.shape[1]]))], target_opset=9) pt_model = convert(onnx_ml_model, "torch", X) assert pt_model # Get the predictions for the ONNX-ML model session = ort.InferenceSession(onnx_ml_model.SerializeToString()) output_names = [ session.get_outputs()[i].name for i in range(len(session.get_outputs())) ] onnx_ml_pred = [[] for i in range(len(output_names))] inputs = {session.get_inputs()[0].name: X} onnx_ml_pred = session.run(output_names, inputs) np.testing.assert_allclose(onnx_ml_pred[0].flatten(), pt_model.predict(X)) # Basic regression test. @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lgbm_onnxml_model_regressor(self): warnings.filterwarnings("ignore") n_features = 28 n_total = 100 np.random.seed(0) X = np.random.rand(n_total, n_features) X = np.array(X, dtype=np.float32) y = np.random.randint(n_total, size=n_total) # Create LightGBM model model = lgb.LGBMRegressor() model.fit(X, y) import platform # TODO bug on newer macOS versions? if platform.system() == "Darwin": self._test_regressor(X, model, rtol=1e-05, atol=1e-04) else: self._test_regressor(X, model) # Regression test with 3 estimators (taken from ONNXMLTOOLS). @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lightgbm_regressor(self): warnings.filterwarnings("ignore") X = [[0, 1], [1, 1], [2, 0]] X = np.array(X, dtype=np.float32) y = np.array([100, -10, 50], dtype=np.float32) model = lgb.LGBMRegressor(n_estimators=3, min_child_samples=1) model.fit(X, y) self._test_regressor(X, model) # Regression test with 1 estimator (taken from ONNXMLTOOLS). @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lightgbm_regressor1(self): warnings.filterwarnings("ignore") model = lgb.LGBMRegressor(n_estimators=1, min_child_samples=1) X = [[0, 1], [1, 1], [2, 0]] X = np.array(X, dtype=np.float32) y = np.array([100, -10, 50], dtype=np.float32) model.fit(X, y) self._test_regressor(X, model) # Regression test with 2 estimators (taken from ONNXMLTOOLS). @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lightgbm_regressor2(self): warnings.filterwarnings("ignore") model = lgb.LGBMRegressor(n_estimators=2, max_depth=1, min_child_samples=1) X = [[0, 1], [1, 1], [2, 0]] X = np.array(X, dtype=np.float32) y = np.array([100, -10, 50], dtype=np.float32) model.fit(X, y) self._test_regressor(X, model) # Regression test with gbdt boosting type (taken from ONNXMLTOOLS). @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lightgbm_booster_regressor(self): warnings.filterwarnings("ignore") X = [[0, 1], [1, 1], [2, 0]] X = np.array(X, dtype=np.float32) y = [0, 1, 1.1] data = lgb.Dataset(X, label=y) model = lgb.train( { "boosting_type": "gbdt", "objective": "regression", "n_estimators": 3, "min_child_samples": 1, "max_depth": 1 }, data, ) self._test_regressor(X, model) # Binary classication test. @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lgbm_onnxml_model_binary(self): warnings.filterwarnings("ignore") n_features = 28 n_total = 100 np.random.seed(0) X = np.random.rand(n_total, n_features) X = np.array(X, dtype=np.float32) y = np.random.randint(2, size=n_total) # Create LightGBM model model = lgb.LGBMClassifier() model.fit(X, y) self._test_classifier(X, model) # Binary classication test with float64. @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lgbm_onnxml_model_binary_float64(self): warnings.filterwarnings("ignore") n_features = 28 n_total = 100 np.random.seed(0) X = np.random.rand(n_total, n_features) X = np.array(X, dtype=np.float32) y = np.random.randint(2, size=n_total) # Create LightGBM model model = lgb.LGBMClassifier() model.fit(X, y) onnx_model = convert(model, "onnx", X) np.testing.assert_allclose(model.predict(X), onnx_model.predict(X)) # Binary classification test with 3 estimators (taken from ONNXMLTOOLS). @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lightgbm_classifier(self): warnings.filterwarnings("ignore") model = lgb.LGBMClassifier(n_estimators=3, min_child_samples=1) X = [[0, 1], [1, 1], [2, 0]] X = np.array(X, dtype=np.float32) y = [0, 1, 0] model.fit(X, y) self._test_classifier(X, model) # Binary classification test with 3 estimators zipmap (taken from ONNXMLTOOLS). @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lightgbm_classifier_zipmap(self): warnings.filterwarnings("ignore") X = [[0, 1], [1, 1], [2, 0], [1, 2]] X = np.array(X, dtype=np.float32) y = [0, 1, 0, 1] model = lgb.LGBMClassifier(n_estimators=3, min_child_samples=1) model.fit(X, y) self._test_classifier(X, model) # Binary classification test with 3 estimators and selecting boosting type (taken from ONNXMLTOOLS). @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lightgbm_booster_classifier(self): warnings.filterwarnings("ignore") X = [[0, 1], [1, 1], [2, 0], [1, 2]] X = np.array(X, dtype=np.float32) y = [0, 1, 0, 1] data = lgb.Dataset(X, label=y) model = lgb.train( { "boosting_type": "gbdt", "objective": "binary", "n_estimators": 3, "min_child_samples": 1 }, data) self._test_classifier(X, model) # Binary classification test with 3 estimators and selecting boosting type zipmap (taken from ONNXMLTOOLS). @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lightgbm_booster_classifier_zipmap(self): warnings.filterwarnings("ignore") X = [[0, 1], [1, 1], [2, 0], [1, 2]] X = np.array(X, dtype=np.float32) y = [0, 1, 0, 1] data = lgb.Dataset(X, label=y) model = lgb.train( { "boosting_type": "gbdt", "objective": "binary", "n_estimators": 3, "min_child_samples": 1 }, data) self._test_classifier(X, model) # Multiclass classification test. @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lgbm_onnxml_model_multi(self): warnings.filterwarnings("ignore") n_features = 28 n_total = 100 np.random.seed(0) X = np.random.rand(n_total, n_features) X = np.array(X, dtype=np.float32) y = np.random.randint(3, size=n_total) # Create LightGBM model model = lgb.LGBMClassifier() model.fit(X, y) self._test_classifier(X, model) # Multiclass classification test with 3 estimators (taken from ONNXMLTOOLS). @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lightgbm_classifier_multi(self): warnings.filterwarnings("ignore") model = lgb.LGBMClassifier(n_estimators=3, min_child_samples=1) X = [[0, 1], [1, 1], [2, 0], [0.5, 0.5], [1.1, 1.1], [2.1, 0.1]] X = np.array(X, dtype=np.float32) y = [0, 1, 2, 1, 1, 2] model.fit(X, y) self._test_classifier(X, model) # Multiclass classification test with 3 estimators and selecting boosting type (taken from ONNXMLTOOLS). @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lightgbm_booster_multi_classifier(self): warnings.filterwarnings("ignore") X = [[0, 1], [1, 1], [2, 0], [1, 2], [-1, 2], [1, -2]] X = np.array(X, dtype=np.float32) y = [0, 1, 0, 1, 2, 2] data = lgb.Dataset(X, label=y) model = lgb.train( { "boosting_type": "gbdt", "objective": "multiclass", "n_estimators": 3, "min_child_samples": 1, "num_class": 3 }, data, ) self._test_classifier(X, model)
class TestONNXDecisionTreeConverter(unittest.TestCase): def __init__(self, *args, **kwargs): super(TestONNXDecisionTreeConverter, self).__init__(*args, **kwargs) # Base test implementation comparing ONNXML and ONNX models. def _test_decision_tree(self, X, model, extra_config={}): # Create ONNX-ML model onnx_ml_model = convert_sklearn( model, initial_types=[("input", FloatTensorType([X.shape[0], X.shape[1]]))], target_opset=11) # Create ONNX model onnx_model = convert(onnx_ml_model, "onnx", X, extra_config) # Get the predictions for the ONNX-ML model session = ort.InferenceSession(onnx_ml_model.SerializeToString()) output_names = [ session.get_outputs()[i].name for i in range(len(session.get_outputs())) ] onnx_ml_pred = [[] for i in range(len(output_names))] inputs = {session.get_inputs()[0].name: X} pred = session.run(output_names, inputs) for i in range(len(output_names)): if "label" in output_names[i]: onnx_ml_pred[1] = pred[i] else: onnx_ml_pred[0] = pred[i] # Get the predictions for the ONNX model onnx_pred = [[] for i in range(len(output_names))] if len(output_names) == 1: # regression onnx_pred = onnx_model.predict(X) else: # classification for i in range(len(output_names)): if "label" in output_names[i]: onnx_pred[1] = onnx_model.predict(X) else: onnx_pred[0] = onnx_model.predict_proba(X) return onnx_ml_pred, onnx_pred, output_names # Utility function for testing regression models. def _test_regressor(self, X, model, rtol=1e-06, atol=1e-06, extra_config={}): onnx_ml_pred, onnx_pred, output_names = self._test_decision_tree( X, model, extra_config) # Check that predicted values match np.testing.assert_allclose(onnx_ml_pred[0].ravel(), onnx_pred, rtol=rtol, atol=atol) # Utility function for testing classification models. def _test_classifier(self, X, model, rtol=1e-06, atol=1e-06, extra_config={}): onnx_ml_pred, onnx_pred, output_names = self._test_decision_tree( X, model, extra_config) np.testing.assert_allclose(onnx_ml_pred[1], onnx_pred[1], rtol=rtol, atol=atol) # labels np.testing.assert_allclose(list( map(lambda x: list(x.values()), onnx_ml_pred[0])), onnx_pred[0], rtol=rtol, atol=atol) # probs # Regression. # Regression test with Decision Tree. @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") def test_decision_tree_regressor(self): warnings.filterwarnings("ignore") model = DecisionTreeRegressor() X = [[0, 1], [1, 1], [2, 0]] X = np.array(X, dtype=np.float32) y = np.array([100, -10, 50], dtype=np.float32) model.fit(X, y) self._test_regressor(X, model) # Basic regression test with decision tree. @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") def test_decision_tree_regressor_random(self): warnings.filterwarnings("ignore") n_features = 28 n_total = 100 np.random.seed(0) X = np.random.rand(n_total, n_features) X = np.array(X, dtype=np.float32) y = np.random.randint(n_total, size=n_total) # Create DecisionTree model model = DecisionTreeRegressor() model.fit(X, y) self._test_regressor(X, model) # Regression test with Random Forest, 1 estimator. @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") def test_random_forest_regressor_1(self): warnings.filterwarnings("ignore") model = RandomForestRegressor(n_estimators=1) X = [[0, 1], [1, 1], [2, 0]] X = np.array(X, dtype=np.float32) y = np.array([100, -10, 50], dtype=np.float32) model.fit(X, y) self._test_regressor(X, model) # Basic regression test with Random Forest. @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") def test_random_forest_regressor_random(self): warnings.filterwarnings("ignore") n_features = 28 n_total = 100 np.random.seed(0) X = np.random.rand(n_total, n_features) X = np.array(X, dtype=np.float32) y = np.random.randint(n_total, size=n_total) # Create RandomForest model model = RandomForestRegressor() model.fit(X, y) self._test_regressor(X, model, rtol=1e-03, atol=1e-03) # Binary. # Binary classication test random. @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") def test_decision_tree_binary_random(self): warnings.filterwarnings("ignore") n_features = 28 n_total = 100 np.random.seed(0) X = np.random.rand(n_total, n_features) X = np.array(X, dtype=np.float32) y = np.random.randint(2, size=n_total) # Create DecisionTree model model = DecisionTreeClassifier() model.fit(X, y) self._test_classifier(X, model) # Binary classification test Decision Tree. @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") def test_decision_tree_binary(self): warnings.filterwarnings("ignore") model = DecisionTreeClassifier() X = [[0, 1], [1, 1], [2, 0]] X = np.array(X, dtype=np.float32) y = [0, 1, 0] model.fit(X, y) self._test_classifier(X, model) # Binary classification test Random Forest with 3 estimators (taken from ONNXMLTOOLS). @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") def test_random_forest_classifier(self): warnings.filterwarnings("ignore") X = [[0, 1], [1, 1], [2, 0], [1, 2]] X = np.array(X, dtype=np.float32) y = [0, 1, 0, 1] model = RandomForestClassifier(n_estimators=3) model.fit(X, y) self._test_classifier(X, model) # Binary classification test Random Forest with 3 estimators random. @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") def test_random_forest_classifier_random(self): warnings.filterwarnings("ignore") n_features = 28 n_total = 100 np.random.seed(0) X = np.random.rand(n_total, n_features) X = np.array(X, dtype=np.float32) y = np.random.randint(2, size=n_total) model = RandomForestClassifier(n_estimators=10) model.fit(X, y) self._test_classifier(X, model) # Multiclass classification test. # Multiclass classification test with DecisionTree, random. @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") def test_decision_tree_multi_random(self): warnings.filterwarnings("ignore") n_features = 28 n_total = 100 np.random.seed(0) X = np.random.rand(n_total, n_features) X = np.array(X, dtype=np.float32) y = np.random.randint(3, size=n_total) # Create the DecisionTree model model = DecisionTreeClassifier() model.fit(X, y) self._test_classifier(X, model) # Multiclass classification test with DecisionTree (taken from ONNXMLTOOLS). @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") def test_decision_tree_multi(self): warnings.filterwarnings("ignore") model = DecisionTreeClassifier() X = [[0, 1], [1, 1], [2, 0], [0.5, 0.5], [1.1, 1.1], [2.1, 0.1]] X = np.array(X, dtype=np.float32) y = [0, 1, 2, 1, 1, 2] model.fit(X, y) self._test_classifier(X, model) # Multiclass classification test with Random Forest. @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") def test_random_forest_multi_random(self): warnings.filterwarnings("ignore") n_features = 28 n_total = 100 np.random.seed(0) X = np.random.rand(n_total, n_features) X = np.array(X, dtype=np.float32) y = np.random.randint(3, size=n_total) # Create the RandomForest model model = RandomForestClassifier(n_estimators=10) model.fit(X, y) self._test_classifier(X, model)
class TestSklearnNormalizer(unittest.TestCase): def test_normalizer_converter(self): # Generate a random 2D array with values in [0, 1000) np.random.seed(0) data = np.random.rand(100, 200) * 1000 data = np.array(data, dtype=np.float32) data_tensor = torch.from_numpy(data) for norm in ["l1", "l2", "max"]: model = Normalizer(norm=norm) model.fit(data) torch_model = hummingbird.ml.convert(model, "torch") self.assertIsNotNone(torch_model) np.testing.assert_allclose( model.transform(data), torch_model.transform(data_tensor), rtol=1e-06, atol=1e-06, ) def test_normalizer_converter_raises_wrong_type(self): # Generate a random 2D array with values in [0, 1000) np.random.seed(0) data = np.random.rand(100, 200) * 1000 data = np.array(data, dtype=np.float32) model = Normalizer(norm="invalid") model.fit(data) torch_model = hummingbird.ml.convert(model, "torch") self.assertRaises(RuntimeError, torch_model.model._operator_map.SklearnNormalizer, torch.from_numpy(data)) # Float 64 data tests def test_float64_normalizer_converter(self): # Generate a random 2D array with values in [0, 1000) np.random.seed(0) data = np.random.rand(100, 200) * 1000 data_tensor = torch.from_numpy(data) for norm in ["l1", "l2", "max"]: model = Normalizer(norm=norm) model.fit(data) torch_model = hummingbird.ml.convert(model, "torch") self.assertIsNotNone(torch_model) np.testing.assert_allclose( model.transform(data), torch_model.transform(data_tensor), rtol=1e-06, atol=1e-06, ) # ONNX backend @unittest.skipIf(not (onnx_runtime_installed()), reason="ONNX test requires ONNX and ORT") def test_normalizer_converter_onnx(self): # Generate a random 2D array with values in [0, 1000) np.random.seed(0) data = np.random.rand(100, 200) * 1000 data = np.array(data, dtype=np.float32) data_tensor = torch.from_numpy(data) for norm in ["l1", "l2", "max"]: model = Normalizer(norm=norm) model.fit(data) hb_model = hummingbird.ml.convert(model, "onnx", data) self.assertIsNotNone(hb_model) np.testing.assert_allclose( model.transform(data), hb_model.transform(data_tensor), rtol=1e-06, atol=1e-06, ) # TVM backend @unittest.skipIf(not (tvm_installed()), reason="TVM tests require TVM") def test_normalizer_converter_tvm(self): # Generate a random 2D array with values in [0, 1000) np.random.seed(0) data = np.random.rand(100, 200) * 1000 data = np.array(data, dtype=np.float32) data_tensor = torch.from_numpy(data) for norm in ["l1", "l2", "max"]: model = Normalizer(norm=norm) model.fit(data) torch_model = hummingbird.ml.convert( model, "tvm", data, extra_config={constants.TVM_MAX_FUSE_DEPTH: 30}) self.assertIsNotNone(torch_model) np.testing.assert_allclose( model.transform(data), torch_model.transform(data_tensor), rtol=1e-06, atol=1e-06, )
class TestSklearnPipeline(unittest.TestCase): def test_pipeline(self): data = np.array([[0, 0], [0, 0], [1, 1], [1, 1]], dtype=np.float32) scaler = StandardScaler() scaler.fit(data) model = Pipeline([("scaler1", scaler), ("scaler2", scaler)]) torch_model = hummingbird.ml.convert(model, "torch") self.assertTrue(torch_model is not None) np.testing.assert_allclose( model.transform(data), torch_model.transform(data), rtol=1e-06, atol=1e-06, ) def test_pipeline2(self): data = np.array([[0.0, 0.0], [0.0, 0.0], [1.0, 1.0], [1.0, 1.0]], dtype=np.float32) scaler = StandardScaler() scaler.fit(data) model = Pipeline([("scaler1", scaler), ("scaler2", scaler)]) torch_model = hummingbird.ml.convert(model, "torch") self.assertTrue(torch_model is not None) np.testing.assert_allclose( model.transform(data), torch_model.transform(data), rtol=1e-06, atol=1e-06, ) def test_combine_inputs_union_in_pipeline(self): from sklearn.preprocessing import StandardScaler from sklearn.pipeline import Pipeline data = np.array([[0.0, 0.0], [0.0, 0.0], [1.0, 1.0], [1.0, 1.0]], dtype=np.float32) model = Pipeline([ ("scaler1", StandardScaler()), ("union", FeatureUnion([("scaler2", StandardScaler()), ("scaler3", MinMaxScaler())])), ]) model.fit(data) torch_model = hummingbird.ml.convert(model, "torch") self.assertTrue(torch_model is not None) np.testing.assert_allclose( model.transform(data), torch_model.transform(data), rtol=1e-06, atol=1e-06, ) def test_combine_inputs_floats_ints(self): data = [[0, 0.0], [0, 0.0], [1, 1.0], [1, 1.0]] scaler = StandardScaler() scaler.fit(data) model = Pipeline([("scaler1", scaler), ("scaler2", scaler)]) torch_model = hummingbird.ml.convert(model, "torch") self.assertTrue(torch_model is not None) np.testing.assert_allclose( model.transform(data), torch_model.transform(data), rtol=1e-06, atol=1e-06, ) @unittest.skipIf(not pandas_installed(), reason="Test requires pandas installed") def test_pipeline_column_transformer_1(self): iris = datasets.load_iris() X = iris.data[:, :3] y = iris.target X_train = pandas.DataFrame(X, columns=["vA", "vB", "vC"]) X_train["vcat"] = X_train["vA"].apply(lambda x: 1 if x > 0.5 else 2) X_train["vcat2"] = X_train["vB"].apply(lambda x: 3 if x > 0.5 else 4) y_train = y % 2 numeric_features = [0, 1, 2] # ["vA", "vB", "vC"] classifier = LogisticRegression( C=0.01, class_weight=dict(zip([False, True], [0.2, 0.8])), n_jobs=1, max_iter=10, solver="liblinear", tol=1e-3, ) numeric_transformer = Pipeline(steps=[("scaler", StandardScaler())]) preprocessor = ColumnTransformer(transformers=[("num", numeric_transformer, numeric_features)]) model = Pipeline(steps=[("precprocessor", preprocessor), ("classifier", classifier)]) model.fit(X_train, y_train) X_test = X_train[:11] torch_model = hummingbird.ml.convert(model, "torch") self.assertTrue(torch_model is not None) np.testing.assert_allclose( model.predict_proba(X_test), torch_model.predict_proba(X_test.values), rtol=1e-06, atol=1e-06, ) @unittest.skipIf(not pandas_installed(), reason="Test requires pandas installed") def test_pipeline_column_transformer(self): iris = datasets.load_iris() X = iris.data[:, :3] y = iris.target X_train = pandas.DataFrame(X, columns=["vA", "vB", "vC"]) X_train["vcat"] = X_train["vA"].apply(lambda x: 1 if x > 0.5 else 2) X_train["vcat2"] = X_train["vB"].apply(lambda x: 3 if x > 0.5 else 4) y_train = y % 2 numeric_features = [0, 1, 2] # ["vA", "vB", "vC"] categorical_features = [3, 4] # ["vcat", "vcat2"] classifier = LogisticRegression( C=0.01, class_weight=dict(zip([False, True], [0.2, 0.8])), n_jobs=1, max_iter=10, solver="liblinear", tol=1e-3, ) numeric_transformer = Pipeline(steps=[("scaler", StandardScaler())]) categorical_transformer = Pipeline( steps=[("onehot", OneHotEncoder(sparse=True, handle_unknown="ignore"))]) preprocessor = ColumnTransformer(transformers=[ ("num", numeric_transformer, numeric_features), ("cat", categorical_transformer, categorical_features), ]) model = Pipeline(steps=[("precprocessor", preprocessor), ("classifier", classifier)]) model.fit(X_train, y_train) X_test = X_train[:11] torch_model = hummingbird.ml.convert(model, "torch") self.assertTrue(torch_model is not None) np.testing.assert_allclose( model.predict_proba(X_test), torch_model.predict_proba(X_test.values), rtol=1e-06, atol=1e-06, ) @unittest.skipIf(not pandas_installed(), reason="Test requires pandas installed") def test_pipeline_column_transformer_weights(self): iris = datasets.load_iris() X = iris.data[:, :3] y = iris.target X_train = pandas.DataFrame(X, columns=["vA", "vB", "vC"]) X_train["vcat"] = X_train["vA"].apply(lambda x: 1 if x > 0.5 else 2) X_train["vcat2"] = X_train["vB"].apply(lambda x: 3 if x > 0.5 else 4) y_train = y % 2 numeric_features = [0, 1, 2] # ["vA", "vB", "vC"] categorical_features = [3, 4] # ["vcat", "vcat2"] classifier = LogisticRegression( C=0.01, class_weight=dict(zip([False, True], [0.2, 0.8])), n_jobs=1, max_iter=10, solver="liblinear", tol=1e-3, ) numeric_transformer = Pipeline(steps=[("scaler", StandardScaler())]) categorical_transformer = Pipeline( steps=[("onehot", OneHotEncoder(sparse=True, handle_unknown="ignore"))]) preprocessor = ColumnTransformer( transformers=[ ("num", numeric_transformer, numeric_features), ("cat", categorical_transformer, categorical_features), ], transformer_weights={ "num": 2, "cat": 3 }, ) model = Pipeline(steps=[("precprocessor", preprocessor), ("classifier", classifier)]) model.fit(X_train, y_train) X_test = X_train[:11] torch_model = hummingbird.ml.convert(model, "torch") self.assertTrue(torch_model is not None) np.testing.assert_allclose( model.predict_proba(X_test), torch_model.predict_proba(X_test.values), rtol=1e-06, atol=1e-06, ) @unittest.skipIf(not pandas_installed(), reason="Test requires pandas installed") def test_pipeline_column_transformer_drop(self): iris = datasets.load_iris() X = iris.data[:, :3] y = iris.target X_train = pandas.DataFrame(X, columns=["vA", "vB", "vC"]) X_train["vcat"] = X_train["vA"].apply(lambda x: 1 if x > 0.5 else 2) X_train["vcat2"] = X_train["vB"].apply(lambda x: 3 if x > 0.5 else 4) y_train = y % 2 numeric_features = [0, 1] # ["vA", "vB"] categorical_features = [3, 4] # ["vcat", "vcat2"] classifier = LogisticRegression( C=0.01, class_weight=dict(zip([False, True], [0.2, 0.8])), n_jobs=1, max_iter=10, solver="liblinear", tol=1e-3, ) numeric_transformer = Pipeline(steps=[("scaler", StandardScaler())]) categorical_transformer = Pipeline( steps=[("onehot", OneHotEncoder(sparse=True, handle_unknown="ignore"))]) preprocessor = ColumnTransformer( transformers=[ ("num", numeric_transformer, numeric_features), ("cat", categorical_transformer, categorical_features), ], transformer_weights={ "num": 2, "cat": 3 }, remainder="drop", ) model = Pipeline(steps=[("precprocessor", preprocessor), ("classifier", classifier)]) model.fit(X_train, y_train) X_test = X_train[:11] torch_model = hummingbird.ml.convert(model, "torch") self.assertTrue(torch_model is not None) np.testing.assert_allclose( model.predict_proba(X_test), torch_model.predict_proba(X_test.values), rtol=1e-06, atol=1e-06, ) @unittest.skipIf(not pandas_installed(), reason="Test requires pandas installed") def test_pipeline_column_transformer_drop_noweights(self): iris = datasets.load_iris() X = iris.data[:, :3] y = iris.target X_train = pandas.DataFrame(X, columns=["vA", "vB", "vC"]) X_train["vcat"] = X_train["vA"].apply(lambda x: 1 if x > 0.5 else 2) X_train["vcat2"] = X_train["vB"].apply(lambda x: 3 if x > 0.5 else 4) y_train = y % 2 numeric_features = [0, 1] # ["vA", "vB"] categorical_features = [3, 4] # ["vcat", "vcat2"] classifier = LogisticRegression( C=0.01, class_weight=dict(zip([False, True], [0.2, 0.8])), n_jobs=1, max_iter=10, solver="liblinear", tol=1e-3, ) numeric_transformer = Pipeline(steps=[("scaler", StandardScaler())]) categorical_transformer = Pipeline( steps=[("onehot", OneHotEncoder(sparse=True, handle_unknown="ignore"))]) preprocessor = ColumnTransformer( transformers=[ ("num", numeric_transformer, numeric_features), ("cat", categorical_transformer, categorical_features), ], remainder="drop", ) model = Pipeline(steps=[("precprocessor", preprocessor), ("classifier", classifier)]) model.fit(X_train, y_train) X_test = X_train[:11] torch_model = hummingbird.ml.convert(model, "torch") self.assertTrue(torch_model is not None) np.testing.assert_allclose( model.predict_proba(X_test), torch_model.predict_proba(X_test.values), rtol=1e-06, atol=1e-06, ) @unittest.skipIf(ColumnTransformer is None, reason="ColumnTransformer not available in 0.19") @unittest.skipIf(not pandas_installed(), reason="Test requires pandas installed") def test_pipeline_column_transformer_passthrough(self): iris = datasets.load_iris() X = iris.data[:, :3] y = iris.target X_train = pandas.DataFrame(X, columns=["vA", "vB", "vC"]) X_train["vcat"] = X_train["vA"].apply(lambda x: 1 if x > 0.5 else 2) X_train["vcat2"] = X_train["vB"].apply(lambda x: 3 if x > 0.5 else 4) y_train = y % 2 numeric_features = [0, 1] # ["vA", "vB"] categorical_features = [3, 4] # ["vcat", "vcat2"] classifier = LogisticRegression( C=0.01, class_weight=dict(zip([False, True], [0.2, 0.8])), n_jobs=1, max_iter=10, solver="liblinear", tol=1e-3, ) numeric_transformer = Pipeline(steps=[("scaler", StandardScaler())]) categorical_transformer = Pipeline( steps=[("onehot", OneHotEncoder(sparse=True, handle_unknown="ignore"))]) preprocessor = ColumnTransformer( transformers=[ ("num", numeric_transformer, numeric_features), ("cat", categorical_transformer, categorical_features), ], transformer_weights={ "num": 2, "cat": 3 }, remainder="passthrough", ) model = Pipeline(steps=[("precprocessor", preprocessor), ("classifier", classifier)]) model.fit(X_train, y_train) X_test = X_train[:11] torch_model = hummingbird.ml.convert(model, "torch") self.assertTrue(torch_model is not None) np.testing.assert_allclose( model.predict_proba(X_test), torch_model.predict_proba(X_test.values), rtol=1e-06, atol=1e-06, ) @unittest.skipIf(ColumnTransformer is None, reason="ColumnTransformer not available in 0.19") @unittest.skipIf(not pandas_installed(), reason="Test requires pandas installed") def test_pipeline_column_transformer_passthrough_noweights(self): iris = datasets.load_iris() X = iris.data[:, :3] y = iris.target X_train = pandas.DataFrame(X, columns=["vA", "vB", "vC"]) X_train["vcat"] = X_train["vA"].apply(lambda x: 1 if x > 0.5 else 2) X_train["vcat2"] = X_train["vB"].apply(lambda x: 3 if x > 0.5 else 4) y_train = y % 2 numeric_features = [0, 1] # ["vA", "vB"] categorical_features = [3, 4] # ["vcat", "vcat2"] classifier = LogisticRegression( C=0.01, class_weight=dict(zip([False, True], [0.2, 0.8])), n_jobs=1, max_iter=10, solver="liblinear", tol=1e-3, ) numeric_transformer = Pipeline(steps=[("scaler", StandardScaler())]) categorical_transformer = Pipeline( steps=[("onehot", OneHotEncoder(sparse=True, handle_unknown="ignore"))]) preprocessor = ColumnTransformer( transformers=[ ("num", numeric_transformer, numeric_features), ("cat", categorical_transformer, categorical_features), ], remainder="passthrough", ) model = Pipeline(steps=[("precprocessor", preprocessor), ("classifier", classifier)]) model.fit(X_train, y_train) X_test = X_train[:11] torch_model = hummingbird.ml.convert(model, "torch") self.assertTrue(torch_model is not None) np.testing.assert_allclose( model.predict_proba(X_test), torch_model.predict_proba(X_test.values), rtol=1e-06, atol=1e-06, ) @unittest.skipIf(ColumnTransformer is None, reason="ColumnTransformer not available in 0.19") @unittest.skipIf(not pandas_installed(), reason="Test requires pandas installed") def test_pipeline_column_transformer_passthrough_slice(self): iris = datasets.load_iris() X = iris.data[:, :3] y = iris.target X_train = pandas.DataFrame(X, columns=["vA", "vB", "vC"]) X_train["vcat"] = X_train["vA"].apply(lambda x: 1 if x > 0.5 else 2) X_train["vcat2"] = X_train["vB"].apply(lambda x: 3 if x > 0.5 else 4) y_train = y % 2 numeric_features = slice(0, 1) # ["vA", "vB"] categorical_features = slice(3, 4) # ["vcat", "vcat2"] classifier = LogisticRegression( C=0.01, class_weight=dict(zip([False, True], [0.2, 0.8])), n_jobs=1, max_iter=10, solver="liblinear", tol=1e-3, ) numeric_transformer = Pipeline(steps=[("scaler", StandardScaler())]) categorical_transformer = Pipeline( steps=[("onehot", OneHotEncoder(sparse=True, handle_unknown="ignore"))]) preprocessor = ColumnTransformer( transformers=[ ("num", numeric_transformer, numeric_features), ("cat", categorical_transformer, categorical_features), ], transformer_weights={ "num": 2, "cat": 3 }, remainder="passthrough", ) model = Pipeline(steps=[("precprocessor", preprocessor), ("classifier", classifier)]) model.fit(X_train, y_train) X_test = X_train[:11] torch_model = hummingbird.ml.convert(model, "torch") self.assertTrue(torch_model is not None) np.testing.assert_allclose( model.predict_proba(X_test), torch_model.predict_proba(X_test.values), rtol=1e-06, atol=1e-06, ) @unittest.skipIf(ColumnTransformer is None, reason="ColumnTransformer not available in 0.19") @unittest.skipIf(not onnx_runtime_installed(), reason="Test requires ORT installed") def test_pipeline_many_inputs(self): n_features = 18 X = np.random.rand(100, n_features) y = np.random.randint(1000, size=100) scaler_transformer = Pipeline(steps=[("scaler", StandardScaler())]) preprocessor = ColumnTransformer( transformers=[("scaling", scaler_transformer, list(range(n_features)))]) model = RandomForestRegressor(n_estimators=10, max_depth=9) pipeline = Pipeline(steps=[("preprocessor", preprocessor), ("model", model)]) pipeline.fit(X, y) X_test = tuple(np.split(X, n_features, axis=1)) hb_model = hummingbird.ml.convert(pipeline, "onnx", X_test) assert len(hb_model.model.graph.input) == n_features np.testing.assert_allclose( pipeline.predict(X), np.array(hb_model.predict(X_test)).flatten(), rtol=1e-06, atol=1e-06, ) @unittest.skipIf(ColumnTransformer is None, reason="ColumnTransformer not available in 0.19") @unittest.skipIf(not onnx_runtime_installed(), reason="Test requires ORT installed") def test_pipeline_many_inputs_with_schema(self): n_features = 5 X = np.random.rand(100, n_features) y = np.random.randint(1000, size=100) input_column_names = ["A", "B", "C", "D", "E"] output_column_names = ["score"] scaler_transformer = Pipeline(steps=[("scaler", StandardScaler())]) preprocessor = ColumnTransformer( transformers=[("scaling", scaler_transformer, list(range(n_features)))]) model = RandomForestRegressor(n_estimators=10, max_depth=9) pipeline = Pipeline(steps=[("preprocessor", preprocessor), ("model", model)]) pipeline.fit(X, y) X_test = tuple(np.split(X, n_features, axis=1)) extra_config = { constants.INPUT_NAMES: input_column_names, constants.OUTPUT_NAMES: output_column_names } hb_model = hummingbird.ml.convert(pipeline, "onnx", X_test, extra_config=extra_config) graph_inputs = [input.name for input in hb_model.model.graph.input] graph_outputs = [output.name for output in hb_model.model.graph.output] assert len(hb_model.model.graph.input) == n_features assert graph_inputs == input_column_names assert graph_outputs == output_column_names
class TestONNXOneHotEncoder(unittest.TestCase): # Test OneHotEncoder with ints @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS" ) def test_one_hot_encoder_onnx_int(self, rtol=1e-06, atol=1e-06): model = OneHotEncoder() X = np.array([[1, 2, 3]], dtype=np.int32) model.fit(X) # Create ONNX-ML model onnx_ml_model = convert_sklearn(model, initial_types=[("int_input", IntTensorType_onnx(X.shape))]) # Create ONNX model by calling converter onnx_model = convert(onnx_ml_model, "onnx", X) # Get the predictions for the ONNX-ML model session = ort.InferenceSession(onnx_ml_model.SerializeToString()) output_names = [session.get_outputs()[i].name for i in range(len(session.get_outputs()))] inputs = {session.get_inputs()[0].name: X} onnx_ml_pred = session.run(output_names, inputs) # Get the predictions for the ONNX model onnx_pred = onnx_model.transform(X) # Check that predicted values match np.testing.assert_allclose(onnx_ml_pred, onnx_pred, rtol=rtol, atol=atol) # Test OneHotEncoder with 2 inputs @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS" ) def test_one_hot_encoder_onnx2(self, rtol=1e-06, atol=1e-06): model = OneHotEncoder() X = np.array([[1, 2, 3], [2, 1, 3]], dtype=np.int32) model.fit(X) # Create ONNX-ML model onnx_ml_model = convert_sklearn(model, initial_types=[("int_input", IntTensorType_onnx(X.shape))]) # Create ONNX model by calling converter onnx_model = convert(onnx_ml_model, "onnx", X) # Get the predictions for the ONNX-ML model session = ort.InferenceSession(onnx_ml_model.SerializeToString()) output_names = [session.get_outputs()[i].name for i in range(len(session.get_outputs()))] inputs = {session.get_inputs()[0].name: X} onnx_ml_pred = session.run(output_names, inputs) # Get the predictions for the ONNX model onnx_pred = onnx_model.transform(X) # Check that predicted values match np.testing.assert_allclose(onnx_ml_pred, onnx_pred, rtol=rtol, atol=atol) # Test OneHotEncoder with int64 @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS" ) def test_one_hot_encoder_onnx_int64(self, rtol=1e-06, atol=1e-06): model = OneHotEncoder() X = np.array([[1, 2, 3]], dtype=np.int64) model.fit(X) # Create ONNX-ML model onnx_ml_model = convert_sklearn(model, initial_types=[("int_input", LongTensorType_onnx(X.shape))]) # Create ONNX model by calling converter onnx_model = convert(onnx_ml_model, "onnx", X) # Get the predictions for the ONNX-ML model session = ort.InferenceSession(onnx_ml_model.SerializeToString()) output_names = [session.get_outputs()[i].name for i in range(len(session.get_outputs()))] inputs = {session.get_inputs()[0].name: X} onnx_ml_pred = session.run(output_names, inputs) # Get the predictions for the ONNX model onnx_pred = onnx_model.transform(X) # Check that predicted values match np.testing.assert_allclose(onnx_ml_pred, onnx_pred, rtol=rtol, atol=atol) # # Test OneHotEncoder with strings # @unittest.skipIf( # not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS" # ) # def test_model_one_hot_encoder_string(self): # model = OneHotEncoder() # data = [['a', 'r', 'x'], ['a', 'r', 'x'], ['aaaa', 'r', 'x'], ['a', 'r', 'xx']] # model.fit(data) # # max word length is the smallest number which is divisible by 4 and larger than or equal to the length of any word # max_word_length = 4 # num_columns = 3 # # Create ONNX-ML model # onnx_ml_model = convert_sklearn( # model, # initial_types=[("input", StringTensorType_onnx([4, 3]))] # ) # pytorch_input = np.array(data, dtype='|S'+str(max_word_length)).view(np.int32).reshape(-1, num_columns, max_word_length // 4) # # Create ONNX model by calling converter # onnx_model = convert(onnx_ml_model, "onnx", pytorch_input) # # Get the predictions for the ONNX-ML model # session = ort.InferenceSession(onnx_ml_model.SerializeToString()) # output_names = [session.get_outputs()[i].name for i in range(len(session.get_outputs()))] # inputs = {session.get_inputs()[0].name: data} # onnx_ml_pred = session.run(output_names, inputs) # # Get the predictions for the ONNX model # session = ort.InferenceSession(onnx_model.SerializeToString()) # inputs_pyt = {session.get_inputs()[0].name: pytorch_input} # onnx_pred = session.run(output_names, inputs_pyt) # return onnx_ml_pred, onnx_pred # Test OneHotEncoder temporary failcase @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS" ) def test_ohe_string_raises_notimpl_onnx(self): warnings.filterwarnings("ignore") model = OneHotEncoder() data = [["a", "r", "x"], ["a", "r", "x"], ["aaaa", "r", "x"], ["a", "r", "xx"]] model.fit(data) # Create ONNX-ML model onnx_ml_model = convert_sklearn(model, initial_types=[("input", StringTensorType_onnx([4, 3]))]) # Create ONNX model by calling converter, should raise error for strings self.assertRaises(RuntimeError, convert, onnx_ml_model, "onnx", data) # Test OneHotEncoder failcase when input data type is not supported @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS" ) def test_ohe_string_raises_type_error_onnx(self): warnings.filterwarnings("ignore") model = OneHotEncoder() data = [["a", "r", "x"], ["a", "r", "x"], ["aaaa", "r", "x"], ["a", "r", "xx"]] model.fit(data) # Create ONNX-ML model onnx_ml_model = convert_sklearn(model, initial_types=[("input", StringTensorType_onnx([4, 3]))]) # Create ONNX model by calling converter, should raise error for strings self.assertRaises(RuntimeError, convert, onnx_ml_model, "onnx")
class TestProphet(unittest.TestCase): def _get_data(self): local_path = "tests/resources" local_data = os.path.join(local_path, "example_wp_log_peyton_manning.csv") url = "https://raw.githubusercontent.com/facebook/prophet/master/examples/example_wp_log_peyton_manning.csv" if not os.path.isfile(local_data): os.makedirs(local_path) urlretrieve(url, local_data) data = pd.read_csv(local_data) return data @unittest.skipIf(not (pandas_installed() and prophet_installed()), reason="Test requires Prophet and Pandas") def test_prophet_trend(self): df = self._get_data() m = Prophet() m.fit(df) # Convert with Hummingbird. hb_model = hummingbird.ml.convert(m, "torch") # Predictions. future = m.make_future_dataframe(periods=365) prophet_trend = m.predict(future)["trend"].values hb_trend = hb_model.predict(future) np.testing.assert_allclose(prophet_trend, hb_trend, rtol=1e-06, atol=1e-06) @unittest.skipIf( not (pandas_installed() and prophet_installed()), reason="Test requires Prophet, Pandas and ONNX runtime.", ) @unittest.skipIf( LooseVersion(torch.__version__) < LooseVersion("1.8.1"), reason="Test requires Torch 1.8.1.", ) @unittest.skipIf( not onnx_runtime_installed() or LooseVersion(onnxruntime.__version__) < LooseVersion("1.7.0"), reason="Prophet test requires onnxruntime => 1.7.0", ) def test_prophet_trend_onnx(self): df = self._get_data() m = Prophet() m.fit(df) future = m.make_future_dataframe(periods=365) future_np = (future.values - np.datetime64("1970-01-01T00:00:00.000000000")).astype( np.int64) / 1000000000 # Convert with Hummingbird. hb_model = hummingbird.ml.convert(m, "onnx", future_np) # Predictions. prophet_trend = m.predict(future)["trend"] hb_trend = hb_model.predict(future_np) import onnx onnx.save(hb_model.model, "prophet.onnx") np.testing.assert_allclose(prophet_trend, hb_trend, rtol=1e-06, atol=1e-06)
class TestSklearnNormalizer(unittest.TestCase): def _test_normalizer_converter(self, norm): warnings.filterwarnings("ignore") X = np.array([[1, 2, 3], [4, 3, 0], [0, 1, 4], [0, 5, 6]], dtype=np.float32) # Create SKL model for testing model = Normalizer(norm=norm) model.fit(X) # Create ONNX-ML model onnx_ml_model = convert_sklearn(model, initial_types=[ ("float_input", FloatTensorType_onnx(X.shape)) ]) # Create ONNX model by calling converter onnx_model = convert(onnx_ml_model, "onnx", X) # Get the predictions for the ONNX-ML model session = ort.InferenceSession(onnx_ml_model.SerializeToString()) output_names = [ session.get_outputs()[i].name for i in range(len(session.get_outputs())) ] onnx_ml_pred = [[] for i in range(len(output_names))] inputs = {session.get_inputs()[0].name: X} onnx_ml_pred = session.run(output_names, inputs) # Get the predictions for the ONNX model session = ort.InferenceSession(onnx_model.SerializeToString()) onnx_pred = [[] for i in range(len(output_names))] onnx_pred = session.run(output_names, inputs) return onnx_ml_pred, onnx_pred @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS") def test_onnx_normalizer_l1(self, rtol=1e-06, atol=1e-06): onnx_ml_pred, onnx_pred = self._test_normalizer_converter("l1") # Check that predicted values match np.testing.assert_allclose(onnx_ml_pred, onnx_pred, rtol=rtol, atol=atol) @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS") def test_onnx_normalizer_l2(self, rtol=1e-06, atol=1e-06): onnx_ml_pred, onnx_pred = self._test_normalizer_converter("l2") # Check that predicted values match np.testing.assert_allclose(onnx_ml_pred, onnx_pred, rtol=rtol, atol=atol) @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS") def test_onnx_normalizer_max(self, rtol=1e-06, atol=1e-06): onnx_ml_pred, onnx_pred = self._test_normalizer_converter("max") # Check that predicted values match np.testing.assert_allclose(onnx_ml_pred, onnx_pred, rtol=rtol, atol=atol) @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS") def test_onnx_normalizer_converter_raises_rt(self): warnings.filterwarnings("ignore") X = np.array([[1, 2, 3], [4, 3, 0], [0, 1, 4], [0, 5, 6]], dtype=np.float32) model = Normalizer(norm="l1") model.fit(X) # generate test input onnx_ml_model = convert_sklearn(model, initial_types=[ ("float_input", FloatTensorType_onnx(X.shape)) ]) onnx_ml_model.graph.node[0].attribute[0].s = "".encode() self.assertRaises(RuntimeError, convert, onnx_ml_model, "onnx", X)
class TestONNXLinear(unittest.TestCase): def _test_linear(self, classes): """ This helper function tests conversion of `ai.onnx.ml.LinearClassifier` which is created from a scikit-learn LogisticRegression. This tests `convert_onnx_linear_model` in `hummingbird.ml.operator_converters.onnxml_linear` """ n_features = 20 n_total = 100 np.random.seed(0) warnings.filterwarnings("ignore") X = np.random.rand(n_total, n_features) X = np.array(X, dtype=np.float32) y = np.random.randint(classes, size=n_total) # Create SKL model for testing model = LogisticRegression(solver="liblinear", multi_class="ovr", fit_intercept=True) model.fit(X, y) # Create ONNX-ML model onnx_ml_model = convert_sklearn(model, initial_types=[ ("float_input", FloatTensorType_onnx(X.shape)) ]) # Create ONNX model by calling converter onnx_model = convert(onnx_ml_model, "onnx", X) # Get the predictions for the ONNX-ML model session = ort.InferenceSession(onnx_ml_model.SerializeToString()) output_names = [ session.get_outputs()[i].name for i in range(len(session.get_outputs())) ] onnx_ml_pred = [[] for i in range(len(output_names))] inputs = {session.get_inputs()[0].name: X} pred = session.run(output_names, inputs) for i in range(len(output_names)): if "label" in output_names[i]: onnx_ml_pred[1] = pred[i] else: onnx_ml_pred[0] = pred[i] # Get the predictions for the ONNX model onnx_pred = [[] for i in range(len(output_names))] if len(output_names) == 1: # regression onnx_pred = onnx_model.predict(X) else: # classification onnx_pred[0] = onnx_model.predict_proba(X) onnx_pred[1] = onnx_model.predict(X) return onnx_ml_pred, onnx_pred @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS") # test ai.onnx.ml.LinearClassifier with 2 classes def test_logistic_regression_onnxml_binary(self, rtol=1e-06, atol=1e-06): onnx_ml_pred, onnx_pred = self._test_linear(2) # Check that predicted values match np.testing.assert_allclose(onnx_ml_pred[1], onnx_pred[1], rtol=rtol, atol=atol) # labels np.testing.assert_allclose(list( map(lambda x: list(x.values()), onnx_ml_pred[0])), onnx_pred[0], rtol=rtol, atol=atol) # probs @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS") # test ai.onnx.ml.LinearClassifier with 3 classes def test_logistic_regression_onnxml_multi(self, rtol=1e-06, atol=1e-06): onnx_ml_pred, onnx_pred = self._test_linear(3) # Check that predicted values match np.testing.assert_allclose(onnx_ml_pred[1], onnx_pred[1], rtol=rtol, atol=atol) # labels np.testing.assert_allclose(list( map(lambda x: list(x.values()), onnx_ml_pred[0])), onnx_pred[0], rtol=rtol, atol=atol) # probs def _test_regressor(self, values): """ This helper function tests conversion of `ai.onnx.ml.LinearRegressor` which is created from a scikit-learn LinearRegression. This tests `convert_onnx_linear_regression_model` in `hummingbird.ml.operator_converters.onnxml_linear` """ n_features = 20 n_total = 100 np.random.seed(0) warnings.filterwarnings("ignore") X = np.random.rand(n_total, n_features) X = np.array(X, dtype=np.float32) y = np.random.randint(values, size=n_total) # Create SKL model for testing model = LinearRegression() model.fit(X, y) # Create ONNX-ML model onnx_ml_model = convert_sklearn(model, initial_types=[ ("float_input", FloatTensorType_onnx(X.shape)) ]) # Create ONNX model by calling converter onnx_model = convert(onnx_ml_model, "onnx", X) # Get the predictions for the ONNX-ML model session = ort.InferenceSession(onnx_ml_model.SerializeToString()) output_names = [ session.get_outputs()[i].name for i in range(len(session.get_outputs())) ] inputs = {session.get_inputs()[0].name: X} onnx_ml_pred = session.run(output_names, inputs) # Get the predictions for the ONNX model onnx_pred = onnx_model.predict(X) return onnx_ml_pred, onnx_pred @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS") # test ai.onnx.ml.LinearRegressor with 2 values def test_linear_regression_onnxml_small(self, rtol=1e-06, atol=1e-06): onnx_ml_pred, onnx_pred = self._test_regressor(2) # Check that predicted values match np.testing.assert_allclose(onnx_ml_pred[0].ravel(), onnx_pred, rtol=rtol, atol=atol) @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS") # test ai.onnx.ml.LinearRegressor with 100 values def test_linear_regression_onnxml_large(self, rtol=1e-06, atol=1e-06): onnx_ml_pred, onnx_pred = self._test_regressor(100) # Check that predicted values match np.testing.assert_allclose(onnx_ml_pred[0].ravel(), onnx_pred, rtol=rtol, atol=atol) @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS") # test for malformed model/problem with parsing def test_onnx_linear_converter_raises_rt(self): n_features = 20 n_total = 100 np.random.seed(0) warnings.filterwarnings("ignore") X = np.random.rand(n_total, n_features) X = np.array(X, dtype=np.float32) y = np.random.randint(3, size=n_total) model = LinearRegression() model.fit(X, y) # generate test input onnx_ml_model = convert_sklearn(model, initial_types=[ ("float_input", FloatTensorType_onnx(X.shape)) ]) onnx_ml_model.graph.node[0].attribute[0].name = "".encode() self.assertRaises(RuntimeError, convert, onnx_ml_model, "onnx", X)
""" Tests onnxml Binarizer converter """ import unittest import warnings import numpy as np import torch from sklearn.preprocessing import Binarizer from hummingbird.ml._utils import onnx_ml_tools_installed, onnx_runtime_installed, lightgbm_installed from hummingbird.ml import convert if onnx_runtime_installed(): import onnxruntime as ort if onnx_ml_tools_installed(): from onnxmltools import convert_sklearn from onnxmltools.convert.common.data_types import FloatTensorType as FloatTensorType_onnx class TestONNXBinarizer(unittest.TestCase): def _test_binarizer_converter(self, threshold): warnings.filterwarnings("ignore") X = np.array([[1, 2, 3], [4, 3, 0], [0, 1, 4], [0, 5, 6]], dtype=np.float32) # Create SKL model for testing model = Binarizer(threshold=threshold) model.fit(X) # Create ONNX-ML model
class TestLGBMConverter(unittest.TestCase): # Check tree implementation @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lgbm_implementation(self): warnings.filterwarnings("ignore") np.random.seed(0) X = np.random.rand(10, 1) X = np.array(X, dtype=np.float32) y = np.random.randint(2, size=10) for model in [ lgb.LGBMClassifier(n_estimators=1, max_depth=1), lgb.LGBMRegressor(n_estimators=1, max_depth=1) ]: for extra_config_param in ["tree_trav", "perf_tree_trav", "gemm"]: model.fit(X, y) torch_model = hummingbird.ml.convert( model, "torch", extra_config={"tree_implementation": extra_config_param}) self.assertIsNotNone(torch_model) self.assertEqual( str(type(list(torch_model.model._operators)[0])), gbdt_implementation_map[extra_config_param]) def _run_lgbm_classifier_converter(self, num_classes, extra_config={}): warnings.filterwarnings("ignore") for max_depth in [1, 3, 8, 10, 12, None]: model = lgb.LGBMClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) torch_model = hummingbird.ml.convert(model, "torch", extra_config=extra_config) self.assertIsNotNone(torch_model) np.testing.assert_allclose(model.predict_proba(X), torch_model.predict_proba(X), rtol=1e-06, atol=1e-06) # Binary classifier @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lgbm_binary_classifier_converter(self): self._run_lgbm_classifier_converter(2) # Gemm classifier @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lgbm_gemm_classifier_converter(self): self._run_lgbm_classifier_converter( 2, extra_config={"tree_implementation": "gemm"}) # Tree_trav classifier @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lgbm_tree_trav_classifier_converter(self): self._run_lgbm_classifier_converter( 2, extra_config={"tree_implementation": "tree_trav"}) # Perf_tree_trav classifier @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lgbm_perf_tree_trav_classifier_converter(self): self._run_lgbm_classifier_converter( 2, extra_config={"tree_implementation": "perf_tree_trav"}) # Multi classifier @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lgbm_multi_classifier_converter(self): self._run_lgbm_classifier_converter(3) # Gemm multi classifier @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lgbm_gemm_multi_classifier_converter(self): self._run_lgbm_classifier_converter( 3, extra_config={"tree_implementation": "gemm"}) # Tree_trav multi classifier @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lgbm_tree_trav_multi_classifier_converter(self): self._run_lgbm_classifier_converter( 3, extra_config={"tree_implementation": "tree_trav"}) # Perf_tree_trav multi classifier @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lgbm_perf_tree_trav_multi_classifier_converter(self): self._run_lgbm_classifier_converter( 3, extra_config={"tree_implementation": "perf_tree_trav"}) def _run_lgbm_ranker_converter(self, num_classes, extra_config={}, label_gain=None): warnings.filterwarnings("ignore") for max_depth in [1, 3, 8, 10, 12, None]: model = lgb.LGBMRanker(n_estimators=10, max_depth=max_depth, label_gain=label_gain) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y, group=[X.shape[0]], eval_set=[(X, y)], eval_group=[X.shape[0]]) torch_model = hummingbird.ml.convert(model, "torch", extra_config=extra_config) self.assertIsNotNone(torch_model) np.testing.assert_allclose(model.predict(X), torch_model.predict(X), rtol=1e-06, atol=1e-06) # Ranker - small, no label gain @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lgbm_binary_ranker_converter_no_label(self): self._run_lgbm_ranker_converter(30) # Ranker @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lgbm_binary_ranker_converter(self): self._run_lgbm_ranker_converter(1000, label_gain=list(range(1000))) # Gemm ranker @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lgbm_gemm_ranker_converter(self): self._run_lgbm_ranker_converter( 1000, extra_config={"tree_implementation": "gemm"}, label_gain=list(range(1000))) # Tree_trav ranker @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lgbm_tree_trav_ranker_converter(self): self._run_lgbm_ranker_converter( 1000, extra_config={"tree_implementation": "tree_trav"}, label_gain=list(range(1000))) # Perf_tree_trav ranker @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lgbm_perf_tree_trav_ranker_converter(self): self._run_lgbm_ranker_converter( 1000, extra_config={"tree_implementation": "perf_tree_trav"}, label_gain=list(range(1000))) def _run_lgbm_regressor_converter(self, num_classes, extra_config={}): warnings.filterwarnings("ignore") for max_depth in [1, 3, 8, 10, 12, None]: model = lgb.LGBMRegressor(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) torch_model = hummingbird.ml.convert(model, "torch", extra_config=extra_config) self.assertIsNotNone(torch_model) np.testing.assert_allclose(model.predict(X), torch_model.predict(X), rtol=1e-06, atol=1e-06) # Regressor @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lgbm_binary_regressor_converter(self): self._run_lgbm_regressor_converter(1000) # Gemm regressor @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lgbm_gemm_regressor_converter(self): self._run_lgbm_regressor_converter( 1000, extra_config={"tree_implementation": "gemm"}) # Tree_trav regressor @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lgbm_tree_trav_regressor_converter(self): self._run_lgbm_regressor_converter( 1000, extra_config={"tree_implementation": "tree_trav"}) # Perf_tree_trav regressor @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lgbm_perf_tree_trav_regressor_converter(self): self._run_lgbm_regressor_converter( 1000, extra_config={"tree_implementation": "perf_tree_trav"}) # Float 64 classification test helper def _run_float64_lgbm_classifier_converter(self, num_classes, extra_config={}): warnings.filterwarnings("ignore") for max_depth in [1, 3, 8, 10, 12, None]: model = lgb.LGBMClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) y = np.random.randint(num_classes, size=100) model.fit(X, y) torch_model = hummingbird.ml.convert(model, "torch", extra_config=extra_config) self.assertIsNotNone(torch_model) np.testing.assert_allclose(model.predict_proba(X), torch_model.predict_proba(X), rtol=1e-06, atol=1e-06) # Gemm classifier (float64 data) @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_float64_lgbm_gemm_classifier_converter(self): self._run_float64_lgbm_classifier_converter( 2, extra_config={"tree_implementation": "gemm"}) # Tree_trav classifier (float64 data) @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_float64_lgbm_tree_trav_classifier_converter(self): self._run_float64_lgbm_classifier_converter( 2, extra_config={"tree_implementation": "tree_trav"}) # Perf_tree_trav classifier (float64 data) @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_float64_lgbm_perf_tree_trav_classifier_converter(self): self._run_float64_lgbm_classifier_converter( 2, extra_config={"tree_implementation": "perf_tree_trav"}) # Float 64 regression test helper def _run_float64_lgbm_regressor_converter(self, num_classes, extra_config={}): warnings.filterwarnings("ignore") for max_depth in [1, 3, 8, 10, 12, None]: model = lgb.LGBMRegressor(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) y = np.random.randint(num_classes, size=100) model.fit(X, y) torch_model = hummingbird.ml.convert(model, "torch", extra_config=extra_config) self.assertIsNotNone(torch_model) np.testing.assert_allclose(model.predict(X), torch_model.predict(X), rtol=1e-06, atol=1e-06) # Gemm regressor (float64 data) @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_float64_lgbm_gemm_regressor_converter(self): self._run_float64_lgbm_regressor_converter( 1000, extra_config={"tree_implementation": "gemm"}) # Tree_trav regressor (float64 data) @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_float64_lgbm_tree_trav_regressor_converter(self): self._run_float64_lgbm_regressor_converter( 1000, extra_config={"tree_implementation": "tree_trav"}) # Perf_tree_trav regressor (float64 data) @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_float64_lgbm_perf_tree_trav_regressor_converter(self): self._run_float64_lgbm_regressor_converter( 1000, extra_config={"tree_implementation": "perf_tree_trav"}) # Random forest in lgbm, the conversion fails with the latest # version of lightgbm. The direct converter to pytorch should be # updated or the model could be converted into ONNX then # converted into pytorch. # For more details, see ONNX converter at https://github.com/onnx/ # onnxmltools/blob/master/onnxmltools/convert/lightgbm/ # operator_converters/LightGbm.py#L313. @unittest.skipIf( True, reason="boosting_type=='rf' produces different probabilites.") @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lgbm_classifier_random_forest_rf(self): warnings.filterwarnings("ignore") model = lgb.LGBMClassifier(boosting_type="rf", n_estimators=128, max_depth=5, subsample=0.3, bagging_freq=1) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(2, size=100) model.fit(X, y) torch_model = hummingbird.ml.convert(model, "torch") self.assertIsNotNone(torch_model) np.testing.assert_allclose(model.predict_proba(X), torch_model.predict_proba(X), rtol=1e-06, atol=1e-06) # Random forest in lgbm @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lgbm_classifier_random_forest_gbdt(self): warnings.filterwarnings("ignore") model = lgb.LGBMClassifier(boosting_type="gbdt", n_estimators=128, max_depth=5, subsample=0.3, bagging_freq=1) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(2, size=100) model.fit(X, y) torch_model = hummingbird.ml.convert(model, "torch") self.assertIsNotNone(torch_model) np.testing.assert_allclose(model.predict_proba(X), torch_model.predict_proba(X), rtol=1e-06, atol=1e-06) # Test Tweedie loss in lgbm @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lgbm_tweedie(self): warnings.filterwarnings("ignore") model = lgb.LGBMRegressor(objective="tweedie", n_estimators=2, max_depth=5) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(100, size=100) model.fit(X, y) torch_model = hummingbird.ml.convert(model, "torch") self.assertIsNotNone(torch_model) np.testing.assert_allclose(model.predict(X), torch_model.predict(X), rtol=1e-06, atol=1e-06) # Backend tests. # Test TorchScript backend regression. @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lgbm_regressor_converter_torchscript(self): warnings.filterwarnings("ignore") for max_depth in [1, 3, 8, 10, 12]: model = lgb.LGBMRegressor(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(1000, size=100) model.fit(X, y) torch_model = hummingbird.ml.convert(model, "torchscript", X, extra_config={}) self.assertIsNotNone(torch_model) np.testing.assert_allclose(model.predict(X), torch_model.predict(X), rtol=1e-06, atol=1e-06) # Test TorchScript backend classification. @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lgbm_classifier_converter_torchscript(self): warnings.filterwarnings("ignore") for max_depth in [1, 3, 8, 10, 12]: model = lgb.LGBMClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(2, size=100) model.fit(X, y) torch_model = hummingbird.ml.convert(model, "torchscript", X, extra_config={}) self.assertIsNotNone(torch_model) np.testing.assert_allclose(model.predict_proba(X), torch_model.predict_proba(X), rtol=1e-06, atol=1e-06) # Check that we can export into ONNX. @unittest.skipIf(not onnx_runtime_installed(), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS") @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lightgbm_onnx(self): warnings.filterwarnings("ignore") X = [[0, 1], [1, 1], [2, 0]] X = np.array(X, dtype=np.float32) y = np.array([100, -10, 50], dtype=np.float32) model = lgb.LGBMRegressor(n_estimators=3, min_child_samples=1) model.fit(X, y) # Create ONNX model onnx_model = hummingbird.ml.convert(model, "onnx", X) np.testing.assert_allclose( onnx_model.predict(X).flatten(), model.predict(X)) # TVM backend tests. @unittest.skipIf(not (tvm_installed()), reason="TVM tests require TVM") def test_lightgbm_tvm_regressor(self): warnings.filterwarnings("ignore") for tree_implementation in ["gemm", "tree_trav", "perf_tree_trav"]: X = [[0, 1], [1, 1], [2, 0]] X = np.array(X, dtype=np.float32) y = np.array([100, -10, 50], dtype=np.float32) model = lgb.LGBMRegressor(n_estimators=3, min_child_samples=1) model.fit(X, y) # Create TVM model. tvm_model = hummingbird.ml.convert( model, "tvm", X, extra_config={"tree_implementation": tree_implementation}) # Check results. np.testing.assert_allclose(tvm_model.predict(X), model.predict(X)) @unittest.skipIf(not (tvm_installed()), reason="TVM tests require TVM installed") def test_lightgbm_tvm_classifier(self): warnings.filterwarnings("ignore") for tree_implementation in ["gemm", "tree_trav", "perf_tree_trav"]: X = [[0, 1], [1, 1], [2, 0]] X = np.array(X, dtype=np.float32) y = np.array([0, 1, 0], dtype=np.float32) model = lgb.LGBMClassifier(n_estimators=3, min_child_samples=1) model.fit(X, y) # Create TVM model. tvm_model = hummingbird.ml.convert( model, "tvm", X, extra_config={"tree_implementation": tree_implementation}) # Check results. np.testing.assert_allclose(tvm_model.predict(X), model.predict(X)) np.testing.assert_allclose(tvm_model.predict_proba(X), model.predict_proba(X)) # Test TVM with large input datasets. @unittest.skipIf(not (tvm_installed()), reason="TVM tests require TVM installed") def test_lightgbm_tvm_classifier_large_dataset(self): warnings.filterwarnings("ignore") for tree_implementation in ["gemm", "tree_trav", "perf_tree_trav"]: size = 200000 X = np.random.rand(size, 28) X = np.array(X, dtype=np.float32) y = np.random.randint(2, size=size) model = lgb.LGBMClassifier(n_estimators=100, max_depth=3) model.fit(X, y) # Create TVM model. tvm_model = hummingbird.ml.convert( model, "tvm", X, extra_config={ constants.TREE_IMPLEMENTATION: tree_implementation, constants.TREE_OP_PRECISION_DTYPE: "float64" }) # Check results. np.testing.assert_allclose(tvm_model.predict_proba(X), model.predict_proba(X), rtol=1e-04, atol=1e-04)
class TestExtraConf(unittest.TestCase): # Test default number of threads. It will only work on mac after 1.6 https://github.com/pytorch/pytorch/issues/43036 @unittest.skipIf( sys.platform == "darwin" and LooseVersion(torch.__version__) <= LooseVersion("1.6.0"), reason="PyTorch has a bug on mac related to multi-threading", ) def test_torch_deafault_n_threads(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "torch") self.assertIsNotNone(hb_model) self.assertTrue(torch.get_num_threads() == psutil.cpu_count(logical=False)) self.assertTrue(torch.get_num_interop_threads() == 1) # Test one thread in pytorch. @unittest.skipIf( sys.platform == "darwin" and LooseVersion(torch.__version__) > LooseVersion("1.6.0"), reason="Setting threading multi times will break on mac", ) def test_torch_one_thread(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "torch", extra_config={constants.N_THREADS: 1}) self.assertIsNotNone(hb_model) self.assertTrue(torch.get_num_threads() == 1) self.assertTrue(torch.get_num_interop_threads() == 1) # Test default number of threads onnx. @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS" ) def test_onnx_deafault_n_threads(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) # Create ONNX-ML model onnx_ml_model = convert_sklearn( model, initial_types=[("input", FloatTensorType([X.shape[0], X.shape[1]]))], target_opset=9 ) hb_model = hummingbird.ml.convert(onnx_ml_model, "onnx", X) self.assertIsNotNone(hb_model) self.assertTrue(hb_model._session.get_session_options().intra_op_num_threads == psutil.cpu_count(logical=False)) self.assertTrue(hb_model._session.get_session_options().inter_op_num_threads == 1) # Test one thread onnx. @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS" ) def test_onnx_one_thread(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "onnx", X, extra_config={constants.N_THREADS: 1}) self.assertIsNotNone(hb_model) self.assertTrue(hb_model._session.get_session_options().intra_op_num_threads == 1) self.assertTrue(hb_model._session.get_session_options().inter_op_num_threads == 1) # Test pytorch regressor with batching. def test_torch_regression_batch(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingRegressor(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) batch_size = 10 remainder_size = X.shape[0] % batch_size hb_model = hummingbird.ml.convert_batch(model, "torch", X[:batch_size, :], remainder_size=remainder_size) self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06) # Test pytorch classifier with batching. def test_torch_classification_batch(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) batch_size = 10 remainder_size = X.shape[0] % batch_size hb_model = hummingbird.ml.convert_batch(model, "torch", X[:batch_size, :], remainder_size=remainder_size) self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06) np.testing.assert_allclose(model.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06) # Test pytorch classifier with batching. def test_torch_iforest_batch(self): warnings.filterwarnings("ignore") num_classes = 2 model = IsolationForest(n_estimators=10, max_samples=2) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) batch_size = 10 remainder_size = X.shape[0] % batch_size hb_model = hummingbird.ml.convert_batch(model, "torch", X[:batch_size, :], remainder_size=remainder_size) self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06) np.testing.assert_allclose(model.decision_function(X), hb_model.decision_function(X), rtol=1e-06, atol=1e-06) np.testing.assert_allclose(model.score_samples(X), hb_model.score_samples(X), rtol=1e-06, atol=1e-06) # Test pytorch regressor with batching and uneven rows. def test_torch_batch_regression_uneven(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingRegressor(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(105, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=105) model.fit(X, y) batch_size = 10 remainder_size = X.shape[0] % batch_size hb_model = hummingbird.ml.convert_batch(model, "torch", X[:batch_size, :], remainder_size=remainder_size) self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06) # Test pytorch classification with batching and uneven rows. def test_torch_batch_classification_uneven(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(105, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=105) model.fit(X, y) batch_size = 10 remainder_size = X.shape[0] % batch_size hb_model = hummingbird.ml.convert_batch(model, "torch", X[:batch_size, :], remainder_size=remainder_size) self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06) # Test pytorch transform with batching and uneven rows. def test_torch_batch_transform(self): warnings.filterwarnings("ignore") model = StandardScaler(with_mean=True, with_std=True) np.random.seed(0) X = np.random.rand(105, 200) X = np.array(X, dtype=np.float32) model.fit(X) batch_size = 10 remainder_size = X.shape[0] % batch_size hb_model = hummingbird.ml.convert_batch(model, "torch", X[:batch_size, :], remainder_size=remainder_size) self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.transform(X), hb_model.transform(X), rtol=1e-06, atol=1e-06) # Test torchscript regression with batching. def test_torchscript_regression_batch(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingRegressor(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(103, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=103) model.fit(X, y) batch_size = 10 remainder_size = X.shape[0] % batch_size hb_model = hummingbird.ml.convert_batch(model, "torch.jit", X[:batch_size, :], remainder_size=remainder_size) self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06) # Test torchscript classification with batching. def test_torchscript_classification_batch(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(103, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=103) model.fit(X, y) batch_size = 10 remainder_size = X.shape[0] % batch_size hb_model = hummingbird.ml.convert_batch(model, "torch.jit", X[:batch_size, :], remainder_size=remainder_size) self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06) np.testing.assert_allclose(model.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06) # Test torchscript iforest with batching. def test_torchscript_iforest_batch(self): warnings.filterwarnings("ignore") num_classes = 2 model = IsolationForest(n_estimators=10, max_samples=2) np.random.seed(0) X = np.random.rand(103, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=103) model.fit(X, y) batch_size = 10 remainder_size = X.shape[0] % batch_size hb_model = hummingbird.ml.convert_batch(model, "torch.jit", X[:batch_size, :], remainder_size=remainder_size) self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06) np.testing.assert_allclose(model.decision_function(X), hb_model.decision_function(X), rtol=1e-06, atol=1e-06) np.testing.assert_allclose(model.score_samples(X), hb_model.score_samples(X), rtol=1e-06, atol=1e-06) # Test torchscript transform with batching and uneven rows. def test_torchscript_batch_transform(self): warnings.filterwarnings("ignore") model = StandardScaler(with_mean=True, with_std=True) np.random.seed(0) X = np.random.rand(101, 200) X = np.array(X, dtype=np.float32) model.fit(X) batch_size = 10 remainder_size = X.shape[0] % batch_size hb_model = hummingbird.ml.convert_batch(model, "torch.jit", X[:batch_size, :], remainder_size=remainder_size) self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.transform(X), hb_model.transform(X), rtol=1e-06, atol=1e-06) # Test onnx transform with batching and uneven rows. @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS" ) def test_onnx_batch_transform(self): warnings.filterwarnings("ignore") model = StandardScaler(with_mean=True, with_std=True) np.random.seed(0) X = np.random.rand(101, 200) X = np.array(X, dtype=np.float32) model.fit(X) batch_size = 10 remainder_size = X.shape[0] % batch_size hb_model = hummingbird.ml.convert_batch(model, "onnx", X[:batch_size, :], remainder_size=remainder_size) self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.transform(X), hb_model.transform(X), rtol=1e-06, atol=1e-06) # Test onnx regression with batching. @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS" ) def test_onnx_regression_batch(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingRegressor(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(103, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=103) model.fit(X, y) batch_size = 10 remainder_size = X.shape[0] % batch_size hb_model = hummingbird.ml.convert_batch(model, "onnx", X[:batch_size, :], remainder_size=remainder_size) self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06) # Test onnx classification with batching. @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS" ) def test_onnx_classification_batch(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(103, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=103) model.fit(X, y) batch_size = 10 remainder_size = X.shape[0] % batch_size hb_model = hummingbird.ml.convert_batch(model, "onnx", X[:batch_size, :], remainder_size=remainder_size) self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06) np.testing.assert_allclose(model.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06) # Test onnx iforest with batching. @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS" ) def test_onnx_iforest_batch(self): warnings.filterwarnings("ignore") num_classes = 2 model = IsolationForest(n_estimators=10, max_samples=2) np.random.seed(0) X = np.random.rand(103, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=103) model.fit(X, y) batch_size = 10 remainder_size = X.shape[0] % batch_size hb_model = hummingbird.ml.convert_batch(model, "onnx", X[:batch_size, :], remainder_size=remainder_size) self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06) np.testing.assert_allclose(model.decision_function(X), hb_model.decision_function(X), rtol=1e-06, atol=1e-06) np.testing.assert_allclose(model.score_samples(X), hb_model.score_samples(X), rtol=1e-06, atol=1e-06) # Test tvm transform with batching. @unittest.skipIf(not tvm_installed(), reason="TVM test require TVM") def test_tvm_batch_transform(self): warnings.filterwarnings("ignore") model = StandardScaler(with_mean=True, with_std=True) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) model.fit(X) batch_size = 10 hb_model = hummingbird.ml.convert_batch(model, "tvm", X[:batch_size, :]) self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.transform(X), hb_model.transform(X), rtol=1e-06, atol=1e-06) # Test tvm regression with batching. @unittest.skipIf(not tvm_installed(), reason="TVM test require TVM") def test_tvm_regression_batch(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingRegressor(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(103, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=103) model.fit(X, y) batch_size = 10 remainder_size = X.shape[0] % batch_size hb_model = hummingbird.ml.convert_batch(model, "tvm", X[:batch_size, :], remainder_size=remainder_size) self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06) # Test tvm classification with batching. @unittest.skipIf(not tvm_installed(), reason="TVM test require TVM") def test_tvm_classification_batch(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) batch_size = 10 hb_model = hummingbird.ml.convert_batch(model, "tvm", X[:batch_size, :]) self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06) np.testing.assert_allclose(model.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06) # Test tvm iforest with batching. @unittest.skipIf(not tvm_installed(), reason="TVM test require TVM") def test_tvm_iforest_batch(self): warnings.filterwarnings("ignore") num_classes = 2 model = IsolationForest(n_estimators=10, max_samples=2) np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) batch_size = 10 hb_model = hummingbird.ml.convert_batch(model, "tvm", X[:batch_size, :]) self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06) np.testing.assert_allclose(model.decision_function(X), hb_model.decision_function(X), rtol=1e-06, atol=1e-06) np.testing.assert_allclose(model.score_samples(X), hb_model.score_samples(X), rtol=1e-06, atol=1e-06) # Test tvm transform with batching and uneven numer of records. @unittest.skipIf(not tvm_installed(), reason="TVM test require TVM") def test_tvm_batch_remainder_transform(self): warnings.filterwarnings("ignore") model = StandardScaler(with_mean=True, with_std=True) np.random.seed(0) X = np.random.rand(105, 200) X = np.array(X, dtype=np.float32) model.fit(X) batch_size = 10 remainder_size = X.shape[0] % batch_size hb_model = hummingbird.ml.convert_batch(model, "tvm", X[:batch_size, :], remainder_size) self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.transform(X), hb_model.transform(X), rtol=1e-06, atol=1e-06) # Test tvm regression with batching and uneven numer of records. @unittest.skipIf(not tvm_installed(), reason="TVM test require TVM") def test_tvm_regression_remainder_batch(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingRegressor(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(105, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=105) model.fit(X, y) batch_size = 10 remainder_size = X.shape[0] % batch_size hb_model = hummingbird.ml.convert_batch(model, "tvm", X[:batch_size, :], remainder_size) self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06) # Test tvm classification with batching and uneven numer of records. @unittest.skipIf(not tvm_installed(), reason="TVM test require TVM") def test_tvm_classification_remainder_batch(self): warnings.filterwarnings("ignore") max_depth = 10 num_classes = 2 model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth) np.random.seed(0) X = np.random.rand(105, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=105) model.fit(X, y) batch_size = 10 remainder_size = X.shape[0] % batch_size hb_model = hummingbird.ml.convert_batch(model, "tvm", X[:batch_size, :], remainder_size) self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06) np.testing.assert_allclose(model.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06) # Test tvm iforest with batching and uneven numer of records. @unittest.skipIf(not tvm_installed(), reason="TVM test require TVM") def test_tvm_iforest_remainder_batch(self): warnings.filterwarnings("ignore") num_classes = 2 model = IsolationForest(n_estimators=10, max_samples=2) np.random.seed(0) X = np.random.rand(105, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=105) model.fit(X, y) batch_size = 10 remainder_size = X.shape[0] % batch_size hb_model = hummingbird.ml.convert_batch(model, "tvm", X[:batch_size, :], remainder_size) self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06) np.testing.assert_allclose(model.decision_function(X), hb_model.decision_function(X), rtol=1e-06, atol=1e-06) np.testing.assert_allclose(model.score_samples(X), hb_model.score_samples(X), rtol=1e-06, atol=1e-06) # Test batch with pandas. @unittest.skipIf(not pandas_installed(), reason="Test requires pandas installed") def test_pandas_batch(self): import pandas max_depth = 10 iris = datasets.load_iris() X = iris.data[:149, :3] y = iris.target[:149] columns = ["vA", "vB", "vC"] X_train = pandas.DataFrame(X, columns=columns) pipeline = Pipeline( steps=[ ("preprocessor", ColumnTransformer(transformers=[], remainder="passthrough",)), ("classifier", GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)), ] ) pipeline.fit(X_train, y) batch_size = 10 remainder_size = X.shape[0] % batch_size torch_model = hummingbird.ml.convert_batch( pipeline, "torch", pandas.DataFrame(X[:batch_size], columns=columns), remainder_size ) self.assertTrue(torch_model is not None) np.testing.assert_allclose( pipeline.predict_proba(X_train), torch_model.predict_proba(X_train), rtol=1e-06, atol=1e-06, ) # Test batch with pandas ts. @unittest.skipIf(not pandas_installed(), reason="Test requires pandas installed") def test_pandas_batch_ts(self): import pandas max_depth = 10 iris = datasets.load_iris() X = iris.data[:149, :3] y = iris.target[:149] columns = ["vA", "vB", "vC"] X_train = pandas.DataFrame(X, columns=columns) pipeline = Pipeline( steps=[ ("preprocessor", ColumnTransformer(transformers=[], remainder="passthrough",)), ("classifier", GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)), ] ) pipeline.fit(X_train, y) batch_size = 10 remainder_size = X.shape[0] % batch_size torch_model = hummingbird.ml.convert_batch( pipeline, "torch.jit", pandas.DataFrame(X[:batch_size], columns=columns), remainder_size ) self.assertTrue(torch_model is not None) np.testing.assert_allclose( pipeline.predict_proba(X_train), torch_model.predict_proba(X_train), rtol=1e-06, atol=1e-06, ) # Test batch with pandas onnx. @unittest.skipIf(not pandas_installed(), reason="Test requires pandas installed") @unittest.skipIf(not onnx_runtime_installed(), reason="ONNXML test require ONNX and ORT") def test_pandas_batch_onnx(self): import pandas max_depth = 10 iris = datasets.load_iris() X = iris.data[:149, :3] y = iris.target[:149] columns = ["vA", "vB", "vC"] X_train = pandas.DataFrame(X, columns=columns) pipeline = Pipeline( steps=[ ("preprocessor", ColumnTransformer(transformers=[], remainder="passthrough",)), ("classifier", GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)), ] ) pipeline.fit(X_train, y) batch_size = 10 remainder_size = X.shape[0] % batch_size hb_model = hummingbird.ml.convert_batch( pipeline, "onnx", pandas.DataFrame(X[:batch_size], columns=columns), remainder_size ) self.assertTrue(hb_model is not None) np.testing.assert_allclose( pipeline.predict_proba(X_train), hb_model.predict_proba(X_train), rtol=1e-06, atol=1e-06, ) # Test batch with pandas from onnxml. @unittest.skipIf(not pandas_installed(), reason="Test requires pandas installed") @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS" ) def test_pandas_batch_onnxml(self): import pandas max_depth = 10 iris = datasets.load_iris() X = iris.data[:, :3] y = iris.target columns = ["vA", "vB", "vC"] X_train = pandas.DataFrame(X, columns=columns) pipeline = Pipeline( steps=[ ("preprocessor", ColumnTransformer(transformers=[], remainder="passthrough",)), ("classifier", GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)), ] ) pipeline.fit(X_train, y) # Create ONNX-ML model onnx_ml_model = convert_sklearn( pipeline, initial_types=[ ("vA", DoubleTensorType([X.shape[0], 1])), ("vB", DoubleTensorType([X.shape[0], 1])), ("vC", DoubleTensorType([X.shape[0], 1])), ], target_opset=9, ) batch_size = 10 remainder_size = X.shape[0] % batch_size hb_model = hummingbird.ml.convert_batch( onnx_ml_model, "onnx", pandas.DataFrame(X[:batch_size], columns=columns), remainder_size ) self.assertTrue(hb_model is not None) np.testing.assert_allclose( pipeline.predict_proba(X_train), hb_model.predict_proba(X_train), rtol=1e-06, atol=1e-06, ) # Test batch with pandas tvm. @unittest.skipIf(not pandas_installed(), reason="Test requires pandas installed") @unittest.skipIf(not tvm_installed(), reason="TVM test requires TVM") def test_pandas_batch_tvm(self): import pandas max_depth = 10 iris = datasets.load_iris() X = iris.data[:149, :3] y = iris.target[:149] columns = ["vA", "vB", "vC"] X_train = pandas.DataFrame(X, columns=columns) pipeline = Pipeline( steps=[ ("preprocessor", ColumnTransformer(transformers=[], remainder="passthrough",)), ("classifier", GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)), ] ) pipeline.fit(X_train, y) batch_size = 10 remainder_size = X.shape[0] % batch_size hb_model = hummingbird.ml.convert_batch( pipeline, "tvm", pandas.DataFrame(X[:batch_size], columns=columns), remainder_size ) self.assertTrue(hb_model is not None) np.testing.assert_allclose( pipeline.predict_proba(X_train), hb_model.predict_proba(X_train), rtol=1e-06, atol=1e-06, ) # Check converter with model name set as extra_config. @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS" ) @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed") def test_lightgbm_pytorch_extra_config(self): warnings.filterwarnings("ignore") X = [[0, 1], [1, 1], [2, 0]] X = np.array(X, dtype=np.float32) y = np.array([100, -10, 50], dtype=np.float32) model = lgb.LGBMRegressor(n_estimators=3, min_child_samples=1) model.fit(X, y) # Create ONNX-ML model onnx_ml_model = convert_lightgbm( model, initial_types=[("input", FloatTensorType([X.shape[0], X.shape[1]]))], target_opset=9 ) # Create ONNX model model_name = "hummingbird.ml.test.lightgbm" onnx_model = hummingbird.ml.convert(onnx_ml_model, "onnx", extra_config={constants.ONNX_OUTPUT_MODEL_NAME: model_name}) assert onnx_model.model.graph.name == model_name # Test max fuse depth configuration in TVM. @unittest.skipIf(not tvm_installed(), reason="TVM test requires TVM installed") def test_tvm_max_fuse(self): warnings.filterwarnings("ignore") X = [[0, 1], [1, 1], [2, 0]] X = np.array(X, dtype=np.float32) y = np.array([100, -10, 50], dtype=np.float32) model = lgb.LGBMRegressor(n_estimators=3, min_child_samples=1) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "tvm", X, extra_config={constants.TVM_MAX_FUSE_DEPTH: 30}) self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06) # Test TVM without padding returns an errror is sizes don't match. @unittest.skipIf(not tvm_installed(), reason="TVM test requires TVM installed") def test_tvm_no_padding(self): warnings.filterwarnings("ignore") np.random.seed(0) X = np.random.rand(100, 20) X = np.array(X, dtype=np.float32) y = np.random.randint(2, size=100) model = lgb.LGBMRegressor(n_estimators=10) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "tvm", X) self.assertIsNotNone(hb_model) self.assertRaises(AssertionError, hb_model.predict, X[:98]) # Test padding in TVM. @unittest.skipIf(not tvm_installed(), reason="TVM test requires TVM installed") def test_tvm_padding(self): warnings.filterwarnings("ignore") np.random.seed(0) X = np.random.rand(100, 20) X = np.array(X, dtype=np.float32) y = np.random.randint(2, size=100) model = lgb.LGBMRegressor(n_estimators=10) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "tvm", X, extra_config={constants.TVM_PAD_INPUT: True}) self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.predict(X[:98]), hb_model.predict(X[:98]), rtol=1e-06, atol=1e-06) # Test padding in TVM does not create problems when not necessary. @unittest.skipIf(not tvm_installed(), reason="TVM test requires TVM installed") def test_tvm_padding_2(self): warnings.filterwarnings("ignore") X = [[0, 1], [1, 1], [2, 0]] X = np.array(X, dtype=np.float32) y = np.array([100, -10, 50], dtype=np.float32) model = lgb.LGBMRegressor(n_estimators=3, min_child_samples=1) model.fit(X, y) hb_model = hummingbird.ml.convert(model, "tvm", X, extra_config={constants.TVM_PAD_INPUT: True}) self.assertIsNotNone(hb_model) np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06) # Test max string lentgh. def test_max_str_length(self): model = LabelEncoder() data = [ "paris", "tokyo", "amsterdam", "tokyo", ] model.fit(data) torch_model = hummingbird.ml.convert(model, "torch", extra_config={constants.MAX_STRING_LENGTH: 20}) np.testing.assert_allclose(model.transform(data), torch_model.transform(data), rtol=1e-06, atol=1e-06)
class TestONNXLabelEncoder(unittest.TestCase): # Test LabelEncoder with longs @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS") def test_model_label_encoder_int_onnxml(self): model = LabelEncoder() X = np.array([1, 4, 5, 2, 0, 2], dtype=np.int64) model.fit(X) # Create ONNX-ML model onnx_ml_model = convert_sklearn(model, initial_types=[ ("input", LongTensorType_onnx(X.shape)) ]) # Create ONNX model by calling converter onnx_model = convert(onnx_ml_model, "onnx", X) # Get the predictions for the ONNX-ML model session = ort.InferenceSession(onnx_ml_model.SerializeToString()) output_names = [ session.get_outputs()[i].name for i in range(len(session.get_outputs())) ] inputs = {session.get_inputs()[0].name: X} onnx_ml_pred = np.array(session.run(output_names, inputs)).ravel() # Get the predictions for the ONNX model onnx_pred = onnx_model.transform(X).ravel() # Check that predicted values match np.testing.assert_allclose(onnx_ml_pred, onnx_pred, rtol=1e-06, atol=1e-06) # Test LabelEncoder with strings on Pytorch >=1.8.0 @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS") @unittest.skipIf( LooseVersion(torch.__version__) < LooseVersion("1.8.0"), reason="PyTorch exporter don't support nonzero until version 1.8.0", ) def test_model_label_encoder_str_onnxml(self): model = LabelEncoder() data = [ "paris", "milan", "amsterdam", "tokyo", ] model.fit(data) onnx_ml_model = convert_sklearn(model, initial_types=[ ("input", StringTensorType_onnx([4])) ]) onnx_model = convert(onnx_ml_model, "onnx", data) # Get the predictions for the ONNX-ML model session = ort.InferenceSession(onnx_ml_model.SerializeToString()) output_names = [ session.get_outputs()[i].name for i in range(len(session.get_outputs())) ] inputs = {session.get_inputs()[0].name: data} onnx_ml_pred = session.run(output_names, inputs) # Get the predictions for the ONNX model onnx_pred = onnx_model.transform(data) # Check that predicted values match np.testing.assert_allclose(onnx_ml_pred[0], onnx_pred, rtol=1e-06, atol=1e-06) # Test LabelEncoder String failcase for torch < 1.8.0 @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS") @unittest.skipIf( LooseVersion(torch.__version__) >= LooseVersion("1.8.0"), reason= "PyTorch exporter supports nonzero only from version 1.8.0 and should fail on older versions", ) def test_le_string_raises_rt_onnx(self): warnings.filterwarnings("ignore") model = LabelEncoder() data = [ "paris", "milan", "amsterdam", "tokyo", ] model.fit(data) onnx_ml_model = convert_sklearn(model, initial_types=[ ("input", StringTensorType_onnx([4])) ]) # Create ONNX model by calling converter, should raise error for strings self.assertRaises(RuntimeError, convert, onnx_ml_model, "onnx", data) @unittest.skipIf( not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS") # if the model is corrupt, we should get a RuntimeError def test_onnx_label_encoder_converter_raises_rt(self): warnings.filterwarnings("ignore") model = LabelEncoder() X = np.array([1, 4, 5, 2, 0, 2], dtype=np.int64) model.fit(X) # generate test input onnx_ml_model = convert_sklearn(model, initial_types=[ ("float_input", FloatTensorType_onnx(X.shape)) ]) onnx_ml_model.graph.node[0].attribute[0].name = "".encode() self.assertRaises(RuntimeError, convert, onnx_ml_model, "onnx", X)