def _test_binarizer_converter(self, threshold): warnings.filterwarnings("ignore") X = np.array([[1, 2, 3], [4, 3, 0], [0, 1, 4], [0, 5, 6]], dtype=np.float32) # Create SKL model for testing model = Binarizer(threshold=threshold) model.fit(X) # Create ONNX-ML model onnx_ml_model = convert_sklearn(model, initial_types=[ ("float_input", FloatTensorType_onnx(X.shape)) ]) # Create ONNX model by calling converter onnx_model = convert(onnx_ml_model, "onnx", X) # Get the predictions for the ONNX-ML model session = ort.InferenceSession(onnx_ml_model.SerializeToString()) output_names = [ session.get_outputs()[i].name for i in range(len(session.get_outputs())) ] inputs = {session.get_inputs()[0].name: X} onnx_ml_pred = session.run(output_names, inputs)[0] # Get the predictions for the ONNX model onnx_pred = onnx_model.transform(X) return onnx_ml_pred, onnx_pred
def test_model_label_encoder_int_onnxml(self): model = LabelEncoder() X = np.array([1, 4, 5, 2, 0, 2], dtype=np.int64) model.fit(X) # Create ONNX-ML model onnx_ml_model = convert_sklearn(model, initial_types=[ ("input", LongTensorType_onnx(X.shape)) ]) # Create ONNX model by calling converter onnx_model = convert(onnx_ml_model, "onnx", X) # Get the predictions for the ONNX-ML model session = ort.InferenceSession(onnx_ml_model.SerializeToString()) output_names = [ session.get_outputs()[i].name for i in range(len(session.get_outputs())) ] inputs = {session.get_inputs()[0].name: X} onnx_ml_pred = np.array(session.run(output_names, inputs)).ravel() # Get the predictions for the ONNX model onnx_pred = onnx_model.transform(X).ravel() # Check that predicted values match np.testing.assert_allclose(onnx_ml_pred, onnx_pred, rtol=1e-06, atol=1e-06)
def test_vectorassembler_converter(self): iris = load_iris() features = [ "sepal_length", "sepal_width", "petal_length", "petal_width" ] pd_df = pd.DataFrame(data=np.c_[iris["data"], iris["target"]], columns=features + ["target"])[[ "sepal_length", "sepal_width", "petal_length", "petal_width" ]] df = sql.createDataFrame(pd_df) model = VectorAssembler(inputCols=features, outputCol="features") test_df = df torch_model = convert(model, "torch", test_df) self.assertTrue(torch_model is not None) spark_output = model.transform(test_df).toPandas() spark_output["features"] = spark_output["features"].map( lambda x: np.array(x.toArray())) spark_output_np = spark_output["features"].to_numpy() torch_output_np = torch_model.transform(pd_df) np.testing.assert_allclose(np.vstack(spark_output_np), torch_output_np, rtol=1e-06, atol=1e-06)
def test_model_label_encoder_str_onnxml(self): model = LabelEncoder() data = [ "paris", "milan", "amsterdam", "tokyo", ] model.fit(data) onnx_ml_model = convert_sklearn(model, initial_types=[ ("input", StringTensorType_onnx([4])) ]) onnx_model = convert(onnx_ml_model, "onnx", data) # Get the predictions for the ONNX-ML model session = ort.InferenceSession(onnx_ml_model.SerializeToString()) output_names = [ session.get_outputs()[i].name for i in range(len(session.get_outputs())) ] inputs = {session.get_inputs()[0].name: data} onnx_ml_pred = session.run(output_names, inputs) # Get the predictions for the ONNX model onnx_pred = onnx_model.transform(data) # Check that predicted values match np.testing.assert_allclose(onnx_ml_pred[0], onnx_pred, rtol=1e-06, atol=1e-06)
def _test_lgbm(self, X, model, extra_config={}): # Create ONNX-ML model onnx_ml_model = convert_lightgbm( model, initial_types=[("input", FloatTensorType([X.shape[0], X.shape[1]]))], target_opset=9) # Create ONNX model onnx_model = convert(onnx_ml_model, "onnx", extra_config=extra_config) # Get the predictions for the ONNX-ML model session = ort.InferenceSession(onnx_ml_model.SerializeToString()) output_names = [ session.get_outputs()[i].name for i in range(len(session.get_outputs())) ] onnx_ml_pred = [[] for i in range(len(output_names))] inputs = {session.get_inputs()[0].name: X} pred = session.run(output_names, inputs) for i in range(len(output_names)): if "label" in output_names[i]: onnx_ml_pred[1] = pred[i] else: onnx_ml_pred[0] = pred[i] # Get the predictions for the ONNX model onnx_pred = [[] for i in range(len(output_names))] if len(output_names) == 1: # regression onnx_pred = onnx_model.predict(X) else: # classification onnx_pred[0] = onnx_model.predict_proba(X) onnx_pred[1] = onnx_model.predict(X) return onnx_ml_pred, onnx_pred, output_names
def test_lightgbm_onnx_pytorch(self): warnings.filterwarnings("ignore") X = [[0, 1], [1, 1], [2, 0]] X = np.array(X, dtype=np.float32) y = np.array([100, -10, 50], dtype=np.float32) model = lgb.LGBMRegressor(n_estimators=3, min_child_samples=1) model.fit(X, y) # Create ONNX-ML model onnx_ml_model = convert_lightgbm( model, initial_types=[("input", FloatTensorType([X.shape[0], X.shape[1]]))], target_opset=9) pt_model = convert(onnx_ml_model, "torch", X) assert pt_model # Get the predictions for the ONNX-ML model session = ort.InferenceSession(onnx_ml_model.SerializeToString()) output_names = [ session.get_outputs()[i].name for i in range(len(session.get_outputs())) ] onnx_ml_pred = [[] for i in range(len(output_names))] inputs = {session.get_inputs()[0].name: X} onnx_ml_pred = session.run(output_names, inputs) np.testing.assert_allclose(onnx_ml_pred[0].flatten(), pt_model.predict(X))
def compile(self): """Convert the LightGBM model to a PyTorch model and store internally.""" if self.lgb_booster is None: raise ValueError("Model has not been trained yet.") output_feature_name = self.output_features.keys()[0] output_feature = self.output_features[output_feature_name] # https://github.com/microsoft/LightGBM/issues/1942#issuecomment-453975607 gbm_sklearn_cls = lgb.LGBMRegressor if output_feature.type( ) == NUMBER else lgb.LGBMClassifier gbm_sklearn = gbm_sklearn_cls(feature_name=list( self.input_features.keys())) # , **params) gbm_sklearn._Booster = self.lgb_booster gbm_sklearn.fitted_ = True gbm_sklearn._n_features = len(self.input_features) if isinstance(gbm_sklearn, lgb.LGBMClassifier): gbm_sklearn._n_classes = output_feature.num_classes if output_feature.type( ) == CATEGORY else 2 hb_model = convert(gbm_sklearn, "torch", extra_config={"tree_implementation": "gemm"}) self.compiled_model = hb_model.model
def test_quantilediscretizer_converter(self): iris = load_iris() features = [ "sepal_length", "sepal_width", "petal_length", "petal_width" ] pd_df = pd.DataFrame(data=np.c_[iris["data"], iris["target"]], columns=features + ["target"]) df = sql.createDataFrame(pd_df).select("sepal_length") quantile = QuantileDiscretizer(inputCol="sepal_length", outputCol="sepal_length_bucket", numBuckets=2) model = quantile.fit(df) test_df = df torch_model = convert(model, "torch", test_df) self.assertTrue(torch_model is not None) spark_output = model.transform(test_df).select( "sepal_length_bucket").toPandas() torch_output_np = torch_model.transform(pd_df[["sepal_length"]]) np.testing.assert_allclose(spark_output.to_numpy(), torch_output_np, rtol=1e-06, atol=1e-06)
def test_one_hot_encoder_onnx_int(self, rtol=1e-06, atol=1e-06): model = OneHotEncoder() X = np.array([[1, 2, 3]], dtype=np.int32) model.fit(X) # Create ONNX-ML model onnx_ml_model = convert_sklearn(model, initial_types=[ ("int_input", IntTensorType_onnx(X.shape)) ]) # Create ONNX model by calling converter onnx_model = convert(onnx_ml_model, "onnx", X) # Get the predictions for the ONNX-ML model session = ort.InferenceSession(onnx_ml_model.SerializeToString()) output_names = [ session.get_outputs()[i].name for i in range(len(session.get_outputs())) ] inputs = {session.get_inputs()[0].name: X} onnx_ml_pred = session.run(output_names, inputs) # Get the predictions for the ONNX model onnx_pred = onnx_model.transform(X) # Check that predicted values match np.testing.assert_allclose(onnx_ml_pred, onnx_pred, rtol=rtol, atol=atol)
def test_pipeline_1(self): n_features = 10 n_total = 100 classes = 2 np.random.seed(0) warnings.filterwarnings("ignore") X = np.random.rand(n_total, n_features) X = np.array(X, dtype=np.float32) y = np.random.randint(classes, size=(n_total, 1)) arr = np.concatenate([y, X], axis=1).reshape(n_total, -1) df = map(lambda x: (int(x[0]), Vectors.dense(x[1:])), arr) df = sql.createDataFrame(df, schema=["label", "features"]) pipeline = Pipeline(stages=[LogisticRegression()]) model = pipeline.fit(df) test_df = df.select("features").limit(1) torch_model = convert(model, "torch", test_df) self.assertTrue(torch_model is not None) np.testing.assert_allclose(np.array( model.transform(df).select("prediction").collect()).reshape(-1), torch_model.predict(X), rtol=1e-06, atol=1e-06) np.testing.assert_allclose(np.array( model.transform(df).select("probability").collect()).reshape( -1, classes), torch_model.predict_proba(X), rtol=1e-06, atol=1e-06)
def _test_scaler_converter(self, model): warnings.filterwarnings("ignore") X = np.array([[0.0, 0.0, 3.0], [1.0, -1.0, 0.0], [0.0, 2.0, 1.0], [1.0, 0.0, -2.0]], dtype=np.float32) model.fit(X) # Create ONNX-ML model onnx_ml_model = convert_sklearn( model, initial_types=[("float_input", FloatTensorType([None, X.shape[1]]))]) # Create ONNX model by calling converter onnx_model = convert(onnx_ml_model, "onnx", X) # Get the predictions for the ONNX-ML model session = ort.InferenceSession(onnx_ml_model.SerializeToString()) output_names = [ session.get_outputs()[i].name for i in range(len(session.get_outputs())) ] inputs = {session.get_inputs()[0].name: X} onnx_ml_pred = session.run(output_names, inputs)[0] # Get the predictions for the ONNX model onnx_pred = onnx_model.transform(X) return onnx_ml_pred, onnx_pred
def test_lightgbm_pytorch_extra_config(self): warnings.filterwarnings("ignore") X = [[0, 1], [1, 1], [2, 0]] X = np.array(X, dtype=np.float32) y = np.array([100, -10, 50], dtype=np.float32) model = lgb.LGBMRegressor(n_estimators=3, min_child_samples=1) model.fit(X, y) # Create ONNX-ML model onnx_ml_model = convert_lightgbm( model, initial_types=[("input", FloatTensorType([X.shape[0], X.shape[1]]))], target_opset=9) # Create ONNX model model_name = "hummingbird.ml.test.lightgbm" extra_config = {} extra_config[constants.ONNX_OUTPUT_MODEL_NAME] = model_name extra_config[constants.ONNX_INITIAL_TYPES] = [ ("input", FloatTensorType([X.shape[0], X.shape[1]])) ] onnx_model = convert(onnx_ml_model, "onnx", extra_config=extra_config) assert onnx_model.model.graph.name == model_name
def _test_linear(self, classes): """ This helper function tests conversion of `ai.onnx.ml.LinearClassifier` which is created from a scikit-learn LogisticRegression. This tests `convert_onnx_linear_model` in `hummingbird.ml.operator_converters.onnxml_linear` """ n_features = 20 n_total = 100 np.random.seed(0) warnings.filterwarnings("ignore") X = np.random.rand(n_total, n_features) X = np.array(X, dtype=np.float32) y = np.random.randint(classes, size=n_total) # Create SKL model for testing model = LogisticRegression(solver="liblinear", multi_class="ovr", fit_intercept=True) model.fit(X, y) # Create ONNX-ML model onnx_ml_model = convert_sklearn(model, initial_types=[ ("float_input", FloatTensorType_onnx(X.shape)) ]) # Create ONNX model by calling converter onnx_model = convert(onnx_ml_model, "onnx", X) # Get the predictions for the ONNX-ML model session = ort.InferenceSession(onnx_ml_model.SerializeToString()) output_names = [ session.get_outputs()[i].name for i in range(len(session.get_outputs())) ] onnx_ml_pred = [[] for i in range(len(output_names))] inputs = {session.get_inputs()[0].name: X} pred = session.run(output_names, inputs) for i in range(len(output_names)): if output_names[i] == "output_label": onnx_ml_pred[1] = pred[i] else: onnx_ml_pred[0] = pred[i] # Get the predictions for the ONNX model session = ort.InferenceSession(onnx_model.SerializeToString()) onnx_pred = [[] for i in range(len(output_names))] pred = session.run(output_names, inputs) for i in range(len(output_names)): if output_names[i] == "output_label": onnx_pred[1] = pred[i] else: onnx_pred[0] = pred[i] return onnx_ml_pred, onnx_pred
def convert(self, model, data, args, model_name): self.configure(data, model, args) test_data = self.get_data(data.X_test) with Timer() as t: self.model = convert( model, self.backend, test_data, device=self.params["device"], extra_config={constants.N_THREADS: self.params["nthread"], constants.BATCH_SIZE: self.params["batch_size"]}, ) return t.interval
def get_all_backends(constructor, clf, include_sklearn=True, include_hummingbird=False, **kwargs): out = {} if include_sklearn: out["sklearn"] = clf out.update({ "numpy": constructor(clf, backend="numpy", **kwargs), "torch_cpu": constructor(clf, backend="torch", device="cpu", **kwargs), "torch_cuda": constructor(clf, backend="torch", device="cuda", **kwargs), }) if include_hummingbird: out["hummingbird_cpu"] = convert(clf, "pytorch") out["hummingbird_cuda"] = convert(clf, "pytorch").to("cuda") return out
def test_lgbm_onnxml_model_binary_float64(self): warnings.filterwarnings("ignore") n_features = 28 n_total = 100 np.random.seed(0) X = np.random.rand(n_total, n_features) X = np.array(X, dtype=np.float32) y = np.random.randint(2, size=n_total) # Create LightGBM model model = lgb.LGBMClassifier() model.fit(X, y) onnx_model = convert(model, "onnx", X) np.testing.assert_allclose(model.predict(X), onnx_model.predict(X))
def test_pipeline3(self): iris = load_iris() features = [ "sepal_length", "sepal_width", "petal_length", "petal_width" ] pd_df = pd.DataFrame(data=np.c_[iris["data"], iris["target"]], columns=features + ["label"]) df = sql.createDataFrame(pd_df) quantile1 = QuantileDiscretizer(inputCol="sepal_length", outputCol="sepal_length_bucket", numBuckets=2) quantile2 = QuantileDiscretizer(inputCol="sepal_width", outputCol="sepal_width_bucket", numBuckets=2) features = ["sepal_length_bucket", "sepal_width_bucket"] + features assembler = VectorAssembler(inputCols=features, outputCol="features") pipeline = Pipeline( stages=[quantile1, quantile2, assembler, LogisticRegression()]) model = pipeline.fit(df) df = df.select( ["sepal_length", "sepal_width", "petal_length", "petal_width"]) pd_df = pd_df[[ "sepal_length", "sepal_width", "petal_length", "petal_width" ]] torch_model = convert(model, "torch", df) self.assertTrue(torch_model is not None) np.testing.assert_allclose( np.array(model.transform(df).select( "prediction").collect()).reshape(-1), torch_model.predict(pd_df), rtol=1e-06, atol=1e-06, ) np.testing.assert_allclose( np.array( model.transform(df).select("probability").collect()).reshape( -1, 3), torch_model.predict_proba(pd_df), rtol=1e-06, atol=1e-05, )
def __init__(self, map_data, sensors: set, k_names=None, acq="gaussian_ei", acq_mod="masked", acq_fusion="decoupled", d=1.0): if k_names is None: k_names = ["RBF"] * len(sensors) self.map_data = map_data self.acquisition = acq # 'gaussian_sei' 'gaussian_ei' 'maxvalue_entropy_search''gaussian_pi' self.acq_mod = acq_mod # 'masked' 'split_path' 'truncated', 'normal' self.k_names = k_names # "RBF" Matern" "RQ" self.sensors = sensors self.gps = dict() self.train_inputs = [np.array([[], []])] self.train_targets = dict() self.proportion = d self.mus = dict() self.stds = dict() self.has_calculated = dict() for sensor, kernel in zip(sensors, k_names): if kernel == "RBF": # "RBF" Matern" "RQ" helper = gpr.GaussianProcessRegressor(kernel=kernels.RBF(100), alpha=1e-7) self.gps[sensor] = convert(helper, 'torch') self.gps[sensor].to('cuda') self.train_targets[sensor] = np.array([]) self.mus[sensor] = np.array([]) self.stds[sensor] = np.array([]) self.has_calculated[sensor] = False self.all_vector_pos = np.mgrid[0:self.map_data.shape[1]:1, 0:self.map_data.shape[0]:1].reshape( 2, -1).T self.vector_pos = np.fliplr( np.asarray(np.where(self.map_data == 0)).reshape(2, -1).T) self.acq_fusion = acq_fusion # simple_max: maximum value found # max_sum: sum of acq on max for each maximum self.splitted_goals = [] self.nans = None
def _test_sv(self, classes, mode="torch"): """ This helper function tests conversion of `ai.onnx.ml.SVMClassifier` which is created from a scikit-learn SVC. This then calls either "_to_onnx" or "_to_torch" """ n_features = 20 n_total = 100 np.random.seed(0) warnings.filterwarnings("ignore") X = np.random.rand(n_total, n_features) X = np.array(X, dtype=np.float32) y = np.random.randint(classes, size=n_total) # Create SKL model for testing model = SVC() model.fit(X, y) # Create ONNX-ML model onnx_ml_model = convert_sklearn(model, initial_types=[ ("float_input", FloatTensorType_onnx(X.shape)) ]) # Get the predictions for the ONNX-ML model session = ort.InferenceSession(onnx_ml_model.SerializeToString()) output_names = [ session.get_outputs()[i].name for i in range(len(session.get_outputs())) ] onnx_ml_pred = [[] for i in range(len(output_names))] inputs = {session.get_inputs()[0].name: X} pred = session.run(output_names, inputs) for i in range(len(output_names)): if "label" in output_names[i]: onnx_ml_pred[1] = pred[i] else: onnx_ml_pred[0] = pred[i] model = convert(onnx_ml_model, mode, X) pred = model.predict(X) return onnx_ml_pred, pred
def _test_regressor(self, values): """ This helper function tests conversion of `ai.onnx.ml.LinearRegressor` which is created from a scikit-learn LinearRegression. This tests `convert_onnx_linear_regression_model` in `hummingbird.ml.operator_converters.onnxml_linear` """ n_features = 20 n_total = 100 np.random.seed(0) warnings.filterwarnings("ignore") X = np.random.rand(n_total, n_features) X = np.array(X, dtype=np.float32) y = np.random.randint(values, size=n_total) # Create SKL model for testing model = LinearRegression() model.fit(X, y) # Create ONNX-ML model onnx_ml_model = convert_sklearn(model, initial_types=[ ("float_input", FloatTensorType_onnx(X.shape)) ]) # Create ONNX model by calling converter onnx_model = convert(onnx_ml_model, "onnx", X) # Get the predictions for the ONNX-ML model session = ort.InferenceSession(onnx_ml_model.SerializeToString()) output_names = [ session.get_outputs()[i].name for i in range(len(session.get_outputs())) ] inputs = {session.get_inputs()[0].name: X} onnx_ml_pred = session.run(output_names, inputs) # Get the predictions for the ONNX model session = ort.InferenceSession(onnx_model.SerializeToString()) onnx_pred = session.run(output_names, inputs) return onnx_ml_pred, onnx_pred
def test_model_one_hot_encoder_string(self): model = OneHotEncoder() data = [["a", "r", "x"], ["a", "r", "x"], ["aaaa", "r", "x"], ["a", "r", "xx"]] model.fit(data) onnx_ml_model = convert_sklearn(model, initial_types=[("input", StringTensorType_onnx([4, 3]))]) # Create ONNX model by calling converter onnx_model = convert(onnx_ml_model, "onnx", data) # Get the predictions for the ONNX-ML model session = ort.InferenceSession(onnx_ml_model.SerializeToString()) output_names = [session.get_outputs()[i].name for i in range(len(session.get_outputs()))] inputs = {session.get_inputs()[0].name: data} onnx_ml_pred = session.run(output_names, inputs) # Get the predictions for the ONNX model onnx_pred = onnx_model.transform(data) return onnx_ml_pred, onnx_pred
def _test_imputer_converter(self, model, mode="onnx"): warnings.filterwarnings("ignore") X = np.array([[1, 2], [np.nan, 3], [7, 6]], dtype=np.float32) model.fit(X) # Create ONNX-ML model onnx_ml_model = convert_sklearn(model, initial_types=[("float_input", FloatTensorType_onnx(X.shape))]) # Get the predictions for the ONNX-ML model session = ort.InferenceSession(onnx_ml_model.SerializeToString()) output_names = [session.get_outputs()[i].name for i in range(len(session.get_outputs()))] inputs = {session.get_inputs()[0].name: X} onnx_ml_pred = session.run(output_names, inputs)[0] # Create test model by calling converter model = convert(onnx_ml_model, mode, X) # Get the predictions for the test model pred = model.transform(X) return onnx_ml_pred, pred
def test_scaler_converter_float_64(self): warnings.filterwarnings("ignore") X = np.array([[0.0, 0.0, 3.0], [1.0, -1.0, 0.0], [0.0, 2.0, 1.0], [1.0, 0.0, -2.0]], dtype=np.float64) # Create SKL model for testing model = StandardScaler() model.fit(X) # Generate test input onnx_ml_model = convert_sklearn(model, initial_types=[("double_input", DoubleTensorType( [None, X.shape[1]]))]) # Create ONNX model by calling converter onnx_model = convert(onnx_ml_model, "onnx", X) # Get the predictions for the ONNX-ML model session = ort.InferenceSession(onnx_ml_model.SerializeToString()) output_names = [ session.get_outputs()[i].name for i in range(len(session.get_outputs())) ] inputs = {session.get_inputs()[0].name: X} onnx_ml_pred = session.run(output_names, inputs)[0] # Get the predictions for the ONNX model onnx_pred = onnx_model.transform(X) # Check that predicted values match np.testing.assert_allclose(onnx_ml_pred, onnx_pred, rtol=1e-06, atol=1e-06)
import lightgbm as lgb import pandas as pd import torch from hummingbird.ml import convert path = Path('data') X_test = pd.read_csv(path / 'X_test.csv') y_test = pd.read_csv(path / 'y_test.csv') lgb_model = joblib.load('model.pkl') begin = time() pred = lgb_model.predict(X_test) total = time() - begin print('LightGBM time:', total, 's') torch_model = convert(lgb_model, 'pytorch') begin = time() torch_pred = torch_model.predict(X_test.to_numpy()) total = time() - begin print('PyTorch time:', total, 's') print('Are predictions equal:', pred == torch_pred) torch_model = torch.jit.trace(torch_model, example_inputs=torch.randn(1, 30)) begin = time() torch_pred = torch_model.forward(torch.tensor(X_test.to_numpy())) total = time() - begin print('PyTorch time:', total, 's') import pprint
def convert(model, name=None, initial_types=None, doc_string='', target_opset=None, targeted_onnx=onnx.__version__, custom_conversion_functions=None, custom_shape_calculators=None, without_onnx_ml=False, zipmap=True): ''' This function produces an equivalent ONNX model of the given lightgbm model. The supported lightgbm modules are listed below. * `LGBMClassifiers <https://lightgbm.readthedocs.io/en/latest/pythonapi/lightgbm.LGBMClassifier.html>`_ * `LGBMRegressor <https://lightgbm.readthedocs.io/en/latest/pythonapi/lightgbm.LGBMRegressor.html>`_ * `Booster <https://lightgbm.readthedocs.io/en/latest/pythonapi/lightgbm.Booster.html>`_ :param model: A LightGBM model :param initial_types: a python list. Each element is a tuple of a variable name and a type defined in data_types.py :param name: The name of the graph (type: GraphProto) in the produced ONNX model (type: ModelProto) :param doc_string: A string attached onto the produced ONNX model :param target_opset: number, for example, 7 for ONNX 1.2, and 8 for ONNX 1.3. :param targeted_onnx: A string (for example, '1.1.2' and '1.2') used to specify the targeted ONNX version of the produced model. If ONNXMLTools cannot find a compatible ONNX python package, an error may be thrown. :param custom_conversion_functions: a dictionary for specifying the user customized conversion function :param custom_shape_calculators: a dictionary for specifying the user customized shape calculator :param without_onnx_ml: whether to generate a model composed by ONNX operators only, or to allow the converter :param zipmap: remove operator ZipMap from the ONNX graph to use ONNX-ML operators as well. :return: An ONNX model (type: ModelProto) which is equivalent to the input lightgbm model ''' if initial_types is None: raise ValueError( 'Initial types are required. See usage of convert(...) in ' 'onnxmltools.convert.lightgbm.convert for details') if without_onnx_ml and not hummingbird_installed(): raise RuntimeError( 'Hummingbird is not installed. Please install hummingbird to use this feature: pip install hummingbird-ml' ) if isinstance(model, lightgbm.Booster): model = WrappedBooster(model) if name is None: name = str(uuid4().hex) target_opset = target_opset if target_opset else get_maximum_opset_supported( ) topology = parse_lightgbm(model, initial_types, target_opset, custom_conversion_functions, custom_shape_calculators, zipmap=zipmap) topology.compile() onnx_ml_model = convert_topology(topology, name, doc_string, target_opset, targeted_onnx) if without_onnx_ml: from hummingbird.ml import convert, constants extra_config = {} # extra_config[constants.ONNX_INITIAL_TYPES] = initial_types extra_config[constants.ONNX_OUTPUT_MODEL_NAME] = name extra_config[constants.ONNX_TARGET_OPSET] = target_opset onnx_model = convert(onnx_ml_model, "onnx", extra_config=extra_config).model return onnx_model return onnx_ml_model
import numpy as np import lightgbm as lgb from hummingbird.ml import convert # Create some random data for binary classification. num_classes = 2 X = np.random.rand(200000, 28) y = np.random.randint(num_classes, size=200000) # In[2]: # Create and train a model (LightGBM in this case). model = lgb.LGBMClassifier() model.fit(X, y) # In[3]: # Use Hummingbird to convert the model to PyTorch. hb_model = convert(model, 'torch') # In[4]: # get_ipython().run_cell_magic('timeit', '-r 3', '\n# Run Hummingbird on CPU - By default CPU execution is used in Hummingbird.\nhb_model.predict(X)') hb_model.predict(X) # In[5]: # get_ipython().run_cell_magic('timeit', '-r 3', "\n# Run Hummingbird on GPU (Note that you must have a GPU-enabled machine).\nhb_model.to('cuda')\nhb_model.predict(X)")
from hummingbird.ml import convert from sklearn.datasets import load_breast_cancer from sklearn.ensemble import RandomForestClassifier from time import time X, y = load_breast_cancer(return_X_y=True) skl_model = RandomForestClassifier(n_estimators=1000, max_depth=7) skl_model.fit(X, y) t0 = time() for i in range(50): pred = skl_model.predict(X) print(time() - t0) t0 = time() model = convert(skl_model, 'torch') tf = time()-t0 for i in range(50): pred_cpu_hb = model.predict(X) print(time() - t0) t0 = time() model.to('cuda') for i in range(50): pred_gpu_hb = model.predict(X) print(time() - t0 + tf)
from hummingbird.ml import convert import pickle import torch print("loading the sklearn model: ") rud_model = pickle.load(open("D:/Chinmay/ML Pipeline/Trained model/mode_1_20201006-083222 - Copy", "rb")) rud_model.verbose = False print(rud_model.n_estimators, rud_model.max_depth, rud_model.max_features) print("Model loaded successfully:: Now converting to hummingbird model") rud_model = convert(rud_model, 'pytorch') print("Converted Sklearn model to : ", type(rud_model)) torch.save(rud_model.state_dict(), "hummingbird_models/rud")
# In[3]: # Use ONNXMLTOOLS to convert the model to ONNXML. initial_types = [("input", FloatTensorType([X.shape[0], X.shape[1]]))] # Define the inputs for the ONNX onnx_ml_model = convert_lightgbm( model, initial_types=initial_types, target_opset=9 ) # In[4]: # Use Hummingbird to convert the ONNXML model to ONNX. onnx_model = convert(onnx_ml_model, "onnx", X) # In[5]: # Alternatively we can set the inital types using the extra_config parameters as in the ONNXMLTOOL converter. extra_config = {} extra_config[constants.ONNX_INITIAL_TYPES] = initial_types onnx_model = convert(onnx_ml_model, "onnx", extra_config=extra_config) # In[6]: get_ipython().run_cell_magic('timeit', '-r 3', '\n# Run the ONNX model on CPU \nonnx_model.predict(X)')
import numpy as np from sklearn import datasets from lightgbm.sklearn import LGBMClassifier from hummingbird.ml import convert import onnxruntime import torch x, y = datasets.load_wine(return_X_y=True) x = x.astype(np.float32) model = LGBMClassifier(n_estimators=5) model.fit(x, y) preds = model.predict_proba(x) pytorch_model = convert(model, "pytorch") torch.onnx.export( pytorch_model.model, (torch.from_numpy(x)), "model.onnx", input_names=["input"], output_names=["output", "probabilities"], dynamic_axes={ "input": { 0: "batch" }, "output": { 0: "batch" }, "probabilities": { 0: "batch"