def _test_binarizer_converter(self, threshold):
        warnings.filterwarnings("ignore")
        X = np.array([[1, 2, 3], [4, 3, 0], [0, 1, 4], [0, 5, 6]],
                     dtype=np.float32)

        # Create SKL model for testing
        model = Binarizer(threshold=threshold)
        model.fit(X)

        # Create ONNX-ML model
        onnx_ml_model = convert_sklearn(model,
                                        initial_types=[
                                            ("float_input",
                                             FloatTensorType_onnx(X.shape))
                                        ])

        # Create ONNX model by calling converter
        onnx_model = convert(onnx_ml_model, "onnx", X)

        # Get the predictions for the ONNX-ML model
        session = ort.InferenceSession(onnx_ml_model.SerializeToString())
        output_names = [
            session.get_outputs()[i].name
            for i in range(len(session.get_outputs()))
        ]
        inputs = {session.get_inputs()[0].name: X}
        onnx_ml_pred = session.run(output_names, inputs)[0]

        # Get the predictions for the ONNX model
        onnx_pred = onnx_model.transform(X)

        return onnx_ml_pred, onnx_pred
Beispiel #2
0
    def test_model_label_encoder_int_onnxml(self):
        model = LabelEncoder()
        X = np.array([1, 4, 5, 2, 0, 2], dtype=np.int64)
        model.fit(X)

        # Create ONNX-ML model
        onnx_ml_model = convert_sklearn(model,
                                        initial_types=[
                                            ("input",
                                             LongTensorType_onnx(X.shape))
                                        ])

        # Create ONNX model by calling converter
        onnx_model = convert(onnx_ml_model, "onnx", X)

        # Get the predictions for the ONNX-ML model
        session = ort.InferenceSession(onnx_ml_model.SerializeToString())
        output_names = [
            session.get_outputs()[i].name
            for i in range(len(session.get_outputs()))
        ]
        inputs = {session.get_inputs()[0].name: X}
        onnx_ml_pred = np.array(session.run(output_names, inputs)).ravel()

        # Get the predictions for the ONNX model
        onnx_pred = onnx_model.transform(X).ravel()

        # Check that predicted values match
        np.testing.assert_allclose(onnx_ml_pred,
                                   onnx_pred,
                                   rtol=1e-06,
                                   atol=1e-06)
Beispiel #3
0
    def test_vectorassembler_converter(self):
        iris = load_iris()
        features = [
            "sepal_length", "sepal_width", "petal_length", "petal_width"
        ]

        pd_df = pd.DataFrame(data=np.c_[iris["data"], iris["target"]],
                             columns=features + ["target"])[[
                                 "sepal_length", "sepal_width", "petal_length",
                                 "petal_width"
                             ]]
        df = sql.createDataFrame(pd_df)

        model = VectorAssembler(inputCols=features, outputCol="features")

        test_df = df
        torch_model = convert(model, "torch", test_df)
        self.assertTrue(torch_model is not None)

        spark_output = model.transform(test_df).toPandas()
        spark_output["features"] = spark_output["features"].map(
            lambda x: np.array(x.toArray()))
        spark_output_np = spark_output["features"].to_numpy()
        torch_output_np = torch_model.transform(pd_df)

        np.testing.assert_allclose(np.vstack(spark_output_np),
                                   torch_output_np,
                                   rtol=1e-06,
                                   atol=1e-06)
Beispiel #4
0
    def test_model_label_encoder_str_onnxml(self):
        model = LabelEncoder()
        data = [
            "paris",
            "milan",
            "amsterdam",
            "tokyo",
        ]
        model.fit(data)

        onnx_ml_model = convert_sklearn(model,
                                        initial_types=[
                                            ("input",
                                             StringTensorType_onnx([4]))
                                        ])

        onnx_model = convert(onnx_ml_model, "onnx", data)

        # Get the predictions for the ONNX-ML model
        session = ort.InferenceSession(onnx_ml_model.SerializeToString())
        output_names = [
            session.get_outputs()[i].name
            for i in range(len(session.get_outputs()))
        ]
        inputs = {session.get_inputs()[0].name: data}
        onnx_ml_pred = session.run(output_names, inputs)

        # Get the predictions for the ONNX model
        onnx_pred = onnx_model.transform(data)

        # Check that predicted values match
        np.testing.assert_allclose(onnx_ml_pred[0],
                                   onnx_pred,
                                   rtol=1e-06,
                                   atol=1e-06)
Beispiel #5
0
    def _test_lgbm(self, X, model, extra_config={}):
        # Create ONNX-ML model
        onnx_ml_model = convert_lightgbm(
            model,
            initial_types=[("input", FloatTensorType([X.shape[0],
                                                      X.shape[1]]))],
            target_opset=9)

        # Create ONNX model
        onnx_model = convert(onnx_ml_model, "onnx", extra_config=extra_config)

        # Get the predictions for the ONNX-ML model
        session = ort.InferenceSession(onnx_ml_model.SerializeToString())
        output_names = [
            session.get_outputs()[i].name
            for i in range(len(session.get_outputs()))
        ]
        onnx_ml_pred = [[] for i in range(len(output_names))]
        inputs = {session.get_inputs()[0].name: X}
        pred = session.run(output_names, inputs)
        for i in range(len(output_names)):
            if "label" in output_names[i]:
                onnx_ml_pred[1] = pred[i]
            else:
                onnx_ml_pred[0] = pred[i]

        # Get the predictions for the ONNX model
        onnx_pred = [[] for i in range(len(output_names))]
        if len(output_names) == 1:  # regression
            onnx_pred = onnx_model.predict(X)
        else:  # classification
            onnx_pred[0] = onnx_model.predict_proba(X)
            onnx_pred[1] = onnx_model.predict(X)

        return onnx_ml_pred, onnx_pred, output_names
Beispiel #6
0
    def test_lightgbm_onnx_pytorch(self):
        warnings.filterwarnings("ignore")
        X = [[0, 1], [1, 1], [2, 0]]
        X = np.array(X, dtype=np.float32)
        y = np.array([100, -10, 50], dtype=np.float32)
        model = lgb.LGBMRegressor(n_estimators=3, min_child_samples=1)
        model.fit(X, y)

        # Create ONNX-ML model
        onnx_ml_model = convert_lightgbm(
            model,
            initial_types=[("input", FloatTensorType([X.shape[0],
                                                      X.shape[1]]))],
            target_opset=9)

        pt_model = convert(onnx_ml_model, "torch", X)
        assert pt_model

        # Get the predictions for the ONNX-ML model
        session = ort.InferenceSession(onnx_ml_model.SerializeToString())
        output_names = [
            session.get_outputs()[i].name
            for i in range(len(session.get_outputs()))
        ]
        onnx_ml_pred = [[] for i in range(len(output_names))]
        inputs = {session.get_inputs()[0].name: X}
        onnx_ml_pred = session.run(output_names, inputs)

        np.testing.assert_allclose(onnx_ml_pred[0].flatten(),
                                   pt_model.predict(X))
Beispiel #7
0
    def compile(self):
        """Convert the LightGBM model to a PyTorch model and store internally."""
        if self.lgb_booster is None:
            raise ValueError("Model has not been trained yet.")

        output_feature_name = self.output_features.keys()[0]
        output_feature = self.output_features[output_feature_name]

        # https://github.com/microsoft/LightGBM/issues/1942#issuecomment-453975607
        gbm_sklearn_cls = lgb.LGBMRegressor if output_feature.type(
        ) == NUMBER else lgb.LGBMClassifier
        gbm_sklearn = gbm_sklearn_cls(feature_name=list(
            self.input_features.keys()))  # , **params)
        gbm_sklearn._Booster = self.lgb_booster
        gbm_sklearn.fitted_ = True
        gbm_sklearn._n_features = len(self.input_features)
        if isinstance(gbm_sklearn, lgb.LGBMClassifier):
            gbm_sklearn._n_classes = output_feature.num_classes if output_feature.type(
            ) == CATEGORY else 2

        hb_model = convert(gbm_sklearn,
                           "torch",
                           extra_config={"tree_implementation": "gemm"})

        self.compiled_model = hb_model.model
Beispiel #8
0
    def test_quantilediscretizer_converter(self):
        iris = load_iris()
        features = [
            "sepal_length", "sepal_width", "petal_length", "petal_width"
        ]

        pd_df = pd.DataFrame(data=np.c_[iris["data"], iris["target"]],
                             columns=features + ["target"])
        df = sql.createDataFrame(pd_df).select("sepal_length")

        quantile = QuantileDiscretizer(inputCol="sepal_length",
                                       outputCol="sepal_length_bucket",
                                       numBuckets=2)
        model = quantile.fit(df)

        test_df = df
        torch_model = convert(model, "torch", test_df)
        self.assertTrue(torch_model is not None)

        spark_output = model.transform(test_df).select(
            "sepal_length_bucket").toPandas()
        torch_output_np = torch_model.transform(pd_df[["sepal_length"]])
        np.testing.assert_allclose(spark_output.to_numpy(),
                                   torch_output_np,
                                   rtol=1e-06,
                                   atol=1e-06)
Beispiel #9
0
    def test_one_hot_encoder_onnx_int(self, rtol=1e-06, atol=1e-06):
        model = OneHotEncoder()
        X = np.array([[1, 2, 3]], dtype=np.int32)
        model.fit(X)

        # Create ONNX-ML model
        onnx_ml_model = convert_sklearn(model,
                                        initial_types=[
                                            ("int_input",
                                             IntTensorType_onnx(X.shape))
                                        ])

        # Create ONNX model by calling converter
        onnx_model = convert(onnx_ml_model, "onnx", X)

        # Get the predictions for the ONNX-ML model
        session = ort.InferenceSession(onnx_ml_model.SerializeToString())
        output_names = [
            session.get_outputs()[i].name
            for i in range(len(session.get_outputs()))
        ]
        inputs = {session.get_inputs()[0].name: X}
        onnx_ml_pred = session.run(output_names, inputs)

        # Get the predictions for the ONNX model
        onnx_pred = onnx_model.transform(X)

        # Check that predicted values match
        np.testing.assert_allclose(onnx_ml_pred,
                                   onnx_pred,
                                   rtol=rtol,
                                   atol=atol)
Beispiel #10
0
    def test_pipeline_1(self):
        n_features = 10
        n_total = 100
        classes = 2
        np.random.seed(0)
        warnings.filterwarnings("ignore")
        X = np.random.rand(n_total, n_features)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(classes, size=(n_total, 1))

        arr = np.concatenate([y, X], axis=1).reshape(n_total, -1)
        df = map(lambda x: (int(x[0]), Vectors.dense(x[1:])), arr)
        df = sql.createDataFrame(df, schema=["label", "features"])

        pipeline = Pipeline(stages=[LogisticRegression()])
        model = pipeline.fit(df)

        test_df = df.select("features").limit(1)
        torch_model = convert(model, "torch", test_df)
        self.assertTrue(torch_model is not None)

        np.testing.assert_allclose(np.array(
            model.transform(df).select("prediction").collect()).reshape(-1),
                                   torch_model.predict(X),
                                   rtol=1e-06,
                                   atol=1e-06)

        np.testing.assert_allclose(np.array(
            model.transform(df).select("probability").collect()).reshape(
                -1, classes),
                                   torch_model.predict_proba(X),
                                   rtol=1e-06,
                                   atol=1e-06)
    def _test_scaler_converter(self, model):
        warnings.filterwarnings("ignore")
        X = np.array([[0.0, 0.0, 3.0], [1.0, -1.0, 0.0], [0.0, 2.0, 1.0],
                      [1.0, 0.0, -2.0]],
                     dtype=np.float32)
        model.fit(X)

        # Create ONNX-ML model
        onnx_ml_model = convert_sklearn(
            model,
            initial_types=[("float_input", FloatTensorType([None,
                                                            X.shape[1]]))])

        # Create ONNX model by calling converter
        onnx_model = convert(onnx_ml_model, "onnx", X)
        # Get the predictions for the ONNX-ML model
        session = ort.InferenceSession(onnx_ml_model.SerializeToString())
        output_names = [
            session.get_outputs()[i].name
            for i in range(len(session.get_outputs()))
        ]
        inputs = {session.get_inputs()[0].name: X}
        onnx_ml_pred = session.run(output_names, inputs)[0]

        # Get the predictions for the ONNX model
        onnx_pred = onnx_model.transform(X)

        return onnx_ml_pred, onnx_pred
    def test_lightgbm_pytorch_extra_config(self):
        warnings.filterwarnings("ignore")
        X = [[0, 1], [1, 1], [2, 0]]
        X = np.array(X, dtype=np.float32)
        y = np.array([100, -10, 50], dtype=np.float32)
        model = lgb.LGBMRegressor(n_estimators=3, min_child_samples=1)
        model.fit(X, y)

        # Create ONNX-ML model
        onnx_ml_model = convert_lightgbm(
            model,
            initial_types=[("input", FloatTensorType([X.shape[0],
                                                      X.shape[1]]))],
            target_opset=9)

        # Create ONNX model
        model_name = "hummingbird.ml.test.lightgbm"
        extra_config = {}
        extra_config[constants.ONNX_OUTPUT_MODEL_NAME] = model_name
        extra_config[constants.ONNX_INITIAL_TYPES] = [
            ("input", FloatTensorType([X.shape[0], X.shape[1]]))
        ]
        onnx_model = convert(onnx_ml_model, "onnx", extra_config=extra_config)

        assert onnx_model.model.graph.name == model_name
Beispiel #13
0
    def _test_linear(self, classes):
        """
        This helper function tests conversion of `ai.onnx.ml.LinearClassifier`
        which is created from a scikit-learn LogisticRegression.

        This tests `convert_onnx_linear_model` in `hummingbird.ml.operator_converters.onnxml_linear`
        """
        n_features = 20
        n_total = 100
        np.random.seed(0)
        warnings.filterwarnings("ignore")
        X = np.random.rand(n_total, n_features)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(classes, size=n_total)

        # Create SKL model for testing
        model = LogisticRegression(solver="liblinear",
                                   multi_class="ovr",
                                   fit_intercept=True)
        model.fit(X, y)

        # Create ONNX-ML model
        onnx_ml_model = convert_sklearn(model,
                                        initial_types=[
                                            ("float_input",
                                             FloatTensorType_onnx(X.shape))
                                        ])

        # Create ONNX model by calling converter
        onnx_model = convert(onnx_ml_model, "onnx", X)

        # Get the predictions for the ONNX-ML model
        session = ort.InferenceSession(onnx_ml_model.SerializeToString())
        output_names = [
            session.get_outputs()[i].name
            for i in range(len(session.get_outputs()))
        ]
        onnx_ml_pred = [[] for i in range(len(output_names))]
        inputs = {session.get_inputs()[0].name: X}
        pred = session.run(output_names, inputs)
        for i in range(len(output_names)):
            if output_names[i] == "output_label":
                onnx_ml_pred[1] = pred[i]
            else:
                onnx_ml_pred[0] = pred[i]

        # Get the predictions for the ONNX model
        session = ort.InferenceSession(onnx_model.SerializeToString())
        onnx_pred = [[] for i in range(len(output_names))]
        pred = session.run(output_names, inputs)
        for i in range(len(output_names)):
            if output_names[i] == "output_label":
                onnx_pred[1] = pred[i]
            else:
                onnx_pred[0] = pred[i]

        return onnx_ml_pred, onnx_pred
Beispiel #14
0
    def convert(self, model, data, args, model_name):
        self.configure(data, model, args)

        test_data = self.get_data(data.X_test)

        with Timer() as t:
            self.model = convert(
                model,
                self.backend,
                test_data,
                device=self.params["device"],
                extra_config={constants.N_THREADS: self.params["nthread"], constants.BATCH_SIZE: self.params["batch_size"]},
            )

        return t.interval
Beispiel #15
0
def get_all_backends(constructor,
                     clf,
                     include_sklearn=True,
                     include_hummingbird=False,
                     **kwargs):
    out = {}

    if include_sklearn:
        out["sklearn"] = clf

    out.update({
        "numpy":
        constructor(clf, backend="numpy", **kwargs),
        "torch_cpu":
        constructor(clf, backend="torch", device="cpu", **kwargs),
        "torch_cuda":
        constructor(clf, backend="torch", device="cuda", **kwargs),
    })

    if include_hummingbird:
        out["hummingbird_cpu"] = convert(clf, "pytorch")
        out["hummingbird_cuda"] = convert(clf, "pytorch").to("cuda")

    return out
Beispiel #16
0
    def test_lgbm_onnxml_model_binary_float64(self):
        warnings.filterwarnings("ignore")
        n_features = 28
        n_total = 100
        np.random.seed(0)
        X = np.random.rand(n_total, n_features)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(2, size=n_total)

        # Create LightGBM model
        model = lgb.LGBMClassifier()
        model.fit(X, y)

        onnx_model = convert(model, "onnx", X)

        np.testing.assert_allclose(model.predict(X), onnx_model.predict(X))
Beispiel #17
0
    def test_pipeline3(self):
        iris = load_iris()
        features = [
            "sepal_length", "sepal_width", "petal_length", "petal_width"
        ]

        pd_df = pd.DataFrame(data=np.c_[iris["data"], iris["target"]],
                             columns=features + ["label"])
        df = sql.createDataFrame(pd_df)

        quantile1 = QuantileDiscretizer(inputCol="sepal_length",
                                        outputCol="sepal_length_bucket",
                                        numBuckets=2)
        quantile2 = QuantileDiscretizer(inputCol="sepal_width",
                                        outputCol="sepal_width_bucket",
                                        numBuckets=2)
        features = ["sepal_length_bucket", "sepal_width_bucket"] + features
        assembler = VectorAssembler(inputCols=features, outputCol="features")
        pipeline = Pipeline(
            stages=[quantile1, quantile2, assembler,
                    LogisticRegression()])
        model = pipeline.fit(df)

        df = df.select(
            ["sepal_length", "sepal_width", "petal_length", "petal_width"])
        pd_df = pd_df[[
            "sepal_length", "sepal_width", "petal_length", "petal_width"
        ]]
        torch_model = convert(model, "torch", df)
        self.assertTrue(torch_model is not None)

        np.testing.assert_allclose(
            np.array(model.transform(df).select(
                "prediction").collect()).reshape(-1),
            torch_model.predict(pd_df),
            rtol=1e-06,
            atol=1e-06,
        )

        np.testing.assert_allclose(
            np.array(
                model.transform(df).select("probability").collect()).reshape(
                    -1, 3),
            torch_model.predict_proba(pd_df),
            rtol=1e-06,
            atol=1e-05,
        )
Beispiel #18
0
    def __init__(self,
                 map_data,
                 sensors: set,
                 k_names=None,
                 acq="gaussian_ei",
                 acq_mod="masked",
                 acq_fusion="decoupled",
                 d=1.0):
        if k_names is None:
            k_names = ["RBF"] * len(sensors)
        self.map_data = map_data
        self.acquisition = acq  # 'gaussian_sei' 'gaussian_ei' 'maxvalue_entropy_search''gaussian_pi'
        self.acq_mod = acq_mod  # 'masked' 'split_path' 'truncated', 'normal'
        self.k_names = k_names  # "RBF" Matern" "RQ"
        self.sensors = sensors
        self.gps = dict()
        self.train_inputs = [np.array([[], []])]
        self.train_targets = dict()
        self.proportion = d

        self.mus = dict()
        self.stds = dict()
        self.has_calculated = dict()

        for sensor, kernel in zip(sensors, k_names):
            if kernel == "RBF":  # "RBF" Matern" "RQ"
                helper = gpr.GaussianProcessRegressor(kernel=kernels.RBF(100),
                                                      alpha=1e-7)
                self.gps[sensor] = convert(helper, 'torch')
                self.gps[sensor].to('cuda')
                self.train_targets[sensor] = np.array([])
                self.mus[sensor] = np.array([])
                self.stds[sensor] = np.array([])
                self.has_calculated[sensor] = False

        self.all_vector_pos = np.mgrid[0:self.map_data.shape[1]:1,
                                       0:self.map_data.shape[0]:1].reshape(
                                           2, -1).T
        self.vector_pos = np.fliplr(
            np.asarray(np.where(self.map_data == 0)).reshape(2, -1).T)

        self.acq_fusion = acq_fusion
        # simple_max: maximum value found
        # max_sum: sum of acq on max for each maximum

        self.splitted_goals = []
        self.nans = None
Beispiel #19
0
    def _test_sv(self, classes, mode="torch"):
        """
        This helper function tests conversion of `ai.onnx.ml.SVMClassifier`
        which is created from a scikit-learn SVC.

        This then calls either "_to_onnx" or "_to_torch"
        """
        n_features = 20
        n_total = 100
        np.random.seed(0)
        warnings.filterwarnings("ignore")
        X = np.random.rand(n_total, n_features)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(classes, size=n_total)

        # Create SKL model for testing
        model = SVC()
        model.fit(X, y)

        # Create ONNX-ML model
        onnx_ml_model = convert_sklearn(model,
                                        initial_types=[
                                            ("float_input",
                                             FloatTensorType_onnx(X.shape))
                                        ])

        # Get the predictions for the ONNX-ML model
        session = ort.InferenceSession(onnx_ml_model.SerializeToString())
        output_names = [
            session.get_outputs()[i].name
            for i in range(len(session.get_outputs()))
        ]
        onnx_ml_pred = [[] for i in range(len(output_names))]
        inputs = {session.get_inputs()[0].name: X}
        pred = session.run(output_names, inputs)
        for i in range(len(output_names)):
            if "label" in output_names[i]:
                onnx_ml_pred[1] = pred[i]
            else:
                onnx_ml_pred[0] = pred[i]

        model = convert(onnx_ml_model, mode, X)

        pred = model.predict(X)

        return onnx_ml_pred, pred
Beispiel #20
0
    def _test_regressor(self, values):
        """
        This helper function tests conversion of `ai.onnx.ml.LinearRegressor`
        which is created from a scikit-learn LinearRegression.

        This tests `convert_onnx_linear_regression_model` in `hummingbird.ml.operator_converters.onnxml_linear`
        """
        n_features = 20
        n_total = 100
        np.random.seed(0)
        warnings.filterwarnings("ignore")
        X = np.random.rand(n_total, n_features)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(values, size=n_total)

        # Create SKL model for testing
        model = LinearRegression()
        model.fit(X, y)

        # Create ONNX-ML model
        onnx_ml_model = convert_sklearn(model,
                                        initial_types=[
                                            ("float_input",
                                             FloatTensorType_onnx(X.shape))
                                        ])

        # Create ONNX model by calling converter
        onnx_model = convert(onnx_ml_model, "onnx", X)

        # Get the predictions for the ONNX-ML model
        session = ort.InferenceSession(onnx_ml_model.SerializeToString())
        output_names = [
            session.get_outputs()[i].name
            for i in range(len(session.get_outputs()))
        ]
        inputs = {session.get_inputs()[0].name: X}
        onnx_ml_pred = session.run(output_names, inputs)

        # Get the predictions for the ONNX model
        session = ort.InferenceSession(onnx_model.SerializeToString())
        onnx_pred = session.run(output_names, inputs)

        return onnx_ml_pred, onnx_pred
Beispiel #21
0
    def test_model_one_hot_encoder_string(self):
        model = OneHotEncoder()
        data = [["a", "r", "x"], ["a", "r", "x"], ["aaaa", "r", "x"], ["a", "r", "xx"]]
        model.fit(data)

        onnx_ml_model = convert_sklearn(model, initial_types=[("input", StringTensorType_onnx([4, 3]))])

        # Create ONNX model by calling converter
        onnx_model = convert(onnx_ml_model, "onnx", data)

        # Get the predictions for the ONNX-ML model
        session = ort.InferenceSession(onnx_ml_model.SerializeToString())
        output_names = [session.get_outputs()[i].name for i in range(len(session.get_outputs()))]
        inputs = {session.get_inputs()[0].name: data}
        onnx_ml_pred = session.run(output_names, inputs)

        # Get the predictions for the ONNX model
        onnx_pred = onnx_model.transform(data)

        return onnx_ml_pred, onnx_pred
    def _test_imputer_converter(self, model, mode="onnx"):
        warnings.filterwarnings("ignore")
        X = np.array([[1, 2], [np.nan, 3], [7, 6]], dtype=np.float32)
        model.fit(X)

        # Create ONNX-ML model
        onnx_ml_model = convert_sklearn(model, initial_types=[("float_input", FloatTensorType_onnx(X.shape))])

        # Get the predictions for the ONNX-ML model
        session = ort.InferenceSession(onnx_ml_model.SerializeToString())
        output_names = [session.get_outputs()[i].name for i in range(len(session.get_outputs()))]
        inputs = {session.get_inputs()[0].name: X}
        onnx_ml_pred = session.run(output_names, inputs)[0]

        # Create test model by calling converter
        model = convert(onnx_ml_model, mode, X)

        # Get the predictions for the test model
        pred = model.transform(X)

        return onnx_ml_pred, pred
    def test_scaler_converter_float_64(self):
        warnings.filterwarnings("ignore")
        X = np.array([[0.0, 0.0, 3.0], [1.0, -1.0, 0.0], [0.0, 2.0, 1.0],
                      [1.0, 0.0, -2.0]],
                     dtype=np.float64)

        # Create SKL model for testing
        model = StandardScaler()
        model.fit(X)

        # Generate test input
        onnx_ml_model = convert_sklearn(model,
                                        initial_types=[("double_input",
                                                        DoubleTensorType(
                                                            [None,
                                                             X.shape[1]]))])

        # Create ONNX model by calling converter
        onnx_model = convert(onnx_ml_model, "onnx", X)
        # Get the predictions for the ONNX-ML model
        session = ort.InferenceSession(onnx_ml_model.SerializeToString())
        output_names = [
            session.get_outputs()[i].name
            for i in range(len(session.get_outputs()))
        ]
        inputs = {session.get_inputs()[0].name: X}
        onnx_ml_pred = session.run(output_names, inputs)[0]

        # Get the predictions for the ONNX model
        onnx_pred = onnx_model.transform(X)

        # Check that predicted values match
        np.testing.assert_allclose(onnx_ml_pred,
                                   onnx_pred,
                                   rtol=1e-06,
                                   atol=1e-06)
Beispiel #24
0
import lightgbm as lgb
import pandas as pd
import torch
from hummingbird.ml import convert

path = Path('data')
X_test = pd.read_csv(path / 'X_test.csv')
y_test = pd.read_csv(path / 'y_test.csv')

lgb_model = joblib.load('model.pkl')
begin = time()
pred = lgb_model.predict(X_test)
total = time() - begin
print('LightGBM time:', total, 's')

torch_model = convert(lgb_model, 'pytorch')

begin = time()
torch_pred = torch_model.predict(X_test.to_numpy())
total = time() - begin
print('PyTorch time:', total, 's')

print('Are predictions equal:', pred == torch_pred)

torch_model = torch.jit.trace(torch_model, example_inputs=torch.randn(1, 30))
begin = time()
torch_pred = torch_model.forward(torch.tensor(X_test.to_numpy()))
total = time() - begin
print('PyTorch time:', total, 's')
import pprint
Beispiel #25
0
def convert(model,
            name=None,
            initial_types=None,
            doc_string='',
            target_opset=None,
            targeted_onnx=onnx.__version__,
            custom_conversion_functions=None,
            custom_shape_calculators=None,
            without_onnx_ml=False,
            zipmap=True):
    '''
    This function produces an equivalent ONNX model of the given lightgbm model.
    The supported lightgbm modules are listed below.

    * `LGBMClassifiers <https://lightgbm.readthedocs.io/en/latest/pythonapi/lightgbm.LGBMClassifier.html>`_
    * `LGBMRegressor <https://lightgbm.readthedocs.io/en/latest/pythonapi/lightgbm.LGBMRegressor.html>`_
    * `Booster <https://lightgbm.readthedocs.io/en/latest/pythonapi/lightgbm.Booster.html>`_

    :param model: A LightGBM model
    :param initial_types: a python list. Each element is a tuple of a variable name and a type defined in data_types.py
    :param name: The name of the graph (type: GraphProto) in the produced ONNX model (type: ModelProto)
    :param doc_string: A string attached onto the produced ONNX model
    :param target_opset: number, for example, 7 for ONNX 1.2, and 8 for ONNX 1.3.
    :param targeted_onnx: A string (for example, '1.1.2' and '1.2') used to specify the targeted ONNX version of the
        produced model. If ONNXMLTools cannot find a compatible ONNX python package, an error may be thrown.
    :param custom_conversion_functions: a dictionary for specifying the user customized conversion function
    :param custom_shape_calculators: a dictionary for specifying the user customized shape calculator
    :param without_onnx_ml: whether to generate a model composed by ONNX operators only, or to allow the converter
    :param zipmap: remove operator ZipMap from the ONNX graph
    to use ONNX-ML operators as well.
    :return: An ONNX model (type: ModelProto) which is equivalent to the input lightgbm model
    '''
    if initial_types is None:
        raise ValueError(
            'Initial types are required. See usage of convert(...) in '
            'onnxmltools.convert.lightgbm.convert for details')
    if without_onnx_ml and not hummingbird_installed():
        raise RuntimeError(
            'Hummingbird is not installed. Please install hummingbird to use this feature: pip install hummingbird-ml'
        )
    if isinstance(model, lightgbm.Booster):
        model = WrappedBooster(model)
    if name is None:
        name = str(uuid4().hex)

    target_opset = target_opset if target_opset else get_maximum_opset_supported(
    )
    topology = parse_lightgbm(model,
                              initial_types,
                              target_opset,
                              custom_conversion_functions,
                              custom_shape_calculators,
                              zipmap=zipmap)
    topology.compile()
    onnx_ml_model = convert_topology(topology, name, doc_string, target_opset,
                                     targeted_onnx)

    if without_onnx_ml:
        from hummingbird.ml import convert, constants
        extra_config = {}
        # extra_config[constants.ONNX_INITIAL_TYPES] = initial_types
        extra_config[constants.ONNX_OUTPUT_MODEL_NAME] = name
        extra_config[constants.ONNX_TARGET_OPSET] = target_opset
        onnx_model = convert(onnx_ml_model, "onnx",
                             extra_config=extra_config).model
        return onnx_model

    return onnx_ml_model
Beispiel #26
0
import numpy as np
import lightgbm as lgb
from hummingbird.ml import convert

# Create some random data for binary classification.
num_classes = 2
X = np.random.rand(200000, 28)
y = np.random.randint(num_classes, size=200000)

# In[2]:

# Create and train a model (LightGBM in this case).
model = lgb.LGBMClassifier()
model.fit(X, y)

# In[3]:

# Use Hummingbird to convert the model to PyTorch.
hb_model = convert(model, 'torch')

# In[4]:

# get_ipython().run_cell_magic('timeit', '-r 3', '\n# Run Hummingbird on CPU - By default CPU execution is used in Hummingbird.\nhb_model.predict(X)')

hb_model.predict(X)

# In[5]:

# get_ipython().run_cell_magic('timeit', '-r 3', "\n# Run Hummingbird on GPU (Note that you must have a GPU-enabled machine).\nhb_model.to('cuda')\nhb_model.predict(X)")
Beispiel #27
0
from hummingbird.ml import convert
from sklearn.datasets import load_breast_cancer
from sklearn.ensemble import RandomForestClassifier
from time import time
X, y = load_breast_cancer(return_X_y=True)
skl_model = RandomForestClassifier(n_estimators=1000, max_depth=7)
skl_model.fit(X, y)

t0 = time()
for i in range(50):
    pred = skl_model.predict(X)
print(time() - t0)
t0 = time()
model = convert(skl_model, 'torch')
tf = time()-t0
for i in range(50):
    pred_cpu_hb = model.predict(X)
print(time() - t0)

t0 = time()
model.to('cuda')
for i in range(50):
    pred_gpu_hb = model.predict(X)
print(time() - t0 + tf)
from hummingbird.ml import convert
import pickle
import torch


print("loading the sklearn model: ")
rud_model = pickle.load(open("D:/Chinmay/ML Pipeline/Trained model/mode_1_20201006-083222 - Copy", "rb"))
rud_model.verbose = False
print(rud_model.n_estimators, rud_model.max_depth, rud_model.max_features)
print("Model loaded successfully:: Now converting to hummingbird model")
rud_model = convert(rud_model, 'pytorch')
print("Converted Sklearn model to : ", type(rud_model))
torch.save(rud_model.state_dict(), "hummingbird_models/rud")


Beispiel #29
0
# In[3]:


# Use ONNXMLTOOLS to convert the model to ONNXML.
initial_types = [("input", FloatTensorType([X.shape[0], X.shape[1]]))] # Define the inputs for the ONNX
onnx_ml_model = convert_lightgbm(
    model, initial_types=initial_types, target_opset=9
)


# In[4]:


# Use Hummingbird to convert the ONNXML model to ONNX.
onnx_model = convert(onnx_ml_model, "onnx", X)


# In[5]:


# Alternatively we can set the inital types using the extra_config parameters as in the ONNXMLTOOL converter.
extra_config = {}
extra_config[constants.ONNX_INITIAL_TYPES] = initial_types
onnx_model = convert(onnx_ml_model, "onnx", extra_config=extra_config)


# In[6]:


get_ipython().run_cell_magic('timeit', '-r 3', '\n# Run the ONNX model on CPU \nonnx_model.predict(X)')
Beispiel #30
0
import numpy as np
from sklearn import datasets
from lightgbm.sklearn import LGBMClassifier
from hummingbird.ml import convert
import onnxruntime
import torch

x, y = datasets.load_wine(return_X_y=True)
x = x.astype(np.float32)

model = LGBMClassifier(n_estimators=5)
model.fit(x, y)
preds = model.predict_proba(x)

pytorch_model = convert(model, "pytorch")

torch.onnx.export(
    pytorch_model.model,
    (torch.from_numpy(x)),
    "model.onnx",
    input_names=["input"],
    output_names=["output", "probabilities"],
    dynamic_axes={
        "input": {
            0: "batch"
        },
        "output": {
            0: "batch"
        },
        "probabilities": {
            0: "batch"