Example #1
0
    def test_onnx_no_test_data_double(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        if CastTransformer is None:
            model = GradientBoostingClassifier(n_estimators=10,
                                               max_depth=max_depth)
        else:
            # newer version of sklearn-onnx
            model = make_pipeline(
                CastTransformer(dtype=np.float32),
                GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth))
        np.random.seed(0)
        X = np.random.rand(100, 200)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        # Create ONNX-ML model
        onnx_ml_model = convert_sklearn(
            model,
            initial_types=[("input", DoubleTensorType([None, X.shape[1]]))],
            target_opset=11)

        # Test onnx requires no test_data
        hb_model = hummingbird.ml.convert(onnx_ml_model, "onnx")
        assert hb_model
Example #2
0
def convert_model(model, name, input_types):
    """
    Runs the appropriate conversion method.
    
    :param model: model, *scikit-learn*, *keras*, or *coremltools* object
    :return: *onnx* model
    """
    from sklearn.base import BaseEstimator
    if model.__class__.__name__.startswith("LGBM"):
        from onnxmltools.convert import convert_lightgbm
        model, prefix = convert_lightgbm(model, name, input_types), "LightGbm"
    elif isinstance(model, BaseEstimator):
        from onnxmltools.convert import convert_sklearn
        model, prefix = convert_sklearn(model, name, input_types), "Sklearn"
    else:
        from keras.models import Model
        if isinstance(model, Model):
            from onnxmltools.convert import convert_keras
            model, prefix = convert_keras(model, name, input_types), "Keras"
        else:
            from onnxmltools.convert import convert_coreml
            model, prefix = convert_coreml(model, name, input_types), "Cml"
    if model is None:
        raise RuntimeError("Unable to convert model of type '{0}'.".format(type(model)))
    return model, prefix
Example #3
0
def convert_model(model, name, input_types):
    """
    Runs the appropriate conversion method.

    :param model: model
    :return: *onnx* model
    """
    from sklearn.base import BaseEstimator
    if model.__class__.__name__.startswith("LGBM"):
        from onnxmltools.convert import convert_lightgbm
        model, prefix = convert_lightgbm(model, name, input_types), "LightGbm"
    elif model.__class__.__name__.startswith("XGB"):
        from onnxmltools.convert import convert_xgboost
        model, prefix = convert_xgboost(model, name, input_types), "XGB"
    elif model.__class__.__name__ == 'Booster':
        import lightgbm
        if isinstance(model, lightgbm.Booster):
            from onnxmltools.convert import convert_lightgbm
            model, prefix = convert_lightgbm(model, name,
                                             input_types), "LightGbm"
        else:
            raise RuntimeError("Unable to convert model of type '{0}'.".format(
                type(model)))
    elif isinstance(model, BaseEstimator):
        from onnxmltools.convert import convert_sklearn
        model, prefix = convert_sklearn(model, name, input_types), "Sklearn"
    else:
        from onnxmltools.convert import convert_coreml
        model, prefix = convert_coreml(model, name, input_types), "Cml"
    if model is None:
        raise RuntimeError("Unable to convert model of type '{0}'.".format(
            type(model)))
    return model, prefix
Example #4
0
 def test_sklearn_classifier_multi_discrete_int_labels(self):
     iris = load_iris()
     x = iris.data[:, :2]
     y = iris.target
     y[y == 0] = 10
     y[y == 1] = 20
     y[y == 2] = -30
     x_train, x_test, y_train, _ = train_test_split(x,
                                                    y,
                                                    test_size=0.5,
                                                    random_state=42)
     xgb = RandomForestClassifier(n_estimators=3)
     xgb.fit(x_train, y_train)
     conv_model = convert_sklearn(
         xgb, initial_types=[('input', FloatTensorType(shape=[None, x_test.shape[1]]))])
     self.assertTrue(conv_model is not None)
     dump_data_and_model(
         x_test.astype("float32"),
         xgb,
         conv_model,
         basename="SklearnRFClassifierMultiDiscreteIntLabels",
         allow_failure="StrictVersion("
         "onnx.__version__)"
         "< StrictVersion('1.3.0')",
     )
Example #5
0
    def test_onnx_deafault_n_threads(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        # Create ONNX-ML model
        onnx_ml_model = convert_sklearn(
            model,
            initial_types=[("input", FloatTensorType([X.shape[0],
                                                      X.shape[1]]))],
            target_opset=9)

        hb_model = hummingbird.ml.convert(onnx_ml_model, "onnx", X)

        self.assertIsNotNone(hb_model)
        self.assertTrue(hb_model._session.get_session_options(
        ).intra_op_num_threads == psutil.cpu_count(logical=False))
        self.assertTrue(
            hb_model._session.get_session_options().inter_op_num_threads == 1)
    def common_test_xgboost_10_skl(self, missing, replace=False):
        this = os.path.abspath(os.path.dirname(__file__))
        data = os.path.join(this, "data_fail.csv")
        data = pandas.read_csv(data)

        for col in data:
            dtype = data[col].dtype
            if dtype in ['float64', 'float32']:
                data[col].fillna(0., inplace=True)
            if dtype in ['int64']:
                data[col].fillna(0, inplace=True)
            elif dtype in ['O']:
                data[col].fillna('N/A', inplace=True)

        data['pclass'] = data['pclass'] * float(1)
        full_df = data.drop('survived', axis=1)
        full_labels = data['survived']

        train_df, test_df, train_labels, test_labels = train_test_split(
            full_df, full_labels, test_size=.2, random_state=11)

        col_transformer = self._column_tranformer_fitted_from_df(full_df)

        param_distributions = {
            "colsample_bytree": 0.5,
            "gamma": 0.2,
            'learning_rate': 0.3,
            'max_depth': 2,
            'min_child_weight': 1.,
            'n_estimators': 1,
            'missing': missing,
        }

        regressor = XGBRegressor(verbose=0,
                                 objective='reg:squarederror',
                                 **param_distributions)
        regressor.fit(col_transformer.transform(train_df), train_labels)
        model = Pipeline(steps=[('preprocessor',
                                 col_transformer), ('regressor', regressor)])

        update_registered_converter(XGBRegressor, 'XGBRegressor',
                                    calculate_linear_regressor_output_shapes,
                                    convert_xgb)

        # last step
        input_xgb = model.steps[0][-1].transform(test_df[:5]).astype(
            np.float32)
        if replace:
            input_xgb[input_xgb[:, :] == missing] = np.nan
        onnx_last = convert_sklearn(
            model.steps[1][-1],
            initial_types=[
                ('X', FloatTensorType(shape=[None, input_xgb.shape[1]]))
            ],
            target_opset=get_opset_number_from_onnx())
        session = rt.InferenceSession(onnx_last.SerializeToString())
        pred_skl = model.steps[1][-1].predict(input_xgb).ravel()
        pred_onx = session.run(None, {'X': input_xgb})[0].ravel()
        assert_almost_equal(pred_skl, pred_onx)
Example #7
0
def scikit_learn_converter(args):
    from sklearn.externals import joblib
    source_model = joblib.load(args.source)
    from onnxmltools.convert.common.data_types import FloatTensorType
    from onnxmltools.convert import convert_sklearn

    onnx_model = convert_sklearn(
        source_model,
        initial_types=[('input', FloatTensorType(source_model.coef_.shape))],
        target_opset=get_opset(args.ONNXVersion))
    return onnx_model
Example #8
0
    def test_onnx_no_test_data_string(self):
        warnings.filterwarnings("ignore")
        model = OneHotEncoder()
        X = np.array([["a", "b", "c"]])
        model.fit(X)

        # Create ONNX-ML model
        onnx_ml_model = convert_sklearn(
            model, initial_types=[("input", StringTensorType([X.shape[0], X.shape[1]]))], target_opset=11
        )

        # Test backends are not case sensitive
        self.assertRaises(RuntimeError, hummingbird.ml.convert, onnx_ml_model, "onnx")
Example #9
0
 def test_sklearn_classifier_multi(self):
     xgb, x_test = _fit_classification_model(RandomForestClassifier(), 3)
     conv_model = convert_sklearn(
         xgb, initial_types=[('input', FloatTensorType(shape=[None, x_test.shape[1]]))])
     self.assertTrue(conv_model is not None)
     dump_data_and_model(
         x_test,
         xgb,
         conv_model,
         basename="SklearnRFClassifierMulti",
         allow_failure="StrictVersion("
         "onnx.__version__)"
         "< StrictVersion('1.3.0')",
     )
Example #10
0
    def test_onnx_no_test_data_int(self):
        warnings.filterwarnings("ignore")
        model = OneHotEncoder()
        X = np.array([[1, 2, 3]], dtype=np.int32)
        model.fit(X)

        # Create ONNX-ML model
        onnx_ml_model = convert_sklearn(
            model, initial_types=[("input", Int32TensorType([X.shape[0], X.shape[1]]))], target_opset=11
        )

        # Test onnx requires no test_data
        hb_model = hummingbird.ml.convert(onnx_ml_model, "onnx")
        assert hb_model
Example #11
0
    def test_onnx_no_test_data_long(self):
        warnings.filterwarnings("ignore")
        model = model = StandardScaler(with_mean=True, with_std=True)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.int64)

        model.fit(X)

        # Create ONNX-ML model
        onnx_ml_model = convert_sklearn(
            model, initial_types=[("input", Int64TensorType([X.shape[0], X.shape[1]]))], target_opset=11
        )

        # Test onnx requires no test_data
        hb_model = hummingbird.ml.convert(onnx_ml_model, "onnx")
        assert hb_model
Example #12
0
    def test_pandas_batch_onnxml(self):
        import pandas

        max_depth = 10
        iris = datasets.load_iris()
        X = iris.data[:, :3]
        y = iris.target
        columns = ["vA", "vB", "vC"]
        X_train = pandas.DataFrame(X, columns=columns)

        pipeline = Pipeline(steps=[
            ("preprocessor",
             ColumnTransformer(
                 transformers=[],
                 remainder="passthrough",
             )),
            ("classifier",
             GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)),
        ])

        pipeline.fit(X_train, y)

        # Create ONNX-ML model
        onnx_ml_model = convert_sklearn(
            pipeline,
            initial_types=[
                ("vA", DoubleTensorType([X.shape[0], 1])),
                ("vB", DoubleTensorType([X.shape[0], 1])),
                ("vC", DoubleTensorType([X.shape[0], 1])),
            ],
            target_opset=9,
        )

        hb_model = hummingbird.ml.convert(
            onnx_ml_model, "onnx", extra_config={constants.BATCH_SIZE: 10})

        self.assertTrue(hb_model is not None)

        np.testing.assert_allclose(
            pipeline.predict_proba(X_train),
            hb_model.predict_proba(X_train),
            rtol=1e-06,
            atol=1e-06,
        )
Example #13
0
    def test_onnx_no_test_data_double(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        # Create ONNX-ML model
        onnx_ml_model = convert_sklearn(
            model, initial_types=[("input", DoubleTensorType([X.shape[0], X.shape[1]]))], target_opset=11
        )

        # Test onnx requires no test_data
        hb_model = hummingbird.ml.convert(onnx_ml_model, "onnx")
        assert hb_model
Example #14
0
 def test_sklearn_regressor(self):
     iris = load_diabetes()
     x = iris.data
     y = iris.target
     x_train, x_test, y_train, _ = train_test_split(x, y, test_size=0.5,
                                                    random_state=42)
     xgb = RandomForestRegressor()
     xgb.fit(x_train, y_train)
     conv_model = convert_sklearn(
         xgb, initial_types=[('input', FloatTensorType(shape=[None, x_test.shape[1]]))])
     self.assertTrue(conv_model is not None)
     dump_data_and_model(
         x_test.astype("float32"),
         xgb,
         conv_model,
         basename="SklearnRFRegressor-Dec3",
         allow_failure="StrictVersion("
         "onnx.__version__)"
         "< StrictVersion('1.3.0')",
     )
Example #15
0
    def _test_decision_tree(self, X, model, extra_config={}):
        # Create ONNX-ML model
        onnx_ml_model = convert_sklearn(
            model,
            initial_types=[("input", FloatTensorType([X.shape[0],
                                                      X.shape[1]]))],
            target_opset=11)

        # Create ONNX model
        onnx_model = convert(onnx_ml_model, "onnx", X, extra_config)

        # Get the predictions for the ONNX-ML model
        session = ort.InferenceSession(onnx_ml_model.SerializeToString())
        output_names = [
            session.get_outputs()[i].name
            for i in range(len(session.get_outputs()))
        ]
        onnx_ml_pred = [[] for i in range(len(output_names))]
        inputs = {session.get_inputs()[0].name: X}
        pred = session.run(output_names, inputs)
        for i in range(len(output_names)):
            if "label" in output_names[i]:
                onnx_ml_pred[1] = pred[i]
            else:
                onnx_ml_pred[0] = pred[i]

        # Get the predictions for the ONNX model
        onnx_pred = [[] for i in range(len(output_names))]
        if len(output_names) == 1:  # regression
            onnx_pred = onnx_model.predict(X)
        else:  # classification
            for i in range(len(output_names)):
                if "label" in output_names[i]:
                    onnx_pred[1] = onnx_model.predict(X)
                else:
                    onnx_pred[0] = onnx_model.predict_proba(X)

        return onnx_ml_pred, onnx_pred, output_names
Example #16
0
    def test_onnx_test_data(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        # Create ONNX-ML model
        onnx_ml_model = convert_sklearn(
            model,
            initial_types=[("input", FloatTensorType([X.shape[0],
                                                      X.shape[1]]))],
            target_opset=11)

        # Test onnx requires test_data
        self.assertRaises(RuntimeError, hummingbird.ml.convert, onnx_ml_model,
                          "onnx")
Example #17
0
    def test_pandas_batch_onnxml(self):
        import pandas

        max_depth = 10
        iris = datasets.load_iris()
        X = iris.data[:, :3]
        y = iris.target
        columns = ["vA", "vB", "vC"]
        X_train = pandas.DataFrame(X, columns=columns)

        if CastTransformer is None:
            pipeline = Pipeline(steps=[
                ("preprocessor",
                 ColumnTransformer(
                     transformers=[],
                     remainder="passthrough",
                 )),
                ("classifier",
                 GradientBoostingClassifier(n_estimators=10,
                                            max_depth=max_depth)),
            ])
        else:
            # newer version of sklearn-onnx
            pipeline = Pipeline(steps=[
                ("preprocessor",
                 ColumnTransformer(
                     transformers=[],
                     remainder="passthrough",
                 )),
                ('cast', CastTransformer(dtype=np.float32)),
                ("classifier",
                 GradientBoostingClassifier(n_estimators=10,
                                            max_depth=max_depth)),
            ])

        pipeline.fit(X_train, y)

        # Create ONNX-ML model
        onnx_ml_model = convert_sklearn(
            pipeline,
            initial_types=[
                ("vA", DoubleTensorType([X.shape[0], 1])),
                ("vB", DoubleTensorType([X.shape[0], 1])),
                ("vC", DoubleTensorType([X.shape[0], 1])),
            ],
            target_opset=9,
        )

        batch_size = 10
        remainder_size = X.shape[0] % batch_size
        hb_model = hummingbird.ml.convert_batch(
            onnx_ml_model, "onnx",
            pandas.DataFrame(X[:batch_size], columns=columns), remainder_size)

        self.assertTrue(hb_model is not None)

        np.testing.assert_allclose(
            pipeline.predict_proba(X_train),
            hb_model.predict_proba(X_train),
            rtol=1e-06,
            atol=1e-06,
        )
Example #18
0
from onnxconverter_common.data_types import FloatTensorType
from onnxmltools.convert import convert_sklearn

iris = load_iris()
X, y = iris.data, iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y)
clr = RandomForestClassifier()
clr.fit(X_train, y_train)
print(clr)

###########################
# Convert a model into ONNX
# +++++++++++++++++++++++++

initial_type = [('float_input', FloatTensorType([None, 4]))]
onx = convert_sklearn(clr, initial_types=initial_type)

with open("rf_iris.onnx", "wb") as f:
    f.write(onx.SerializeToString())

###################################
# Compute the prediction with onnxruntime
# +++++++++++++++++++++++++++++++++++++++
sess = rt.InferenceSession("rf_iris.onnx")
input_name = sess.get_inputs()[0].name
label_name = sess.get_outputs()[0].name
pred_onx = sess.run([label_name],
                    {input_name: X_test.astype(numpy.float32)})[0]
print(pred_onx)

#######################################