Esempio n. 1
0
 def test_xgb_classifier_reglog(self):
     xgb, x_test = _fit_classification_model(
         XGBClassifier(objective='reg:logistic'), 2)
     conv_model = convert_xgboost(xgb,
                                  initial_types=[
                                      ('input',
                                       FloatTensorType(shape=[None, None]))
                                  ])
     self.assertTrue(conv_model is not None)
     dump_data_and_model(
         x_test,
         xgb,
         conv_model,
         basename="SklearnXGBClassifierRegLog",
         allow_failure="StrictVersion("
         "onnx.__version__)"
         "< StrictVersion('1.3.0')",
     )
    def test_xgb_classifier_multi_reglog(self):
        iris = load_iris()
        X = iris.data[:, :2]
        y = iris.target

        xgb = XGBClassifier(objective='reg:logistic')
        xgb.fit(X, y)
        conv_model = convert_xgboost(xgb,
                                     initial_types=[
                                         ('input',
                                          FloatTensorType(shape=[1, 2]))
                                     ])
        self.assertTrue(conv_model is not None)
        dump_multiple_classification(
            xgb,
            suffix="RegLog",
            allow_failure=
            "StrictVersion(onnx.__version__) < StrictVersion('1.3.0')")
 def test_xgb_classifier_multi_str_labels(self):
     xgb, x_test = _fit_classification_model(XGBClassifier(n_estimators=4),
                                             5,
                                             is_str=True)
     conv_model = convert_xgboost(xgb,
                                  initial_types=[
                                      ('input',
                                       FloatTensorType(shape=[None, None]))
                                  ])
     self.assertTrue(conv_model is not None)
     dump_data_and_model(
         x_test,
         xgb,
         conv_model,
         basename="SklearnXGBClassifierMultiStrLabels",
         allow_failure="StrictVersion("
         "onnx.__version__)"
         "< StrictVersion('1.3.0')",
     )
Esempio n. 4
0
    def convert(self, model, data, args, model_name):
        from onnxmltools.convert import convert_xgboost
        from onnxmltools.convert import convert_lightgbm
        from skl2onnx import convert_sklearn
        from onnxmltools.convert.common.data_types import FloatTensorType

        self.configure(data, model, args)

        with Timer() as t:
            if self.params["operator"] == "xgb":
                initial_type = [
                    ("input", FloatTensorType([1, self.params["input_size"]]))
                ]
                fixed_names = list(
                    map(lambda x: str(x),
                        range(len(model._Booster.feature_names))))
                model._Booster.feature_names = fixed_names
                self.model = convert_xgboost(model,
                                             initial_types=initial_type,
                                             target_opset=11)
            else:
                batch = min(len(data.X_test), self.params["batch_size"])
                remainder = len(data.X_test) % batch
                initial_type = [
                    ("input",
                     FloatTensorType([batch, self.params["input_size"]]))
                ]

                if self.params["operator"] == "lgbm":
                    converter = convert_lightgbm
                elif self.params["operator"] == "rf":
                    converter = convert_sklearn

                self.model = converter(model, initial_types=initial_type)
                if remainder > 0:
                    initial_type = [("input",
                                     FloatTensorType([
                                         remainder, self.params["input_size"]
                                     ]))]
                    self.remainder_model = converter(
                        model, initial_types=initial_type, target_opset=11)
        return t.interval
Esempio n. 5
0
 def test_xgb_regressor(self):
     iris = load_diabetes()
     x = iris.data
     y = iris.target
     x_train, x_test, y_train, _ = train_test_split(x, y, test_size=0.5,
                                                    random_state=42)
     xgb = XGBRegressor()
     xgb.fit(x_train, y_train)
     conv_model = convert_xgboost(
         xgb, initial_types=[('input', FloatTensorType(shape=['None', 'None']))])
     self.assertTrue(conv_model is not None)
     dump_data_and_model(
         x_test.astype("float32"),
         xgb,
         conv_model,
         basename="SklearnXGBRegressor-Dec4",
         allow_failure="StrictVersion("
         "onnx.__version__)"
         "< StrictVersion('1.3.0')",
     )
Esempio n. 6
0
def convert_model(model, name, input_types, without_onnx_ml=False):
    """
    Runs the appropriate conversion method.

    :param model: model
    :return: *onnx* model
    """
    from sklearn.base import BaseEstimator
    if model.__class__.__name__.startswith("LGBM"):
        from onnxmltools.convert import convert_lightgbm
        model, prefix = convert_lightgbm(
            model, name, input_types,
            without_onnx_ml=without_onnx_ml), "LightGbm"
    elif model.__class__.__name__.startswith("XGB"):
        from onnxmltools.convert import convert_xgboost
        model, prefix = convert_xgboost(model, name, input_types), "XGB"
    elif model.__class__.__name__ == 'Booster':
        import lightgbm
        if isinstance(model, lightgbm.Booster):
            from onnxmltools.convert import convert_lightgbm
            model, prefix = convert_lightgbm(
                model, name, input_types,
                without_onnx_ml=without_onnx_ml), "LightGbm"
        else:
            raise RuntimeError("Unable to convert model of type '{0}'.".format(
                type(model)))
    elif model.__class__.__name__.startswith("CatBoost"):
        from onnxmltools.convert import convert_catboost
        model, prefix = convert_catboost(model, name, input_types), "CatBoost"
    elif isinstance(model, BaseEstimator):
        from onnxmltools.convert import convert_sklearn
        model, prefix = convert_sklearn(model, name, input_types), "Sklearn"
    else:
        from onnxmltools.convert import convert_coreml
        model, prefix = convert_coreml(model, name, input_types), "Cml"
    if model is None:
        raise RuntimeError("Unable to convert model of type '{0}'.".format(
            type(model)))
    return model, prefix
    def test_xgboost_10(self):
        this = os.path.abspath(os.path.dirname(__file__))
        train = os.path.join(this, "input_fail_train.csv")
        test = os.path.join(this, "input_fail_test.csv")

        param_distributions = {
            "colsample_bytree": 0.5,
            "gamma": 0.2,
            'learning_rate': 0.3,
            'max_depth': 2,
            'min_child_weight': 1.,
            'n_estimators': 1,
            'missing': np.nan,
        }

        train_df = pandas.read_csv(train)
        X_train, y_train = train_df.drop(
            'label', axis=1).values, train_df['label'].values
        test_df = pandas.read_csv(test)
        X_test, y_test = test_df.drop('label',
                                      axis=1).values, test_df['label'].values

        regressor = XGBRegressor(verbose=0,
                                 objective='reg:squarederror',
                                 **param_distributions)
        regressor.fit(X_train, y_train)

        model_onnx = convert_xgboost(
            regressor, 'bug',
            [('input', FloatTensorType([None, X_train.shape[1]]))])

        dump_data_and_model(
            X_test.astype(np.float32),
            regressor,
            model_onnx,
            allow_failure=
            "StrictVersion(onnx.__version__) < StrictVersion('1.3.0')",
            basename="XGBBoosterRegBug")
from onnxconverter_common.data_types import FloatTensorType
from onnxmltools.convert import convert_xgboost

iris = load_iris()
X, y = iris.data, iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y)
clr = XGBClassifier()
clr.fit(X_train, y_train)
print(clr)

###########################
# Convert a model into ONNX
# +++++++++++++++++++++++++

initial_type = [('float_input', FloatTensorType([None, 4]))]
onx = convert_xgboost(clr, initial_types=initial_type)

###################################
# Compute the predictions with onnxruntime
# ++++++++++++++++++++++++++++++++++++++++

sess = rt.InferenceSession(onx.SerializeToString())
input_name = sess.get_inputs()[0].name
label_name = sess.get_outputs()[0].name
pred_onx = sess.run([label_name],
                    {input_name: X_test.astype(numpy.float32)})[0]
print(pred_onx)

###############################################
# With DMatrix
# ++++++++++++
Esempio n. 9
0
X_test.head()


#%%
# Make a CSV backup of our test data without column names or indexes
test_data = X_test.copy()
test_data['target'] = y_test
test_data.to_csv("test_data.csv", header=False, index=False)


#%%
test_data.head()


#%%
model = xgbr()
model.fit(X_train, y_train, eval_set=[(X_test, y_test)])


#%%
conv_model = convert_xgboost(model, initial_types=[('float_input', FloatTensorType(shape=[1, 4]))])
assert(conv_model is not None)

save_model(conv_model, 'model.onnx')


#%%



Esempio n. 10
0
ONNX_TRANS_PATH = env.str('TRANS_PATH', './outputs/trans.onnx')
ONNX_MODEl_PATH_XGB = env.str('ONNX_MODEl_PATH_XGB', './outputs/xgb.onnx')
ONNX_MODEl_PATH_LGB = env.str('ONNX_MODEl_PATH_LGB', './outputs/lgb.onnx')
ONNX_MODEl_PATH_DCN = env.str('ONNX_MODEl_PATH_DCN', './outputs/dcn.onnx')

trans_initial_type = [('num_feat', FloatTensorType([None, 13])),
                      ('cat_feat', StringTensorType([None, 26]))]
model_initial_type = [('num_feat', FloatTensorType([None, 39]))]

print('convert sklearn transformer')
trans = joblib.load(TRANS_PATH)
onx = convert_sklearn(trans, initial_types=trans_initial_type)
onnx.save(onx, ONNX_TRANS_PATH)

print('convert XGBoost model')
model = xgb.XGBClassifier()
model.load_model(MODEL_PATH_XGB)
onx = convert_xgboost(model, initial_types=model_initial_type)
onnx.save(onx, ONNX_MODEl_PATH_XGB)

print('convert LightGBM model')
model = lgb.Booster(model_file=MODEL_PATH_LGB)
onx = convert_lightgbm(model, initial_types=model_initial_type)
onnx.save(onx, ONNX_MODEl_PATH_LGB)

print('convert DCN model')
graph_def, inputs, outputs = from_saved_model(MODEL_PATH_DCN, None, None)
tf.compat.v1.disable_eager_execution()
onx = convert_tensorflow(graph_def, input_names=inputs, output_names=outputs)
onnx.save(onx, ONNX_MODEl_PATH_DCN)
Esempio n. 11
0
iris = load_iris()
X, y = iris.data, iris.target
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.8,
                                                    random_state=42)

import xgboost as xgb

xgb_model = xgb.XGBClassifier()
xgb_model = xgb_model.fit(X_train, y_train)

print("Test data accuracy of the xgb classifier is {:.2f}".format(
    xgb_model.score(X_test, y_test)))

from onnxmltools.convert import convert_xgboost, convert_lightgbm
from onnxconverter_common.data_types import FloatTensorType

onnx_model = convert_xgboost(xgb_model,
                             initial_types=[("input", FloatTensorType([1,
                                                                       4]))])

with open("gbtree.onnx", "wb") as f:
    f.write(onnx_model.SerializeToString())

if __name__ == '__main__':
    from inference import Inference

    infer = Inference("gbtree.onnx")
    print(infer.run(X[:1]))
Esempio n. 12
0
    def do_training(self, X, Y, create_onnx = True):
        START_DATE = datetime.datetime.strptime('2017-11-30', '%Y-%m-%d')
        month = X['TransactionDT'].apply(lambda x: (START_DATE + datetime.timedelta(seconds = x)))
        month = (month.dt.year-2017)*12 + month.dt.month

        print('Pre-processing ...')
        X = self.transform(X, do_fit=True)

        new_columns = {}
        for i, c in enumerate(X.columns):
            new_columns[c] = i
        X = X.rename(columns=new_columns)

        print('Training ...')
        self.clfs = []
        skf = GroupKFold(n_splits=5)
        for i, (idxT, idxV) in enumerate( skf.split(X, Y, groups=month) ):
            m = month.iloc[idxV].iloc[0]
            print('Fold',i,'withholding month',m)
            print(' rows of train =',len(idxT),'rows of holdout =',len(idxV))
            clf = xgb.XGBClassifier(
                n_estimators=5000,
                max_depth=12,
                learning_rate=0.02,
                subsample=0.8,
                colsample_bytree=0.4,
                # [missing=-1,
                eval_metric='auc',
                # USE CPU
                nthread=32,
                tree_method='hist'
                # USE GPU
                #tree_method='gpu_hist' 
            )
            h = clf.fit(X.iloc[idxT], Y.iloc[idxT], 
                        eval_set=[(X.iloc[idxV],Y.iloc[idxV])],
                        verbose=100, early_stopping_rounds=200)
            self.clfs.append(clf)
        
        # idxT = X.index[:3*len(X)//4]
        # idxV = X.index[3*len(X)//4:]

        # oof = np.zeros(len(idxV))
        # clf = xgb.XGBClassifier(
        #     n_estimators=5000,
        #     max_depth=12,
        #     learning_rate=0.02,
        #     subsample=0.8,
        #     colsample_bytree=0.4,
        #     # missing=-1,
        #     eval_metric='auc',
        #     # USE CPU
        #     nthread=32,
        #     tree_method='hist'
        #     # USE GPU
        #     #tree_method='gpu_hist' 
        # )
        # h = clf.fit(X.loc[idxT], Y.loc[idxT], 
        #             eval_set=[(X.loc[idxV],Y.loc[idxV])],
        #                  verbose=100, early_stopping_rounds=200)
        # self.clfs.append(clf)

        if create_onnx:
            print('Converting models into ONNX ...')
            onnx_ml_models = []
            for i, clf in enumerate(self.clfs):
                initial_type = [('dense_input', FloatTensorType([None, len(self.pipeline.output_columns)]))]
                onnx_ml_models.append(convert_xgboost(clf, initial_types=initial_type))

            self.create_onnx('fraud-detection', onnx_ml_models)