def test_xgb_classifier_reglog(self): xgb, x_test = _fit_classification_model( XGBClassifier(objective='reg:logistic'), 2) conv_model = convert_xgboost(xgb, initial_types=[ ('input', FloatTensorType(shape=[None, None])) ]) self.assertTrue(conv_model is not None) dump_data_and_model( x_test, xgb, conv_model, basename="SklearnXGBClassifierRegLog", allow_failure="StrictVersion(" "onnx.__version__)" "< StrictVersion('1.3.0')", )
def test_xgb_classifier_multi_reglog(self): iris = load_iris() X = iris.data[:, :2] y = iris.target xgb = XGBClassifier(objective='reg:logistic') xgb.fit(X, y) conv_model = convert_xgboost(xgb, initial_types=[ ('input', FloatTensorType(shape=[1, 2])) ]) self.assertTrue(conv_model is not None) dump_multiple_classification( xgb, suffix="RegLog", allow_failure= "StrictVersion(onnx.__version__) < StrictVersion('1.3.0')")
def test_xgb_classifier_multi_str_labels(self): xgb, x_test = _fit_classification_model(XGBClassifier(n_estimators=4), 5, is_str=True) conv_model = convert_xgboost(xgb, initial_types=[ ('input', FloatTensorType(shape=[None, None])) ]) self.assertTrue(conv_model is not None) dump_data_and_model( x_test, xgb, conv_model, basename="SklearnXGBClassifierMultiStrLabels", allow_failure="StrictVersion(" "onnx.__version__)" "< StrictVersion('1.3.0')", )
def convert(self, model, data, args, model_name): from onnxmltools.convert import convert_xgboost from onnxmltools.convert import convert_lightgbm from skl2onnx import convert_sklearn from onnxmltools.convert.common.data_types import FloatTensorType self.configure(data, model, args) with Timer() as t: if self.params["operator"] == "xgb": initial_type = [ ("input", FloatTensorType([1, self.params["input_size"]])) ] fixed_names = list( map(lambda x: str(x), range(len(model._Booster.feature_names)))) model._Booster.feature_names = fixed_names self.model = convert_xgboost(model, initial_types=initial_type, target_opset=11) else: batch = min(len(data.X_test), self.params["batch_size"]) remainder = len(data.X_test) % batch initial_type = [ ("input", FloatTensorType([batch, self.params["input_size"]])) ] if self.params["operator"] == "lgbm": converter = convert_lightgbm elif self.params["operator"] == "rf": converter = convert_sklearn self.model = converter(model, initial_types=initial_type) if remainder > 0: initial_type = [("input", FloatTensorType([ remainder, self.params["input_size"] ]))] self.remainder_model = converter( model, initial_types=initial_type, target_opset=11) return t.interval
def test_xgb_regressor(self): iris = load_diabetes() x = iris.data y = iris.target x_train, x_test, y_train, _ = train_test_split(x, y, test_size=0.5, random_state=42) xgb = XGBRegressor() xgb.fit(x_train, y_train) conv_model = convert_xgboost( xgb, initial_types=[('input', FloatTensorType(shape=['None', 'None']))]) self.assertTrue(conv_model is not None) dump_data_and_model( x_test.astype("float32"), xgb, conv_model, basename="SklearnXGBRegressor-Dec4", allow_failure="StrictVersion(" "onnx.__version__)" "< StrictVersion('1.3.0')", )
def convert_model(model, name, input_types, without_onnx_ml=False): """ Runs the appropriate conversion method. :param model: model :return: *onnx* model """ from sklearn.base import BaseEstimator if model.__class__.__name__.startswith("LGBM"): from onnxmltools.convert import convert_lightgbm model, prefix = convert_lightgbm( model, name, input_types, without_onnx_ml=without_onnx_ml), "LightGbm" elif model.__class__.__name__.startswith("XGB"): from onnxmltools.convert import convert_xgboost model, prefix = convert_xgboost(model, name, input_types), "XGB" elif model.__class__.__name__ == 'Booster': import lightgbm if isinstance(model, lightgbm.Booster): from onnxmltools.convert import convert_lightgbm model, prefix = convert_lightgbm( model, name, input_types, without_onnx_ml=without_onnx_ml), "LightGbm" else: raise RuntimeError("Unable to convert model of type '{0}'.".format( type(model))) elif model.__class__.__name__.startswith("CatBoost"): from onnxmltools.convert import convert_catboost model, prefix = convert_catboost(model, name, input_types), "CatBoost" elif isinstance(model, BaseEstimator): from onnxmltools.convert import convert_sklearn model, prefix = convert_sklearn(model, name, input_types), "Sklearn" else: from onnxmltools.convert import convert_coreml model, prefix = convert_coreml(model, name, input_types), "Cml" if model is None: raise RuntimeError("Unable to convert model of type '{0}'.".format( type(model))) return model, prefix
def test_xgboost_10(self): this = os.path.abspath(os.path.dirname(__file__)) train = os.path.join(this, "input_fail_train.csv") test = os.path.join(this, "input_fail_test.csv") param_distributions = { "colsample_bytree": 0.5, "gamma": 0.2, 'learning_rate': 0.3, 'max_depth': 2, 'min_child_weight': 1., 'n_estimators': 1, 'missing': np.nan, } train_df = pandas.read_csv(train) X_train, y_train = train_df.drop( 'label', axis=1).values, train_df['label'].values test_df = pandas.read_csv(test) X_test, y_test = test_df.drop('label', axis=1).values, test_df['label'].values regressor = XGBRegressor(verbose=0, objective='reg:squarederror', **param_distributions) regressor.fit(X_train, y_train) model_onnx = convert_xgboost( regressor, 'bug', [('input', FloatTensorType([None, X_train.shape[1]]))]) dump_data_and_model( X_test.astype(np.float32), regressor, model_onnx, allow_failure= "StrictVersion(onnx.__version__) < StrictVersion('1.3.0')", basename="XGBBoosterRegBug")
from onnxconverter_common.data_types import FloatTensorType from onnxmltools.convert import convert_xgboost iris = load_iris() X, y = iris.data, iris.target X_train, X_test, y_train, y_test = train_test_split(X, y) clr = XGBClassifier() clr.fit(X_train, y_train) print(clr) ########################### # Convert a model into ONNX # +++++++++++++++++++++++++ initial_type = [('float_input', FloatTensorType([None, 4]))] onx = convert_xgboost(clr, initial_types=initial_type) ################################### # Compute the predictions with onnxruntime # ++++++++++++++++++++++++++++++++++++++++ sess = rt.InferenceSession(onx.SerializeToString()) input_name = sess.get_inputs()[0].name label_name = sess.get_outputs()[0].name pred_onx = sess.run([label_name], {input_name: X_test.astype(numpy.float32)})[0] print(pred_onx) ############################################### # With DMatrix # ++++++++++++
X_test.head() #%% # Make a CSV backup of our test data without column names or indexes test_data = X_test.copy() test_data['target'] = y_test test_data.to_csv("test_data.csv", header=False, index=False) #%% test_data.head() #%% model = xgbr() model.fit(X_train, y_train, eval_set=[(X_test, y_test)]) #%% conv_model = convert_xgboost(model, initial_types=[('float_input', FloatTensorType(shape=[1, 4]))]) assert(conv_model is not None) save_model(conv_model, 'model.onnx') #%%
ONNX_TRANS_PATH = env.str('TRANS_PATH', './outputs/trans.onnx') ONNX_MODEl_PATH_XGB = env.str('ONNX_MODEl_PATH_XGB', './outputs/xgb.onnx') ONNX_MODEl_PATH_LGB = env.str('ONNX_MODEl_PATH_LGB', './outputs/lgb.onnx') ONNX_MODEl_PATH_DCN = env.str('ONNX_MODEl_PATH_DCN', './outputs/dcn.onnx') trans_initial_type = [('num_feat', FloatTensorType([None, 13])), ('cat_feat', StringTensorType([None, 26]))] model_initial_type = [('num_feat', FloatTensorType([None, 39]))] print('convert sklearn transformer') trans = joblib.load(TRANS_PATH) onx = convert_sklearn(trans, initial_types=trans_initial_type) onnx.save(onx, ONNX_TRANS_PATH) print('convert XGBoost model') model = xgb.XGBClassifier() model.load_model(MODEL_PATH_XGB) onx = convert_xgboost(model, initial_types=model_initial_type) onnx.save(onx, ONNX_MODEl_PATH_XGB) print('convert LightGBM model') model = lgb.Booster(model_file=MODEL_PATH_LGB) onx = convert_lightgbm(model, initial_types=model_initial_type) onnx.save(onx, ONNX_MODEl_PATH_LGB) print('convert DCN model') graph_def, inputs, outputs = from_saved_model(MODEL_PATH_DCN, None, None) tf.compat.v1.disable_eager_execution() onx = convert_tensorflow(graph_def, input_names=inputs, output_names=outputs) onnx.save(onx, ONNX_MODEl_PATH_DCN)
iris = load_iris() X, y = iris.data, iris.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.8, random_state=42) import xgboost as xgb xgb_model = xgb.XGBClassifier() xgb_model = xgb_model.fit(X_train, y_train) print("Test data accuracy of the xgb classifier is {:.2f}".format( xgb_model.score(X_test, y_test))) from onnxmltools.convert import convert_xgboost, convert_lightgbm from onnxconverter_common.data_types import FloatTensorType onnx_model = convert_xgboost(xgb_model, initial_types=[("input", FloatTensorType([1, 4]))]) with open("gbtree.onnx", "wb") as f: f.write(onnx_model.SerializeToString()) if __name__ == '__main__': from inference import Inference infer = Inference("gbtree.onnx") print(infer.run(X[:1]))
def do_training(self, X, Y, create_onnx = True): START_DATE = datetime.datetime.strptime('2017-11-30', '%Y-%m-%d') month = X['TransactionDT'].apply(lambda x: (START_DATE + datetime.timedelta(seconds = x))) month = (month.dt.year-2017)*12 + month.dt.month print('Pre-processing ...') X = self.transform(X, do_fit=True) new_columns = {} for i, c in enumerate(X.columns): new_columns[c] = i X = X.rename(columns=new_columns) print('Training ...') self.clfs = [] skf = GroupKFold(n_splits=5) for i, (idxT, idxV) in enumerate( skf.split(X, Y, groups=month) ): m = month.iloc[idxV].iloc[0] print('Fold',i,'withholding month',m) print(' rows of train =',len(idxT),'rows of holdout =',len(idxV)) clf = xgb.XGBClassifier( n_estimators=5000, max_depth=12, learning_rate=0.02, subsample=0.8, colsample_bytree=0.4, # [missing=-1, eval_metric='auc', # USE CPU nthread=32, tree_method='hist' # USE GPU #tree_method='gpu_hist' ) h = clf.fit(X.iloc[idxT], Y.iloc[idxT], eval_set=[(X.iloc[idxV],Y.iloc[idxV])], verbose=100, early_stopping_rounds=200) self.clfs.append(clf) # idxT = X.index[:3*len(X)//4] # idxV = X.index[3*len(X)//4:] # oof = np.zeros(len(idxV)) # clf = xgb.XGBClassifier( # n_estimators=5000, # max_depth=12, # learning_rate=0.02, # subsample=0.8, # colsample_bytree=0.4, # # missing=-1, # eval_metric='auc', # # USE CPU # nthread=32, # tree_method='hist' # # USE GPU # #tree_method='gpu_hist' # ) # h = clf.fit(X.loc[idxT], Y.loc[idxT], # eval_set=[(X.loc[idxV],Y.loc[idxV])], # verbose=100, early_stopping_rounds=200) # self.clfs.append(clf) if create_onnx: print('Converting models into ONNX ...') onnx_ml_models = [] for i, clf in enumerate(self.clfs): initial_type = [('dense_input', FloatTensorType([None, len(self.pipeline.output_columns)]))] onnx_ml_models.append(convert_xgboost(clf, initial_types=initial_type)) self.create_onnx('fraud-detection', onnx_ml_models)