def test_rfr(self): model = self.fit(RandomForestRegressor()) info = analyze_model(model) self.assertGreater(info['estimators_.size'], 10) self.assertGreater(info['estimators_.sum|tree_.node_count'], 100) self.assertGreater(info['estimators_.sum|tree_.leave_count'], 100) self.assertGreater(info['estimators_.max|tree_.max_depth'], 3)
def test_logreg(self): model = self.fit(LogisticRegression(solver='liblinear')) info = analyze_model(model) self.assertIn('classes_.shape', info) self.assertEqual(info['classes_.shape'], 3) self.assertEqual(info['coef_.shape'], (3, 4)) self.assertEqual(info['intercept_.shape'], 3)
def __init__(self, model, dataset, norm): BenchPerfTest.__init__(self) self.model_name = model self.dataset_name = dataset self.datas = common_datasets[dataset] skl_model = get_model(model) if norm: if 'NB' in model: self.model = make_pipeline(MinMaxScaler(), skl_model) else: self.model = make_pipeline(StandardScaler(), skl_model) else: self.model = skl_model self.model.fit(self.datas[0], self.datas[2]) self.data_test = self.datas[1] if '-cdist' in model: options = {id(skl_model): {'optim': 'cdist'}} else: options = None self.onx = to_onnx(self.model, self.datas[0].astype(numpy.float32), options=options, target_opset=__max_supported_opset__) self.onx.ir_version = get_ir_version(__max_supported_opset__) logger = getLogger("skl2onnx") logger.propagate = False logger.disabled = True self.ort = InferenceSession(self.onx.SerializeToString()) self.oinf = OnnxInference(self.onx, runtime='python') self.oinfc = OnnxInference(self.onx, runtime='python_compiled') self.output_name = self.oinf.output_names[-1] self.input_name = self.ort.get_inputs()[0].name self.model_info = analyze_model(self.model)
def test_hgbr(self): model = self.fit(HistGradientBoostingRegressor()) info = analyze_model(model) self.assertGreater(info['_predictors.size'], 10) self.assertGreater(info['_predictors.sum|tree_.node_count'], 100) self.assertGreater(info['_predictors.sum|tree_.leave_count'], 100) self.assertGreater(info['_predictors.max|tree_.max_depth'], 3)
def test_gbc(self): model = self.fit(GradientBoostingClassifier()) info = analyze_model(model) self.assertIn('classes_.shape', info) self.assertEqual(info['classes_.shape'], 3) self.assertGreater(info['estimators_.sum|.sum|tree_.node_count'], 15) self.assertGreater(info['estimators_.sum|.sum|tree_.leave_count'], 8) self.assertGreater(info['estimators_.max|.max|tree_.max_depth'], 3)
def test_dtc(self): model = self.fit(DecisionTreeClassifier()) info = analyze_model(model) self.assertIn('classes_.shape', info) self.assertEqual(info['classes_.shape'], 3) self.assertGreater(info['tree_.node_count'], 15) self.assertGreater(info['tree_.leave_count'], 8) self.assertGreater(info['tree_.max_depth'], 3)
def test_hgbc(self): model = self.fit(HistGradientBoostingClassifier()) info = analyze_model(model) self.assertIn('classes_.shape', info) self.assertEqual(info['classes_.shape'], 3) self.assertGreater(info['_predictors.size'], 10) self.assertGreater(info['_predictors.sum|tree_.node_count'], 100) self.assertGreater(info['_predictors.sum|tree_.leave_count'], 100) self.assertGreater(info['_predictors.max|tree_.max_depth'], 3)
def test_xgbr(self): model = self.fit(XGBRegressor()) info = analyze_model(model) self.assertGreater(info['ntrees'], 10) self.assertIn(info['objective'], ('reg:linear', 'reg:squarederror')) self.assertGreater(info['estimators_.size'], 10) self.assertGreater(info['leave_count'], 100) self.assertGreater(info['mode_count'], 2) self.assertGreater(info['node_count'], 100)
def test_lgbmr(self): model = self.fit(LGBMRegressor()) info = analyze_model(model) self.assertGreater(info['ntrees'], 10) self.assertEqual(info['objective'], 'regression') self.assertGreater(info['estimators_.size'], 10) self.assertGreater(info['leave_count'], 100) self.assertGreater(info['mode_count'], 2) self.assertGreater(info['node_count'], 100)
def test_rfc(self): model = self.fit(RandomForestClassifier()) info = analyze_model(model) self.assertIn('classes_.shape', info) self.assertEqual(info['classes_.shape'], 3) self.assertEqual(info['estimators_.classes_.shape'], 3) self.assertGreater(info['estimators_.size'], 10) self.assertGreater(info['estimators_.sum|tree_.node_count'], 100) self.assertGreater(info['estimators_.sum|tree_.leave_count'], 100) self.assertGreater(info['estimators_.max|tree_.max_depth'], 3)
def test_lgbmr(self): from lightgbm import LGBMRegressor # pylint: disable=C0411 model = self.fit(LGBMRegressor()) info = analyze_model(model) self.assertGreater(info['ntrees'], 10) self.assertEqual(info['objective'], 'regression') self.assertGreater(info['estimators_.size'], 10) self.assertGreater(info['leave_count'], 100) self.assertGreater(info['mode_count'], 2) self.assertGreater(info['node_count'], 100)
def test_lgbmc(self): model = self.fit(LGBMClassifier()) info = analyze_model(model) self.assertEqual(info['n_classes'], 3) self.assertGreater(info['ntrees'], 10) self.assertEqual(info['objective'], 'multiclass num_class:3') self.assertGreater(info['estimators_.size'], 10) self.assertGreater(info['leave_count'], 100) self.assertGreater(info['mode_count'], 2) self.assertGreater(info['node_count'], 100)
def test_xgbc(self): model = self.fit(XGBClassifier()) info = analyze_model(model) self.assertEqual(info['classes_.shape'], 3) self.assertGreater(info['ntrees'], 10) self.assertEqual(info['objective'], 'multi:softprob') self.assertGreater(info['estimators_.size'], 10) self.assertGreater(info['leave_count'], 100) self.assertGreater(info['mode_count'], 2) self.assertGreater(info['node_count'], 100)
def __init__(self, model, dataset, norm): BenchPerfTest.__init__(self) self.model_name = model self.dataset_name = dataset self.datas = common_datasets[dataset] skl_model = get_model(model) if norm: if 'NB' in model: self.model = make_pipeline(MinMaxScaler(), skl_model) else: self.model = make_pipeline(StandardScaler(), skl_model) else: self.model = skl_model self.model.fit(self.datas[0], self.datas[2]) self.data_test = self.datas[1] if '-cdist' in model: options = {id(skl_model): {'optim': 'cdist'}} elif "-ZM" in model: options = {id(skl_model): {'zipmap': False}} else: options = None try: self.onx = to_onnx(self.model, self.datas[0].astype(numpy.float32), options=options, target_opset=__max_supported_opsets__) self.onx.ir_version = get_ir_version(__max_supported_opset__) except (RuntimeError, NameError) as e: raise RuntimeError("Unable to convert model {}.".format( self.model)) from e logger = getLogger("skl2onnx") logger.propagate = False logger.disabled = True self.oinf = OnnxInference(self.onx, runtime='python') self.oinfc = OnnxInference(self.onx, runtime='python_compiled') try: self.ort = InferenceSession(self.onx.SerializeToString()) except OrtFail as e: raise RuntimeError( "Unable to load model {}\n--SUMMARY--\n{}".format( self.model, self.oinfc)) from e self.output_name = self.oinf.output_names[-1] self.input_name = self.ort.get_inputs()[0].name self.model_info = analyze_model(self.model)
def __init__(self, lib, dataset): BenchPerfTest.__init__(self) logger = getLogger("skl2onnx") logger.propagate = False logger.disabled = True self.dataset_name = dataset self.lib_name = lib self.models = {} self.datas = {} self.onxs = {} self.orts = {} self.oinfcs = {} self.model_info = {} self.output_name = {} self.input_name = {} self.models[lib] = get_model(lib) self.datas[lib] = common_datasets[dataset] x = self.datas[lib][0] y = self.datas[lib][2] self.models[lib].fit(x, y) self.model_info[lib] = analyze_model(self.models[lib]) try: self.onxs[lib] = to_onnx(self.models[lib], self.datas[lib][0][:1].astype( numpy.float32), options=None) except RuntimeError: pass if lib in self.onxs: self.orts[lib] = InferenceSession( self.onxs[lib].SerializeToString()) self.oinfcs[lib] = OnnxInference(self.onxs[lib], runtime='python_compiled') self.output_name[lib] = self.oinfcs[lib].output_names[-1] self.input_name[lib] = self.orts[lib].get_inputs()[0].name
def test_knnc_onnx(self): model = self.fit(KNeighborsClassifier()) onx = to_onnx(model, numpy.zeros((3, 4), dtype=numpy.float32)) info = analyze_model(onx) self.assertIn('op_Identity', info) self.assertEqual(info['op_Identity'], 2)
def test_knnc(self): model = self.fit(KNeighborsClassifier()) info = analyze_model(model) self.assertIn('classes_.shape', info) self.assertEqual(info['classes_.shape'], 3) self.assertEqual(info['_fit_X.shape'], (150, 4))
######################################## # Training and converting a model # +++++++++++++++++++++++++++++++ data = make_regression(100000, 20) X, y = data X_train, X_test, y_train, y_test = train_test_split(X, y) hgb = HistGradientBoostingRegressor(max_iter=100, max_depth=6) hgb.fit(X_train, y_train) print(hgb) ######################################## # Let's get more statistics about the model itself. pprint(analyze_model(hgb)) ################################# # And let's convert it. register_rewritten_operators() onx = to_onnx(hgb, X_train[:1].astype(numpy.float32)) oinf = OnnxInference(onx, runtime='python_compiled') print(oinf) ################################ # The runtime of the forest is in the following object. print(oinf.sequence_[0].ops_) print(oinf.sequence_[0].ops_.rt_)
def test_dtr(self): model = self.fit(DecisionTreeRegressor()) info = analyze_model(model) self.assertGreater(info['tree_.node_count'], 15) self.assertGreater(info['tree_.leave_count'], 8) self.assertGreater(info['tree_.max_depth'], 3)
def test_linreg(self): model = self.fit(LinearRegression()) info = analyze_model(model) self.assertEqual(info['coef_.shape'], 4) self.assertEqual(info['intercept_.shape'], 1)