def test_onnxrt_python_GradientBoostingRegressor64(self): iris = load_iris() X, y = iris.data, iris.target X_train, X_test, y_train, _ = train_test_split(X, y, random_state=11) # pylint: disable=W0612 clr = GradientBoostingRegressor(n_estimators=20) clr.fit(X_train, y_train) lexp = clr.predict(X_test) model_def64 = to_onnx(clr, X_train.astype(numpy.float64), dtype=numpy.float64, rewrite_ops=True) oinf64 = OnnxInference(model_def64) text = "\n".join(map(lambda x: str(x.ops_), oinf64.sequence_)) self.assertIn("TreeEnsembleRegressor", text) #self.assertIn("TreeEnsembleRegressorDouble", text) smodel_def64 = str(model_def64) self.assertIn('double_data', smodel_def64) self.assertNotIn('floats', smodel_def64) y64 = oinf64.run({'X': X_test.astype(numpy.float64)}) self.assertEqual(list(sorted(y64)), ['variable']) self.assertEqual(lexp.shape, y64['variable'].shape) self.assertEqualArray(lexp, y64['variable']) model_def32 = to_onnx(clr, X_train.astype(numpy.float32), dtype=numpy.float32, rewrite_ops=True) oinf32 = OnnxInference(model_def32) text = "\n".join(map(lambda x: str(x.ops_), oinf32.sequence_)) self.assertIn("TreeEnsembleRegressor", text) self.assertNotIn("TreeEnsembleRegressorDouble", text) smodel_def32 = str(model_def32) self.assertNotIn('doubles', smodel_def32) self.assertNotIn('double_data', smodel_def32) self.assertIn('floats', smodel_def32) y32 = oinf32.run({'X': X_test.astype(numpy.float32)}) self.assertEqual(list(sorted(y32)), ['variable']) self.assertEqual(lexp.shape, y32['variable'].shape) self.assertEqualArray(lexp, y32['variable']) onx32 = model_def32.SerializeToString() onx64 = model_def64.SerializeToString() s32 = len(onx32) s64 = len(onx64) self.assertGreater(s64, s32 + 100) self.assertNotEqual(y32['variable'].dtype, y64['variable'].dtype) diff = numpy.max( numpy.abs(y32['variable'].astype(numpy.float64) - y64['variable'].astype(numpy.float64))) self.assertLesser(diff, 1e-5)
def test_onnxt_lrc_iris_run_node_time(self): iris = load_iris() X, y = iris.data, iris.target X_train, X_test, y_train, _ = train_test_split(X, y, random_state=11) clr = LogisticRegression(solver="liblinear") clr.fit(X_train, y_train) model_def = to_onnx(clr, X_train.astype(numpy.float32)) oinf = OnnxInference(model_def) _, mt = oinf.run({'X': X_test}, node_time=True) self.assertIsInstance(mt, list) self.assertGreater(len(mt), 1) self.assertIsInstance(mt[0], dict) rows = [] def myprint(*args): rows.append(' '.join(map(str, args))) _, mt = oinf.run({'X': X_test}, node_time=True, verbose=1, fLOG=myprint) self.assertIsInstance(mt, list) self.assertGreater(len(mt), 1) self.assertIsInstance(mt[0], dict)
def test_onnxrt_python_DecisionTreeClassifier_mlabel(self): iris = load_iris() X, y_ = iris.data, iris.target y = numpy.zeros((y_.shape[0], 3), dtype=int) y[y_ == 0, 0] = 1 y[y_ == 1, 1] = 1 y[y_ == 2, 2] = 1 X_train, X_test, y_train, _ = train_test_split(X, y, random_state=11) clr = DecisionTreeClassifier() clr.fit(X_train, y_train) try: model_def = to_onnx(clr, X_train.astype(numpy.float32)) except NotImplementedError: # multi-label is not supported yet return oinf = OnnxInference(model_def) text = "\n".join(map(lambda x: str(x.ops_), oinf.sequence_)) self.assertIn("TreeEnsembleClassifier", text) y = oinf.run({'X': X_test.astype(numpy.float32)}) self.assertEqual(list(sorted(y)), ['output_label', 'output_probability']) exp = clr.predict_proba(X_test) got = pandas.DataFrame(list(y['output_probability'])).values self.assertEqualArray(exp, got, decimal=5) lexp = clr.predict(X_test) self.assertEqualArray(lexp, y['output_label'])
def test_validate_GradientBoostingClassifier_custom(self): mcl = _problems['m-cl']() (X, y, init_types, _, __, ___) = mcl X_train, X_test, y_train, _ = train_test_split(X, y, shuffle=True, random_state=2) cl = GradientBoostingClassifier(n_estimators=20) cl.fit(X_train, y_train) pred_skl = cl.predict_proba(X_test) model_onnx = to_onnx(cl, init_types[0][1]) oinf = OnnxInference(model_onnx, runtime='python') pred_onx = oinf.run({'X': X_test.astype(numpy.float32)}) diff = numpy.max( numpy.abs(pred_skl - pred_onx['output_probability'].values).ravel()) if diff >= 1e-5: dd = [(numpy.max(numpy.abs(a - b)), i) for i, (a, b) in enumerate( zip(pred_skl, pred_onx['output_probability'].values))] dd.sort(reverse=True) diff1 = dd[0][0] diff2 = dd[3][0] self.assertGreater(diff1, diff2) self.assertLesser(diff2, 1e-5) diff = measure_relative_difference(pred_skl, pred_onx['output_probability']) self.assertLesser(diff, 1e-5)
def test_onnxt_knn_iris_dot(self): iris = load_iris() X, y = iris.data, iris.target X_train, __, y_train, _ = train_test_split(X, y, random_state=11) clr = KNeighborsClassifier() clr.fit(X_train, y_train) model_def = to_onnx(clr, X_train.astype(numpy.float32)) oinf = OnnxInference(model_def, skip_run=True) dot = oinf.to_dot() self.assertNotIn("class_labels_0 -> ;", dot)
def test_onnxt_lrc_iris(self): iris = load_iris() X, y = iris.data, iris.target X_train, _, y_train, __ = train_test_split(X, y, random_state=11) clr = LogisticRegression(solver="liblinear") clr.fit(X_train, y_train) model_def = to_onnx(clr, X_train.astype(numpy.float32)) oinf = OnnxInference(model_def) dot = oinf.to_dot() self.assertIn('ZipMap', dot) self.assertIn('LinearClassifier', dot)
def test_partial_float64(self): data = load_boston() X, y = data.data, data.target X_train, X_test, y_train, _ = train_test_split(X, y) gau = GaussianProcessRegressor(alpha=10, kernel=DotProduct()) gau.fit(X_train, y_train) onnxgau48 = to_onnx(gau, X_train.astype(numpy.float32), dtype=numpy.float32, options={GaussianProcessRegressor: {'float64': True}}) oinf48 = OnnxInference(onnxgau48, runtime="python") out = oinf48.run({'X': X_test.astype(numpy.float32)}) y = out['GPmean'] self.assertEqual(y.dtype, numpy.float32)
def test_onnxt_lrreg_iris_run(self): iris = load_iris() X, y = iris.data, iris.target X_train, X_test, y_train, _ = train_test_split(X, y, random_state=11) clr = LinearRegression() clr.fit(X_train, y_train) model_def = to_onnx(clr, X_train.astype(numpy.float32)) oinf = OnnxInference(model_def) y = oinf.run({'X': X_test}) exp = clr.predict(X_test) self.assertEqual(list(sorted(y)), ['variable']) self.assertEqualArray(exp, y['variable'].ravel(), decimal=6)
def test_onnxrt_python_Binarizer(self): iris = load_iris() X, y = iris.data, iris.target X_train, X_test, y_train, _ = train_test_split(X, y, random_state=11) clr = Binarizer() clr.fit(X_train, y_train) model_def = to_onnx(clr, X_train.astype(numpy.float32)) oinf = OnnxInference(model_def) got = oinf.run({'X': X_test}) self.assertEqual(list(sorted(got)), ['variable']) exp = clr.transform(X_test) self.assertEqualArray(exp, got['variable'], decimal=6)
def test_onnxrt_python_KMeans(self): iris = load_iris() X, y = iris.data, iris.target X_train, X_test, __, _ = train_test_split(X, y, random_state=11) clr = KMeans() clr.fit(X_train) model_def = to_onnx(clr, X_train.astype(numpy.float32)) oinf = OnnxInference(model_def) got = oinf.run({'X': X_test.astype(numpy.float32)}) self.assertEqual(list(sorted(got)), ['label', 'scores']) exp = clr.predict(X_test) self.assertEqualArray(exp, got['label']) exp = clr.transform(X_test) self.assertEqualArray(exp, got['scores'], decimal=4)
def test_onnxrt_python_DecisionTreeRegressor(self): iris = load_iris() X, y = iris.data, iris.target X_train, X_test, y_train, _ = train_test_split(X, y, random_state=11) clr = DecisionTreeRegressor() clr.fit(X_train, y_train) model_def = to_onnx(clr, X_train.astype(numpy.float32)) oinf = OnnxInference(model_def) text = "\n".join(map(lambda x: str(x.ops_), oinf.sequence_)) self.assertIn("TreeEnsembleRegressor", text) y = oinf.run({'X': X_test.astype(numpy.float32)}) self.assertEqual(list(sorted(y)), ['variable']) lexp = clr.predict(X_test) self.assertEqual(lexp.shape, y['variable'].shape) self.assertEqualArray(lexp, y['variable'])
def test_getitem(self): iris = load_iris() X, y = iris.data, iris.target X_train, __, y_train, _ = train_test_split(X, y, random_state=11) clr = KNeighborsClassifier() clr.fit(X_train, y_train) model_def = to_onnx(clr, X_train.astype(numpy.float32)) oinf = OnnxInference(model_def, skip_run=True) topk = oinf['ArrayFeatureExtractor'] self.assertIn('ArrayFeatureExtractor', str(topk)) zm = oinf['ZipMap'] self.assertIn('ZipMap', str(zm)) par = oinf['ZipMap', 'classlabels_int64s'] self.assertIn('classlabels_int64s', str(par))
def test_onnxrt_python_SimpleImputer(self): iris = load_iris() X, y = iris.data, iris.target for i in range(X.shape[1]): X[i::10, i] = numpy.nan X_train, X_test, y_train, _ = train_test_split(X, y, random_state=11) clr = SimpleImputer() clr.fit(X_train, y_train) model_def = to_onnx(clr, X_train.astype(numpy.float32)) oinf = OnnxInference(model_def) got = oinf.run({'X': X_test}) self.assertEqual(list(sorted(got)), ['variable']) exp = clr.transform(X_test) self.assertEqualArray(exp, got['variable'], decimal=6) self.assertRaise(lambda: oinf.run({'X': X_test[0]}), RuntimeError)
def test_rt_MLPRegressor_simple_test(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") logger = getLogger('skl2onnx') logger.disabled = True iris = load_iris() X, y = iris.data, iris.target X_train, X_test, y_train, _ = train_test_split(X, y) clr = MLPRegressor() clr.fit(X_train, y_train) x2 = X_test.astype(numpy.float32) onx = to_onnx(clr, x2) pyrun = OnnxInference(onx, runtime="python") res = pyrun.run({'X': x2}) self.assertIn('variable', res) self.assertEqual(res['variable'].shape, (38, 1))
def test_onnxrt_python_xgbregressor(self): nb_tests = 0 for objective in obj_classes: for n_estimators in [1, 2]: probs = [] cl, fct, prob = obj_classes[objective] iris = load_iris() X, y = iris.data, iris.target y = fct(y) X_train, X_test, y_train, _ = train_test_split(X, y, random_state=11) probs.append((X_train, X_test, y_train)) X_train, X_test, y_train, _ = train_test_split(*prob, random_state=11) probs.append((X_train, X_test, y_train)) for X_train, X_test, y_train in probs: clr = cl(objective=objective, n_estimators=n_estimators) clr.fit(X_train, y_train) model_def = to_onnx(clr, X_train.astype(numpy.float32)) oinf = OnnxInference(model_def) y = oinf.run({'X': X_test.astype(numpy.float32)}) if cl == XGBRegressor: exp = clr.predict(X_test) self.assertEqual(list(sorted(y)), ['variable']) self.assertEqualArray(exp, y['variable'].ravel(), decimal=6) else: exp = clr.predict_proba(X_test) self.assertEqual(list(sorted(y)), ['label', 'probabilities']) self.assertEqualArray(exp, y['probabilities'], decimal=6) nb_tests += 1 self.assertGreater(nb_tests, 20)
def test_onnxrt_python_LinearRegression(self): iris = load_iris() X, y = iris.data, iris.target X_train, X_test, y_train, _ = train_test_split(X, y, random_state=11) clr = LinearRegression() clr.fit(X_train, y_train) model_def = to_onnx(clr, X_train.astype(numpy.float32)) oinf = OnnxInference(model_def) y = oinf.run({'X': X_test}) exp = clr.predict(X_test) self.assertEqual(list(sorted(y)), ['variable']) self.assertEqualArray(exp, y['variable'].ravel(), decimal=6) seq = oinf.sequence_ text = "\n".join(map(lambda x: str(x.ops_), seq)) self.assertIn('op_type=LinearRegressor', text) self.assertIn("post_transform=b'NONE'", text)
def test_onnxt_lrc_iris_run(self): iris = load_iris() X, y = iris.data, iris.target X_train, X_test, y_train, _ = train_test_split(X, y, random_state=11) clr = LogisticRegression(solver="liblinear") clr.fit(X_train, y_train) model_def = to_onnx(clr, X_train.astype(numpy.float32)) oinf = OnnxInference(model_def) y = oinf.run({'X': X_test}) self.assertEqual(list(sorted(y)), ['output_label', 'output_probability']) lexp = clr.predict(X_test) self.assertEqualArray(lexp, y['output_label']) exp = clr.predict_proba(X_test) got = pandas.DataFrame(list(y['output_probability'])).values self.assertEqualArray(exp, got, decimal=5)
def test_onnxt_lrc_iris_json(self): iris = load_iris() X, y = iris.data, iris.target X_train, _, y_train, __ = train_test_split(X, y, random_state=11) clr = LogisticRegression(solver="liblinear") clr.fit(X_train, y_train) model_def = to_onnx(clr, X_train.astype(numpy.float32)) oinf = OnnxInference(model_def) js = oinf.to_json() self.assertIn('"producer_name": "skl2onnx",', js) self.assertIn('"name": "output_label",', js) self.assertIn('"name": "output_probability",', js) self.assertIn('"name": "LinearClassifier",', js) self.assertIn('"coefficients": {', js) self.assertIn('"name": "Normalizer",', js) self.assertIn('"name": "Cast",', js) self.assertIn('"name": "ZipMap",', js)
def test_onnxrt_python_KNeighborsRegressor(self): iris = load_iris() X, y = iris.data, iris.target X_train, X_test, y_train, _ = train_test_split(X, y, random_state=11) clr = KNeighborsRegressor() clr.fit(X_train, y_train) model_def = to_onnx(clr, X_train.astype(numpy.float32)) oinf = OnnxInference(model_def) for i in range(0, 5): y = oinf.run({'X': X_test[i:i + 1].astype(numpy.float32)}) seq = oinf.sequence_ text = "\n".join(map(lambda x: str(x.ops_), seq)) self.assertIn('op_type=TopK', text) exp = clr.predict(X_test[i:i + 1]).reshape((1, 1)) self.assertEqual(list(sorted(y)), ['variable']) self.assertEqualArray(exp, y['variable'], decimal=6)
def test_onnxrt_python_KNeighborsClassifier(self): iris = load_iris() X, y = iris.data, iris.target X_train, X_test, y_train, _ = train_test_split(X, y, random_state=11) clr = KNeighborsClassifier() clr.fit(X_train, y_train) model_def = to_onnx(clr, X_train.astype(numpy.float32)) oinf = OnnxInference(model_def) for i in range(0, X_test.shape[0]): y = oinf.run({'X': X_test[i:i + 1].astype(numpy.float32)}) self.assertEqual(list(sorted(y)), [ 'output_label', 'output_probability']) lexp = clr.predict(X_test[i:i + 1]) self.assertEqualArray(lexp, y['output_label']) exp = clr.predict_proba(X_test[i:i + 1]) got = pandas.DataFrame(list(y['output_probability'])).values self.assertEqualArray(exp, got, decimal=5)
def test_onnxrt_python_KNeighborsRegressor_simple_k1(self): X = numpy.array([[0, 1], [0.2, 1.2], [1, 2], [ 1.2, 2.2]], dtype=numpy.float32) y = numpy.array([1, 2, 3, 4], dtype=numpy.float32) clr = KNeighborsRegressor(n_neighbors=1) clr.fit(X, y) model_def = to_onnx(clr, X.astype(numpy.float32)) oinf = OnnxInference(model_def) for i in range(0, X.shape[0]): y = oinf.run({'X': X[i:i + 1]}) seq = oinf.sequence_ text = "\n".join(map(lambda x: str(x.ops_), seq)) self.assertIn('op_type=TopK', text) exp = clr.predict(X[i:i + 1]).reshape((1, 1)) self.assertEqual(list(sorted(y)), ['variable']) self.assertEqualArray(exp, y['variable'], decimal=6)
def test_onnxrt_python_SVC_proba_linear(self): iris = load_iris() X, y = iris.data, iris.target X_train, X_test, y_train, _ = train_test_split(X, y, random_state=11) clr = LinearSVC() clr.fit(X_train, y_train) model_def = to_onnx(clr, X_train.astype(numpy.float32)) oinf = OnnxInference(model_def) text = "\n".join(map(lambda x: str(x.ops_), oinf.sequence_)) self.assertIn("LinearClassifier", text) y = oinf.run({'X': X_test.astype(numpy.float32)}) self.assertEqual(list(sorted(y)), ['label', 'probabilities']) lexp = clr.predict(X_test) lprob = clr.decision_function(X_test) self.assertEqual(lexp.shape, y['label'].shape) self.assertEqual(lprob.shape, y['probabilities'].shape) self.assertEqualArray(lexp, y['label'], decimal=5) self.assertEqualArray(lprob, y['probabilities'], decimal=5)
def test_onnxrt_python_GradientBoostingClassifier3(self): iris = load_iris() X, y = iris.data, iris.target X_train, X_test, y_train, _ = train_test_split(X, y, random_state=11) clr = GradientBoostingClassifier() clr.fit(X_train, y_train) model_def = to_onnx(clr, X_train.astype(numpy.float32)) oinf = OnnxInference(model_def) text = "\n".join(map(lambda x: str(x.ops_), oinf.sequence_)) self.assertIn("TreeEnsembleClassifier", text) y = oinf.run({'X': X_test.astype(numpy.float32)}) self.assertEqual(list(sorted(y)), ['output_label', 'output_probability']) lexp = clr.predict(X_test) self.assertEqualArray(lexp, y['output_label']) exp = clr.predict_proba(X_test) got = pandas.DataFrame(list(y['output_probability'])).values self.assertEqualArray(exp, got, decimal=3)
def test_onnxrt_python_SVC_proba(self): iris = load_iris() X, y = iris.data, iris.target X_train, X_test, y_train, _ = train_test_split(X, y, random_state=11) clr = SVC(probability=True) clr.fit(X_train, y_train) model_def = to_onnx(clr, X_train.astype(numpy.float32)) oinf = OnnxInference(model_def) text = "\n".join(map(lambda x: str(x.ops_), oinf.sequence_)) self.assertIn("SVMClassifier", text) y = oinf.run({'X': X_test.astype(numpy.float32)}) self.assertEqual(list(sorted(y)), [ 'output_label', 'output_probability']) lexp = clr.predict(X_test) lprob = clr.predict_proba(X_test) got = y['output_probability'].values self.assertEqual(lexp.shape, y['output_label'].shape) self.assertEqual(lprob.shape, got.shape) self.assertEqualArray(lexp, y['output_label'], decimal=5) self.assertEqualArray(lprob, got, decimal=5)
def test_onnxt_iris_gaussian_process_dot_product(self): iris = load_iris() X, y = iris.data, iris.target X_train, X_test, y_train, _ = train_test_split(X, y, random_state=11) clr = GaussianProcessRegressor( kernel=DotProduct(), alpha=100) clr.fit(X_train, y_train) ym, std = clr.predict(X_test, return_std=True) model_def = to_onnx( clr, X_train.astype(numpy.float32), options={GaussianProcessRegressor: {'return_std': True}}) oinf = OnnxInference(model_def, runtime='python') res = oinf.run({'X': X_test.astype(numpy.float32)}) ym2, std2 = res['GPmean'], res['GPcovstd'] self.assertEqualArray(numpy.squeeze(ym), numpy.squeeze(ym2), decimal=5) self.assertEqualArray(std, std2, decimal=4) res = oinf.switch_initializers_dtype(clr) last = res[-1] self.assertEqual(last[0], 'pass2') _linv = 0 for a in enumerate_fitted_arrays(clr): if "_K_inv" in a[-2]: _linv += 1 self.assertEqual(_linv, 1) res = oinf.run({'X': X_test}) ym3, std3 = res['GPmean'], res['GPcovstd'] self.assertEqualArray(ym3, ym2, decimal=5) self.assertEqualArray(std3, std2, decimal=4) d1 = numpy.sum(numpy.abs(ym.ravel() - ym2.ravel())) d2 = numpy.sum(numpy.abs(ym.ravel() - ym3.ravel())) d3 = numpy.sum(numpy.abs(ym2.ravel() - ym3.ravel())) self.assertLess(d2, min(d1, d3) / 2) d1 = numpy.sum(numpy.abs(std.ravel() - std2.ravel())) d2 = numpy.sum(numpy.abs(std.ravel() - std3.ravel())) d3 = numpy.sum(numpy.abs(std2.ravel() - std3.ravel())) self.assertLess(d2, min(d1, d3) / 2)
def test_onnx_shaker(self): iris = load_iris() X, y = iris.data, iris.target X_train, X_test, y_train, _ = train_test_split( X, y, random_state=1, shuffle=True) clr = GradientBoostingClassifier(n_estimators=20) clr.fit(X_train, y_train) exp = clr.predict_proba(X_test)[:, 2] def output_fct(res): val = res['output_probability'].values return val[:, 2] model_def = to_onnx(clr, X_train.astype(numpy.float32)) oinf = OnnxInference(model_def) inputs = {'X': X_test} res1 = output_fct(oinf.run({'X': X_test.astype(numpy.float32)})) shaked = onnx_shaker(oinf, inputs, dtype=numpy.float32, n=100, output_fct=output_fct, force=2) delta1 = numpy.max(shaked.max(axis=1) - shaked.min(axis=1)) deltae = numpy.max(numpy.abs(res1 - exp)) self.assertLesser(deltae, delta1 * 2)
def test_onnxrt_python_KMeans_verbose(self): iris = load_iris() X, y = iris.data, iris.target X_train, X_test, __, _ = train_test_split(X, y, random_state=11) clr = KMeans() clr.fit(X_train) rows = [] def myprint(*args, **kwargs): rows.extend(args) model_def = to_onnx(clr, X_train.astype(numpy.float32)) oinf = OnnxInference(model_def) got = oinf.run({'X': X_test.astype(numpy.float32)}, verbose=2, fLOG=myprint) self.assertEqual(list(sorted(got)), ['label', 'scores']) exp = clr.predict(X_test) self.assertEqualArray(exp, got['label']) exp = clr.transform(X_test) self.assertEqualArray(exp, got['scores'], decimal=4) self.assertGreater(len(rows), 2)