def test_onnx_example_cdist_in_euclidean(self): x2 = numpy.array([1.1, 2.1, 4.01, 5.01, 5.001, 4.001, 0, 0]).astype(numpy.float32).reshape((4, 2)) cop = OnnxAdd('input', 'input', op_version=get_opset_number_from_onnx()) cop2 = OnnxIdentity(onnx_cdist( cop, x2, dtype=numpy.float32, metric='euclidean', op_version=get_opset_number_from_onnx()), output_names=['cdist'], op_version=get_opset_number_from_onnx()) model_def = cop2.to_onnx(inputs=[('input', FloatTensorType([None, None]))], outputs=[('cdist', FloatTensorType())], target_opset=get_opset_number_from_onnx()) new_model = onnx_remove_node_identity(model_def) stats = onnx_statistics(model_def, optim=False) stats2 = onnx_statistics(new_model, optim=False) self.assertEqual(stats.get('op_Identity', 0), 3) self.assertEqual(stats2.get('op_Identity', 0), 1)
def test_onnx_remove_redundant_subgraphs_full(self): from skl2onnx.algebra.complex_functions import onnx_squareform_pdist cop = OnnxAdd(OnnxIdentity('input', op_version=get_opset_number_from_onnx()), 'input', op_version=get_opset_number_from_onnx()) cdist = onnx_squareform_pdist(cop, dtype=numpy.float32, op_version=get_opset_number_from_onnx()) cdist2 = onnx_squareform_pdist(cop, dtype=numpy.float32, op_version=get_opset_number_from_onnx()) cop2 = OnnxAdd(cdist, cdist2, output_names=['cdist'], op_version=get_opset_number_from_onnx()) model_def = cop2.to_onnx({'input': FloatTensorType()}, outputs=[('cdist', FloatTensorType())], target_opset=get_opset_number_from_onnx()) stats = onnx_statistics(model_def, optim=False) new_model = onnx_optimisations(model_def) stats2 = onnx_statistics(new_model, optim=False) self.assertLess(stats2['size'], stats['size']) self.assertLess(stats2['nnodes'], stats['nnodes']) self.assertLess(stats2['op_Identity'], stats['op_Identity'])
def test_onnx_remove_identities2(self): from skl2onnx.algebra.complex_functions import onnx_squareform_pdist x = numpy.array([1, 2, 4, 5, 5, 4]).astype(numpy.float32).reshape( (3, 2)) cop = OnnxIdentity('input', op_version=get_opset_number_from_onnx()) cdist = onnx_squareform_pdist(cop, dtype=numpy.float32, op_version=get_opset_number_from_onnx()) cop2 = OnnxIdentity(cdist, output_names=['cdist'], op_version=get_opset_number_from_onnx()) model_def = cop2.to_onnx({'input': FloatTensorType()}, outputs=[('cdist', FloatTensorType())], target_opset=get_opset_number_from_onnx()) stats = onnx_statistics(model_def, optim=False) self.assertIn('subgraphs', stats) self.assertGreater(stats['subgraphs'], 1) self.assertGreater(stats['op_Identity'], 2) new_model = onnx_remove_node_identity(model_def) stats2 = onnx_statistics(new_model, optim=False) self.assertEqual(stats['subgraphs'], stats2['subgraphs']) self.assertLesser(stats2['op_Identity'], 2) oinf1 = OnnxInference(model_def) oinf2 = OnnxInference(new_model) y1 = oinf1.run({'input': x})['cdist'] y2 = oinf2.run({'input': x})['cdist'] self.assertEqualArray(y1, y2) self.assertLesser(stats2['op_Identity'], 1)
def test_statistics_pipeline_rf(self): iris = load_iris() X, y = iris.data, iris.target X_train, __, y_train, _ = train_test_split(X, y, random_state=11) clr = Pipeline([('scaler1', StandardScaler()), ('rf', RandomForestRegressor(n_estimators=10, n_jobs=1, max_depth=4))]) clr.fit(X_train, y_train) res = inspect_sklearn_model(clr) self.assertEqual(res['max_depth'], 4) self.assertEqual(res['ntrees'], 10) self.assertEqual(res['nop'], 11) onx = to_onnx(clr, X_train[:1].astype(numpy.float32)) ostats = onnx_statistics(onx) for k, v in { 'nnodes': 2, 'doc_string': '', 'domain': 'ai.onnx', 'model_version': 0, 'producer_name': 'skl2onnx', 'ai.onnx.ml': 1 }.items(): self.assertEqual(ostats[k], v)
def test_onnx_remove_two_outputs(self): dtype = numpy.float32 x = numpy.array([1, 2, 4, 5, 5, 4]).astype(numpy.float32).reshape( (3, 2)) cop = OnnxAdd('X', numpy.array([1], dtype=dtype), op_version=get_opset_number_from_onnx()) cop2 = OnnxAdd('X', numpy.array([1], dtype=dtype), output_names=['keep'], op_version=get_opset_number_from_onnx()) cop3 = OnnxAdd('X', numpy.array([2], dtype=dtype), op_version=get_opset_number_from_onnx()) cop4 = OnnxSub(OnnxMul(cop, cop3, op_version=get_opset_number_from_onnx()), cop2, output_names=['final'], op_version=get_opset_number_from_onnx()) model_def = cop4.to_onnx({'X': x}, outputs=[('keep', FloatTensorType([None, 2])), ('final', FloatTensorType([None, 2]))]) c1 = model_def.SerializeToString() self.assertEqual(len(model_def.graph.output), 2) c2 = model_def.SerializeToString() self.assertEqual(c1, c2) stats = onnx_statistics(model_def, optim=True) new_model = onnx_remove_node_redundant(model_def, max_hash_size=10) stats2 = onnx_statistics(model_def, optim=True) stats3 = onnx_statistics(new_model, optim=False) self.assertEqual(stats['ninits'], 2) self.assertEqual(stats2['ninits'], 2) self.assertEqual(stats3['ninits'], 2) self.assertEqual(stats2['nnodes'], 6) self.assertEqual(stats3['nnodes'], 6) oinf1 = OnnxInference(model_def) y1 = oinf1.run({'X': x}) oinf2 = OnnxInference(new_model) y2 = oinf2.run({'X': x}) self.assertEqualArray(y1['final'], y2['final']) self.assertEqualArray(y1['keep'], y2['keep'])
def onnx_test_knn_single_regressor(self, dtype, n_targets=1, debug=False, add_noise=False, runtime='python', target_opset=None, expected=None, **kwargs): iris = load_iris() X, y = iris.data, iris.target if add_noise: X += numpy.random.randn(X.shape[0], X.shape[1]) * 10 y = y.astype(dtype) if n_targets != 1: yn = numpy.empty((y.shape[0], n_targets), dtype=dtype) for i in range(n_targets): yn[:, i] = y + i y = yn X_train, X_test, y_train, _ = train_test_split(X, y, random_state=11) X_test = X_test.astype(dtype) clr = KNeighborsRegressor(**kwargs) clr.fit(X_train, y_train) model_def = to_onnx(clr, X_train.astype(dtype), rewrite_ops=True, target_opset=target_opset) c1 = model_def.SerializeToString() new_model = onnx_remove_node_identity(model_def) c2 = model_def.SerializeToString() self.assertEqual(c1, c2) stats = onnx_statistics(model_def, optim=True) stats2 = onnx_statistics(new_model, optim=False) self.assertEqual(stats.get('op_Identity', 0), expected[0]) self.assertEqual(stats2.get('op_Identity', 0), expected[1]) self.assertEqual(stats.get('op_Identity_optim', 0), expected[1]) self.assertIn('nnodes_optim', stats) self.assertIn('ninits_optim', stats) self.assertIn('size_optim', stats) self.assertIn('subgraphs_optim', stats)
def test_onnx_stat_recursive(self): from skl2onnx.algebra.complex_functions import onnx_squareform_pdist cop = OnnxAdd(OnnxIdentity('input', op_version=get_opset_number_from_onnx()), 'input', op_version=get_opset_number_from_onnx()) cdist = onnx_squareform_pdist(cop, dtype=numpy.float32, op_version=get_opset_number_from_onnx()) cop2 = OnnxIdentity(cdist, output_names=['cdist'], op_version=get_opset_number_from_onnx()) model_def = cop2.to_onnx({'input': FloatTensorType()}, outputs=[('cdist', FloatTensorType())], target_opset=get_opset_number_from_onnx()) stats = onnx_statistics(model_def) self.assertIn('subgraphs', stats) self.assertGreater(stats['subgraphs'], 1) self.assertGreater(stats['op_Identity'], 2)
def test_statistics_pipeline_sgd(self): iris = load_iris() X, y = iris.data, iris.target X_train, __, y_train, _ = train_test_split(X, y, random_state=11) clr = SGDClassifier() clr.fit(X_train, y_train) onx = to_onnx(clr, X_train[:1].astype(numpy.float32)) ostats = onnx_statistics(onx) for k, v in { 'nnodes': 8, 'doc_string': '', 'domain': 'ai.onnx', 'model_version': 0, 'producer_name': 'skl2onnx', 'ai.onnx.ml': 1 }.items(): try: self.assertEqual(ostats[k], v) except AssertionError as e: raise AssertionError("Issue with '{}' -> {}.".format(k, v)) from e self.assertIn('', ostats) self.assertIn("op_Cast", ostats)