def test_pipeline_add(self): iris = load_iris() X, y = iris.data, iris.target pca = PCA(n_components=2) pca.fit(X) add = OnnxAdd('X', numpy.full((1, X.shape[1]), 1, dtype=numpy.float32), output_names=['Yadd']) onx = add.to_onnx(inputs=[('X', FloatTensorType((None, X.shape[1])))], outputs=[('Yadd', FloatTensorType( (None, X.shape[1])))]) tr = OnnxTransformer(onx) tr.fit() pipe = make_pipeline(tr, LogisticRegression()) pipe.fit(X, y) pred = pipe.predict(X) self.assertEqual(pred.shape, (150, )) model_onnx = to_onnx(pipe, X.astype(numpy.float32)) oinf = OnnxInference(model_onnx) y1 = pipe.predict(X) y2 = oinf.run({'X': X.astype(numpy.float32)}) self.assertEqual(list(y2), ['output_label', 'output_probability']) self.assertEqualArray(y1, y2['output_label']) y1 = pipe.predict_proba(X) probas = DataFrame(list(y2['output_probability'])).values self.assertEqualArray(y1, probas, decimal=5)
def test_transform_dict(self): x = {'X': np.array([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]])} content = self.get_onnx_mul() tr = OnnxTransformer(content) tr.fit() res = tr.transform(x) exp = np.array([[1., 4.], [9., 16.], [25., 36.]], dtype=np.float32) self.assertEqual(list(res.ravel()), list(exp.ravel()))
def test_transform_numpy(self): x = np.array([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]], dtype=np.float32) content = self.get_onnx_mul() tr = OnnxTransformer(content) tr.fit() res = tr.transform(x) exp = np.array([[1., 4.], [9., 16.], [25., 36.]], dtype=np.float32) self.assertEqual(list(res.ravel()), list(exp.ravel())) rp = repr(tr) self.assertStartsWith("OnnxTransformer(onnx_bytes=b'\\", rp) self.assertEndsWith("')", rp)
def test_pipeline_iris_intermediate(self): iris = load_iris() X, y = iris.data, iris.target pipe = make_pipeline(PCA(n_components=2), LogisticRegression()) pipe.fit(X, y) onx = convert_sklearn(pipe, initial_types=[ ('input', FloatTensorType((None, X.shape[1])))]) tr = OnnxTransformer(onx, output_name="probabilities", reshape=True) tr.fit(X) y = tr.transform(X[:2]) self.assertEqual(len(y.shape), 2) self.assertEqual(y.shape[0], 2)
def test_multiple_transform(self): x = pandas.DataFrame(data=[[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]]) x.columns = "X1 X2".split() content = self.get_onnx_mul() res = list(OnnxTransformer.enumerate_create(content)) self.assertNotEmpty(res) for _, tr in res: tr.fit() self.assertRaise(lambda tr=tr: tr.transform(x), RuntimeError)
def test_transform_dataframe(self): x = pandas.DataFrame(data=[[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]]) x.columns = "X1 X2".split() content = self.get_onnx_mul() tr = OnnxTransformer(content) tr.fit() try: tr.transform(x) except RuntimeError: pass
def test_pipeline(self): iris = load_iris() X, y = iris.data, iris.target pca = PCA(n_components=2) pca.fit(X) onx = convert_sklearn(pca, initial_types=[ ('input', FloatTensorType((None, X.shape[1])))]) onx_bytes = onx.SerializeToString() tr = OnnxTransformer(onx_bytes) pipe = make_pipeline(tr, LogisticRegression()) pipe.fit(X, y) pred = pipe.predict(X) self.assertEqual(pred.shape, (150, )) skl_pred = pca.transform(X) skl_onx = pipe.steps[0][1].transform(X) self.assertEqualArray(skl_pred, skl_onx, decimal=5)
def test_pipeline_iris_change_dim(self): iris = load_iris() X, y = iris.data, iris.target pipe = make_pipeline(PCA(n_components=2), LogisticRegression()) pipe.fit(X, y) onx = convert_sklearn(pipe, initial_types=[ ('input', FloatTensorType((None, X.shape[1])))]) tr = OnnxTransformer(onx, change_batch_size=2, runtime='onnxruntime1') tr.fit(X) self.assertRaise(lambda: tr.transform(X), OrtInvalidArgument) y = tr.transform(X[:2]) self.assertEqual(len(y.shape), 2) self.assertEqual(y.shape[0], 2)
def test_pipeline_iris(self): iris = load_iris() X, y = iris.data, iris.target pipe = make_pipeline(PCA(n_components=2), LogisticRegression()) pipe.fit(X, y) onx = convert_sklearn(pipe, initial_types=[ ('input', FloatTensorType((None, X.shape[1])))]) onx_bytes = onx.SerializeToString() res = list(OnnxTransformer.enumerate_create(onx_bytes)) outputs = [] shapes = [] for k, tr in res: outputs.append(k) tr.fit() y = tr.transform(X) self.assertEqual(y.shape[0], X.shape[0]) shapes.append(y.shape) self.assertEqual(len(set(outputs)), len(outputs)) shapes = set(shapes) self.assertEqual(shapes, {(150, 3), (150, 4), (150, 2), (150,)})
def test_grid_search_onnx(self): iris = load_iris() X, y = iris.data, iris.target X_train, X_test, y_train, y_test = train_test_split(X, y) pca = PCA(n_components=2) pca.fit(X_train) onx = convert_sklearn(pca, initial_types=[('input', FloatTensorType( (1, X.shape[1])))]) onx_bytes2 = onx.SerializeToString() pca = PCA(n_components=3) pca.fit(X_train) onx = convert_sklearn(pca, initial_types=[('input', FloatTensorType( (1, X.shape[1])))]) onx_bytes3 = onx.SerializeToString() pipe = make_pipeline(OnnxTransformer(onx_bytes2), LogisticRegression()) param_grid = [{ 'onnxtransformer__onnx_bytes': [onx_bytes2, onx_bytes3] }] clf = GridSearchCV(pipe, param_grid, cv=3) clf.fit(X_train, y_train) bp = clf.best_params_ self.assertIn("onnxtransformer__onnx_bytes", bp) y_true, y_pred = y_test, clf.predict(X_test) cl = classification_report(y_true, y_pred) self.assertIn('precision', cl) sc = clf.score(X_test, y_test) self.assertGreater(sc, 0.70)
def __init__(self, fixed_dim=None, skl_model=None, model_onnx=None, **kwargs): BenchPerfTest.__init__(self, **kwargs) if fixed_dim is None: raise RuntimeError("fixed_dim cannot be None.") if skl_model is None: raise RuntimeError("skl_model cannot be None.") if model_onnx is None: raise RuntimeError("model_onnx cannot be None.") self.fixed_dim = fixed_dim self.skl_model = skl_model self.model_onnx = model_onnx output_names = list(enumerate_model_node_outputs(model_onnx)) self.onnx_bytes = model_onnx.SerializeToString() models = OrderedDict() for name in output_names: models[name] = OnnxTransformer(self.onnx_bytes, name) models[name].fit() self.onnx_models = models
def test_grid_search(self): iris = load_iris() X, y = iris.data, iris.target X_train, X_test, y_train, y_test = train_test_split(X, y) pca = PCA(n_components=2) pca.fit(X_train) onx = convert_sklearn(pca, initial_types=[('input', FloatTensorType( (1, X.shape[1])))]) onx_bytes = onx.SerializeToString() tr = OnnxTransformer(onx_bytes) pipe = make_pipeline(tr, LogisticRegression(solver='liblinear')) param_grid = [{'logisticregression__penalty': ['l2', 'l1']}] clf = GridSearchCV(pipe, param_grid, cv=3) clf.fit(X_train, y_train) bp = clf.best_params_ self.assertIn(bp, ({ 'logisticregression__penalty': 'l1' }, { 'logisticregression__penalty': 'l2' })) tr2 = OnnxTransformer(onx_bytes) tr2.fit() self.assertEqualArray( tr2.transform(X_test), clf.best_estimator_.steps[0][1].transform(X_test)) y_true, y_pred = y_test, clf.predict(X_test) cl = classification_report(y_true, y_pred) self.assertIn('precision', cl) sc = clf.score(X_test, y_test) self.assertGreater(sc, 0.70)
[img[1][0][1][:15] for img in climgs]) ######################################### # Transfer learning in a pipeline # +++++++++++++++++++++++++++++++ # # The proposed transfer learning consists # using a PCA to projet the probabilities # on a graph. with open(model_name, 'rb') as f: model_bytes = f.read() pipe = Pipeline(steps=[( 'deep', OnnxTransformer(model_bytes, runtime='onnxruntime1', change_batch_size=0) ), ('pca', PCA(2))]) X_train = numpy.vstack([im2array(img) for _, img in imgs]).astype(numpy.float32) pipe.fit(X_train) proj = pipe.transform(X_train) print(proj) ########################################### # Graph for the PCA # ----------------- fig, ax = plt.subplots(1, 1, figsize=(5, 5)) ax.plot(proj[:, 0], proj[:, 1], 'o')
onx_bytes = [] for model in dec_models: model.fit(X_train) onx = convert_sklearn(model, initial_types=[('X', FloatTensorType( (None, X.shape[1])))]) onx_bytes.append(onx.SerializeToString()) ############################## # Pipeline with OnnxTransformer # +++++++++++++++++++++++++++++++ pipe = make_pipeline(OnnxTransformer(onx_bytes[0]), LogisticRegression(multi_class='ovr')) ################################ # Grid Search # +++++++++++ # # The serialized models are now used as a parameter # in the grid search. param_grid = [{ 'onnxtransformer__onnx_bytes': onx_bytes, 'logisticregression__penalty': ['l2', 'l1'], 'logisticregression__solver': ['liblinear', 'saga'] }]