def test_custom_pipeline_scaler(self): data = load_iris() X = data.data[:, :2] y = data.target model = MyCustomClassifier() pipe = Pipeline([("scaler", StandardScaler()), ("lgbm", model)]) pipe.fit(X, y) try: model_onnx = convert_sklearn(pipe, "pipeline", [("input", FloatTensorType([1, 2]))]) except RuntimeError as e: if "No proper shape calculator found for" not in str( e ) and "Unable to find a shape calculator for type" not in str(e): raise e try: model_onnx = convert_sklearn( pipe, "pipeline", [("input", FloatTensorType([1, 2]))], custom_conversion_functions={ "MyCustomClassifier": my_custom_converter }, custom_shape_calculators={ "MyCustomClassifier": my_custom_shape_extractor }, ) except TypeError as e: if "Keys in custom_conversion_functions must be types" not in str( e): raise e model_onnx = convert_sklearn( pipe, "pipeline", [("input", FloatTensorType([1, 2]))], custom_conversion_functions={ MyCustomClassifier: my_custom_converter }, custom_shape_calculators={ MyCustomClassifier: my_custom_shape_extractor }, ) self.assertTrue(model_onnx is not None) dump_data_and_model( X.astype(numpy.float32), pipe, model_onnx, basename="SklearnPipelineScalerCustomClassifier", ) update_registered_converter( MyCustomClassifier, "MyCustomClassifier", my_custom_shape_extractor, my_custom_converter, ) model_onnx = convert_sklearn(pipe, "pipeline", [("input", FloatTensorType([1, 2]))]) self.assertTrue(model_onnx is not None) dump_data_and_model( X.astype(numpy.float32), pipe, model_onnx, basename="SklearnPipelineScalerCustomClassifier2", )
def test_gpr_rbf_unfitted(self): se = (C(1.0, (1e-3, 1e3)) * RBF(length_scale=10, length_scale_bounds=(1e-3, 1e3))) kernel = (Sum( se, C(0.1, (1e-3, 1e3)) * RBF(length_scale=1, length_scale_bounds=(1e-3, 1e3)))) gp = GaussianProcessRegressor(alpha=1e-7, kernel=kernel, n_restarts_optimizer=15, normalize_y=True) # return_cov=False, return_std=False model_onnx = to_onnx(gp, initial_types=[('X', FloatTensorType([]))]) self.assertTrue(model_onnx is not None) dump_data_and_model(Xtest_.astype(np.float32), gp, model_onnx, verbose=False, basename="SklearnGaussianProcessRBFUnfitted", backend=TestSklearnGaussianProcess.backend) # return_cov=True, return_std=True options = { GaussianProcessRegressor: { "return_std": True, "return_cov": True } } try: to_onnx(gp, Xtrain_.astype(np.float32), options=options) except RuntimeError as e: assert "Not returning standard deviation" in str(e) # return_std=True options = {GaussianProcessRegressor: {"return_std": True}} model_onnx = to_onnx(gp, options=options, initial_types=[('X', FloatTensorType([None, None]))]) self.assertTrue(model_onnx is not None) self.check_outputs( gp, model_onnx, Xtest_.astype(np.float32), predict_attributes=options[GaussianProcessRegressor]) # return_cov=True options = {GaussianProcessRegressor: {"return_cov": True}} # model_onnx = to_onnx(gp, Xtrain_.astype(np.float32), options=options) model_onnx = to_onnx(gp, options=options, initial_types=[('X', FloatTensorType([None, None]))]) self.assertTrue(model_onnx is not None) self.check_outputs( gp, model_onnx, Xtest_.astype(np.float32), predict_attributes=options[GaussianProcessRegressor])
def test_pipeline_pca_pipeline_multinomial(self): model = Pipeline( memory=None, steps=[ ( "PCA", PCA( copy=True, iterated_power="auto", n_components=2, random_state=None, svd_solver="auto", tol=0.0, whiten=False, ), ), ( "Pipeline", Pipeline( memory=None, steps=[ ( "MinMax scaler", MinMaxScaler( copy=True, feature_range=(0, 3.7209871159509307), ), ), ( "MultinomialNB", MultinomialNB( alpha=0.7368421052631579, class_prior=None, fit_prior=True, ), ), ], ), ), ], ) data = np.array( [[0, 0, 0], [0, 0, 0.1], [1, 1, 1.1], [1, 1.1, 1]], dtype=np.float32, ) y = [0, 0, 1, 1] model.fit(data, y) model_onnx = convert_sklearn(model, "pipelinewithinpipeline", [("input", FloatTensorType(data.shape))], target_opset=TARGET_OPSET) self.assertTrue(model_onnx is not None) dump_data_and_model( data, model, model_onnx, basename="SklearnPipelinePcaPipelineMinMaxNB2", allow_failure="StrictVersion(onnxruntime.__version__)" " <= StrictVersion('0.2.1')", )
def test_kernel_ker2_def_ort1(self): ker = Sum( CK(0.1, (1e-3, 1e3)) * RBF(length_scale=10, length_scale_bounds=(1e-3, 1e3)), CK(0.1, (1e-3, 1e3)) * RBF(length_scale=1, length_scale_bounds=(1e-3, 1e3)) ) onx = convert_kernel(ker, 'X', output_names=['Y'], dtype=numpy.float32, op_version=get_opset_number_from_onnx()) model_onnx = onx.to_onnx( inputs=[('X', FloatTensorType([None, None]))], outputs=[('Y', FloatTensorType([None, None]))], target_opset=get_opset_number_from_onnx()) model_onnx.ir_version = get_ir_version_from_onnx() sess = OnnxInference(model_onnx.SerializeToString(), runtime="onnxruntime1") rows = [] def myprint(*args, **kwargs): rows.append(" ".join(map(str, args))) res = _capture_output( lambda: sess.run({'X': Xtest_.astype(numpy.float32)}, intermediate=True, verbose=1, fLOG=myprint), 'c')[0] self.assertGreater(len(rows), 2) m1 = res['Y'] self.assertNotEmpty(m1) self.assertGreater(len(res), 2) # m2 = ker(Xtest_) # self.assertEqualArray(m1, m2, decimal=5) cpu = OnnxInference(model_onnx.SerializeToString()) sbs = side_by_side_by_values( [cpu, sess], inputs={'X': Xtest_.astype(numpy.float32)}) self.assertGreater(len(sbs), 2) self.assertIsInstance(sbs, list) self.assertIsInstance(sbs[0], dict) self.assertIn('step', sbs[0]) self.assertIn('step', sbs[1]) self.assertIn('metric', sbs[0]) self.assertIn('metric', sbs[1]) self.assertIn('cmp', sbs[0]) self.assertIn('cmp', sbs[1]) sess3 = _capture_output( lambda: OnnxInference(model_onnx.SerializeToString(), runtime="onnxruntime2"), 'c')[0] try: sbs = side_by_side_by_values( [cpu, sess, sess3], inputs={'X': Xtest_.astype(numpy.float32)}) except RuntimeError as e: if "Got invalid dimensions for input" in str(e): # probable bug somewhere return raise e self.assertNotEmpty(sbs) inputs = {'X': Xtest_.astype(numpy.float32)} sbs = side_by_side_by_values( [(cpu, inputs), (sess, inputs), (sess3, inputs)]) self.assertNotEmpty(sbs)
def test_custom_pipeline_scaler(self): digits = datasets.load_digits(n_class=6) Xd = digits.data[:20] yd = digits.target[:20] n_samples, n_features = Xd.shape ptsne_knn = PredictableTSNE() ptsne_knn.fit(Xd, yd) update_registered_converter( PredictableTSNE, "CustomPredictableTSNE", predictable_tsne_shape_calculator, predictable_tsne_converter, ) model_onnx = convert_sklearn( ptsne_knn, "predictable_tsne", [("input", FloatTensorType([None, Xd.shape[1]]))], target_opset=TARGET_OPSET) dump_data_and_model(Xd.astype(numpy.float32)[:7], ptsne_knn, model_onnx, basename="CustomTransformerTSNEkNN-OneOffArray", allow_failure="StrictVersion(onnx.__version__) " "<= StrictVersion('1.5')") trace_line = [] def my_parser(scope, model, inputs, custom_parsers=None): trace_line.append(model) return _parse_sklearn_simple_model(scope, model, inputs, custom_parsers) model_onnx = convert_sklearn( ptsne_knn, "predictable_tsne", [("input", FloatTensorType([None, Xd.shape[1]]))], custom_parsers={PredictableTSNE: my_parser}, target_opset=TARGET_OPSET) assert len(trace_line) == 1 dump_data_and_model( Xd.astype(numpy.float32)[:7], ptsne_knn, model_onnx, basename="CustomTransformerTSNEkNNCustomParser-OneOffArray", allow_failure="StrictVersion(onnx.__version__) " "<= StrictVersion('1.5')", ) update_registered_parser(PredictableTSNE, my_parser) model_onnx = convert_sklearn( ptsne_knn, "predictable_tsne", [("input", FloatTensorType([None, Xd.shape[1]]))], target_opset=TARGET_OPSET) assert len(trace_line) == 2
def save_as_onnx(model_to_save, filename): initial_type = [('float_input', FloatTensorType([1, 4]))] onx = convert_sklearn(model_to_save, initial_types=initial_type) with open(filename, "wb") as f: f.write(onx.SerializeToString())
def test_pipeline_dataframe(self): text = """ fixed_acidity,volatile_acidity,citric_acid,residual_sugar,chlorides,free_sulfur_dioxide,total_sulfur_dioxide,density,pH,sulphates,alcohol,quality,color 7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5,red 7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5,red 7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5,red 11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6,red """.replace(" ", "") X_train = pandas.read_csv(StringIO(text)) for c in X_train.columns: if c != 'color': X_train[c] = X_train[c].astype(numpy.float32) numeric_features = [c for c in X_train if c != 'color'] pipe = Pipeline([ ("prep", ColumnTransformer([("color", Pipeline([('one', OneHotEncoder()), ('select', ColumnTransformer([ ('sel1', 'passthrough', [0]) ]))]), ['color']), ("others", "passthrough", numeric_features)])), ]) init_types = [ ('fixed_acidity', FloatTensorType(shape=[None, 1])), ('volatile_acidity', FloatTensorType(shape=[None, 1])), ('citric_acid', FloatTensorType(shape=[None, 1])), ('residual_sugar', FloatTensorType(shape=[None, 1])), ('chlorides', FloatTensorType(shape=[None, 1])), ('free_sulfur_dioxide', FloatTensorType(shape=[None, 1])), ('total_sulfur_dioxide', FloatTensorType(shape=[None, 1])), ('density', FloatTensorType(shape=[None, 1])), ('pH', FloatTensorType(shape=[None, 1])), ('sulphates', FloatTensorType(shape=[None, 1])), ('alcohol', FloatTensorType(shape=[None, 1])), ('quality', FloatTensorType(shape=[None, 1])), ('color', StringTensorType(shape=[None, 1])) ] pipe.fit(X_train) model_onnx = convert_sklearn(pipe, initial_types=init_types) oinf = InferenceSession(model_onnx.SerializeToString()) pred = pipe.transform(X_train) inputs = {c: X_train[c].values for c in X_train.columns} inputs = {c: v.reshape((v.shape[0], 1)) for c, v in inputs.items()} onxp = oinf.run(None, inputs) got = onxp[0] assert_almost_equal(pred, got)
X, y = iris.data, iris.target X_train, X_test, y_train, y_test = train_test_split(X, y) dec_models = [ PCA(n_components=1), PCA(n_components=2), StandardScaler(), ] onx_bytes = [] for model in dec_models: model.fit(X_train) onx = convert_sklearn(model, initial_types=[('X', FloatTensorType( (None, X.shape[1])))]) onx_bytes.append(onx.SerializeToString()) ############################## # Pipeline with OnnxTransformer # +++++++++++++++++++++++++++++++ pipe = make_pipeline(OnnxTransformer(onx_bytes[0]), LogisticRegression(multi_class='ovr')) ################################ # Grid Search # +++++++++++ # # The serialized models are now used as a parameter # in the grid search.
df1 = pd.DataFrame(data1, columns=['First', 'Second']) dumdf1 = pd.get_dummies(df1) scaler = MinMaxScaler() scaler.partial_fit(dumdf1) sc_data = scaler.transform(dumdf1) model1 = IForest(n_estimators=10, bootstrap=True, behaviour='new', contamination=0.1, random_state=np.random.RandomState(42), verbose=1, n_jobs=-1).fit(sc_data) feature_names2 = dumdf1.columns initial_type = [('float_input', FloatTensorType([None, len(feature_names2)]))] ############################################# # We check that the conversion fails as expected. if IForest is not None: try: to_onnx(model1, initial_types=initial_type) except Exception as e: print(e) #################################################### # Custom converter # ++++++++++++++++ # # First the parser and the shape calculator.
iris = load_iris() X, y = iris.data, iris.target X_train, X_test, y_train, y_test = train_test_split(X, y) dec_models = [ PCA(n_components=1), PCA(n_components=2), StandardScaler(), ] onx_bytes = [] for model in dec_models: model.fit(X_train) onx = convert_sklearn( model, initial_types=[('X', FloatTensorType((None, X.shape[1])))]) onx_bytes.append(onx.SerializeToString()) ############################## # Pipeline with OnnxTransformer # +++++++++++++++++++++++++++++++ pipe = make_pipeline(OnnxTransformer(onx_bytes[0]), LogisticRegression(multi_class='ovr')) ################################ # Grid Search # +++++++++++ # # The serialized models are now used as a parameter # in the grid search.
from skl2onnx import convert_sklearn from sklearn.datasets import load_iris from sklearn.model_selection import train_test_split from sklearn.ensemble import RandomForestClassifier iris = load_iris() X, y = iris.data, iris.target X_train, X_test, y_train, y_test = train_test_split(X, y) clr = RandomForestClassifier() clr.fit(X_train, y_train) print(clr) ########################### # Convert a model into ONNX # +++++++++++++++++++++++++ initial_type = [('float_input', FloatTensorType([None, 4]))] onx = convert_sklearn(clr, initial_types=initial_type, target_opset=12) with open("rf_iris.onnx", "wb") as f: f.write(onx.SerializeToString()) ################################### # Compute the prediction with ONNX Runtime # ++++++++++++++++++++++++++++++++++++++++ sess = rt.InferenceSession("rf_iris.onnx") input_name = sess.get_inputs()[0].name label_name = sess.get_outputs()[0].name pred_onx = sess.run([label_name], {input_name: X_test.astype(numpy.float32)})[0] print(pred_onx)
def dummy_shape_calculator(operator): op_input = operator.inputs[0] operator.outputs[0].type = FloatTensorType(op_input.type.shape)
def dump_multiple_classification(model, suffix="", folder=None, allow_failure=None, verbose=False, label_string=False, label_uint8=False, first_class=0, comparable_outputs=None, target_opset=None): """ Trains and dumps a model for a binary classification problem. The function trains a model and calls :func:`dump_data_and_model`. Every created filename will follow the pattern: ``<folder>/<prefix><task><classifier-name><suffix>.<data|expected|model|onnx>.<pkl|onnx>``. """ X = [[0, 1], [1, 1], [2, 0], [0.5, 0.5], [1.1, 1.1], [2.1, 0.1]] X = numpy.array(X, dtype=numpy.float32) y = [0, 1, 2, 1, 1, 2] y = [i + first_class for i in y] if label_string: if label_uint8: raise AssertionError( "label_string and label_uint8 cannot be both True") y = ["l%d" % i for i in y] suffix += 'String' elif label_uint8: y = numpy.array(y).astype(numpy.uint8) suffix += 'Uint8' model.fit(X, y) if verbose: print("[dump_multiple_classification] model '{}'".format( model.__class__.__name__)) model_onnx, prefix = convert_model(model, "multi-class classifier", [("input", FloatTensorType([None, 2]))], target_opset=target_opset) if verbose: print("[dump_multiple_classification] model was converted") dump_data_and_model( X.astype(numpy.float32), model, model_onnx, folder=folder, allow_failure=allow_failure, basename=prefix + "Mcl" + model.__class__.__name__ + suffix, verbose=verbose, comparable_outputs=comparable_outputs, ) X, y = make_classification(40, n_features=4, random_state=42, n_classes=3, n_clusters_per_class=1) X = X[:, :2] model.fit(X, y) if verbose: print("[dump_multiple_classification] model '{}'".format( model.__class__.__name__)) model_onnx, prefix = convert_model(model, "multi-class classifier", [("input", FloatTensorType([None, 2]))], target_opset=target_opset) if verbose: print("[dump_multiple_classification] model was converted") dump_data_and_model( X[:10].astype(numpy.float32), model, model_onnx, folder=folder, allow_failure=allow_failure, basename=prefix + "RndMcl" + model.__class__.__name__ + suffix, verbose=verbose, comparable_outputs=comparable_outputs, )
y += a return y def taylor_sigmoid(x, degre=50): den = one + taylor_approximation_exp(-x, degre) return one / (den) opset = get_max_opset() N = 300 min_values = [-20 + float(i) * 10 / N for i in range(N)] data = numpy.array([0], dtype=numpy.float32) node = OnnxSigmoid('X', op_version=opset, output_names=['Y']) onx = node.to_onnx({'X': FloatTensorType()}, {'Y': FloatTensorType()}, target_opset=opset) rts = ['numpy', 'python', 'onnxruntime', 'taylor20', 'taylor40'] oinf = OnnxInference(onx) sess = InferenceSession(onx.SerializeToString()) graph = [] for mv in tqdm(min_values): data[0] = mv for rt in rts: lab = "" if rt == 'numpy': y = expit(data) elif rt == 'python': y = oinf.run({'X': data})['Y']
def test_onnx_ml(self): def generate_onnx_graph(opv): node = OnnxAdd(('X1', FloatTensorType()), np.array([0.1], dtype=np.float32), op_version=opv) out = OnnxLinearRegressor(node, coefficients=[0.3, 0.3, 0.4, 0.5, 0.6], intercepts=[-50.], op_version=1) last = OnnxIdentity(out, output_names=['Y'], op_version=opv) onx = last.to_onnx([('X1', FloatTensorType((None, 5)))], outputs=[('Y', FloatTensorType())], target_opset=opv) return onx, (node, out, last) for opv in [{'': 10}] + list(range(9, TARGET_OPSET + 1)): with self.subTest(opv=opv): if isinstance(opv, dict): if opv[''] > get_latest_tested_opset_version(): continue elif (opv is not None and opv > get_latest_tested_opset_version()): continue for i, nbnode in enumerate((1, 2, 3, 100)): onx, nodes = generate_onnx_graph(opv=opv) if opv == {'': 10}: for im in onx.opset_import: if im.version > 10: raise AssertionError( "Wrong final opset\nopv={}\n{}".format( opv, onx)) else: for im in onx.opset_import: if im.version > opv: raise AssertionError( "Wrong final opset\nopv={}\n{}".format( opv, onx)) as_string = onx.SerializeToString() try: ort = InferenceSession(as_string) except (InvalidGraph, InvalidArgument) as e: if (isinstance(opv, dict) and opv[''] >= onnx_opset_version()): continue if (isinstance(opv, int) and opv >= onnx_opset_version()): continue raise AssertionError( "Unable to load opv={}\n---\n{}\n---".format( opv, onx)) from e X = (np.ones((1, 5)) * nbnode).astype(np.float32) res_out = ort.run(None, {'X1': X}) assert len(res_out) == 1 res = res_out[0] self.assertEqual(res.shape, (1, 1)) inputs = None expected = [[('Ad_C0', FloatTensorType(shape=[]))], [('Li_Y0', FloatTensorType(shape=[]))], [('Y', FloatTensorType(shape=[]))]] for i, node in enumerate(nodes): shape = node.get_output_type_inference(inputs) self.assertEqual(len(shape), 1) if isinstance(shape[0], tuple): self.assertEqual(str(expected[i]), str(shape)) else: self.assertEqual( str(expected[i]), str([(shape[0].onnx_name, shape[0].type)])) inputs = shape
def custom_transform_shape_calculator(operator): operator.outputs[0].type = FloatTensorType([3, 2])
def _init(self, variables=None): """ Initializes the node. :param variables: registered variables created by previous operators The current implementation for operator *Scan* only works for matrices. """ custom_nodes = self.options.get('nodes', None) if (custom_nodes is not None and self.onnx_node.op_type in custom_nodes): self.alg_class = custom_nodes[self.onnx_node.op_type] else: try: self.alg_class = getattr(alg2, 'Onnx' + self.onnx_node.op_type) except AttributeError: try: self.alg_class = getattr( alg, 'Onnx' + self.onnx_node.op_type) except AttributeError: self.alg_class = getattr( alg3, 'Onnx' + self.onnx_node.op_type) inputs = list(self.onnx_node.input) self.mapping, self.inputs = self._name_mapping(inputs) self.outputs = list(self.onnx_node.output) options = self.options.copy() options.pop('nodes', None) target_opset = options.pop('target_opset', None) domain = options.pop('domain', None) disable_optimisation = options.pop('disable_optimisation', False) session_options = options.pop('session_options', False) ir_version = options.pop('ir_version', None) if domain == '' and target_opset < 9: # target_opset should be >= 9 not {} for main domain. # We assume it was the case when the graph was created. pass if self.onnx_node.op_type == 'ZipMap': self.inst_ = self.alg_class(*self.inputs, output_names=self.outputs, op_version=target_opset, **options) inputs = get_defined_inputs( self.inputs, variables, dtype=self.dtype) name = (self.outputs[0] if len(self.outputs) == 1 else self.inst_.expected_outputs[0][0]) otype = (Int64TensorType if 'classlabels_int64s' in options else StringTensorType) outvar = [(name, DictionaryType(otype([1]), FloatTensorType([1])))] self.onnx_ = self.inst_.to_onnx(inputs, outputs=outvar) forced = True elif self.onnx_node.op_type == 'ConstantOfShape': for k in options: v = options[k] if isinstance(v, numpy.ndarray): options[k] = make_tensor( k, self._guess_proto_type(v.dtype), v.shape, v.tolist()) self.inst_ = self.alg_class(*self.inputs, output_names=self.outputs, op_version=target_opset, **options) inputs = get_defined_inputs( self.inputs, variables, dtype=self.dtype) try: self.onnx_ = self.inst_.to_onnx(inputs, target_opset=target_opset, domain=domain) if "dim_value: 0" in str(self.onnx_): raise RuntimeError( # pragma: no cover "Probable issue as one dimension is null.\n--\n{}".format( self.onnx_)) except AttributeError as e: # pragma: no cover # older version of skl2onnx self.onnx_ = self.inst_.to_onnx(inputs) if "dim_value: 0" in str(self.onnx_): raise RuntimeError( "Probable issue as one dimension is null.\n--\n{}".format( self.onnx_)) from e forced = False elif self.onnx_node.op_type == 'Scan': self.inst_ = self.alg_class( *self.inputs, output_names=self.outputs, op_version=target_opset, **options) inputs = get_defined_inputs( self.inputs, variables, dtype=self.dtype) outputs = get_defined_outputs( self.outputs, self.onnx_node, inputs, variables, dtype=self.dtype) inputs = [(name, cl.__class__([None, None])) for (name, cl) in inputs] outputs = [(name, cl.__class__([None, None])) for (name, cl) in outputs] self.onnx_ = self.inst_.to_onnx(inputs, outputs=outputs, target_opset=target_opset, domain=domain) if "dim_value: 0" in str(self.onnx_): raise RuntimeError( # pragma: no cover "Probable issue as one dimension is null.\n--\n{}".format( self.onnx_)) forced = True else: self.inst_ = self.alg_class(*self.inputs, output_names=self.outputs, op_version=target_opset, domain=domain, **options) inputs = get_defined_inputs( self.inputs, variables, dtype=self.dtype, schema=self.alg_class.expected_inputs) try: self.onnx_ = self.inst_.to_onnx( inputs, target_opset=target_opset, domain=domain) if "dim_value: 0" in str(self.onnx_): raise RuntimeError( # pragma: no cover "Probable issue as one dimension is null.\n--\n{}\n---\n{}".format( self.onnx_, inputs)) forced = False except (RuntimeError, ValueError, InferenceError) as eo: # Let's try again by forcing output types. forced = True outputs = get_defined_outputs( self.outputs, self.onnx_node, inputs, variables, dtype=self.dtype, schema=self.alg_class.expected_outputs, schema_inputs=self.alg_class.expected_inputs) try: self.onnx_ = self.inst_.to_onnx(inputs, outputs=outputs, target_opset=target_opset, domain=domain) except NotImplementedError as e: # pragma: no cover raise NotImplementedError( "Unable to instantiate node {} inputs={} " "self.inputs={} outputs={} variables={} " "dtype={} e={} eo={}".format( self.alg_class, inputs, self.inputs, outputs, variables, self.dtype, e, eo)) from e if "dim_value: 0" in str(self.onnx_): raise RuntimeError( # pragma: no cover "Probable issue as one dimension is null.\n--\n{}".format( self.onnx_)) from e if len(self.onnx_.graph.output) != len(self.outputs): # pragma: no cover # Something is wrong, falls back to default plan. forced = True outputs = get_defined_outputs( self.outputs, self.onnx_node, inputs, variables, dtype=self.dtype, schema=self.alg_class.expected_outputs) self.onnx_ = self.inst_.to_onnx(inputs, outputs=outputs, target_opset=target_opset, domain=domain) if "dim_value: 0" in str(self.onnx_): raise RuntimeError( # pragma: no cover "Probable issue as one dimension is null.\n--\n{}".format( self.onnx_)) else: lo = list(self.onnx_.graph.output) outputs = proto2vars(lo) sess_options = session_options or SessionOptions() self.run_options = RunOptions() if session_options is None: try: sess_options.session_log_severity_level = 3 # sess_options.sessions_log_verbosity_level = 0 except AttributeError: # pragma: no cover # onnxruntime not recent enough. pass try: self.run_options.run_log_severity_level = 3 # self.run_options.run_log_verbosity_level = 0 except AttributeError: # pragma: no cover # onnxruntime not recent enough. pass if disable_optimisation: sess_options.graph_optimization_level = ( # pragma: no cover GraphOptimizationLevel.ORT_DISABLE_ALL) elif disable_optimisation: raise RuntimeError( # pragma: no cover "session_options and disable_optimisation cannot be defined " "at the same time.") if ir_version is not None: self.onnx_.ir_version = ir_version try: self.sess_ = InferenceSession( self.onnx_.SerializeToString(), sess_options=sess_options) except (RuntimeError, OrtNotImplemented, OrtInvalidGraph, OrtFail) as e: raise RuntimeError( "Unable to load node '{}' (output type was {}) inputs={} " "self.inputs={} self.onnx_node.input={} " "variables={} mapping={} " "expected_inputs={}\n{}".format( self.onnx_node.op_type, "guessed" if forced else "inferred", inputs, self.inputs, self.onnx_node.input, variables, self.mapping, self.alg_class.expected_inputs, self.onnx_)) from e self.typed_outputs_ = outputs
def test_convert_nusvr_default(self): model, X = self._fit_binary_classification(NuSVR()) model_onnx = convert_sklearn( model, "SVR", [("input", FloatTensorType([None, X.shape[1]]))]) self.assertIsNotNone(model_onnx) dump_data_and_model(X, model, model_onnx, basename="SklearnRegNuSVR2")
def test_pipeline_column_transformer(self): iris = datasets.load_iris() X = iris.data[:, :3] y = iris.target X_train = pandas.DataFrame(X, columns=["vA", "vB", "vC"]) X_train["vcat"] = X_train["vA"].apply(lambda x: "cat1" if x > 0.5 else "cat2") X_train["vcat2"] = X_train["vB"].apply(lambda x: "cat3" if x > 0.5 else "cat4") y_train = y % 2 numeric_features = [0, 1, 2] # ["vA", "vB", "vC"] categorical_features = [3, 4] # ["vcat", "vcat2"] classifier = LogisticRegression( C=0.01, class_weight=dict(zip([False, True], [0.2, 0.8])), n_jobs=1, max_iter=10, solver="lbfgs", tol=1e-3, ) numeric_transformer = Pipeline(steps=[ ("imputer", SimpleImputer(strategy="median")), ("scaler", StandardScaler()), ]) categorical_transformer = Pipeline(steps=[ ( "onehot", OneHotEncoder(sparse=True, handle_unknown="ignore"), ), ( "tsvd", TruncatedSVD(n_components=1, algorithm="arpack", tol=1e-4), ), ]) preprocessor = ColumnTransformer(transformers=[ ("num", numeric_transformer, numeric_features), ("cat", categorical_transformer, categorical_features), ]) model = Pipeline(steps=[("precprocessor", preprocessor), ("classifier", classifier)]) model.fit(X_train, y_train) initial_type = [ ("numfeat", FloatTensorType([None, 3])), ("strfeat", StringTensorType([None, 2])), ] X_train = X_train[:11] model_onnx = convert_sklearn(model, initial_types=initial_type) dump_data_and_model( X_train, model, model_onnx, basename="SklearnPipelineColumnTransformerPipeliner", allow_failure="StrictVersion(onnx.__version__)" " < StrictVersion('1.3') or " "StrictVersion(onnxruntime.__version__)" " <= StrictVersion('0.4.0')", ) if __name__ == "__main__": from onnx.tools.net_drawer import GetPydotGraph, GetOpNodeProducer pydot_graph = GetPydotGraph( model_onnx.graph, name=model_onnx.graph.name, rankdir="TP", node_producer=GetOpNodeProducer("docstring"), ) pydot_graph.write_dot("graph.dot") import os os.system("dot -O -G=300 -Tpng graph.dot")
########################### # The pipeline. data = load_iris() X = data.data pipe = Pipeline(steps=[('std', StandardScaler()), ('km', KMeans(3))]) pipe.fit(X) ################################# # The function goes through every step, # overloads the methods *transform* and # returns an ONNX graph for every step. steps = collect_intermediate_steps( pipe, "pipeline", [("X", FloatTensorType([None, X.shape[1]]))]) ##################################### # We call method transform to population the # cache the overloaded methods *transform* keeps. pipe.transform(X) ####################################### # We compute every step and compare # ONNX and scikit-learn outputs. for step in steps: print('----------------------------') print(step['model']) onnx_step = step['onnx_step'] sess = InferenceSession(onnx_step.SerializeToString())
def test_model_knn_regressor(self): model, X = self._fit_model(KNeighborsRegressor(n_neighbors=2)) model_onnx = convert_sklearn(model, 'KNN regressor', [('input', FloatTensorType([1, 4]))]) self.assertIsNotNone(model_onnx)
def dump_data_and_model(data, model, onnx=None, basename="model", folder=None, inputs=None, backend="onnxruntime", context=None, allow_failure=None, methods=None, dump_error_log=None, benchmark=None, comparable_outputs=None, intermediate_steps=False, fail_evenif_notimplemented=False, verbose=False, classes=None): """ Saves data with pickle, saves the model with pickle and *onnx*, runs and saves the predictions for the given model. This function is used to test a backend (runtime) for *onnx*. :param data: any kind of data :param model: any model :param onnx: *onnx* model or *None* to use an onnx converters to convert it only if the model accepts one float vector :param basemodel: three files are writen ``<basename>.data.pkl``, ``<basename>.model.pkl``, ``<basename>.model.onnx`` :param folder: files are written in this folder, it is created if it does not exist, if *folder* is None, it looks first in environment variable ``ONNXTESTDUMP``, otherwise, it is placed into ``'tests_dump'``. :param inputs: standard type or specific one if specified, only used is parameter *onnx* is None :param backend: backend used to compare expected output and runtime output. Two options are currently supported: None for no test, `'onnxruntime'` to use module *onnxruntime*. :param context: used if the model contains a custom operator such as a custom Keras function... :param allow_failure: None to raise an exception if comparison fails for the backends, otherwise a string which is then evaluated to check whether or not the test can fail, example: ``"StrictVersion(onnx.__version__) < StrictVersion('1.3.0')"`` :param dump_error_log: if True, dumps any error message in a file ``<basename>.err``, if it is None, it checks the environment variable ``ONNXTESTDUMPERROR`` :param benchmark: if True, runs a benchmark and stores the results into a file ``<basename>.bench``, if None, it checks the environment variable ``ONNXTESTBENCHMARK`` :param verbose: additional information :param methods: ONNX may produce one or several results, each of them is equivalent to the output of a method from the model class, this parameter defines which methods is equivalent to ONNX outputs. If not specified, it falls back into a default behaviour implemented for classifiers, regressors, clustering. :param comparable_outputs: compares only these outputs :param intermediate_steps: displays intermediate steps in case of an error :param fail_evenif_notimplemented: the test is considered as failing even if the error is due to onnxuntime missing the implementation of a new operator defiend in ONNX. :param classes: classes names (only for classifier, mandatory if option 'nocl' is used) :return: the created files Some convention for the name, *Bin* for a binary classifier, *Mcl* for a multiclass classifier, *Reg* for a regressor, *MRg* for a multi-regressor. The name can contain some flags. Expected outputs refer to the outputs computed with the original library, computed outputs refer to the outputs computed with a ONNX runtime. * ``-CannotLoad``: the model can be converted but the runtime cannot load it * ``-Dec3``: compares expected and computed outputs up to 3 decimals (5 by default) * ``-Dec4``: compares expected and computed outputs up to 4 decimals (5 by default) * ``-NoProb``: The original models computed probabilites for two classes *size=(N, 2)* but the runtime produces a vector of size *N*, the test will compare the second column to the column * ``-OneOff``: the ONNX runtime cannot compute the prediction for several inputs, it must be called for each of them. * ``-OneOffArray``: same as ``-OneOff`` but input is still a 2D array with one observation * ``-Out0``: only compares the first output on both sides * ``-Reshape``: merges all outputs into one single vector and resizes it before comparing * ``-SkipDim1``: before comparing expected and computed output, arrays with a shape like *(2, 1, 2)* becomes *(2, 2)* * ``-SklCol``: *scikit-learn* operator applies on a column and not a matrix If the *backend* is not None, the function either raises an exception if the comparison between the expected outputs and the backend outputs fails or it saves the backend output and adds it to the results. """ runtime_test = dict(model=model, data=data) if folder is None: folder = os.environ.get("ONNXTESTDUMP", "tests_dump") if dump_error_log is None: dump_error_log = os.environ.get("ONNXTESTDUMPERROR", "0") in ( "1", 1, "True", "true", True, ) if benchmark is None: benchmark = os.environ.get("ONNXTESTBENCHMARK", "0") in ( "1", 1, "True", "true", True, ) if not os.path.exists(folder): os.makedirs(folder) lambda_original = None if isinstance(data, (numpy.ndarray, pandas.DataFrame)): dataone = data[:1].copy() else: dataone = data def _raw_score_binary_classification(model, X): scores = model.decision_function(X) if len(scores.shape) == 1: scores = scores.reshape(-1, 1) if len(scores.shape) != 2 or scores.shape[1] != 1: raise RuntimeError( "Unexpected shape {} for a binary classifiation".format( scores.shape)) return numpy.hstack([-scores, scores]) if methods is not None: prediction = [] for method in methods: if callable(method): call = lambda X, model=model: method(model, X) # noqa else: try: call = getattr(model, method) except AttributeError as e: if method == 'decision_function_binary': call = (lambda X, model=model: _raw_score_binary_classification(model, X)) else: raise e if callable(call): prediction.append(call(data)) # we only take the last one for benchmark lambda_original = lambda: call(dataone) # noqa else: raise RuntimeError( "Method '{0}' is not callable.".format(method)) else: if hasattr(model, "predict"): if _has_predict_proba(model): # Classifier prediction = [model.predict(data), model.predict_proba(data)] lambda_original = lambda: model.predict_proba(dataone) # noqa elif _has_decision_function(model): # Classifier without probabilities prediction = [ model.predict(data), model.decision_function(data), ] lambda_original = (lambda: model.decision_function(dataone) ) # noqa elif _has_transform_model(model): # clustering prediction = [model.predict(data), model.transform(data)] lambda_original = lambda: model.transform(dataone) # noqa else: # Regressor or VotingClassifier prediction = [model.predict(data)] lambda_original = lambda: model.predict(dataone) # noqa elif hasattr(model, "transform"): options = extract_options(basename) SklCol = options.get("SklCol", False) if SklCol: prediction = model.transform(data.ravel()) lambda_original = lambda: model.transform(dataone.ravel() ) # noqa else: prediction = model.transform(data) lambda_original = lambda: model.transform(dataone) # noqa else: raise TypeError( "Model has no predict or transform method: {0}".format( type(model))) runtime_test["expected"] = prediction names = [] dest = os.path.join(folder, basename + ".expected.pkl") names.append(dest) with open(dest, "wb") as f: pickle.dump(prediction, f) dest = os.path.join(folder, basename + ".data.pkl") names.append(dest) with open(dest, "wb") as f: pickle.dump(data, f) if hasattr(model, "save"): dest = os.path.join(folder, basename + ".model.keras") names.append(dest) model.save(dest) else: dest = os.path.join(folder, basename + ".model.pkl") names.append(dest) with open(dest, "wb") as f: try: pickle.dump(model, f) except AttributeError as e: print("[dump_data_and_model] cannot pickle model '{}'" " due to {}.".format(dest, e)) if dump_error_log: error_dump = os.path.join(folder, basename + ".err") if onnx is None: array = numpy.array(data) if inputs is None: inputs = [("input", FloatTensorType(list(array.shape)))] onnx, _ = convert_model(model, basename, inputs) dest = os.path.join(folder, basename + ".model.onnx") names.append(dest) with open(dest, "wb") as f: f.write(onnx.SerializeToString()) if verbose: print("[dump_data_and_model] created '{}'.".format(dest)) runtime_test["onnx"] = dest # backend if backend is not None: if not isinstance(backend, list): backend = [backend] for b in backend: if not is_backend_enabled(b): continue if isinstance(allow_failure, str): allow = evaluate_condition(b, allow_failure) else: allow = allow_failure if allow is None: output, lambda_onnx = compare_backend( b, runtime_test, options=extract_options(basename), context=context, verbose=verbose, comparable_outputs=comparable_outputs, intermediate_steps=intermediate_steps, ) else: try: output, lambda_onnx = compare_backend( b, runtime_test, options=extract_options(basename), context=context, verbose=verbose, comparable_outputs=comparable_outputs, intermediate_steps=intermediate_steps, classes=classes) except OnnxRuntimeMissingNewOnnxOperatorException as e: if fail_evenif_notimplemented: raise e warnings.warn(str(e)) continue except AssertionError as e: if dump_error_log: with open(error_dump, "w", encoding="utf-8") as f: f.write(str(e) + "\n--------------\n") traceback.print_exc(file=f) if isinstance(allow, bool) and allow: warnings.warn("Issue with '{0}' due to {1}".format( basename, str(e).replace("\n", " -- "))) continue else: raise e if output is not None: dest = os.path.join(folder, basename + ".backend.{0}.pkl".format(b)) names.append(dest) with open(dest, "wb") as f: pickle.dump(output, f) if (benchmark and lambda_onnx is not None and lambda_original is not None): # run a benchmark obs = compute_benchmark({ "onnxrt": lambda_onnx, "original": lambda_original }) df = pandas.DataFrame(obs) df["input_size"] = sys.getsizeof(dataone) dest = os.path.join(folder, basename + ".bench") df.to_csv(dest, index=False) return names
def predictable_tsne_shape_calculator(operator): op_input = operator.inputs[0] op = operator.raw_operator N = op_input.type.shape[0] C = op.estimator_._y.shape[1] operator.outputs[0].type = FloatTensorType([N, C])
def dump_multilabel_classification(model, suffix="", folder=None, allow_failure=None, verbose=False, label_string=False, first_class=0, comparable_outputs=None, target_opset=None): """ Trains and dumps a model for a binary classification problem. The function trains a model and calls :func:`dump_data_and_model`. Every created filename will follow the pattern: ``<folder>/<prefix><task><classifier-name><suffix>.<data|expected|model|onnx>.<pkl|onnx>``. """ X = [[0, 1], [1, 1], [2, 0], [0.5, 0.5], [1.1, 1.1], [2.1, 0.1]] X = numpy.array(X, dtype=numpy.float32) if label_string: y = [["l0"], ["l1"], ["l2"], ["l0", "l1"], ["l1"], ["l2"]] else: y = [[0 + first_class], [1 + first_class], [2 + first_class], [0 + first_class, 1 + first_class], [1 + first_class], [2 + first_class]] y = MultiLabelBinarizer().fit_transform(y) model.fit(X, y) if verbose: print("[make_multilabel_classification] model '{}'".format( model.__class__.__name__)) model_onnx, prefix = convert_model(model, "multi-class classifier", [("input", FloatTensorType([None, 2]))], target_opset=target_opset) if verbose: print("[make_multilabel_classification] model was converted") dump_data_and_model( X.astype(numpy.float32), model, model_onnx, folder=folder, allow_failure=allow_failure, basename=prefix + "Mcl" + model.__class__.__name__ + suffix, verbose=verbose, comparable_outputs=comparable_outputs, ) X, y = make_multilabel_classification(40, n_features=4, random_state=42, n_classes=3) X = X[:, :2] model.fit(X, y) if verbose: print("[make_multilabel_classification] model '{}'".format( model.__class__.__name__)) model_onnx, prefix = convert_model(model, "multi-class classifier", [("input", FloatTensorType([None, 2]))]) if verbose: print("[make_multilabel_classification] model was converted") dump_data_and_model( X[:10].astype(numpy.float32), model, model_onnx, folder=folder, allow_failure=allow_failure, basename=prefix + "RndMla" + model.__class__.__name__ + suffix, verbose=verbose, comparable_outputs=comparable_outputs, )
update_registered_converter(LGBMClassifier, 'LightGbmLGBMClassifier', calculate_linear_classifier_output_shapes, convert_lightgbm, options={ 'nocl': [True, False], 'zipmap': [True, False] }) ################################## # Convert again # +++++++++++++ model_onnx = convert_sklearn(pipe, 'pipeline_lightgbm', [('input', FloatTensorType([None, 2]))], target_opset=12) # And save. with open("pipeline_lightgbm.onnx", "wb") as f: f.write(model_onnx.SerializeToString()) ########################### # Compare the predictions # +++++++++++++++++++++++ # # Predictions with LightGbm. print("predict", pipe.predict(X[:5])) print("predict_proba", pipe.predict_proba(X[:1]))
def test_sub_graph_tuple(self): self.common_test_sub_graph(('X1', FloatTensorType()), LinearRegression)
classifier = RandomForestClassifier() classifier.fit(X, Y) sample_text = "orange is present" print(sample_text) sample = vectorizer.transform( [sample_text]).toarray() pred = classifier.predict(sample) print("rfc model prediction...") print(pred) from skl2onnx import convert_sklearn from skl2onnx.common.data_types import FloatTensorType initial_type = [('float_input', FloatTensorType([1, 10]))] onx = convert_sklearn(classifier, initial_types=initial_type) with open('rfc_onx.onnx', 'wb') as f: f.write(onx.SerializeToString()) import onnxruntime as rt sess = rt.InferenceSession('rfc_onx.onnx') input_name = sess.get_inputs()[0].name label_name = sess.get_outputs()[0].name pred_onx = sess.run( [label_name], {input_name: sample.astype(np.float32)})[0] print('onnx prediction...') print(pred_onx) conn = redis.Redis(host='localhost', port=6379, db=0) with open("rfc_onx.onnx", "rb") as f:
def test_sub_graph_tuple_cls(self): self.common_test_sub_graph(('X1', FloatTensorType()), LogisticRegression, {'zipmap': False})
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2) # Create the model ############################################################################################ mlp = MLPClassifier(hidden_layer_sizes=(50, 50)) # Train the model ############################################################################################ mlp.fit(X_train, y_train) score = mlp.score(X_test, y_test) print('Test Accuracy: ', score) # Save ONNX model ########################################################################################### initial_type = [('float_input', FloatTensorType([None, 4]))] onnx_model = convert_sklearn(mlp, initial_types=initial_type) onnx.save_model(onnx_model, 'sklearn_iris_model_before_pruning.onnx') # Prune the ONNX model ########################################################################################### onnx_model = onnx.load('sklearn_iris_model_before_pruning.onnx') graph = onnx_model.graph # Print a model overview # print('The model is:\n{}'.format(onnx_model)) # Just remove all nodes after Identity remove_list = [] end_reached = False for x in graph.node: # print(x.name)
def save_onnx(model, filepath: str): initial_type = [('float_input', FloatTensorType([None, 4]))] onx = convert_sklearn(model, initial_types=initial_type) with open(filepath, 'wb') as f: f.write(onx.SerializeToString())