def test_model_one_hot_encoder(self): # categorical_features will be removed in 0.22 (this test # will fail by then). FutureWarning: The handling of integer # data will change in version 0.22. Currently, the categories # are determined based on the range [0, max(values)], while # in the future they will be determined based on the unique values. # If you want the future behaviour and silence this warning, # you can specify "categories='auto'". model = OneHotEncoder() data = numpy.array([[1, 2, 3], [4, 3, 0], [0, 1, 4], [0, 5, 6]], dtype=numpy.int64) model.fit(data) model_onnx = convert_sklearn( model, "scikit-learn one-hot encoder", [("input", Int64TensorType([1, 3]))], ) self.assertTrue(model_onnx is not None) dump_data_and_model( data, model, model_onnx, basename="SklearnOneHotEncoderInt64-SkipDim1", )
def test_select_from_model_int(self): model = SelectFromModel(estimator=SVR(kernel="linear")) X = np.array( [[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]], dtype=np.int64, ) y = np.array([0, 1, 0, 1]) model.fit(X, y) model_onnx = convert_sklearn( model, "select from model", [("input", Int64TensorType([None, X.shape[1]]))], ) self.assertTrue(model_onnx is not None) dump_data_and_model( X, model, model_onnx, basename="SklearnSelectFromModel", allow_failure="StrictVersion(onnx.__version__)" " < StrictVersion('1.2') or " "StrictVersion(onnxruntime.__version__)" " <= StrictVersion('0.2.1')", )
def test_combine_inputs_floats_ints(self): data = [[0, 0.0], [0, 0.0], [1, 1.0], [1, 1.0]] scaler = StandardScaler() scaler.fit(data) model = Pipeline([("scaler1", scaler), ("scaler2", scaler)]) model_onnx = convert_sklearn( model, "pipeline", [ ("input1", Int64TensorType([None, 1])), ("input2", FloatTensorType([None, 1])), ], target_opset=TARGET_OPSET) self.assertTrue(len(model_onnx.graph.node[-1].output) == 1) self.assertTrue(model_onnx is not None) data = numpy.array(data) data = { "input1": data[:, 0].reshape((-1, 1)).astype(numpy.int64), "input2": data[:, 1].reshape((-1, 1)).astype(numpy.float32), } dump_data_and_model(data, PipeConcatenateInput(model), model_onnx, basename="SklearnPipelineScalerMixed")
def test_variance_threshold_int(self): model = VarianceThreshold() X = np.array( [[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]], dtype=np.int64, ) y = np.array([0, 1, 0, 1]) model.fit(X, y) model_onnx = convert_sklearn( model, "variance threshold", [("input", Int64TensorType([None, X.shape[1]]))], ) self.assertTrue(model_onnx is not None) dump_data_and_model( X, model, model_onnx, basename="SklearnVarianceThreshold", allow_failure="StrictVersion(onnx.__version__)" " < StrictVersion('1.2') or " "StrictVersion(onnxruntime.__version__)" " <= StrictVersion('0.2.1')", )
def test_onnxrt_tfidf_vectorizer_bi_skip0(self): inputi = numpy.array([[1, 1, 3, 3, 3, 7, 8, 6, 7, 5, 6, 8]]).astype(numpy.int64) output = numpy.array([[0., 0., 0., 0., 1., 1., 1.]]).astype(numpy.float32) ngram_counts = numpy.array([0, 4]).astype(numpy.int64) ngram_indexes = numpy.array([0, 1, 2, 3, 4, 5, 6]).astype(numpy.int64) pool_int64s = numpy.array([ 2, 3, 5, 4, # unigrams 5, 6, 7, 8, 6, 7 ]).astype(numpy.int64) # bigrams op = OnnxTfIdfVectorizer('tokens', op_version=TARGET_OPSET, mode='TF', min_gram_length=2, max_gram_length=2, max_skip_count=0, ngram_counts=ngram_counts, ngram_indexes=ngram_indexes, pool_int64s=pool_int64s, output_names=['out']) onx = op.to_onnx(inputs=[('tokens', Int64TensorType())], outputs=[('out', FloatTensorType())]) oinf = OnnxInference(onx) res = oinf.run({'tokens': inputi}) self.assertEqual(output.tolist(), res['out'].tolist())
def test_model_polynomial_features_int_degree_3(self): X = np.array([ [1, 3, 33], [4, 1, -11], [3, 7, -3], [3, 5, 4], [1, 0, 3], [5, 4, 9], ]) model = PolynomialFeatures(degree=3).fit(X) model_onnx = convert_sklearn( model, "scikit-learn polynomial features", [("input", Int64TensorType([None, X.shape[1]]))], ) self.assertTrue(model_onnx is not None) dump_data_and_model( X.astype(np.int64), model, model_onnx, basename="SklearnPolynomialFeaturesIntDegree3", allow_failure="StrictVersion(onnxruntime.__version__)" " <= StrictVersion('0.2.1')", )
def shape_calculator(operator): cout = self.onnxrt_.output_names if len(operator.outputs) != len(cout): raise RuntimeError( # pragma: no cover "Mismatched number of outputs: {} != {}." "".format(len(operator.outputs), len(cout))) for out_op, out in zip(operator.outputs, self.onnxrt_.obj.graph.output): var = _var_as_dict(out) if var['type']['kind'] != 'tensor': raise NotImplementedError( # pragma: no cover "Noy yet implemented for output:\n{}".format(out)) shape = var['type']['shape'] if shape[0] == 0: shape = (None,) + tuple(shape[1:]) elem = var['type']['elem'] if elem == 'float': out_op.type = FloatTensorType(shape=shape) elif elem == 'int64': out_op.type = Int64TensorType(shape=shape) elif elem == 'double': out_op.type = DoubleTensorType(shape=shape) else: raise NotImplementedError( # pragma: no cover "Not yet implemented for elem_type:\n{}".format(elem))
def custom_parser(scope, model, inputs, custom_parsers=None): if custom_parsers is not None and model in custom_parsers: return custom_parsers[model](scope, model, inputs, custom_parsers=custom_parsers) if all( isinstance(i, (numbers.Real, bool, np.bool_)) for i in model.classes_): label_type = Int64TensorType() else: label_type = StringTensorType() output_label = scope.declare_local_variable( 'output_label', label_type) this_operator = scope.declare_local_operator( 'LgbmClassifier', model) this_operator.inputs = inputs probability_map_variable = scope.declare_local_variable( 'output_probability', SequenceType(DictionaryType(label_type, scope.tensor_type()))) this_operator.outputs.append(output_label) this_operator.outputs.append(probability_map_variable) return this_operator.outputs
def test_feature_union_transformer_weights_1(self): data = load_digits() X, y = data.data, data.target X = X.astype(np.int64) X_train, X_test, *_ = train_test_split(X, y, test_size=0.5, random_state=42) model = FeatureUnion([('pca', PCA()), ('svd', TruncatedSVD())], transformer_weights={ 'pca': 10, 'svd': 3 }).fit(X_train) model_onnx = convert_sklearn( model, 'feature union', [('input', Int64TensorType([None, X_test.shape[1]]))], target_opset=TARGET_OPSET) self.assertTrue(model_onnx is not None) dump_data_and_model( X_test, model, model_onnx, basename="SklearnFeatureUnionTransformerWeights1-Dec4")
print(r2_score(y_test, pred)) #################################### # Conversion to ONNX format # +++++++++++++++++++++++++ # # We use module # `sklearn-onnx <https://github.com/onnx/sklearn-onnx>`_ # to convert the model into ONNX format. from skl2onnx import convert_sklearn from skl2onnx.common.data_types import DictionaryType, FloatTensorType, Int64TensorType, SequenceType # initial_type = [('float_input', DictionaryType(Int64TensorType([1]), FloatTensorType([])))] initial_type = [("float_input", DictionaryType(Int64TensorType([1]), FloatTensorType([])))] onx = convert_sklearn(pipe, initial_types=initial_type) with open("pipeline_vectorize.onnx", "wb") as f: f.write(onx.SerializeToString()) ################################## # We load the model with ONNX Runtime and look at # its input and output. import onnxruntime as rt from onnxruntime.capi.onnxruntime_pybind11_state import InvalidArgument sess = rt.InferenceSession("pipeline_vectorize.onnx", providers=rt.get_available_providers()) import numpy
def get_defined_outputs(outputs, onnx_node, typed_inputs=None, variables=None, dtype=None, schema=None, schema_inputs=None): """ Gets types of predefined outputs when they cannot be inferred. Some part of it should be automated based on type constraints. :param outputs: requested outputs :param onnx_node: :epkg:`ONNX` node definition :param typed_inputs: known typed inputs of the node as `tuple(name, type)` :param variables: registered variables created by previous operators :param dtype: float computational type :param schema: defined outputs by schema (*expected_outputs*) :param schema_inputs: defined inputs by schema (*expected_inputs*) :return: typed outputs as ``tuple(name, type)`` """ if schema is None: ft = DoubleTensorType if dtype == numpy.float64 else FloatTensorType elif len(schema) != 1: raise ValueError( # pragma: no cover "schema should only contain one output not {}.".format(schema)) else: if isinstance(schema, DataType): ft = schema[0].__class__ else: ft = schema[0][1].__class__ if onnx_node.op_type in { 'ZipMap', 'ArgMin', 'ArgMax', 'Shape', 'Greater', 'Less', 'Equal', 'TopK', 'Cast', 'ArrayFeatureExtractor', 'Reshape', 'Transpose', 'Scan', 'ConstantOfShape' }: if onnx_node.op_type == "ZipMap": # ZipMap otype = SequenceType(DictionaryType(Int64Type(), ft())) outputs = [(name, otype) for name in outputs] elif (onnx_node.op_type in ("ArgMin", "ArgMax", 'Shape') and len(outputs) == 1): # ArgMin, ArgMax, Shape outputs = [(outputs[0], Int64TensorType())] elif (onnx_node.op_type in ("Greater", "Less", 'Equal') and len(outputs) == 1): # Greater, Less, Equal outputs = [(outputs[0], BooleanTensorType())] elif onnx_node.op_type == "TopK" and len(outputs) == 2: # TopK if len(typed_inputs) != 2: raise RuntimeError( # pragma: no cover "Wrong typed_inputs, got {}.".format(typed_inputs)) outputs = [(outputs[0], typed_inputs[0][1]), (outputs[1], Int64TensorType())] elif onnx_node.op_type == "Cast" and len(outputs) == 1: # Cast ttyp = _guess_type_proto(onnx_node.attribute[0].i, dims=None) outputs = [(outputs[0], ttyp)] elif onnx_node.op_type == "ArrayFeatureExtractor": # ArrayFeatureExtractor if len(typed_inputs) != 2: raise RuntimeError( # pragma: no cover "Wrong typed_inputs, got {}.".format(typed_inputs)) outputs = [(outputs[0], typed_inputs[0][1])] elif onnx_node.op_type in ('Reshape', 'Transpose'): # Reshape outputs = [(outputs[0], typed_inputs[0][1].__class__())] elif onnx_node.op_type == 'Scan': # Scan if len(outputs) != len(typed_inputs): raise RuntimeError( # pragma: no cover "Dimension mismatch, operator Scan should have " "the same number of inputs and outputs {} != {}" ".".format(len(outputs), len(typed_inputs))) outputs = [(o, t[1].__class__()) for o, t in zip(outputs, typed_inputs)] elif onnx_node.op_type == "ConstantOfShape": # ConstantOfShape outputs = [(outputs[0], ft())] elif 'Classifier' in onnx_node.op_type: # Good chance that's a classifier. outputs = [(outputs[0], Int64TensorType()), (outputs[1], ft())] else: if schema_inputs is not None and schema is not None: dt = {} for got, exp in zip(typed_inputs, schema_inputs): if isinstance(exp[1], str): dt[exp[1]] = got out = [] for i in range(len(outputs)): # pylint: disable=C0200 o = outputs[i] if isinstance(o, str): exp = schema[i] if exp[1] in dt: out.append((o, dt[exp[1]][1].__class__())) else: nt = _guess_type_proto_str(exp[1], None) out.append((o, nt)) elif (isinstance(o, tuple) and (isinstance(o[1], str) or o[1] is None)): exp = schema[i] if exp[1] in dt: out.append((o[0], dt[exp[1]][1].__class__())) else: nt = _guess_type_proto_str(exp[1], None) out.append((o[0], nt)) else: out.append(o) outputs = out elif len(typed_inputs) == 1 and len(outputs) == 1: # Default case # Assuming the only output is the same as the only input. outputs = [(outputs[0], typed_inputs[0][1])] else: # Default outputs = [(name, ft()) for name in outputs] for name, typ in outputs: if typ in ('T', None, '', 'I'): raise NotImplementedError( # pragma: no cover "Undefined output type: %r (outputs=%r, typed_inputs=%r, " "dtype=%r, schema=%r, schema_inputs=%r, onnx_node=%r, " "variables=%r)." % (typ, outputs, typed_inputs, dtype, schema, schema_inputs, onnx_node, variables)) if not isinstance(name, str): raise NotImplementedError( # pragma: no cover "Undefined output type: %r (outputs=%r, typed_inputs=%r, " "dtype=%r, schema=%r, schema_inputs=%r, onnx_node=%r, " "variables=%r)." % (typ, outputs, typed_inputs, dtype, schema, schema_inputs, onnx_node, variables)) return outputs
def test_onnxt_runtime_topk(self): X = numpy.array( [[0, 1, 2, 3, 4], [1, -1, -2, 4, 5], [2, -2, -3, 5, -4]], dtype=numpy.float32) # axis=0, k=-1 onx = OnnxTopK('X', numpy.array([-1], dtype=numpy.int64), axis=0, output_names=['Y', 'Yi']) model_def = onx.to_onnx({'X': X.astype(numpy.float32)}, outputs=[('Y', FloatTensorType(X.shape)), ('Yi', Int64TensorType(X.shape))]) oinf = OnnxInference(model_def) got = oinf.run({'X': X}) self.assertEqual(list(sorted(got)), ['Y', 'Yi']) exp = numpy.array([[0., -2., -3., 3., -4.], [1., -1., -2., 4., 4.], [2., 1., 2., 5., 5.]], dtype=numpy.float32) self.assertEqualArray(exp, got['Y']) # axis=0, k=2 onx = OnnxTopK('X', numpy.array([2], dtype=numpy.int64), axis=0, output_names=['Y', 'Yi']) model_def = onx.to_onnx({'X': X.astype(numpy.float32)}, outputs=[('Y', FloatTensorType(X.shape)), ('Yi', Int64TensorType(X.shape))]) oinf = OnnxInference(model_def) got = oinf.run({'X': X}) self.assertEqual(list(sorted(got)), ['Y', 'Yi']) exp = numpy.array([[1., -1., -2., 4., 4.], [2., 1., 2., 5., 5.]], dtype=numpy.float32) self.assertEqualArray(exp, got['Y']) # axis=1, k=-1 onx = OnnxTopK('X', numpy.array([-1], dtype=numpy.int64), axis=1, output_names=['Y', 'Yi']) model_def = onx.to_onnx({'X': X.astype(numpy.float32)}, outputs=[('Y', FloatTensorType(X.shape)), ('Yi', Int64TensorType(X.shape))]) oinf = OnnxInference(model_def) got = oinf.run({'X': X}) self.assertEqual(list(sorted(got)), ['Y', 'Yi']) exp = numpy.array([[0., 1., 2., 3., 4.], [-2., -1., 1., 4., 5.], [-4., -3., -2., 2., 5.]], dtype=numpy.float32) self.assertEqualArray(exp, got['Y']) # axis=1, k=2 onx = OnnxTopK('X', numpy.array([2], dtype=numpy.int64), axis=1, output_names=['Y', 'Yi']) model_def = onx.to_onnx({'X': X.astype(numpy.float32)}, outputs=[('Y', FloatTensorType(X.shape)), ('Yi', Int64TensorType(X.shape))]) oinf = OnnxInference(model_def) got = oinf.run({'X': X}) self.assertEqual(list(sorted(got)), ['Y', 'Yi']) exp = numpy.array([[3., 4.], [4., 5.], [2., 5.]], dtype=numpy.float32) self.assertEqualArray(exp, got['Y']) exp = numpy.array([[3, 4], [3, 4], [0, 3]], dtype=numpy.int64) self.assertEqualArray(exp, got['Yi']) # axis=-1, k=2 onx = OnnxTopK('X', numpy.array([2], dtype=numpy.int64), axis=-1, output_names=['Y', 'Yi']) model_def = onx.to_onnx({'X': X.astype(numpy.float32)}, outputs=[('Y', FloatTensorType(X.shape)), ('Yi', Int64TensorType(X.shape))]) oinf = OnnxInference(model_def) got = oinf.run({'X': X}) self.assertEqual(list(sorted(got)), ['Y', 'Yi']) exp = numpy.array([[3., 4.], [4., 5.], [2., 5.]], dtype=numpy.float32) self.assertEqualArray(exp, got['Y'])
pred = pipe.predict(X_test_dict) print(r2_score(y_test, pred)) #################################### # Conversion to ONNX format # +++++++++++++++++++++++++ # # We use module # `sklearn-onnx <https://github.com/onnx/sklearn-onnx>`_ # to convert the model into ONNX format. from skl2onnx import convert_sklearn from skl2onnx.common.data_types import FloatTensorType, Int64TensorType, DictionaryType, SequenceType # initial_type = [('float_input', DictionaryType(Int64TensorType([1]), FloatTensorType([])))] initial_type = [('float_input', DictionaryType(Int64TensorType([1]), FloatTensorType([])))] onx = convert_sklearn(pipe, initial_types=initial_type) with open("pipeline_vectorize.onnx", "wb") as f: f.write(onx.SerializeToString()) ################################## # We load the model with ONNX Runtime and look at # its input and output. import onnxruntime as rt from onnxruntime.capi.onnxruntime_pybind11_state import InvalidArgument sess = rt.InferenceSession("pipeline_vectorize.onnx") import numpy inp, out = sess.get_inputs()[0], sess.get_outputs()[0] print("input name='{}' and shape={} and type={}".format(inp.name, inp.shape, inp.type))
def test_model_normalizer(self): model = Normalizer(norm='l2') model_onnx = convert_sklearn(model, 'scikit-learn normalizer', [('input', Int64TensorType([1, 1]))]) self.assertTrue(model_onnx is not None) self.assertTrue(len(model_onnx.graph.node) == 1)
input_names = [i.name for i in sess.get_inputs()] output_names = [o.name for o in sess.get_outputs()] print("inputs=%r, outputs=%r" % (input_names, output_names)) print(sess.run(None, {input_names[0]: X_test[:2]})) #################################### # Changes the output names # ++++++++++++++++++++++++ # # It is possible to change the input name by using the # parameter *final_types*. onx = to_onnx(clr, X, options={'zipmap': False}, final_types=[('L', Int64TensorType([None])), ('P', FloatTensorType([None, 3]))]) sess = InferenceSession(onx.SerializeToString()) input_names = [i.name for i in sess.get_inputs()] output_names = [o.name for o in sess.get_outputs()] print("inputs=%r, outputs=%r" % (input_names, output_names)) print(sess.run(None, {input_names[0]: X_test[:2]})) #################################### # Renaming intermediate results # +++++++++++++++++++++++++++++ # # It is possible to rename intermediate results by using a prefix # or by using a function. The result will be post-processed in order # to unique names. It does not impact the graph inputs or outputs.
def get_defined_outputs(outputs, onnx_node, typed_inputs=None, variables=None, dtype=None): """ Gets types of predefined outputs when they cannot be inferred. Some part of it should be automated based on type constraints. @param outputs requested outputs @param onnx_node :epkg:`ONNX` node definition @param typed_inputs known typed inputs of the node as ``tuple(name, type)`` @param variables registered variables created by previous operators @param dtype float computational type @return typed outputs as ``tuple(name, type)`` """ ft = DoubleTensorType if dtype == numpy.float64 else FloatTensorType # ZipMap if onnx_node.op_type == "ZipMap": otype = SequenceType(DictionaryType( Int64Type(), ft())) outputs = [(name, otype) for name in outputs] # ArgMin, ArgMax, Shape elif onnx_node.op_type in ("ArgMin", "ArgMax", 'Shape') and len(outputs) == 1: outputs = [(outputs[0], Int64TensorType())] # Greater, Less, Equal elif onnx_node.op_type in ("Greater", "Less", 'Equal') and len(outputs) == 1: outputs = [(outputs[0], BooleanTensorType())] # TopK elif onnx_node.op_type == "TopK" and len(outputs) == 2: if len(typed_inputs) != 2: raise RuntimeError( "Wrong typed_inputs, got {}.".format(typed_inputs)) outputs = [(outputs[0], typed_inputs[0][1]), (outputs[1], Int64TensorType())] # Cast elif onnx_node.op_type == "Cast" and len(outputs) == 1: ttyp = _guess_type_proto(onnx_node.attribute[0].i, dims=None) outputs = [(outputs[0], ttyp)] # ArrayFeatureExtractor elif onnx_node.op_type == "ArrayFeatureExtractor": if len(typed_inputs) != 2: raise RuntimeError( "Wrong typed_inputs, got {}.".format(typed_inputs)) outputs = [(outputs[0], typed_inputs[0][1])] elif 'Classifier' in onnx_node.op_type: # Good chance that's a classifier. outputs = [(outputs[0], Int64TensorType()), (outputs[1], ft())] # Reshape elif onnx_node.op_type in ('Reshape', 'Transpose'): outputs = [(outputs[0], typed_inputs[0][1].__class__())] # Scan elif onnx_node.op_type == 'Scan': if len(outputs) != len(typed_inputs): raise RuntimeError("Dimension mismatch, operator Scan should have " "the same number of inputs and outputs {} != {}" ".".format(len(outputs), len(typed_inputs))) outputs = [(o, t[1].__class__()) for o, t in zip(outputs, typed_inputs)] # ConstantOfShape elif onnx_node.op_type == "ConstantOfShape": outputs = [(outputs[0], ft())] # Default case # Assuming the only output is the same as the only input. elif len(typed_inputs) == 1 and len(outputs) == 1: outputs = [(outputs[0], typed_inputs[0][1])] # Default else: outputs = [(name, ft()) for name in outputs] return outputs