def __init__(self, *args, **kwargs): "Overwrites the constructor." runtime_options = kwargs.pop('runtime_options', {}) disable_optimisation = runtime_options.pop('disable_optimisation', False) if disable_optimisation: if 'sess_options' in kwargs: raise RuntimeError( "Incompatible options, 'disable_options' and 'sess_options' cannot " "be sepcified at the same time.") kwargs['sess_options'] = SessionOptions() kwargs['sess_options'].graph_optimization_level = ( GraphOptimizationLevel.ORT_DISABLE_ALL) self.sess, self.outi, self.erri = _capture_output( lambda: InferenceSession(*args, **kwargs), 'c')
def fcts_model(X, y, n_jobs): "LinearRegression." model = LinearRegression(n_jobs=n_jobs) model.fit(X, y) initial_types = [('X', FloatTensorType([None, X.shape[1]]))] onx = convert_sklearn(model, initial_types=initial_types) sess = InferenceSession(onx.SerializeToString()) outputs = [o.name for o in sess.get_outputs()] oinf = OnnxInference(onx, runtime="python") def predict_skl_predict(X, model=model): return model.predict(X) def predict_onnxrt_predict(X, sess=sess): return sess.run(outputs[:1], {'X': X})[0] def predict_onnx_inference(X, oinf=oinf): return oinf.run({'X': X})["variable"] return { 'predict': (predict_skl_predict, predict_onnxrt_predict, predict_onnx_inference) }
def test_onnx_init_sparse_coo(self): row = np.array([0, 0, 1, 3, 1], dtype=np.float32) col = np.array([0, 2, 1, 3, 1], dtype=np.float32) data = np.array([1, 1, 1, 1, 1], dtype=np.float32) X = coo_matrix((data, (row, col)), shape=(4, 4)) node = OnnxAdd('X', X, output_names=['Y'], op_version=TARGET_OPSET) model_def = node.to_onnx({'X': X}, outputs=[('Y', FloatTensorType())]) try: sess = InferenceSession(model_def.SerializeToString()) except (RuntimeError, OrtInvalidArgument): # Sparse tensor is not supported for constant. return try: res = sess.run(None, {'X': X})[0] except RuntimeError as e: # Sparse tensor is not supported for constant. warnings.warn("Unable to run with %r\n---\n%s\n%s" % ({ 'X': X }, model_def, e)) return assert_almost_equal(X + X, res)
def test_model_mlp_regressor_default(self): model, X_test = fit_regression_model(MLPRegressor(random_state=42)) exp = model.predict(X_test) for opv in (1, 2, 7, 8, 9, 10, 11, 12, onnx_opset_version()): if opv is not None and opv > get_latest_tested_opset_version(): continue try: onx = convert_sklearn( model, "scikit-learn MLPRegressor", [("input", FloatTensorType([None, X_test.shape[1]]))], target_opset=opv) except RuntimeError as e: if ("is higher than the number of the " "installed onnx package") in str(e): continue raise e as_string = onx.SerializeToString() try: ort = InferenceSession(as_string) except (RuntimeError, InvalidGraph, Fail) as e: if opv in (None, 1, 2): continue if opv >= onnx_opset_version(): continue if ("No suitable kernel definition found for " "op Cast(9)") in str(e): # too old onnxruntime continue raise AssertionError( "Unable to load opv={}\n---\n{}\n---".format(opv, onx)) from e res_out = ort.run(None, {'input': X_test}) assert len(res_out) == 1 res = res_out[0] assert_almost_equal(exp.ravel(), res.ravel(), decimal=4)
def optimize_model(model_path: Path): ''' Generate model that applies graph optimization (constant folding,etc.) parameter model_path: path to the original onnx model return: optimized onnx model ''' opt_model_path = generate_identified_filename(model_path, "-opt") sess_option = SessionOptions() sess_option.optimized_model_filepath = opt_model_path.as_posix() sess_option.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_BASIC _ = InferenceSession(model_path.as_posix(), sess_option, providers=['CPUExecutionProvider']) optimized_model = onnx.load(opt_model_path.as_posix()) return optimized_model
def test_model_tfidf_vectorizer11_nolowercase(self): corpus = numpy.array([ "This is the first document.", "This document is the second document.", "And this is the third one.", "Is this the first document?", ]).reshape((4, 1)) vect = TfidfVectorizer(ngram_range=(1, 1), norm=None, lowercase=False) vect.fit(corpus.ravel()) model_onnx = convert_sklearn(vect, "TfidfVectorizer", [("input", StringTensorType())], options=self.get_options(), target_opset=TARGET_OPSET) self.assertTrue(model_onnx is not None) dump_data_and_model( corpus, vect, model_onnx, basename="SklearnTfidfVectorizer11NoL-OneOff-SklCol") sess = InferenceSession(model_onnx.SerializeToString()) res = sess.run(None, {'input': corpus.ravel()})[0] assert res.shape == (4, 11)
def __init__(self, onnx_data, runtime, runtime_options=None): """ @param onnx_data :epkg:`ONNX` model or data @param runtime runtime to be used, mostly :epkg:`onnxruntime` @param runtime_options runtime options """ if runtime != 'onnxruntime1': raise NotImplementedError( # pragma: no cover "runtime '{}' is not implemented.".format(runtime)) if hasattr(onnx_data, 'SerializeToString'): onnx_data = onnx_data.SerializeToString() self.runtime = runtime sess_options = SessionOptions() self.run_options = RunOptions() try: sess_options.sessions_log_verbosity_level = 0 except AttributeError: # pragma: no cover # onnxruntime not recent enough. pass try: self.run_options.run_log_verbosity_level = 0 except AttributeError: # pragma: no cover # onnxruntime not recent enough. pass if (runtime_options is not None and runtime_options.get('disable_optimisation', False)): sess_options.graph_optimization_level = ( GraphOptimizationLevel.ORT_ENABLE_ALL) try: self.sess = InferenceSession(onnx_data, sess_options=sess_options) except (OrtFail, OrtNotImplemented, OrtInvalidGraph, OrtInvalidArgument, OrtRuntimeException, RuntimeError) as e: raise RuntimeError( "Unable to create InferenceSession due to '{}'\n{}.".format( e, display_onnx(onnx.load(BytesIO(onnx_data))))) from e
def test_pipeline_tfidf_svc(self): pipe = Pipeline([('tfidf', TfidfVectorizer()), ('clf_svc', SVC(probability=True, kernel='linear'))]) data = numpy.array([ "first sentance", "second sentence", "many sentances", "dummy sentance", "no sentance at all" ]) y = numpy.array([0, 0, 1, 0, 1]) pipe.fit(data, y) expected_label = pipe.predict(data) expected_proba = pipe.predict_proba(data) df = pandas.DataFrame(data) df.columns = ['text'] # first conversion if shape=[None, 1] model_onnx = convert_sklearn(pipe, initial_types=[ ('text', StringTensorType([None, 1])) ], target_opset=TARGET_OPSET, options={id(pipe): { 'zipmap': False }}) sess = InferenceSession(model_onnx.SerializeToString()) got = sess.run(None, {'text': data.reshape((-1, 1))}) assert_almost_equal(expected_proba, got[1]) assert_almost_equal(expected_label, got[0]) # sess.run(None, {'text': df}) --> failures # sess.run(None, {'text': df["text"]}) --> failures # second conversion with shape=[None] model_onnx = convert_sklearn(pipe, initial_types=[ ('text', StringTensorType([None])) ], target_opset=TARGET_OPSET, options={id(pipe): { 'zipmap': False }}) sess = InferenceSession(model_onnx.SerializeToString()) got = sess.run(None, {'text': data}) assert_almost_equal(expected_proba, got[1]) assert_almost_equal(expected_label, got[0]) # sess.run(None, {'text': df}) failure # sess.run(None, {'text': df["text"]}) failure sess.run(None, {'text': df["text"].values}) # success
def test_extratreesclassifier_decision_path(self): model = ExtraTreesClassifier(max_depth=2, n_estimators=3) X, y = make_classification(10, n_features=4, random_state=42) X = X[:, :2] model.fit(X, y) initial_types = [('input', FloatTensorType((None, X.shape[1])))] model_onnx = convert_sklearn( model, initial_types=initial_types, options={id(model): { 'decision_path': True, 'zipmap': False }}, target_opset=TARGET_OPSET) sess = InferenceSession(model_onnx.SerializeToString()) res = sess.run(None, {'input': X.astype(numpy.float32)}) pred = model.predict(X) assert_almost_equal(pred, res[0].ravel()) prob = model.predict_proba(X) assert_almost_equal(prob, res[1]) dec = model.decision_path(X) exp = binary_array_to_string(dec[0].todense()) got = numpy.array([''.join(row) for row in res[2]]) assert exp == got.ravel().tolist()
def test_decisiontree_regressor_decision_path_leaf(self): model = DecisionTreeRegressor(max_depth=2) X, y = make_classification(10, n_features=4, random_state=42) X = X[:, :2] model.fit(X, y) initial_types = [('input', FloatTensorType((None, X.shape[1])))] model_onnx = convert_sklearn(model, initial_types=initial_types, options={ id(model): { 'decision_leaf': True, 'decision_path': True } }, target_opset=TARGET_OPSET) sess = InferenceSession(model_onnx.SerializeToString()) res = sess.run(None, {'input': X.astype(np.float32)}) pred = model.predict(X) assert_almost_equal(pred, res[0].ravel()) dec = model.decision_path(X) exp_leaf = path_to_leaf(model.tree_, dec.todense()) exp_path = binary_array_to_string(dec.todense()) assert exp_path == res[1].ravel().tolist() assert exp_leaf.tolist() == res[2].ravel().tolist()
def check_outputs(self, model, model_onnx, Xtest, predict_attributes, decimal=5, skip_if_float32=False, disable_optimisation=True): if "TransposeScaleMatMul" in str(model_onnx): raise RuntimeError("This node must not be added.") if predict_attributes is None: predict_attributes = {} exp = model.predict(Xtest, **predict_attributes) if disable_optimisation and GraphOptimizationLevel is not None: opts = SessionOptions() opts.graph_optimization_level = ( GraphOptimizationLevel.ORT_DISABLE_ALL) sess = InferenceSession( model_onnx.SerializeToString(), sess_options=opts) else: sess = InferenceSession(model_onnx.SerializeToString()) got = sess.run(None, {'X': Xtest}) if isinstance(exp, tuple): if len(exp) != len(got): raise AssertionError("Mismatched number of outputs.") for i, (e, g) in enumerate(zip(exp, got)): if skip_if_float32 and g.dtype == np.float32: continue try: assert_almost_equal(self.remove_dim1(e), self.remove_dim1(g), decimal=decimal) except AssertionError as e: # noqa raise AssertionError( "Mismatch for output {} and attributes {}" ".".format(i, predict_attributes)) from e else: if skip_if_float32 and Xtest.dtype == np.float32: return assert_almost_equal(np.squeeze(exp), np.squeeze(got), decimal=decimal)
def test_kernel_cosine_double(self): ker = PairwiseKernel(metric='cosine') onx = convert_kernel(ker, 'X', output_names=['Y'], dtype=np.float64, op_version=_TARGET_OPSET_) model_onnx = onx.to_onnx(inputs=[('X', DoubleTensorType([None, None]))], target_opset=TARGET_OPSET) x = np.random.randn(4, 3) x[0, 0] = x[1, 1] = x[2, 2] = 10. x[3, 2] = 5. try: sess = InferenceSession(model_onnx.SerializeToString()) except NotImplemented: # Failed to find kernel for FusedMatMul(1). return res = sess.run(None, {'X': x.astype(np.float64)})[0] m1 = res m2 = ker(x) assert_almost_equal(m1, m2, decimal=5)
def do_onnx_inference_test(module, input, *, training=False): with tempfile.NamedTemporaryFile() as file: onnx_export_to(file.name, module, input, training=training) onnx.checker.check_model(onnx.load(file.name), full_check=True) # run inference through the exported model input = torch.randn_like(input) output, = InferenceSession(file.name).run( ['output'], {'input': input.numpy()} ) assert torch.allclose( torch.from_numpy(output), module(input), rtol=5e-5, atol=1e-6 )
def test_local_outlier_factor_metric(self): for metric in ['cityblock', 'euclidean', 'manhattan', 'sqeuclidean']: with self.subTest(metric=metric): lof = LocalOutlierFactor(n_neighbors=2, novelty=True, metric=metric) data = np.array( [[-1.1, -1.2], [0.3, 0.2], [0.5, 0.4], [100., 99.]], dtype=np.float32) model = lof.fit(data) model_onnx = to_onnx(model, data, target_opset=TARGET_OPSET) data = data.copy() data[:, 0] += 0.1 sess = InferenceSession(model_onnx.SerializeToString()) names = [o.name for o in sess.get_outputs()] self.assertEqual(names, ['label', 'scores']) got = sess.run(None, {'X': data}) self.assertEqual(len(got), 2) expected_label = lof.predict(data) expected_decif = lof.decision_function(data) assert_almost_equal(expected_label, got[0].ravel()) assert_almost_equal(expected_decif, got[1].ravel(), decimal=4)
def fcts_model(X, y, max_depth, n_estimators, n_jobs): "RandomForestClassifier." rf = RandomForestClassifier(max_depth=max_depth, n_estimators=n_estimators, n_jobs=n_jobs) rf.fit(X, y) initial_types = [('X', FloatTensorType([None, X.shape[1]]))] onx = convert_sklearn(rf, initial_types=initial_types, options={RandomForestClassifier: { 'zipmap': False }}) f = BytesIO() f.write(onx.SerializeToString()) content = f.getvalue() sess = InferenceSession(content) outputs = [o.name for o in sess.get_outputs()] def predict_skl_predict(X, model=rf): return rf.predict(X) def predict_skl_predict_proba(X, model=rf): return rf.predict_proba(X) def predict_onnxrt_predict(X, sess=sess): return sess.run(outputs[:1], {'X': X})[0] def predict_onnxrt_predict_proba(X, sess=sess): return sess.run(outputs[1:], {'X': X})[0] return { 'predict': (predict_skl_predict, predict_onnxrt_predict), 'predict_proba': (predict_skl_predict_proba, predict_onnxrt_predict_proba) }
def prepare(cls, model, device=None, **kwargs): """ Load the model and creates a :class:`onnxruntime.InferenceSession` ready to be used as a backend. :param model: ModelProto (returned by `onnx.load`), string for a filename or bytes for a serialized model :param device: requested device for the computation, None means the default one which depends on the compilation settings :param kwargs: see :class:`onnxruntime.SessionOptions` :return: :class:`onnxruntime.InferenceSession` """ if isinstance(model, OnnxRuntimeBackendRep): return model elif isinstance(model, InferenceSession): return OnnxRuntimeBackendRep(model) elif isinstance(model, (str, bytes)): options = SessionOptions() for k, v in kwargs.items(): if hasattr(options, k): setattr(options, k, v) inf = InferenceSession(model, options) # backend API is primarily used for ONNX test/validation. As such, we should disable session.run() fallback # which may hide test failures. inf.disable_fallback() if device is not None and not cls.supports_device(device): raise RuntimeError( "Incompatible device expected '{0}', got '{1}'".format( device, get_device())) return cls.prepare(inf, device, **kwargs) else: # type: ModelProto check_model(model) bin = model.SerializeToString() return cls.prepare(bin, device, **kwargs)
def create_ort_session(model_path, use_gpu): from onnxruntime import GraphOptimizationLevel, InferenceSession, SessionOptions from onnxruntime import __version__ as ort_version from onnxruntime import get_available_providers sess_options = SessionOptions() sess_options.graph_optimization_level = GraphOptimizationLevel.ORT_DISABLE_ALL execution_providers = ["CUDAExecutionProvider", "CPUExecutionProvider"] if use_gpu else ["CPUExecutionProvider"] if use_gpu: if "CUDAExecutionProvider" not in get_available_providers(): raise RuntimeError("CUDAExecutionProvider is not avaiable for --use_gpu!") else: print("use CUDAExecutionProvider") ort_session = InferenceSession(model_path, sess_options, providers=execution_providers) return ort_session
def _test_lgbm(self, X, model, extra_config={}): # Create ONNX-ML model onnx_ml_model = convert_model( model, 'lgbm-onnxml', [("input", FloatTensorType([X.shape[0], X.shape[1]]))] )[0] # Create ONNX model onnx_model = convert_model( model, 'lgbm-onnx', [("input", FloatTensorType([X.shape[0], X.shape[1]]))], without_onnx_ml=True )[0] try: from onnxruntime import InferenceSession except ImportError: # onnxruntime not installed (python 2.7) return # Get the predictions for the ONNX-ML model session = InferenceSession(onnx_ml_model.SerializeToString()) output_names = [session.get_outputs()[i].name for i in range(len(session.get_outputs()))] onnx_ml_pred = [[] for i in range(len(output_names))] inputs = {session.get_inputs()[0].name: X} pred = session.run(output_names, inputs) for i in range(len(output_names)): if output_names[i] == "label": onnx_ml_pred[1] = pred[i] else: onnx_ml_pred[0] = pred[i] # Get the predictions for the ONNX model session = InferenceSession(onnx_model.SerializeToString()) onnx_pred = [[] for i in range(len(output_names))] pred = session.run(output_names, inputs) for i in range(len(output_names)): if output_names[i] == "label": onnx_pred[1] = pred[i] else: onnx_pred[0] = pred[i] return onnx_ml_pred, onnx_pred, output_names
def test_squeeze(self): x = numpy.random.randn(20, 1).astype(numpy.float32) y = numpy.squeeze(x) for opset in range(10, 20): if opset > TARGET_OPSET: continue with self.subTest(opset=opset): onx = OnnxSqueezeApi11('X', axes=[1], output_names=['Y'], op_version=opset) model_def = onx.to_onnx({'X': x.astype(numpy.float32)}, target_opset=opset) got = InferenceSession(model_def.SerializeToString()).run( None, {'X': x}) assert_almost_equal(y, got[0])
def test_onnx_example_pdist(self): x = np.array([1, 2, 4, 5, 5, 4]).astype(np.float32).reshape((3, 2)) diff = OnnxSub('next_in', 'next', output_names=['diff'], op_version=onnx.defs.onnx_opset_version()) id_next = OnnxIdentity('next_in', output_names=['next_out'], op_version=onnx.defs.onnx_opset_version()) norm = OnnxReduceSumSquare(diff, output_names=['norm'], axes=[1], op_version=onnx.defs.onnx_opset_version()) flat = OnnxSqueeze(norm, output_names=['scan_out'], axes=[1], op_version=onnx.defs.onnx_opset_version()) scan_body = id_next.to_onnx(OrderedDict([('next_in', x), ('next', FloatTensorType())]), outputs=[ ('next_out', FloatTensorType([3, 2])), ('scan_out', FloatTensorType([3])) ], other_outputs=[flat]) sess = InferenceSession(scan_body.SerializeToString()) res = sess.run(None, {'next_in': x, 'next': x[:1]}) assert_almost_equal(x, res[0]) exp = np.array([0., 18., 20.], dtype=np.float32) assert_almost_equal(exp, res[1]) node = OnnxScan('x', 'x', output_names=['y', 'z'], num_scan_inputs=1, body=scan_body.graph, op_version=onnx.defs.onnx_opset_version()) model_def = node.to_onnx({'x': x}, outputs=[('y', FloatTensorType([3, 2])), ('z', FloatTensorType([3, 3]))]) try: onnx.checker.check_model(model_def) except ValidationError as e: if StrictVersion(onnx__version__) <= StrictVersion("1.5.0"): warnings.warn(e) else: raise e sess = InferenceSession(model_def.SerializeToString()) res = sess.run(None, {'x': x}) exp = squareform(pdist(x, metric="sqeuclidean")) assert_almost_equal(x, res[0]) assert_almost_equal(exp, res[1])
def test_unsqueeze(self): x = numpy.random.randn(1, 3, 1, 5).astype(numpy.float32) y = numpy.expand_dims(x, axis=-2) for opset in (10, 11, 12, 13): if opset > TARGET_OPSET: continue with self.subTest(opset=opset): onx = OnnxUnsqueezeApi11('X', axes=[-2], output_names=['Y'], op_version=opset) model_def = onx.to_onnx({'X': x.astype(numpy.float32)}, target_opset=opset) got = InferenceSession(model_def.SerializeToString()).run( None, {'X': x}) assert_almost_equal(y, got[0])
def _predict(session: rt.InferenceSession, data: pd.DataFrame) -> pd.Series: def _correctly_typed_column(column: pd.Series) -> pd.Series: if column.dtype in ['float64']: return column.astype(np.float32) return column def _correctly_shaped_values(values): return values.reshape((values.shape[0], 1)) inputs = { c: _correctly_shaped_values(_correctly_typed_column(data[c]).values) for c in data.columns } return pd.Series(session.run(None, inputs)[0].reshape(-1), index=data.index)
def test_cascade_scaler(self): def generate_onnx_graph(dim, nbnode, input_name='X1', opv=1): i1 = input_name scale = list(np.ones((1, dim)).ravel()) for i in range(nbnode - 1): i2 = list( map(float, np.ones((1, dim)).astype(np.float32).ravel())) node = OnnxScaler(i1, offset=i2, scale=scale, op_version=opv) i1 = node i2 = list(map(float, np.ones((1, dim)).astype(np.float32).ravel())) node = OnnxScaler(i1, offset=i2, scale=scale, output_names=['Y'], op_version=opv) onx = node.to_onnx([(input_name, FloatTensorType((None, dim)))], outputs=[('Y', FloatTensorType((None, dim)))]) return onx exp = [ np.zeros((1, 5)), np.zeros((1, 5)), np.zeros((1, 5)), np.zeros((1, 5)) ] for opv in (1, 2, 3, None): for i, nbnode in enumerate((1, 2, 3, 100)): onx = generate_onnx_graph(5, nbnode, opv=opv) as_string = onx.SerializeToString() try: ort = InferenceSession(as_string) except InvalidGraph as e: if opv in (3, ): continue if opv >= onnx_opset_version(): continue raise AssertionError( "Unable to load opv={}\n---\n{}\n---".format( opv, onx)) from e X = (np.ones((1, 5)) * nbnode).astype(np.float32) res_out = ort.run(None, {'X1': X}) assert len(res_out) == 1 res = res_out[0] assert_almost_equal(exp[i], res) dim = 10 onx = generate_onnx_graph(dim, 300) as_string = onx.SerializeToString() ort = InferenceSession(as_string) X = (np.ones((1, dim)) * nbnode).astype(np.float32) res_out = ort.run(None, {'X1': X}) assert len(res_out) == 1 res = res_out[0] assert res.shape[1] == dim
def test_batch_normalization(self): def _batchnorm_test_mode(x, s, bias, mean, var, epsilon=1e-5): dims_x = len(x.shape) dim_ones = (1, ) * (dims_x - 2) s = s.reshape(-1, *dim_ones) bias = bias.reshape(-1, *dim_ones) mean = mean.reshape(-1, *dim_ones) var = var.reshape(-1, *dim_ones) return s * (x - mean) / np.sqrt(var + epsilon) + bias # input size: (1, 2, 1, 3) x = np.array([[[[-1, 0, 1]], [[2, 3, 4]]]]).astype(np.float32) s = np.array([1.0, 1.5]).astype(np.float32) bias = np.array([0, 1]).astype(np.float32) mean = np.array([0, 3]).astype(np.float32) var = np.array([1, 1.5]).astype(np.float32) y = _batchnorm_test_mode(x, s, bias, mean, var).astype(np.float32) onx = OnnxBatchNormalization('X', s, bias, mean, var, output_names=['Y'], op_version=TARGET_OPSET) model_def = onx.to_onnx({'X': x.astype(np.float32)}, target_opset=TARGET_OPSET) oinf = InferenceSession(model_def.SerializeToString()) got = oinf.run(None, {'X': x}) assert_almost_equal(y, got[0], decimal=5) # input size: (2, 3, 4, 5) x = np.random.randn(2, 3, 4, 5).astype(np.float32) s = np.random.randn(3).astype(np.float32) bias = np.random.randn(3).astype(np.float32) mean = np.random.randn(3).astype(np.float32) var = np.random.rand(3).astype(np.float32) epsilon = 1e-2 y = _batchnorm_test_mode(x, s, bias, mean, var, epsilon).astype(np.float32) onx = OnnxBatchNormalization('X', s, bias, mean, var, output_names=['Y'], epsilon=epsilon, op_version=TARGET_OPSET) model_def = onx.to_onnx({'X': x.astype(np.float32)}, target_opset=TARGET_OPSET) oinf = InferenceSession(model_def.SerializeToString()) got = oinf.run(None, {'X': x}) assert_almost_equal(y, got[0], decimal=5)
def _predict_with_onnx(model, X): session = InferenceSession(model.SerializeToString()) output_names = [s_output.name for s_output in session.get_outputs()] input_names = [s_input.name for s_input in session.get_inputs()] if len(input_names) > 1: raise RuntimeError( "Test expects one input. Found multiple inputs: %r." % input_names) input_name = input_names[0] if hasattr(X, "values"): return session.run(output_names, {input_name: X.values})[0][:, 0] return session.run(output_names, {input_name: X})[0][:, 0]
def __init__( self, module="", args=None, args_parser=None, ): self.device = "cpu" self.module = module self.args_parser = args_parser if not args: args = argparse.Namespace() self.args = args else: self.args = args self.args = self.args_parser(self.args) if not os.path.isdir(self.args.output_dir): get_model_dir( output_dir=self.args.output_dir, add_ro=self.args.add_ro, module=self.module, onnx=self.args.onnx, ) # onnx = True self.tokenizer = BertTokenizer.from_pretrained(self.args.output_dir, do_lower_case=False) self.options = SessionOptions() # 1 thread ensures higher throughput overall # self.options.enable_profiling = True self.options.intra_op_num_threads = 1 self.options.inter_op_num_threads = 1 # self.options.log_severity_level = 1 self.options.execution_mode = ExecutionMode.ORT_SEQUENTIAL # the stored optimized onnx model for the module given by the output_dir value. # pre-optimized for this hardware so turn off on the fly optimizations # self.options.graph_optimization_level = rt.GraphOptimizationLevel.ORT_DISABLE_ALL # still faster on the fly self.model_quant = os.path.join(self.args.output_dir, "converted-optimized.onnx") self.session = InferenceSession(self.model_quant, self.options)
def convert_to_onnx(self, onnx_output_dir=None, set_onnx_arg=True): """Convert the model to ONNX format and save to output_dir Args: onnx_output_dir (str, optional): If specified, ONNX model will be saved to output_dir (else args.output_dir will be used). Defaults to None. set_onnx_arg (bool, optional): Updates the model args to set onnx=True. Defaults to True. """ # noqa if not onnx_output_dir: onnx_output_dir = os.path.join(self.options.output_dir, self.options.model_type, self.options.model_name, "onnx") os.makedirs(onnx_output_dir, exist_ok=True) if not os.listdir(onnx_output_dir): onnx_model_name = os.path.join(onnx_output_dir, "onnx_model.onnx") with tempfile.TemporaryDirectory() as temp_dir: basedir = os.path.basename(temp_dir) temp_dir = os.path.join(self.options.output_dir, basedir) self.save_model(output_dir=temp_dir, model=self.model) convert( framework="pt", model=temp_dir, tokenizer=self.tokenizer, output=Path(onnx_model_name), pipeline_name="ner", opset=11, ) self.tokenizer.save_pretrained(onnx_output_dir) self.config.save_pretrained(onnx_output_dir) onnx_options = SessionOptions() use_cuda = True if self._device.type != 'cpu' else False onnx_execution_provider = "CUDAExecutionProvider" if use_cuda else "CPUExecutionProvider" onnx_options.intra_op_num_threads = 1 onnx_options.execution_mode = ExecutionMode.ORT_SEQUENTIAL onnx_model_path = os.path.join(onnx_output_dir, "onnx_model.onnx") if self.options.dynamic_quantize: # Append "-quantized" at the end of the model's name quantized_model_path = generate_identified_filename( Path(onnx_model_path), "-quantized") quantize_dynamic(Path(onnx_model_path), quantized_model_path) onnx_model_path = quantized_model_path.as_posix() return InferenceSession(onnx_model_path, onnx_options, providers=[onnx_execution_provider])
def verify_onnx(model: T5EncoderDecoderInit, ort_session: InferenceSession, device: torch.device, max_cases=4): """ Compare the result from PyTorch and OnnxRuntime to verify the ONNX model is good. """ ort_inputs = ort_session.get_inputs() use_decoder_input_ids = len(ort_inputs) == 3 test_cases = [(4, 11), (1, 2), (3, 1), (8, 5)] test_cases_max_diff = [] for (batch_size, encode_sequence_length) in test_cases[:max_cases]: inputs = T5EncoderDecoderInitInputs.create_dummy(model.config, batch_size, encode_sequence_length, use_decoder_input_ids=use_decoder_input_ids, device=device) ort_outputs = T5EncoderDecoderInitHelper.onnxruntime_inference(ort_session, inputs) # Run inference of PyTorch model input_list = inputs.to_list() torch_outputs = model(*input_list) assert (torch_outputs[0].cpu().numpy().shape == ort_outputs[0].shape) max_diff = numpy.amax(numpy.abs(torch_outputs[0].cpu().numpy() - ort_outputs[0])) logger.debug(f"logits max_diff={max_diff}") max_diff_all = max_diff assert (torch_outputs[1].cpu().numpy().shape == ort_outputs[1].shape) max_diff = numpy.amax(numpy.abs(torch_outputs[1].cpu().numpy() - ort_outputs[1])) logger.debug(f"encoder_hidden_states max_diff={max_diff}") max_diff_all = max(max_diff_all, max_diff) for i in range(2 * model.config.num_layers): max_diff = numpy.amax(numpy.abs(torch_outputs[2][i].cpu().numpy() - ort_outputs[2 + i])) logger.debug(f"self attention past state {i} max_diff={max_diff}") for i in range(2 * model.config.num_layers): max_diff = numpy.amax( numpy.abs(torch_outputs[3][i].cpu().numpy() - ort_outputs[2 + 2 * model.config.num_layers + i])) logger.debug(f"cross attention past state {i} max_diff={max_diff}") max_diff_all = max(max_diff_all, max_diff) test_cases_max_diff.append(max_diff_all) logger.info( f"batch_size={batch_size} encode_sequence_length={encode_sequence_length}, max_diff={max_diff_all}") return max(test_cases_max_diff)
class NegLogLearningLoss(BaseLearningLoss): """ Implements a negative log loss `'log(yt, yp) = -(1-yt)\\log(1-yp) - yt\\log(yp)`, this only works for a binary classification where *yp* is the predicted probability, *yt* is the expected probability. *yt* is expected to be binary, *yp* is a matrix with two columns, the sum on every line is 1. However, this loss is usually applied after a function softmax and the gradient is directly computed from the loss to the raw score before they are processed through the softmax function (see class `Log <https://github.com/scikit-learn/scikit-learn/blob/main/sklearn/ linear_model/_sgd_fast.pyx#L236>`_). :param eps: clipping value for probabilities, avoids computing `log(0)` :param probability_function: function to convert raw scores into probabilities, default value is `sigmoid` for a logistic regression """ def __init__(self, eps=1e-5, probability_function='sigmoid'): BaseLearningLoss.__init__(self) self.eps = eps self.probability_function = probability_function def build_onnx_function(self, opset, device, weight_name): so = SessionOptions() so.log_severity_level = 4 # loss_grad fct_name = f"grad_{self.probability_function}_neg_log_loss_error" self.loss_grad_onnx_ = function_onnx_graph(fct_name, target_opset=opset, weight_name=weight_name, eps=self.eps) self.loss_grad_sess_ = InferenceSession( self.loss_grad_onnx_.SerializeToString(), so, providers=device_to_providers(device)) self.loss_grad_sess_bind_ = ( self.loss_grad_sess_.io_binding()._iobinding) # score self.build_onnx_score_function(opset, device, weight_name)
def test_cascade_add(self): def generate_onnx_graph(dim, nbnode, input_name='X1', opv=None): i1 = input_name for i in range(nbnode - 1): i2 = (np.ones((1, dim)) * nbnode * 10).astype(np.float32) node = OnnxAdd(i1, i2, op_version=opv) i1 = node i2 = (np.ones((1, dim)) * nbnode * 10).astype(np.float32) node = OnnxAdd(i1, i2, output_names=['Y'], op_version=opv) onx = node.to_onnx([(input_name, FloatTensorType((None, dim)))], outputs=[('Y', FloatTensorType())], target_opset=opv) return onx exp = [ np.array([[11., 11., 11., 11., 11.]]), np.array([[42., 42., 42., 42., 42.]]), np.array([[93., 93., 93., 93., 93.]]), np.array([[100100., 100100., 100100., 100100., 100100.]]) ] for opv in ({'': 10}, 9, 10, 11, 12, onnx_opset_version()): if isinstance(opv, dict): if opv[''] > get_latest_tested_opset_version(): continue elif opv is not None and opv > get_latest_tested_opset_version(): continue for i, nbnode in enumerate((1, 2, 3, 100)): onx = generate_onnx_graph(5, nbnode, opv=opv) as_string = onx.SerializeToString() try: ort = InferenceSession(as_string) except InvalidGraph as e: if opv >= onnx_opset_version(): continue raise AssertionError( "Unable to load opv={}\n---\n{}\n---".format( opv, onx)) from e X = (np.ones((1, 5)) * nbnode).astype(np.float32) res_out = ort.run(None, {'X1': X}) assert len(res_out) == 1 res = res_out[0] assert_almost_equal(exp[i], res) dim = 10 onx = generate_onnx_graph(dim, 300, opv=11) as_string = onx.SerializeToString() ort = InferenceSession(as_string) X = (np.ones((1, dim)) * nbnode).astype(np.float32) res_out = ort.run(None, {'X1': X}) assert len(res_out) == 1 res = res_out[0] assert res.shape[1] == dim