def _provider_name_to_device_type(provider_name): if provider_name == 'CPUExecutionProvider': return OrtDevice.cpu() if provider_name == 'CUDAExecutionProvider': # pragma: no cover return OrtDevice.cuda() raise ValueError( # pragma: no cover f'Unexpected provider name {provider_name!r}.')
def device_name(device): """ Returns the device name of a device. :param device: OrtDevice :return: string """ if device.device_type() == OrtDevice.cpu(): return 'Cpu' if device.device_type() == OrtDevice.cuda(): # pragma: no cover return 'Gpu' raise RuntimeError( # pragma: no cover f"Unexpected value for device type {device.device_type()!r}.")
def get_ort_device_type(device): """ Converts device into device type. :param device: string :return: device type """ if isinstance(device, str): if device == 'cuda': return C_OrtDevice.cuda() if device == 'cpu': return C_OrtDevice.cpu() raise ValueError( # pragma: no cover f'Unsupported device type: {device!r}.') if not hasattr(device, 'device_type'): raise TypeError(f'Unsupported device type: {type(device)!r}.') device_type = device.device_type() if device_type in ('cuda', 1): return C_OrtDevice.cuda() if device_type in ('cpu', 0): return C_OrtDevice.cpu() raise ValueError( # pragma: no cover f'Unsupported device type: {device_type!r}.')
def fcts_model(X, y, n_jobs): "LinearRegression." model = LinearRegression(n_jobs=n_jobs) model.fit(X, y) initial_types = [('X', FloatTensorType([None, X.shape[1]]))] onx = to_onnx(model, initial_types=initial_types, black_op={'LinearRegressor'}) sess = InferenceSession(onx.SerializeToString(), providers=['CPUExecutionProvider']) outputs = [o.name for o in sess.get_outputs()] oinf = OnnxInference(onx, runtime="python") bind = SessionIOBinding(sess._sess) # ort_device = C_OrtDevice.cpu() ort_device = C_OrtDevice(C_OrtDevice.cpu(), C_OrtDevice.default_memory(), 0) def predict_skl_predict(X, model=model): return model.predict(X) def predict_onnxrt_predict(X, sess=sess): return sess.run(outputs[:1], {'X': X})[0] def predict_onnx_inference(X, oinf=oinf): return oinf.run({'X': X})["variable"] def predict_onnxrt_predict_bind(X, sess=sess, bind=bind, ort_device=ort_device): if X.__array_interface__['strides'] is not None: raise RuntimeError("onnxruntime only supports contiguous arrays.") bind.bind_input('X', ort_device, X.dtype, X.shape, X.__array_interface__['data'][0]) bind.bind_output('variable', ort_device) sess._sess.run_with_iobinding(bind, None) ortvalues = bind.get_outputs() return ortvalues[0].numpy() return { 'predict': { 'skl': predict_skl_predict, 'ort': predict_onnxrt_predict, 'numpy': predict_onnx_inference, 'ort-bind': predict_onnxrt_predict_bind } }
def test_bind_input_types(self): opset = onnx_opset_version() devices = [(C_OrtDevice(C_OrtDevice.cpu(), C_OrtDevice.default_memory(), 0), ['CPUExecutionProvider'])] if "CUDAExecutionProvider" in onnxrt.get_all_providers(): devices.append((C_OrtDevice(C_OrtDevice.cuda(), C_OrtDevice.default_memory(), 0), ['CUDAExecutionProvider'])) for device, provider in devices: for dtype in [np.float32, np.float64, np.int32, np.uint32, np.int64, np.uint64, np.int16, np.uint16, np.int8, np.uint8, np.float16, np.bool_]: with self.subTest(dtype=dtype, device=str(device)): x = np.arange(8).reshape((-1, 2)).astype(dtype) proto_dtype = NP_TYPE_TO_TENSOR_TYPE[x.dtype] X = helper.make_tensor_value_info('X', proto_dtype, [None, x.shape[1]]) Y = helper.make_tensor_value_info('Y', proto_dtype, [None, x.shape[1]]) # inference node_add = helper.make_node('Identity', ['X'], ['Y']) # graph graph_def = helper.make_graph([node_add], 'lr', [X], [Y], []) model_def = helper.make_model( graph_def, producer_name='dummy', ir_version=7, producer_version="0", opset_imports=[helper.make_operatorsetid('', opset)]) sess = onnxrt.InferenceSession(model_def.SerializeToString(), providers=provider) bind = SessionIOBinding(sess._sess) ort_value = C_OrtValue.ortvalue_from_numpy(x, device) bind.bind_ortvalue_input('X', ort_value) bind.bind_output('Y', device) sess._sess.run_with_iobinding(bind, None) ortvalue = bind.get_outputs()[0] y = ortvalue.numpy() assert_almost_equal(x, y) bind = SessionIOBinding(sess._sess) bind.bind_input('X', device, dtype, x.shape, ort_value.data_ptr()) bind.bind_output('Y', device) sess._sess.run_with_iobinding(bind, None) ortvalue = bind.get_outputs()[0] y = ortvalue.numpy() assert_almost_equal(x, y)
def benchmark(name, onx, fct_numpy, *args, dims=(1, 10, 100, 200, 500, 1000, 2000, 10000)): sess = InferenceSession(onx.SerializeToString()) device = C_OrtDevice(C_OrtDevice.cpu(), C_OrtDevice.default_memory(), 0) names = [i.name for i in sess.get_inputs()] out_names = [o.name for o in sess.get_outputs()] if len(names) != len(args): raise RuntimeError(f"Size mismatch {len(names)} != {len(args)}.") rows = [] for dim in tqdm(dims): new_args = [reshape(a, dim) for a in args] ortvalues = [ C_OrtValue.ortvalue_from_numpy(a, device) for a in new_args ] ms = measure_time(lambda: fct_numpy(*new_args), repeat=50, number=100) ms.update(dict(name=name, impl='numpy', dim=dim)) rows.append(ms) inps = {n: a for n, a in zip(names, new_args)} ms = measure_time(lambda: sess.run(None, inps)) ms.update(dict(name=name, impl='sess', dim=dim)) rows.append(ms) bind = SessionIOBinding(sess._sess) ms = measure_time(lambda: bind_and_run(sess._sess, bind, names, ortvalues, out_names, device)) ms.update(dict(name=name, impl='bind_run', dim=dim)) rows.append(ms) ms = measure_time(lambda: nobind_just_run(sess._sess, bind)) ms.update(dict(name=name, impl='run', dim=dim)) rows.append(ms) return rows
def get_ort_device(device): """ Converts device into :epkg:`C_OrtDevice`. :param device: any type :return: :epkg:`C_OrtDevice` Example: :: get_ort_device('cpu') get_ort_device('gpu') get_ort_device('cuda') get_ort_device('cuda:0') """ if isinstance(device, C_OrtDevice): return device if isinstance(device, str): if device == 'cpu': return C_OrtDevice(C_OrtDevice.cpu(), C_OrtDevice.default_memory(), 0) if device in {'gpu', 'cuda:0', 'cuda', 'gpu:0'}: return C_OrtDevice(C_OrtDevice.cuda(), C_OrtDevice.default_memory(), 0) if device.startswith('gpu:'): idx = int(device[4:]) return C_OrtDevice(C_OrtDevice.cuda(), C_OrtDevice.default_memory(), idx) if device.startswith('cuda:'): idx = int(device[5:]) return C_OrtDevice(C_OrtDevice.cuda(), C_OrtDevice.default_memory(), idx) raise ValueError( # pragma: no cover "Unable to interpret string %r as a device." % device) raise TypeError( # pragma: no cover "Unable to interpret type %r, (%r) as de device." % (type(device), device))
def ort_device_to_string(device): """ Returns a string representing the device. Opposite of function @see fn get_ort_device. :param device: see :epkg:`C_OrtDevice` :return: string """ if not isinstance(device, C_OrtDevice): raise TypeError( f"device must be of type C_OrtDevice not {type(device)!r}.") ty = device.device_type() if ty == C_OrtDevice.cpu(): sty = 'cpu' elif ty == C_OrtDevice.cuda(): sty = 'cuda' else: raise NotImplementedError( # pragma: no cover f"Unable to guess device for {device!r} and type={ty!r}.") idx = device.device_id() if idx == 0: return sty return "%s:%d" % (sty, idx)
def forward_no_training(self, exc=None, verbose=False): if exc is None: exc = __name__ != '__main__' from onnxruntime.capi._pybind_state import (OrtValue as C_OrtValue, OrtDevice as C_OrtDevice, OrtMemType) from onnxruntime.capi._pybind_state import (OrtValueVector) from onnxcustom.training.ortgradient import OrtGradientForwardBackward X, y = make_regression( # pylint: disable=W0632 100, n_features=10, bias=2) X = X.astype(numpy.float32) y = y.astype(numpy.float32) X_train, X_test, y_train, _ = train_test_split(X, y) reg = LinearRegression() reg.fit(X_train, y_train) reg.coef_ = reg.coef_.reshape((1, -1)) onx = to_onnx(reg, X_train, target_opset=opset, black_op={'LinearRegressor'}) # starts testing if verbose: print("[forward_no_training] start testing") if exc: if verbose: print("[forward_no_training] check exception") self.assertRaise( lambda: OrtGradientForwardBackward( onx, debug=True, enable_logging=True, providers=['NONE']), ValueError) if verbose: print("[forward_no_training] instantiate") forback = OrtGradientForwardBackward(onx, debug=True, enable_logging=True) self.assertEqual(repr(forback), "OrtGradientForwardBackward(...)") self.assertTrue(hasattr(forback, 'cls_type_')) self.assertEqual(forback.cls_type_._onx_inp, ['X', 'coef', 'intercept']) self.assertEqual(forback.cls_type_._onx_out, ['X_grad', 'coef_grad', 'intercept_grad']) self.assertEqual(forback.cls_type_._weights_to_train, ['coef', 'intercept']) self.assertEqual(forback.cls_type_._grad_input_names, ['X', 'coef', 'intercept']) self.assertEqual(forback.cls_type_._input_names, ['X']) self.assertEqual(forback.cls_type_._bw_fetches_names, ['X_grad', 'coef_grad', 'intercept_grad']) self.assertEqual(forback.cls_type_._output_names, ['variable']) if verbose: print("[forward_no_training] expected prediction") expected = reg.predict(X_test) coef = reg.coef_.astype(numpy.float32).reshape((-1, 1)) intercept = numpy.array([reg.intercept_], dtype=numpy.float32) if verbose: print("[forward_no_training] InferenceSession") providers = device_to_providers('cpu') sess0 = InferenceSession(onx.SerializeToString(), providers=providers) inames = [i.name for i in sess0.get_inputs()] # pylint: disable=E1101 self.assertEqual(inames, ['X']) got = sess0.run(None, {'X': X_test}) self.assertEqualArray(expected.ravel(), got[0].ravel(), decimal=4) if verbose: print("[forward_no_training] evaluation") sess_eval = forback.cls_type_._sess_eval # pylint: disable=E1101 inames = [i.name for i in sess_eval.get_inputs()] self.assertEqual(inames, ['X', 'coef', 'intercept']) got = sess_eval.run(None, { 'X': X_test, 'coef': coef, 'intercept': intercept }) self.assertEqualArray(expected.ravel(), got[0].ravel(), decimal=4) # OrtValue if verbose: print("[forward_no_training] OrtValue") inst = forback.new_instance() device = C_OrtDevice(C_OrtDevice.cpu(), OrtMemType.DEFAULT, 0) # list of OrtValues inputs = [] for a in [X_test, coef, intercept]: inputs.append(C_OrtValue.ortvalue_from_numpy(a, device)) got_ort = inst.forward(inputs) got = [v.numpy() for v in got_ort] self.assertEqual(len(got), 1) self.assertEqualArray(expected.ravel(), got[0].ravel(), decimal=4) # OrtValueVector if verbose: print("[forward_no_training] OrtValueVector") inputs = OrtValueVector() for a in [X_test, coef, intercept]: inputs.push_back(C_OrtValue.ortvalue_from_numpy(a, device)) got = inst.forward(inputs) self.assertEqual(len(got), 1) self.assertEqualArray(expected.ravel(), got[0].numpy().ravel(), decimal=4) # numpy if verbose: print("[forward_no_training] numpy") inputs = [X_test, coef, intercept] got = inst.forward(inputs) self.assertEqual(len(got), 1) self.assertEqualArray(expected.ravel(), got[0].numpy().ravel(), decimal=4) if verbose: print("[forward_no_training] end")
def forward_training(self, model, debug=False, n_classes=3, add_print=False): from onnxruntime.capi._pybind_state import (OrtValue as C_OrtValue, OrtMemType, OrtDevice as C_OrtDevice) from onnxruntime.capi._pybind_state import (OrtValueVector) from onnxcustom.training.ortgradient import OrtGradientForwardBackward def to_proba(yt): mx = yt.max() + 1 new_yt = numpy.zeros((yt.shape[0], mx), dtype=numpy.float32) for i, y in enumerate(yt): new_yt[i, y] = 1 return new_yt if hasattr(model.__class__, 'predict_proba'): X, y = make_classification( # pylint: disable=W0632 100, n_features=10, n_classes=n_classes, n_informative=7) X = X.astype(numpy.float32) y = y.astype(numpy.int64) else: X, y = make_regression( # pylint: disable=W0632 100, n_features=10, bias=2) X = X.astype(numpy.float32) y = y.astype(numpy.float32) X_train, X_test, y_train, y_test = train_test_split(X, y) reg = model reg.fit(X_train, y_train) # needs if skl2onnx<1.10.4 # reg.coef_ = reg.coef_.reshape((1, -1)) # reg.intercept_ = reg.intercept_.reshape((-1, )) if hasattr(model.__class__, 'predict_proba'): onx = to_onnx(reg, X_train, target_opset=opset, black_op={'LinearClassifier'}, options={'zipmap': False}) onx = select_model_inputs_outputs( onx, outputs=[onx.graph.output[1].name]) else: onx = to_onnx(reg, X_train, target_opset=opset, black_op={'LinearRegressor'}) # remove batch possibility #onx.graph.input[0].type.tensor_type.shape.dim[0].dim_value = 0 #onx.graph.input[0].type.tensor_type.shape.dim[0].dim_param = "batch_size" #onx.graph.output[0].type.tensor_type.shape.dim[0].dim_value = 0 #onx.graph.output[0].type.tensor_type.shape.dim[0].dim_param = "batch_size" providers = device_to_providers('cpu') sess = InferenceSession(onx.SerializeToString(), providers=providers) sess.run(None, {'X': X_test[:1]}) # starts testing forback = OrtGradientForwardBackward(onx, debug=True, enable_logging=True) if debug: n = model.__class__.__name__ temp = get_temp_folder(__file__, f"temp_forward_training_{n}") with open(os.path.join(temp, f"model_{n}.onnx"), "wb") as f: f.write(onx.SerializeToString()) with open(os.path.join(temp, f"fw_train_{n}.onnx"), "wb") as f: f.write(forback.cls_type_._trained_onnx.SerializeToString()) with open(os.path.join(temp, f"fw_pre_{n}.onnx"), "wb") as f: gr = forback.cls_type_._optimized_pre_grad_model f.write(gr.SerializeToString()) if hasattr(model.__class__, 'predict_proba'): expected = reg.predict_proba(X_test) coef = reg.coef_.astype(numpy.float32).T intercept = reg.intercept_.astype(numpy.float32) # only one observation X_test1 = X_test[:1] y_test = to_proba(y_test).astype(numpy.float32) y_test1 = y_test[:1] expected1 = expected[:1] else: expected = reg.predict(X_test) coef = reg.coef_.astype(numpy.float32).reshape((-1, 1)) intercept = numpy.array([reg.intercept_], dtype=numpy.float32) # only one observation X_test1 = X_test[:1] y_test1 = y_test[0].reshape((1, -1)) expected1 = expected[:1] # OrtValueVector inst = forback.new_instance() device = C_OrtDevice(C_OrtDevice.cpu(), OrtMemType.DEFAULT, 0) if add_print: print("\n\n######################\nFORWARD") inputs = OrtValueVector() for a in [X_test1, coef, intercept]: inputs.push_back(C_OrtValue.ortvalue_from_numpy(a, device)) got = inst.forward(inputs, training=True) self.assertEqual(len(got), 1) self.assertEqualArray(expected1.ravel(), got[0].numpy().ravel(), decimal=4) if add_print: print("\n\n######################\nBACKWARD") outputs = OrtValueVector() outputs.push_back(C_OrtValue.ortvalue_from_numpy(y_test1, device)) got = inst.backward(outputs) self.assertEqual(len(got), 3) if add_print: print("\n######################\nEND\n") # OrtValueVectorN inputs = OrtValueVector() for a in [X_test, coef, intercept]: inputs.push_back(C_OrtValue.ortvalue_from_numpy(a, device)) got = inst.forward(inputs, training=True) self.assertEqual(len(got), 1) self.assertEqualArray(expected.ravel(), got[0].numpy().ravel(), decimal=4) outputs = OrtValueVector() outputs.push_back( C_OrtValue.ortvalue_from_numpy(y_test.reshape((1, -1)), device)) got = inst.backward(outputs) self.assertEqual(len(got), 3) # list of OrtValues inputs = [] for a in [X_test, coef, intercept]: inputs.append(C_OrtValue.ortvalue_from_numpy(a, device)) got_ort = inst.forward(inputs, training=True) got = [v.numpy() for v in got_ort] self.assertEqual(len(got), 1) self.assertEqualArray(expected.ravel(), got[0].ravel(), decimal=4) outputs = [ C_OrtValue.ortvalue_from_numpy(y_test.reshape((1, -1)), device) ] got = inst.backward(outputs) self.assertEqual(len(got), 3) # numpy inputs = [X_test, coef, intercept] got_ort = inst.forward(inputs, training=True) got = [v.numpy() for v in got_ort] self.assertEqual(len(got), 1) self.assertEqualArray(expected.ravel(), got[0].ravel(), decimal=4) outputs = [y_test.reshape((1, -1))] got = inst.backward(outputs) self.assertEqual(len(got), 3)
def test_forward_no_training_pickle(self): from onnxruntime.capi._pybind_state import (OrtValue as C_OrtValue, OrtMemType, OrtDevice as C_OrtDevice) from onnxruntime.capi._pybind_state import (OrtValueVector) from onnxcustom.training.ortgradient import OrtGradientForwardBackward X, y = make_regression( # pylint: disable=W0632 100, n_features=10, bias=2) X = X.astype(numpy.float32) y = y.astype(numpy.float32) X_train, X_test, y_train, _ = train_test_split(X, y) reg = LinearRegression() reg.fit(X_train, y_train) reg.coef_ = reg.coef_.reshape((1, -1)) onx = to_onnx(reg, X_train, target_opset=opset, black_op={'LinearRegressor'}) forback0 = OrtGradientForwardBackward(onx, debug=True) st = io.BytesIO() pickle.dump(forback0, st) st2 = io.BytesIO(st.getvalue()) forback = pickle.load(st2) self.assertTrue(hasattr(forback, 'cls_type_')) self.assertEqual(forback.cls_type_._onx_inp, ['X', 'coef', 'intercept']) self.assertEqual(forback.cls_type_._onx_out, ['X_grad', 'coef_grad', 'intercept_grad']) self.assertEqual(forback.cls_type_._weights_to_train, ['coef', 'intercept']) self.assertEqual(forback.cls_type_._grad_input_names, ['X', 'coef', 'intercept']) self.assertEqual(forback.cls_type_._input_names, ['X']) self.assertEqual(forback.cls_type_._bw_fetches_names, ['X_grad', 'coef_grad', 'intercept_grad']) self.assertEqual(forback.cls_type_._output_names, ['variable']) expected = reg.predict(X_test) coef = reg.coef_.astype(numpy.float32).reshape((-1, 1)) intercept = numpy.array([reg.intercept_], dtype=numpy.float32) providers = device_to_providers('cpu') sess0 = InferenceSession(onx.SerializeToString(), providers=providers) inames = [i.name for i in sess0.get_inputs()] self.assertEqual(inames, ['X']) got = sess0.run(None, {'X': X_test}) self.assertEqualArray(expected.ravel(), got[0].ravel(), decimal=4) sess_eval = forback.cls_type_._sess_eval # pylint: disable=W0212 inames = [i.name for i in sess_eval.get_inputs()] self.assertEqual(inames, ['X', 'coef', 'intercept']) got = sess_eval.run(None, { 'X': X_test, 'coef': coef, 'intercept': intercept }) self.assertEqualArray(expected.ravel(), got[0].ravel(), decimal=4) # OrtValue inst = forback.new_instance() inputs = [] device = C_OrtDevice(C_OrtDevice.cpu(), OrtMemType.DEFAULT, 0) for a in [X_test, coef, intercept]: inputs.append(C_OrtValue.ortvalue_from_numpy(a, device)) got_ort = inst.forward(inputs) got = [v.numpy() for v in got_ort] self.assertEqual(len(got), 1) self.assertEqualArray(expected.ravel(), got[0].ravel(), decimal=4) # OrtValueVector inputs = OrtValueVector() for a in [X_test, coef, intercept]: inputs.push_back(C_OrtValue.ortvalue_from_numpy(a, device)) got = inst.forward(inputs) self.assertEqual(len(got), 1) self.assertEqualArray(expected.ravel(), got[0].numpy().ravel(), decimal=4) # numpy inputs = [X_test, coef, intercept] got = inst.forward(inputs) self.assertEqual(len(got), 1) self.assertEqualArray(expected.ravel(), got[0].numpy().ravel(), decimal=4)
################################# # Profiling # +++++++++ # # Let's choose the device available on this machine. # batch dimension is set to 10. batch = 10 if get_device().upper() == 'GPU': ort_device = C_OrtDevice(C_OrtDevice.cuda(), C_OrtDevice.default_memory(), 0) provider = 'CUDAExecutionProvider' else: ort_device = C_OrtDevice(C_OrtDevice.cpu(), C_OrtDevice.default_memory(), 0) provider = 'CPUExecutionProvider' print(f"provider = {provider!r}") #################################### # We load the graph. with open(filename, 'rb') as f: onx = onnx.load(f) ############################### # Create of the session. so = SessionOptions()
print('ort-c') sess = InferenceSession(onx.SerializeToString(), providers=['CPUExecutionProvider']) ro = RunOptions() output_names = [o.name for o in sess.get_outputs()] obs = measure_time(lambda: sess._sess.run(output_names, {'X': X}, ro), context=dict(sess=sess, X=X), repeat=repeat, number=number) obs['name'] = 'ort-c' data.append(obs) ################################### # onnxruntime: run_with_ort_values print('ort-ov-c') device = C_OrtDevice(C_OrtDevice.cpu(), OrtMemType.DEFAULT, 0) Xov = C_OrtValue.ortvalue_from_numpy(X, device) sess = InferenceSession(onx.SerializeToString(), providers=['CPUExecutionProvider']) ro = RunOptions() output_names = [o.name for o in sess.get_outputs()] obs = measure_time( lambda: sess._sess.run_with_ort_values({'X': Xov}, output_names, ro), context=dict(sess=sess), repeat=repeat, number=number) obs['name'] = 'ort-ov' data.append(obs)