def _provider_name_to_device_type(provider_name): if provider_name == 'CPUExecutionProvider': return OrtDevice.cpu() if provider_name == 'CUDAExecutionProvider': # pragma: no cover return OrtDevice.cuda() raise ValueError( # pragma: no cover f'Unexpected provider name {provider_name!r}.')
def device_name(device): """ Returns the device name of a device. :param device: OrtDevice :return: string """ if device.device_type() == OrtDevice.cpu(): return 'Cpu' if device.device_type() == OrtDevice.cuda(): # pragma: no cover return 'Gpu' raise RuntimeError( # pragma: no cover f"Unexpected value for device type {device.device_type()!r}.")
def fcts_model(X, y, n_jobs): "LinearRegression." model = LinearRegression(n_jobs=n_jobs) model.fit(X, y) initial_types = [('X', FloatTensorType([None, X.shape[1]]))] onx = to_onnx(model, initial_types=initial_types, black_op={'LinearRegressor'}) sess = InferenceSession(onx.SerializeToString(), providers=['CPUExecutionProvider']) outputs = [o.name for o in sess.get_outputs()] oinf = OnnxInference(onx, runtime="python") bind = SessionIOBinding(sess._sess) # ort_device = C_OrtDevice.cpu() ort_device = C_OrtDevice(C_OrtDevice.cpu(), C_OrtDevice.default_memory(), 0) def predict_skl_predict(X, model=model): return model.predict(X) def predict_onnxrt_predict(X, sess=sess): return sess.run(outputs[:1], {'X': X})[0] def predict_onnx_inference(X, oinf=oinf): return oinf.run({'X': X})["variable"] def predict_onnxrt_predict_bind(X, sess=sess, bind=bind, ort_device=ort_device): if X.__array_interface__['strides'] is not None: raise RuntimeError("onnxruntime only supports contiguous arrays.") bind.bind_input('X', ort_device, X.dtype, X.shape, X.__array_interface__['data'][0]) bind.bind_output('variable', ort_device) sess._sess.run_with_iobinding(bind, None) ortvalues = bind.get_outputs() return ortvalues[0].numpy() return { 'predict': { 'skl': predict_skl_predict, 'ort': predict_onnxrt_predict, 'numpy': predict_onnx_inference, 'ort-bind': predict_onnxrt_predict_bind } }
def test_bind_input_types(self): opset = onnx_opset_version() devices = [(C_OrtDevice(C_OrtDevice.cpu(), C_OrtDevice.default_memory(), 0), ['CPUExecutionProvider'])] if "CUDAExecutionProvider" in onnxrt.get_all_providers(): devices.append((C_OrtDevice(C_OrtDevice.cuda(), C_OrtDevice.default_memory(), 0), ['CUDAExecutionProvider'])) for device, provider in devices: for dtype in [np.float32, np.float64, np.int32, np.uint32, np.int64, np.uint64, np.int16, np.uint16, np.int8, np.uint8, np.float16, np.bool_]: with self.subTest(dtype=dtype, device=str(device)): x = np.arange(8).reshape((-1, 2)).astype(dtype) proto_dtype = NP_TYPE_TO_TENSOR_TYPE[x.dtype] X = helper.make_tensor_value_info('X', proto_dtype, [None, x.shape[1]]) Y = helper.make_tensor_value_info('Y', proto_dtype, [None, x.shape[1]]) # inference node_add = helper.make_node('Identity', ['X'], ['Y']) # graph graph_def = helper.make_graph([node_add], 'lr', [X], [Y], []) model_def = helper.make_model( graph_def, producer_name='dummy', ir_version=7, producer_version="0", opset_imports=[helper.make_operatorsetid('', opset)]) sess = onnxrt.InferenceSession(model_def.SerializeToString(), providers=provider) bind = SessionIOBinding(sess._sess) ort_value = C_OrtValue.ortvalue_from_numpy(x, device) bind.bind_ortvalue_input('X', ort_value) bind.bind_output('Y', device) sess._sess.run_with_iobinding(bind, None) ortvalue = bind.get_outputs()[0] y = ortvalue.numpy() assert_almost_equal(x, y) bind = SessionIOBinding(sess._sess) bind.bind_input('X', device, dtype, x.shape, ort_value.data_ptr()) bind.bind_output('Y', device) sess._sess.run_with_iobinding(bind, None) ortvalue = bind.get_outputs()[0] y = ortvalue.numpy() assert_almost_equal(x, y)
def benchmark(name, onx, fct_numpy, *args, dims=(1, 10, 100, 200, 500, 1000, 2000, 10000)): sess = InferenceSession(onx.SerializeToString()) device = C_OrtDevice(C_OrtDevice.cpu(), C_OrtDevice.default_memory(), 0) names = [i.name for i in sess.get_inputs()] out_names = [o.name for o in sess.get_outputs()] if len(names) != len(args): raise RuntimeError(f"Size mismatch {len(names)} != {len(args)}.") rows = [] for dim in tqdm(dims): new_args = [reshape(a, dim) for a in args] ortvalues = [ C_OrtValue.ortvalue_from_numpy(a, device) for a in new_args ] ms = measure_time(lambda: fct_numpy(*new_args), repeat=50, number=100) ms.update(dict(name=name, impl='numpy', dim=dim)) rows.append(ms) inps = {n: a for n, a in zip(names, new_args)} ms = measure_time(lambda: sess.run(None, inps)) ms.update(dict(name=name, impl='sess', dim=dim)) rows.append(ms) bind = SessionIOBinding(sess._sess) ms = measure_time(lambda: bind_and_run(sess._sess, bind, names, ortvalues, out_names, device)) ms.update(dict(name=name, impl='bind_run', dim=dim)) rows.append(ms) ms = measure_time(lambda: nobind_just_run(sess._sess, bind)) ms.update(dict(name=name, impl='run', dim=dim)) rows.append(ms) return rows
def ort_device_to_string(device): """ Returns a string representing the device. Opposite of function @see fn get_ort_device. :param device: see :epkg:`C_OrtDevice` :return: string """ if not isinstance(device, C_OrtDevice): raise TypeError( f"device must be of type C_OrtDevice not {type(device)!r}.") ty = device.device_type() if ty == C_OrtDevice.cpu(): sty = 'cpu' elif ty == C_OrtDevice.cuda(): sty = 'cuda' else: raise NotImplementedError( # pragma: no cover f"Unable to guess device for {device!r} and type={ty!r}.") idx = device.device_id() if idx == 0: return sty return "%s:%d" % (sty, idx)
def get_ort_device_type(device): """ Converts device into device type. :param device: string :return: device type """ if isinstance(device, str): if device == 'cuda': return C_OrtDevice.cuda() if device == 'cpu': return C_OrtDevice.cpu() raise ValueError( # pragma: no cover f'Unsupported device type: {device!r}.') if not hasattr(device, 'device_type'): raise TypeError(f'Unsupported device type: {type(device)!r}.') device_type = device.device_type() if device_type in ('cuda', 1): return C_OrtDevice.cuda() if device_type in ('cpu', 0): return C_OrtDevice.cpu() raise ValueError( # pragma: no cover f'Unsupported device type: {device_type!r}.')
def get_ort_device(device): """ Converts device into :epkg:`C_OrtDevice`. :param device: any type :return: :epkg:`C_OrtDevice` Example: :: get_ort_device('cpu') get_ort_device('gpu') get_ort_device('cuda') get_ort_device('cuda:0') """ if isinstance(device, C_OrtDevice): return device if isinstance(device, str): if device == 'cpu': return C_OrtDevice(C_OrtDevice.cpu(), C_OrtDevice.default_memory(), 0) if device in {'gpu', 'cuda:0', 'cuda', 'gpu:0'}: return C_OrtDevice(C_OrtDevice.cuda(), C_OrtDevice.default_memory(), 0) if device.startswith('gpu:'): idx = int(device[4:]) return C_OrtDevice(C_OrtDevice.cuda(), C_OrtDevice.default_memory(), idx) if device.startswith('cuda:'): idx = int(device[5:]) return C_OrtDevice(C_OrtDevice.cuda(), C_OrtDevice.default_memory(), idx) raise ValueError( # pragma: no cover "Unable to interpret string %r as a device." % device) raise TypeError( # pragma: no cover "Unable to interpret type %r, (%r) as de device." % (type(device), device))
def forward_no_training(self, exc=None, verbose=False): if exc is None: exc = __name__ != '__main__' from onnxruntime.capi._pybind_state import (OrtValue as C_OrtValue, OrtDevice as C_OrtDevice, OrtMemType) from onnxruntime.capi._pybind_state import (OrtValueVector) from onnxcustom.training.ortgradient import OrtGradientForwardBackward X, y = make_regression( # pylint: disable=W0632 100, n_features=10, bias=2) X = X.astype(numpy.float32) y = y.astype(numpy.float32) X_train, X_test, y_train, _ = train_test_split(X, y) reg = LinearRegression() reg.fit(X_train, y_train) reg.coef_ = reg.coef_.reshape((1, -1)) onx = to_onnx(reg, X_train, target_opset=opset, black_op={'LinearRegressor'}) # starts testing if verbose: print("[forward_no_training] start testing") if exc: if verbose: print("[forward_no_training] check exception") self.assertRaise( lambda: OrtGradientForwardBackward( onx, debug=True, enable_logging=True, providers=['NONE']), ValueError) if verbose: print("[forward_no_training] instantiate") forback = OrtGradientForwardBackward(onx, debug=True, enable_logging=True) self.assertEqual(repr(forback), "OrtGradientForwardBackward(...)") self.assertTrue(hasattr(forback, 'cls_type_')) self.assertEqual(forback.cls_type_._onx_inp, ['X', 'coef', 'intercept']) self.assertEqual(forback.cls_type_._onx_out, ['X_grad', 'coef_grad', 'intercept_grad']) self.assertEqual(forback.cls_type_._weights_to_train, ['coef', 'intercept']) self.assertEqual(forback.cls_type_._grad_input_names, ['X', 'coef', 'intercept']) self.assertEqual(forback.cls_type_._input_names, ['X']) self.assertEqual(forback.cls_type_._bw_fetches_names, ['X_grad', 'coef_grad', 'intercept_grad']) self.assertEqual(forback.cls_type_._output_names, ['variable']) if verbose: print("[forward_no_training] expected prediction") expected = reg.predict(X_test) coef = reg.coef_.astype(numpy.float32).reshape((-1, 1)) intercept = numpy.array([reg.intercept_], dtype=numpy.float32) if verbose: print("[forward_no_training] InferenceSession") providers = device_to_providers('cpu') sess0 = InferenceSession(onx.SerializeToString(), providers=providers) inames = [i.name for i in sess0.get_inputs()] # pylint: disable=E1101 self.assertEqual(inames, ['X']) got = sess0.run(None, {'X': X_test}) self.assertEqualArray(expected.ravel(), got[0].ravel(), decimal=4) if verbose: print("[forward_no_training] evaluation") sess_eval = forback.cls_type_._sess_eval # pylint: disable=E1101 inames = [i.name for i in sess_eval.get_inputs()] self.assertEqual(inames, ['X', 'coef', 'intercept']) got = sess_eval.run(None, { 'X': X_test, 'coef': coef, 'intercept': intercept }) self.assertEqualArray(expected.ravel(), got[0].ravel(), decimal=4) # OrtValue if verbose: print("[forward_no_training] OrtValue") inst = forback.new_instance() device = C_OrtDevice(C_OrtDevice.cpu(), OrtMemType.DEFAULT, 0) # list of OrtValues inputs = [] for a in [X_test, coef, intercept]: inputs.append(C_OrtValue.ortvalue_from_numpy(a, device)) got_ort = inst.forward(inputs) got = [v.numpy() for v in got_ort] self.assertEqual(len(got), 1) self.assertEqualArray(expected.ravel(), got[0].ravel(), decimal=4) # OrtValueVector if verbose: print("[forward_no_training] OrtValueVector") inputs = OrtValueVector() for a in [X_test, coef, intercept]: inputs.push_back(C_OrtValue.ortvalue_from_numpy(a, device)) got = inst.forward(inputs) self.assertEqual(len(got), 1) self.assertEqualArray(expected.ravel(), got[0].numpy().ravel(), decimal=4) # numpy if verbose: print("[forward_no_training] numpy") inputs = [X_test, coef, intercept] got = inst.forward(inputs) self.assertEqual(len(got), 1) self.assertEqualArray(expected.ravel(), got[0].numpy().ravel(), decimal=4) if verbose: print("[forward_no_training] end")
def forward_training(self, model, debug=False, n_classes=3, add_print=False): from onnxruntime.capi._pybind_state import (OrtValue as C_OrtValue, OrtMemType, OrtDevice as C_OrtDevice) from onnxruntime.capi._pybind_state import (OrtValueVector) from onnxcustom.training.ortgradient import OrtGradientForwardBackward def to_proba(yt): mx = yt.max() + 1 new_yt = numpy.zeros((yt.shape[0], mx), dtype=numpy.float32) for i, y in enumerate(yt): new_yt[i, y] = 1 return new_yt if hasattr(model.__class__, 'predict_proba'): X, y = make_classification( # pylint: disable=W0632 100, n_features=10, n_classes=n_classes, n_informative=7) X = X.astype(numpy.float32) y = y.astype(numpy.int64) else: X, y = make_regression( # pylint: disable=W0632 100, n_features=10, bias=2) X = X.astype(numpy.float32) y = y.astype(numpy.float32) X_train, X_test, y_train, y_test = train_test_split(X, y) reg = model reg.fit(X_train, y_train) # needs if skl2onnx<1.10.4 # reg.coef_ = reg.coef_.reshape((1, -1)) # reg.intercept_ = reg.intercept_.reshape((-1, )) if hasattr(model.__class__, 'predict_proba'): onx = to_onnx(reg, X_train, target_opset=opset, black_op={'LinearClassifier'}, options={'zipmap': False}) onx = select_model_inputs_outputs( onx, outputs=[onx.graph.output[1].name]) else: onx = to_onnx(reg, X_train, target_opset=opset, black_op={'LinearRegressor'}) # remove batch possibility #onx.graph.input[0].type.tensor_type.shape.dim[0].dim_value = 0 #onx.graph.input[0].type.tensor_type.shape.dim[0].dim_param = "batch_size" #onx.graph.output[0].type.tensor_type.shape.dim[0].dim_value = 0 #onx.graph.output[0].type.tensor_type.shape.dim[0].dim_param = "batch_size" providers = device_to_providers('cpu') sess = InferenceSession(onx.SerializeToString(), providers=providers) sess.run(None, {'X': X_test[:1]}) # starts testing forback = OrtGradientForwardBackward(onx, debug=True, enable_logging=True) if debug: n = model.__class__.__name__ temp = get_temp_folder(__file__, f"temp_forward_training_{n}") with open(os.path.join(temp, f"model_{n}.onnx"), "wb") as f: f.write(onx.SerializeToString()) with open(os.path.join(temp, f"fw_train_{n}.onnx"), "wb") as f: f.write(forback.cls_type_._trained_onnx.SerializeToString()) with open(os.path.join(temp, f"fw_pre_{n}.onnx"), "wb") as f: gr = forback.cls_type_._optimized_pre_grad_model f.write(gr.SerializeToString()) if hasattr(model.__class__, 'predict_proba'): expected = reg.predict_proba(X_test) coef = reg.coef_.astype(numpy.float32).T intercept = reg.intercept_.astype(numpy.float32) # only one observation X_test1 = X_test[:1] y_test = to_proba(y_test).astype(numpy.float32) y_test1 = y_test[:1] expected1 = expected[:1] else: expected = reg.predict(X_test) coef = reg.coef_.astype(numpy.float32).reshape((-1, 1)) intercept = numpy.array([reg.intercept_], dtype=numpy.float32) # only one observation X_test1 = X_test[:1] y_test1 = y_test[0].reshape((1, -1)) expected1 = expected[:1] # OrtValueVector inst = forback.new_instance() device = C_OrtDevice(C_OrtDevice.cpu(), OrtMemType.DEFAULT, 0) if add_print: print("\n\n######################\nFORWARD") inputs = OrtValueVector() for a in [X_test1, coef, intercept]: inputs.push_back(C_OrtValue.ortvalue_from_numpy(a, device)) got = inst.forward(inputs, training=True) self.assertEqual(len(got), 1) self.assertEqualArray(expected1.ravel(), got[0].numpy().ravel(), decimal=4) if add_print: print("\n\n######################\nBACKWARD") outputs = OrtValueVector() outputs.push_back(C_OrtValue.ortvalue_from_numpy(y_test1, device)) got = inst.backward(outputs) self.assertEqual(len(got), 3) if add_print: print("\n######################\nEND\n") # OrtValueVectorN inputs = OrtValueVector() for a in [X_test, coef, intercept]: inputs.push_back(C_OrtValue.ortvalue_from_numpy(a, device)) got = inst.forward(inputs, training=True) self.assertEqual(len(got), 1) self.assertEqualArray(expected.ravel(), got[0].numpy().ravel(), decimal=4) outputs = OrtValueVector() outputs.push_back( C_OrtValue.ortvalue_from_numpy(y_test.reshape((1, -1)), device)) got = inst.backward(outputs) self.assertEqual(len(got), 3) # list of OrtValues inputs = [] for a in [X_test, coef, intercept]: inputs.append(C_OrtValue.ortvalue_from_numpy(a, device)) got_ort = inst.forward(inputs, training=True) got = [v.numpy() for v in got_ort] self.assertEqual(len(got), 1) self.assertEqualArray(expected.ravel(), got[0].ravel(), decimal=4) outputs = [ C_OrtValue.ortvalue_from_numpy(y_test.reshape((1, -1)), device) ] got = inst.backward(outputs) self.assertEqual(len(got), 3) # numpy inputs = [X_test, coef, intercept] got_ort = inst.forward(inputs, training=True) got = [v.numpy() for v in got_ort] self.assertEqual(len(got), 1) self.assertEqualArray(expected.ravel(), got[0].ravel(), decimal=4) outputs = [y_test.reshape((1, -1))] got = inst.backward(outputs) self.assertEqual(len(got), 3)
def test_forward_no_training_pickle(self): from onnxruntime.capi._pybind_state import (OrtValue as C_OrtValue, OrtMemType, OrtDevice as C_OrtDevice) from onnxruntime.capi._pybind_state import (OrtValueVector) from onnxcustom.training.ortgradient import OrtGradientForwardBackward X, y = make_regression( # pylint: disable=W0632 100, n_features=10, bias=2) X = X.astype(numpy.float32) y = y.astype(numpy.float32) X_train, X_test, y_train, _ = train_test_split(X, y) reg = LinearRegression() reg.fit(X_train, y_train) reg.coef_ = reg.coef_.reshape((1, -1)) onx = to_onnx(reg, X_train, target_opset=opset, black_op={'LinearRegressor'}) forback0 = OrtGradientForwardBackward(onx, debug=True) st = io.BytesIO() pickle.dump(forback0, st) st2 = io.BytesIO(st.getvalue()) forback = pickle.load(st2) self.assertTrue(hasattr(forback, 'cls_type_')) self.assertEqual(forback.cls_type_._onx_inp, ['X', 'coef', 'intercept']) self.assertEqual(forback.cls_type_._onx_out, ['X_grad', 'coef_grad', 'intercept_grad']) self.assertEqual(forback.cls_type_._weights_to_train, ['coef', 'intercept']) self.assertEqual(forback.cls_type_._grad_input_names, ['X', 'coef', 'intercept']) self.assertEqual(forback.cls_type_._input_names, ['X']) self.assertEqual(forback.cls_type_._bw_fetches_names, ['X_grad', 'coef_grad', 'intercept_grad']) self.assertEqual(forback.cls_type_._output_names, ['variable']) expected = reg.predict(X_test) coef = reg.coef_.astype(numpy.float32).reshape((-1, 1)) intercept = numpy.array([reg.intercept_], dtype=numpy.float32) providers = device_to_providers('cpu') sess0 = InferenceSession(onx.SerializeToString(), providers=providers) inames = [i.name for i in sess0.get_inputs()] self.assertEqual(inames, ['X']) got = sess0.run(None, {'X': X_test}) self.assertEqualArray(expected.ravel(), got[0].ravel(), decimal=4) sess_eval = forback.cls_type_._sess_eval # pylint: disable=W0212 inames = [i.name for i in sess_eval.get_inputs()] self.assertEqual(inames, ['X', 'coef', 'intercept']) got = sess_eval.run(None, { 'X': X_test, 'coef': coef, 'intercept': intercept }) self.assertEqualArray(expected.ravel(), got[0].ravel(), decimal=4) # OrtValue inst = forback.new_instance() inputs = [] device = C_OrtDevice(C_OrtDevice.cpu(), OrtMemType.DEFAULT, 0) for a in [X_test, coef, intercept]: inputs.append(C_OrtValue.ortvalue_from_numpy(a, device)) got_ort = inst.forward(inputs) got = [v.numpy() for v in got_ort] self.assertEqual(len(got), 1) self.assertEqualArray(expected.ravel(), got[0].ravel(), decimal=4) # OrtValueVector inputs = OrtValueVector() for a in [X_test, coef, intercept]: inputs.push_back(C_OrtValue.ortvalue_from_numpy(a, device)) got = inst.forward(inputs) self.assertEqual(len(got), 1) self.assertEqualArray(expected.ravel(), got[0].numpy().ravel(), decimal=4) # numpy inputs = [X_test, coef, intercept] got = inst.forward(inputs) self.assertEqual(len(got), 1) self.assertEqualArray(expected.ravel(), got[0].numpy().ravel(), decimal=4)
shape = inp.shape res[name] = random_input(typ, shape, batch) return res ################################# # Profiling # +++++++++ # # Let's choose the device available on this machine. # batch dimension is set to 10. batch = 10 if get_device().upper() == 'GPU': ort_device = C_OrtDevice(C_OrtDevice.cuda(), C_OrtDevice.default_memory(), 0) provider = 'CUDAExecutionProvider' else: ort_device = C_OrtDevice(C_OrtDevice.cpu(), C_OrtDevice.default_memory(), 0) provider = 'CPUExecutionProvider' print(f"provider = {provider!r}") #################################### # We load the graph. with open(filename, 'rb') as f: onx = onnx.load(f)
def _create_onnx_graphs(self): """ Creates forward and backward ONNX graph. The new class has the following attributes: * `__doc__`: doc string * `__module__`: module name (this file) * `_run_options`: see :epkg:`RunOptions` * `_sess`: :epkg:`InferenceSession` with the original graph * `_sess_eval`: :epkg:`InferenceSession` on the graph with weights as inputs * `_training_agent`: :epkg:`TrainingAgent` * `_cache`: :epkg:`OrtValueCache` * `_logger`: logger * `_input_names`: input names * `_debug`: use debug mode * `_grad_input_names`: gradient input names * `_output_names`: output names * `_weights_to_train`: names of the weights to train Training attributes * `_bw_fetches_names`: bw_fetches_names, * `_fw_outputs_device_info`: fw_outputs_device_info, * `_bw_outputs_device_info`: bw_outputs_device_info, * `_fw_no_grad_output_device_info`: fw_no_grad_output_device_info, * `_graph_info`: graph_info} Additional attributes added if *keep_model* is True: * `_trained_onnx`: ONNX graph for the gradient * `_optimized_pre_grad_model`: evaluation ONNX graph taking weights as inputs * `_graph_builder`: :epkg:`OrtModuleGraphBuilder` """ logger = self._logger if logger is not None: logger.info("[OrtGradientForwardBackward] create training onnx") logger.info("[OrtGradientForwardBackward] input_names=%r", self.input_names) logger.info("[OrtGradientForwardBackward] output_names=%r", self.output_names) logger.info("[OrtGradientForwardBackward] weights_to_train=%r", self.weights_to_train) builder = OrtModuleGraphBuilder() if logger is not None: cf = self.graph_builder_config.graph_transformer_config cfp = cf.propagate_cast_ops_config logger.info("[OrtGradientForwardBackward] " "OrtModuleGraphBuilder.initialize") logger.info( "[OrtGradientForwardBackward] graph_builder_config=%s", OrtGradientForwardBackward._repr_helper_( self.graph_builder_config, indent=4)) logger.info( "[OrtGradientForwardBackward] graph_builder_config." "graph_transformer_config=%s", OrtGradientForwardBackward._repr_helper_(cf, indent=4)) logger.info( "[OrtGradientForwardBackward] graph_builder_config." "graph_transformer_config.propagate_cast_ops_config=%s", OrtGradientForwardBackward._repr_helper_(cfp, indent=4)) builder.initialize(self.onnx_model.SerializeToString(), self.graph_builder_config) if logger is not None: logger.info( "[OrtGradientForwardBackward] OrtModuleGraphBuilder.build") builder.build() if logger is not None: logger.info( "[OrtGradientForwardBackward] OrtModuleGraphBuilder.get_model") train_onnx_model_serialized = builder.get_model() optimized_pre_grad_model = builder.get_inference_optimized_model() graph_info = builder.get_graph_info() if logger is not None: logger.info( "[OrtGradientForwardBackward] graph_info=%s", OrtGradientForwardBackward._repr_helper_(graph_info, indent=4)) logger.info("[OrtGradientForwardBackward] create TrainSession") logger.info( "[OrtGradientForwardBackward] sess_options=%s", OrtGradientForwardBackward._repr_helper_(self.sess_options, indent=4)) logger.info("[OrtGradientForwardBackward] providers=%r", self.providers) sess = InferenceSession(train_onnx_model_serialized, sess_options=self.sess_options, provider_options=self.provider_options, providers=self.providers) if logger is not None: logger.info("[OrtGradientForwardBackward] create InferenceSession") sess_eval = InferenceSession(optimized_pre_grad_model, sess_options=self.sess_options, provider_options=self.provider_options, providers=self.providers) if logger is not None: logger.info("[OrtGradientForwardBackward] create training agent") grad_input_names = [obj.name for obj in sess.get_inputs()] bw_fetches_names = [obj.name for obj in sess.get_outputs()] fw_outputs_device_info = [ OrtDevice( OrtGradientForwardBackward._provider_name_to_device_type(i), OrtDevice.default_memory(), self.device_index) for i in self.providers ] bw_outputs_device_info = [ OrtDevice( OrtGradientForwardBackward._provider_name_to_device_type( self.providers[0]), OrtDevice.default_memory(), self.device_index) for i in bw_fetches_names ] fw_no_grad_output_device_info = [ OrtDevice( OrtGradientForwardBackward._provider_name_to_device_type( self.providers[0]), OrtDevice.default_memory(), self.device_index) for i in self.output_names ] try: # onnxruntime>=1.12 training_agent = TrainingAgent(sess._sess, grad_input_names, fw_outputs_device_info, bw_fetches_names, bw_outputs_device_info, 0) except TypeError: # onnxruntime<=1.11 training_agent = TrainingAgent(sess._sess, grad_input_names, fw_outputs_device_info, bw_fetches_names, bw_outputs_device_info) if logger is not None: logger.info( "[OrtGradientForwardBackward] instantiate dynamic class %r", self.class_name) logger.info("[OrtGradientForwardBackward] weights_to_train=%r", self.weights_to_train) logger.info("[OrtGradientForwardBackward] grad_input_names=%r", grad_input_names) logger.info("[OrtGradientForwardBackward] bw_fetches_names=%r", bw_fetches_names) logger.info("[OrtGradientForwardBackward] device_index=%r", self.device_index) devices = list(fw_outputs_device_info) while len(devices) < len(grad_input_names): devices.append(devices[-1]) trained_onnx = onnx.load(BytesIO(train_onnx_model_serialized)) onnx_loss = onnx.load(BytesIO(optimized_pre_grad_model)) for i, node in enumerate(trained_onnx.graph.node): if node.name == '': node.name = "N%d" % i for i, node in enumerate(onnx_loss.graph.node): if node.name == '': node.name = "N%d" % i kwargs = { '_run_options': self.run_options, '_sess': sess, '_sess_eval': sess_eval, '_training_agent': training_agent, '_cache': OrtValueCache(), '_logger': logger, '_input_names': self.input_names, '_grad_input_names': grad_input_names, '_output_names': self.output_names, '_bw_fetches_names': bw_fetches_names, '_fw_outputs_device_info': fw_outputs_device_info, '_bw_outputs_device_info': bw_outputs_device_info, '_fw_no_grad_output_device_info': fw_no_grad_output_device_info, '_weights_to_train': list(sorted(self.weights_to_train)), '_graph_info': graph_info, # '_trained_onnx': trained_onnx, '_optimized_pre_grad_model': onnx_loss, '_graph_builder': builder, '_devices': devices, '_debug': self.debug } graph = kwargs['_trained_onnx'].graph kwargs.update({ '_onx_inp': [o.name for o in graph.input], '_onx_out': [o.name for o in graph.output] }) if len(kwargs['_onx_inp']) != len(kwargs['_onx_out']): raise RuntimeError( # pragma: no cover "Gradient input and output are inconsistant: " "%r != %r" % (kwargs['_onx_inp'], kwargs['_onx_out'])) return kwargs
print('ort-c') sess = InferenceSession(onx.SerializeToString(), providers=['CPUExecutionProvider']) ro = RunOptions() output_names = [o.name for o in sess.get_outputs()] obs = measure_time(lambda: sess._sess.run(output_names, {'X': X}, ro), context=dict(sess=sess, X=X), repeat=repeat, number=number) obs['name'] = 'ort-c' data.append(obs) ################################### # onnxruntime: run_with_ort_values print('ort-ov-c') device = C_OrtDevice(C_OrtDevice.cpu(), OrtMemType.DEFAULT, 0) Xov = C_OrtValue.ortvalue_from_numpy(X, device) sess = InferenceSession(onx.SerializeToString(), providers=['CPUExecutionProvider']) ro = RunOptions() output_names = [o.name for o in sess.get_outputs()] obs = measure_time( lambda: sess._sess.run_with_ort_values({'X': Xov}, output_names, ro), context=dict(sess=sess), repeat=repeat, number=number) obs['name'] = 'ort-ov' data.append(obs)