def update_weights(self, n_bind, device, statei, gradienti, batch_size, velocity=None): if (not hasattr(self, "axpyw_onnx_") or not hasattr(self, "axpyw_sess_binds_")): raise RuntimeError( # pragma: no cover "Attributes 'axpyw_sess_binds_' or " "'axpyw_onnx_' is missing. Method " "'build_onnx_function' has not been called.") if velocity is None: raise RuntimeError( # pragma: no cover "Velocity must not be None for this way of updating weights.") bind = self.axpyw_sess_binds_[n_bind] self._bind_input_ortvalue("X1", bind, gradienti, device, cache=True) self._bind_input_ortvalue("X2", bind, statei, device, cache=True) self._bind_input_ortvalue("G", bind, velocity, device, cache=True) self.alpha_[0] = - self.value / batch_size # pylint: disable=E1130 self.beta_[0] = self.momentum ort_alpha = C_OrtValue.ortvalue_from_numpy(self.alpha_, device) ort_beta = C_OrtValue.ortvalue_from_numpy(self.beta_, device) self._bind_input_ortvalue("alpha", bind, ort_alpha, device, cache=True) self._bind_input_ortvalue("beta", bind, ort_beta, device, cache=True) self._bind_output_ortvalue('Y', bind, statei, cache=True) self._bind_output_ortvalue('Z', bind, velocity, cache=True) self._call_iobinding(self.axpyw_sess_._sess, bind) return bind.get_outputs() # loss, velocity
def __iter__(self): """ Iterates over the datasets by drawing *batch_size* consecutive observations. """ N = 0 b = len(self) - self.batch_size while N < len(self): i = numpy.random.randint(0, b) N += self.batch_size yield (C_OrtValue.ortvalue_from_numpy( self.X[i:i + self.batch_size], self.device), C_OrtValue.ortvalue_from_numpy( self.y[i:i + self.batch_size], self.device))
def test_print_ortvalue(self): expected = ("device=Cpu dtype=dtype('float32') shape=(1, 4) " "value=[0.0, 1.0, 4.0, 4.5]") value = numpy.array([[0, 1, 4, 4.5]], dtype=numpy.float32) dev = get_ort_device('cpu') ort = C_OrtValue.ortvalue_from_numpy(value, dev) text = str_ortvalue(ort) self.assertEqual(expected, text) text = str_ortvalue(ort) # pylint: disable=W0212 self.assertEqual(expected, text) expected = ("device=Cpu dtype=dtype('int64') shape=(100,) " "value=[0, 1, 2, 3, 4, '...', 95, 96, 97, 98, 99]") value = numpy.arange(100).astype(numpy.int64) ort = C_OrtValue.ortvalue_from_numpy(value, dev) text = str_ortvalue(ort) # pylint: disable=W0212 self.assertEqual(expected, text)
def numpy_to_ort_value(arr, device=None): """ Converts a numpy array to :epkg:`C_OrtValue`. :param arr: numpy array :param device: :epkg:`C_OrtDevice` or None for cpu :return: :epkg:`C_OrtValue` """ if device is None: device = get_ort_device('cpu') return C_OrtValue.ortvalue_from_numpy(arr, device)
def input_to_ort(tensors, devices, debug): "Converts a list of tensos into an :epkg:`OrtValueVector`." def _validate_(tensors): if any( map( lambda tu: (tu[0].device_name( ) != OrtGradientForwardBackwardFunction.device_name(tu[ 1])), zip(tensors, devices))): raise RuntimeError( # pragma: no cover "Not all inputs are on the same device %r != %r." % ([ OrtGradientForwardBackward.device_name(d) for d in devices ], [x.device_name() for x in tensors])) if isinstance(tensors, OrtValueVector): if debug: _validate_(tensors) return tensors if all(map(lambda t: isinstance(t, C_OrtValue), tensors)): if debug: _validate_(tensors) vect = OrtValueVector() vect.reserve(len(tensors)) for t in tensors: if t is None: raise NotImplementedError( # pragma: no cover "Empty vector found.") vect.push_back(t) return vect # generic case vect = OrtValueVector() vect.reserve(len(tensors)) for t, dev in zip(tensors, devices): if t is None: # if gradient then # grad_output = torch.zeros(shape, device=device, dtype=dtype) raise NotImplementedError( # pragma: no cover "Empty vector found.") if not t.data.contiguous: t = t.as_contiguous() # pragma: no cover vect.push_back(C_OrtValue.ortvalue_from_numpy(t, dev)) if debug: if len(vect) != len(tensors): raise RuntimeError( # pragma: no cover "Unexpected array length %d != %d (len(devices)=%d)." % (len(vect), len(tensors), len(devices))) _validate_(vect) return vect
def test_bind_input_types(self): opset = onnx_opset_version() devices = [(C_OrtDevice(C_OrtDevice.cpu(), C_OrtDevice.default_memory(), 0), ['CPUExecutionProvider'])] if "CUDAExecutionProvider" in onnxrt.get_all_providers(): devices.append((C_OrtDevice(C_OrtDevice.cuda(), C_OrtDevice.default_memory(), 0), ['CUDAExecutionProvider'])) for device, provider in devices: for dtype in [np.float32, np.float64, np.int32, np.uint32, np.int64, np.uint64, np.int16, np.uint16, np.int8, np.uint8, np.float16, np.bool_]: with self.subTest(dtype=dtype, device=str(device)): x = np.arange(8).reshape((-1, 2)).astype(dtype) proto_dtype = NP_TYPE_TO_TENSOR_TYPE[x.dtype] X = helper.make_tensor_value_info('X', proto_dtype, [None, x.shape[1]]) Y = helper.make_tensor_value_info('Y', proto_dtype, [None, x.shape[1]]) # inference node_add = helper.make_node('Identity', ['X'], ['Y']) # graph graph_def = helper.make_graph([node_add], 'lr', [X], [Y], []) model_def = helper.make_model( graph_def, producer_name='dummy', ir_version=7, producer_version="0", opset_imports=[helper.make_operatorsetid('', opset)]) sess = onnxrt.InferenceSession(model_def.SerializeToString(), providers=provider) bind = SessionIOBinding(sess._sess) ort_value = C_OrtValue.ortvalue_from_numpy(x, device) bind.bind_ortvalue_input('X', ort_value) bind.bind_output('Y', device) sess._sess.run_with_iobinding(bind, None) ortvalue = bind.get_outputs()[0] y = ortvalue.numpy() assert_almost_equal(x, y) bind = SessionIOBinding(sess._sess) bind.bind_input('X', device, dtype, x.shape, ort_value.data_ptr()) bind.bind_output('Y', device) sess._sess.run_with_iobinding(bind, None) ortvalue = bind.get_outputs()[0] y = ortvalue.numpy() assert_almost_equal(x, y)
def fit(self, X, y): """ Trains the model. :param X: features :param y: expected output :return: self """ self.train_session_ = create_training_session( self.model_onnx, self.weights_to_train, loss_output_name=self.loss_output_name, training_optimizer_name=self.training_optimizer_name, device=self.device) data_loader = DataLoaderDevice(X, y, batch_size=self.batch_size, device=self.device) lr = self._init_learning_rate() self.input_names_ = [i.name for i in self.train_session_.get_inputs()] self.output_names_ = [ o.name for o in self.train_session_.get_outputs() ] self.loss_index_ = self.output_names_.index(self.loss_output_name) bind = self.train_session_.io_binding()._iobinding loop = (tqdm(range(self.max_iter)) if self.verbose else range(self.max_iter)) train_losses = [] for it in loop: bind_lr = C_OrtValue.ortvalue_from_numpy( numpy.array([lr], dtype=numpy.float32), self.device) loss = self._iteration(data_loader, bind_lr, bind) lr = self._update_learning_rate(it, lr) if self.verbose > 1: loop.set_description(f"loss={loss:1.3g} lr={lr:1.3g}") train_losses.append(loss) self.train_losses_ = train_losses self.trained_coef_ = self.train_session_.get_state() return self
def benchmark(name, onx, fct_numpy, *args, dims=(1, 10, 100, 200, 500, 1000, 2000, 10000)): sess = InferenceSession(onx.SerializeToString()) device = C_OrtDevice(C_OrtDevice.cpu(), C_OrtDevice.default_memory(), 0) names = [i.name for i in sess.get_inputs()] out_names = [o.name for o in sess.get_outputs()] if len(names) != len(args): raise RuntimeError(f"Size mismatch {len(names)} != {len(args)}.") rows = [] for dim in tqdm(dims): new_args = [reshape(a, dim) for a in args] ortvalues = [ C_OrtValue.ortvalue_from_numpy(a, device) for a in new_args ] ms = measure_time(lambda: fct_numpy(*new_args), repeat=50, number=100) ms.update(dict(name=name, impl='numpy', dim=dim)) rows.append(ms) inps = {n: a for n, a in zip(names, new_args)} ms = measure_time(lambda: sess.run(None, inps)) ms.update(dict(name=name, impl='sess', dim=dim)) rows.append(ms) bind = SessionIOBinding(sess._sess) ms = measure_time(lambda: bind_and_run(sess._sess, bind, names, ortvalues, out_names, device)) ms.update(dict(name=name, impl='bind_run', dim=dim)) rows.append(ms) ms = measure_time(lambda: nobind_just_run(sess._sess, bind)) ms.update(dict(name=name, impl='run', dim=dim)) rows.append(ms) return rows
so = SessionOptions() so.enable_profiling = True so.optimized_model_filepath = os.path.split(filename)[-1] + ".optimized.onnx" sess = InferenceSession(onx.SerializeToString(), so, providers=[provider]) bind = SessionIOBinding(sess._sess) print("graph_optimization_level:", so.graph_optimization_level) ##################################### # Creates random data feed = random_feed(sess, batch) ##################################### # moving the data on CPU or GPU feed_ort_value = OrderedDict( (name, (C_OrtValue.ortvalue_from_numpy(v, ort_device), v.dtype)) for name, v in feed.items()) outputs = [o.name for o in sess.get_outputs()] ####################################### # A function which calls the API for any device. def run_with_iobinding(sess, bind, ort_device, feed_ort_value, outputs): for name, (value, dtype) in feed_ort_value.items(): bind.bind_input(name, ort_device, dtype, value.shape(), value.data_ptr()) for out in outputs: bind.bind_output(out, ort_device) sess._sess.run_with_iobinding(bind, None) ortvalues = bind.get_outputs()
providers=['CPUExecutionProvider']) ro = RunOptions() output_names = [o.name for o in sess.get_outputs()] obs = measure_time(lambda: sess._sess.run(output_names, {'X': X}, ro), context=dict(sess=sess, X=X), repeat=repeat, number=number) obs['name'] = 'ort-c' data.append(obs) ################################### # onnxruntime: run_with_ort_values print('ort-ov-c') device = C_OrtDevice(C_OrtDevice.cpu(), OrtMemType.DEFAULT, 0) Xov = C_OrtValue.ortvalue_from_numpy(X, device) sess = InferenceSession(onx.SerializeToString(), providers=['CPUExecutionProvider']) ro = RunOptions() output_names = [o.name for o in sess.get_outputs()] obs = measure_time( lambda: sess._sess.run_with_ort_values({'X': Xov}, output_names, ro), context=dict(sess=sess), repeat=repeat, number=number) obs['name'] = 'ort-ov' data.append(obs) ################################### # onnxruntime: run_with_iobinding
train_session = create_training_session(onx_train, ['coefs', 'intercept'], device=device) print(train_session) ########################################## # The coefficients. state_tensors = train_session.get_state() pprint(state_tensors) ###################################### # We can now check the coefficients are updated after one iteration. dev = get_ort_device(device) ortx = C_OrtValue.ortvalue_from_numpy(X_train[:1], dev) orty = C_OrtValue.ortvalue_from_numpy(y_train[:1].reshape((-1, 1)), dev) ortlr = C_OrtValue.ortvalue_from_numpy( numpy.array([0.01], dtype=numpy.float32), dev) bind = train_session.io_binding()._iobinding bind.bind_ortvalue_input('X', ortx) bind.bind_ortvalue_input('label', orty) bind.bind_ortvalue_input('Learning_Rate', ortlr) bind.bind_output('loss', dev) train_session._sess.run_with_iobinding(bind, None) outputs = bind.copy_outputs_to_cpu() pprint(outputs) ########################################## # We check the coefficients have changed.
def benchmark_op(repeat=10, number=10, name="Slice", shape_slice_fct=None, save=None, opset=14, repeat_profile=1500, verbose=1): if verbose: print("[benchmark_op] start repeat=%d number=%d repeat_profile=%d" " opset=%d." % (repeat, number, repeat_profile, opset)) res = [] for dim in tqdm([ 8, 16, 32, 64, 100, 128, 200, 256, 400, 512, 600, 784, 800, 1000, 1024, 1200 ]): shape, slices = shape_slice_fct(dim) onx, ort_fct, npy_fct, ort_fct_gpu = build_ort_op(save=save, op_version=opset, slices=slices) n_arrays = 20 if dim >= 512: n_arrays = 10 xs = [ numpy.random.rand(*shape).astype(numpy.float32) for _ in range(n_arrays) ] info = dict(shape=shape) ctx = dict(xs=xs, loop_fct=loop_fct) # numpy ctx['fct'] = npy_fct obs = measure_time(lambda: loop_fct(npy_fct, xs), div_by_number=True, context=ctx, repeat=repeat, number=number) obs['dim'] = dim obs['fct'] = 'numpy' obs['shape'] = ",".join(map(str, shape)) obs['slices'] = str(slices) obs.update(info) res.append(obs) # onnxruntime ctx['fct'] = ort_fct obs = measure_time(lambda: loop_fct(ort_fct, xs), div_by_number=True, context=ctx, repeat=repeat, number=number) obs['dim'] = dim obs['fct'] = 'ort' obs['shape'] = ",".join(map(str, shape)) obs['slices'] = str(slices) obs.update(info) res.append(obs) if ort_fct_gpu is not None: # onnxruntime dev = get_ort_device('cuda:0') ctx['xs'] = [C_OrtValue.ortvalue_from_numpy(x, dev) for x in xs] ctx['fct'] = ort_fct_gpu obs = measure_time(lambda: loop_fct(ort_fct_gpu, ctx['xs']), div_by_number=True, context=ctx, repeat=repeat, number=number) obs['dim'] = dim obs['fct'] = 'ort_gpu' obs['shape'] = ",".join(map(str, shape)) obs['slices'] = str(slices) obs.update(info) res.append(obs) # profiling CPU if verbose: print("[benchmark_op] done.") print("[benchmark_op] profile CPU.") so = SessionOptions() so.enable_profiling = True sess = InferenceSession(onx.SerializeToString(), so, providers=["CPUExecutionProvider"]) for i in range(0, repeat_profile): sess.run( None, {'X': xs[-1]}, ) prof = sess.end_profiling() with open(prof, "r") as f: js = json.load(f) dfprof = DataFrame(OnnxWholeSession.process_profiling(js)) dfprof['shape'] = ",".join(map(str, shape)) dfprof['slices'] = str(slices) if verbose: print("[benchmark_op] done.") # profiling CPU if ort_fct_gpu is not None: if verbose: print("[benchmark_op] profile GPU.") so = SessionOptions() so.enable_profiling = True sess = InferenceSession(onx.SerializeToString(), so, providers=["CUDAExecutionProvider"]) io_binding = sess.io_binding()._iobinding device = get_ort_device('cpu') for i in range(0, repeat_profile): x = ctx['xs'][-1] io_binding.bind_input('X', device, numpy.float32, x.shape(), x.data_ptr()) io_binding.bind_output('Y', device) sess._sess.run_with_iobinding(io_binding, None) prof = sess.end_profiling() with open(prof, "r") as f: js = json.load(f) dfprofgpu = DataFrame(OnnxWholeSession.process_profiling(js)) dfprofgpu['shape'] = ",".join(map(str, shape)) dfprofgpu['slices'] = str(slices) if verbose: print("[benchmark_op] profile done.") else: dfprofgpu = None # Dataframes shape_name = str(shape).replace(str(dim), "N") df = pandas.DataFrame(res) piv = df.pivot('shape', 'fct', 'average') rs = piv.copy() for c in ['numpy', 'ort', 'ort_gpu']: if c in rs.columns: rs[f"numpy/{c}"] = rs['numpy'] / rs[c] rs = rs[[c for c in rs.columns if "/numpy" not in c]].copy() # Graphs. fig, ax = plt.subplots(1, 2, figsize=(12, 4)) piv.plot(logx=True, logy=True, ax=ax[0], title=f"{name} benchmark\n{shape_name!r} lower better") ax[0].legend(prop={"size": 9}) rs.plot( logx=True, logy=True, ax=ax[1], title=f"{name} Speedup, baseline=numpy\n{shape_name!r} higher better") ax[1].plot([min(rs.index), max(rs.index)], [0.5, 0.5], 'g--') ax[1].plot([min(rs.index), max(rs.index)], [2., 2.], 'g--') ax[1].legend(prop={"size": 9}) return dfprof, dfprofgpu, df, rs, ax
def forward_no_training(self, exc=None, verbose=False): if exc is None: exc = __name__ != '__main__' from onnxruntime.capi._pybind_state import (OrtValue as C_OrtValue, OrtDevice as C_OrtDevice, OrtMemType) from onnxruntime.capi._pybind_state import (OrtValueVector) from onnxcustom.training.ortgradient import OrtGradientForwardBackward X, y = make_regression( # pylint: disable=W0632 100, n_features=10, bias=2) X = X.astype(numpy.float32) y = y.astype(numpy.float32) X_train, X_test, y_train, _ = train_test_split(X, y) reg = LinearRegression() reg.fit(X_train, y_train) reg.coef_ = reg.coef_.reshape((1, -1)) onx = to_onnx(reg, X_train, target_opset=opset, black_op={'LinearRegressor'}) # starts testing if verbose: print("[forward_no_training] start testing") if exc: if verbose: print("[forward_no_training] check exception") self.assertRaise( lambda: OrtGradientForwardBackward( onx, debug=True, enable_logging=True, providers=['NONE']), ValueError) if verbose: print("[forward_no_training] instantiate") forback = OrtGradientForwardBackward(onx, debug=True, enable_logging=True) self.assertEqual(repr(forback), "OrtGradientForwardBackward(...)") self.assertTrue(hasattr(forback, 'cls_type_')) self.assertEqual(forback.cls_type_._onx_inp, ['X', 'coef', 'intercept']) self.assertEqual(forback.cls_type_._onx_out, ['X_grad', 'coef_grad', 'intercept_grad']) self.assertEqual(forback.cls_type_._weights_to_train, ['coef', 'intercept']) self.assertEqual(forback.cls_type_._grad_input_names, ['X', 'coef', 'intercept']) self.assertEqual(forback.cls_type_._input_names, ['X']) self.assertEqual(forback.cls_type_._bw_fetches_names, ['X_grad', 'coef_grad', 'intercept_grad']) self.assertEqual(forback.cls_type_._output_names, ['variable']) if verbose: print("[forward_no_training] expected prediction") expected = reg.predict(X_test) coef = reg.coef_.astype(numpy.float32).reshape((-1, 1)) intercept = numpy.array([reg.intercept_], dtype=numpy.float32) if verbose: print("[forward_no_training] InferenceSession") providers = device_to_providers('cpu') sess0 = InferenceSession(onx.SerializeToString(), providers=providers) inames = [i.name for i in sess0.get_inputs()] # pylint: disable=E1101 self.assertEqual(inames, ['X']) got = sess0.run(None, {'X': X_test}) self.assertEqualArray(expected.ravel(), got[0].ravel(), decimal=4) if verbose: print("[forward_no_training] evaluation") sess_eval = forback.cls_type_._sess_eval # pylint: disable=E1101 inames = [i.name for i in sess_eval.get_inputs()] self.assertEqual(inames, ['X', 'coef', 'intercept']) got = sess_eval.run(None, { 'X': X_test, 'coef': coef, 'intercept': intercept }) self.assertEqualArray(expected.ravel(), got[0].ravel(), decimal=4) # OrtValue if verbose: print("[forward_no_training] OrtValue") inst = forback.new_instance() device = C_OrtDevice(C_OrtDevice.cpu(), OrtMemType.DEFAULT, 0) # list of OrtValues inputs = [] for a in [X_test, coef, intercept]: inputs.append(C_OrtValue.ortvalue_from_numpy(a, device)) got_ort = inst.forward(inputs) got = [v.numpy() for v in got_ort] self.assertEqual(len(got), 1) self.assertEqualArray(expected.ravel(), got[0].ravel(), decimal=4) # OrtValueVector if verbose: print("[forward_no_training] OrtValueVector") inputs = OrtValueVector() for a in [X_test, coef, intercept]: inputs.push_back(C_OrtValue.ortvalue_from_numpy(a, device)) got = inst.forward(inputs) self.assertEqual(len(got), 1) self.assertEqualArray(expected.ravel(), got[0].numpy().ravel(), decimal=4) # numpy if verbose: print("[forward_no_training] numpy") inputs = [X_test, coef, intercept] got = inst.forward(inputs) self.assertEqual(len(got), 1) self.assertEqualArray(expected.ravel(), got[0].numpy().ravel(), decimal=4) if verbose: print("[forward_no_training] end")
def forward_training(self, model, debug=False, n_classes=3, add_print=False): from onnxruntime.capi._pybind_state import (OrtValue as C_OrtValue, OrtMemType, OrtDevice as C_OrtDevice) from onnxruntime.capi._pybind_state import (OrtValueVector) from onnxcustom.training.ortgradient import OrtGradientForwardBackward def to_proba(yt): mx = yt.max() + 1 new_yt = numpy.zeros((yt.shape[0], mx), dtype=numpy.float32) for i, y in enumerate(yt): new_yt[i, y] = 1 return new_yt if hasattr(model.__class__, 'predict_proba'): X, y = make_classification( # pylint: disable=W0632 100, n_features=10, n_classes=n_classes, n_informative=7) X = X.astype(numpy.float32) y = y.astype(numpy.int64) else: X, y = make_regression( # pylint: disable=W0632 100, n_features=10, bias=2) X = X.astype(numpy.float32) y = y.astype(numpy.float32) X_train, X_test, y_train, y_test = train_test_split(X, y) reg = model reg.fit(X_train, y_train) # needs if skl2onnx<1.10.4 # reg.coef_ = reg.coef_.reshape((1, -1)) # reg.intercept_ = reg.intercept_.reshape((-1, )) if hasattr(model.__class__, 'predict_proba'): onx = to_onnx(reg, X_train, target_opset=opset, black_op={'LinearClassifier'}, options={'zipmap': False}) onx = select_model_inputs_outputs( onx, outputs=[onx.graph.output[1].name]) else: onx = to_onnx(reg, X_train, target_opset=opset, black_op={'LinearRegressor'}) # remove batch possibility #onx.graph.input[0].type.tensor_type.shape.dim[0].dim_value = 0 #onx.graph.input[0].type.tensor_type.shape.dim[0].dim_param = "batch_size" #onx.graph.output[0].type.tensor_type.shape.dim[0].dim_value = 0 #onx.graph.output[0].type.tensor_type.shape.dim[0].dim_param = "batch_size" providers = device_to_providers('cpu') sess = InferenceSession(onx.SerializeToString(), providers=providers) sess.run(None, {'X': X_test[:1]}) # starts testing forback = OrtGradientForwardBackward(onx, debug=True, enable_logging=True) if debug: n = model.__class__.__name__ temp = get_temp_folder(__file__, f"temp_forward_training_{n}") with open(os.path.join(temp, f"model_{n}.onnx"), "wb") as f: f.write(onx.SerializeToString()) with open(os.path.join(temp, f"fw_train_{n}.onnx"), "wb") as f: f.write(forback.cls_type_._trained_onnx.SerializeToString()) with open(os.path.join(temp, f"fw_pre_{n}.onnx"), "wb") as f: gr = forback.cls_type_._optimized_pre_grad_model f.write(gr.SerializeToString()) if hasattr(model.__class__, 'predict_proba'): expected = reg.predict_proba(X_test) coef = reg.coef_.astype(numpy.float32).T intercept = reg.intercept_.astype(numpy.float32) # only one observation X_test1 = X_test[:1] y_test = to_proba(y_test).astype(numpy.float32) y_test1 = y_test[:1] expected1 = expected[:1] else: expected = reg.predict(X_test) coef = reg.coef_.astype(numpy.float32).reshape((-1, 1)) intercept = numpy.array([reg.intercept_], dtype=numpy.float32) # only one observation X_test1 = X_test[:1] y_test1 = y_test[0].reshape((1, -1)) expected1 = expected[:1] # OrtValueVector inst = forback.new_instance() device = C_OrtDevice(C_OrtDevice.cpu(), OrtMemType.DEFAULT, 0) if add_print: print("\n\n######################\nFORWARD") inputs = OrtValueVector() for a in [X_test1, coef, intercept]: inputs.push_back(C_OrtValue.ortvalue_from_numpy(a, device)) got = inst.forward(inputs, training=True) self.assertEqual(len(got), 1) self.assertEqualArray(expected1.ravel(), got[0].numpy().ravel(), decimal=4) if add_print: print("\n\n######################\nBACKWARD") outputs = OrtValueVector() outputs.push_back(C_OrtValue.ortvalue_from_numpy(y_test1, device)) got = inst.backward(outputs) self.assertEqual(len(got), 3) if add_print: print("\n######################\nEND\n") # OrtValueVectorN inputs = OrtValueVector() for a in [X_test, coef, intercept]: inputs.push_back(C_OrtValue.ortvalue_from_numpy(a, device)) got = inst.forward(inputs, training=True) self.assertEqual(len(got), 1) self.assertEqualArray(expected.ravel(), got[0].numpy().ravel(), decimal=4) outputs = OrtValueVector() outputs.push_back( C_OrtValue.ortvalue_from_numpy(y_test.reshape((1, -1)), device)) got = inst.backward(outputs) self.assertEqual(len(got), 3) # list of OrtValues inputs = [] for a in [X_test, coef, intercept]: inputs.append(C_OrtValue.ortvalue_from_numpy(a, device)) got_ort = inst.forward(inputs, training=True) got = [v.numpy() for v in got_ort] self.assertEqual(len(got), 1) self.assertEqualArray(expected.ravel(), got[0].ravel(), decimal=4) outputs = [ C_OrtValue.ortvalue_from_numpy(y_test.reshape((1, -1)), device) ] got = inst.backward(outputs) self.assertEqual(len(got), 3) # numpy inputs = [X_test, coef, intercept] got_ort = inst.forward(inputs, training=True) got = [v.numpy() for v in got_ort] self.assertEqual(len(got), 1) self.assertEqualArray(expected.ravel(), got[0].ravel(), decimal=4) outputs = [y_test.reshape((1, -1))] got = inst.backward(outputs) self.assertEqual(len(got), 3)
def test_forward_no_training_pickle(self): from onnxruntime.capi._pybind_state import (OrtValue as C_OrtValue, OrtMemType, OrtDevice as C_OrtDevice) from onnxruntime.capi._pybind_state import (OrtValueVector) from onnxcustom.training.ortgradient import OrtGradientForwardBackward X, y = make_regression( # pylint: disable=W0632 100, n_features=10, bias=2) X = X.astype(numpy.float32) y = y.astype(numpy.float32) X_train, X_test, y_train, _ = train_test_split(X, y) reg = LinearRegression() reg.fit(X_train, y_train) reg.coef_ = reg.coef_.reshape((1, -1)) onx = to_onnx(reg, X_train, target_opset=opset, black_op={'LinearRegressor'}) forback0 = OrtGradientForwardBackward(onx, debug=True) st = io.BytesIO() pickle.dump(forback0, st) st2 = io.BytesIO(st.getvalue()) forback = pickle.load(st2) self.assertTrue(hasattr(forback, 'cls_type_')) self.assertEqual(forback.cls_type_._onx_inp, ['X', 'coef', 'intercept']) self.assertEqual(forback.cls_type_._onx_out, ['X_grad', 'coef_grad', 'intercept_grad']) self.assertEqual(forback.cls_type_._weights_to_train, ['coef', 'intercept']) self.assertEqual(forback.cls_type_._grad_input_names, ['X', 'coef', 'intercept']) self.assertEqual(forback.cls_type_._input_names, ['X']) self.assertEqual(forback.cls_type_._bw_fetches_names, ['X_grad', 'coef_grad', 'intercept_grad']) self.assertEqual(forback.cls_type_._output_names, ['variable']) expected = reg.predict(X_test) coef = reg.coef_.astype(numpy.float32).reshape((-1, 1)) intercept = numpy.array([reg.intercept_], dtype=numpy.float32) providers = device_to_providers('cpu') sess0 = InferenceSession(onx.SerializeToString(), providers=providers) inames = [i.name for i in sess0.get_inputs()] self.assertEqual(inames, ['X']) got = sess0.run(None, {'X': X_test}) self.assertEqualArray(expected.ravel(), got[0].ravel(), decimal=4) sess_eval = forback.cls_type_._sess_eval # pylint: disable=W0212 inames = [i.name for i in sess_eval.get_inputs()] self.assertEqual(inames, ['X', 'coef', 'intercept']) got = sess_eval.run(None, { 'X': X_test, 'coef': coef, 'intercept': intercept }) self.assertEqualArray(expected.ravel(), got[0].ravel(), decimal=4) # OrtValue inst = forback.new_instance() inputs = [] device = C_OrtDevice(C_OrtDevice.cpu(), OrtMemType.DEFAULT, 0) for a in [X_test, coef, intercept]: inputs.append(C_OrtValue.ortvalue_from_numpy(a, device)) got_ort = inst.forward(inputs) got = [v.numpy() for v in got_ort] self.assertEqual(len(got), 1) self.assertEqualArray(expected.ravel(), got[0].ravel(), decimal=4) # OrtValueVector inputs = OrtValueVector() for a in [X_test, coef, intercept]: inputs.push_back(C_OrtValue.ortvalue_from_numpy(a, device)) got = inst.forward(inputs) self.assertEqual(len(got), 1) self.assertEqualArray(expected.ravel(), got[0].numpy().ravel(), decimal=4) # numpy inputs = [X_test, coef, intercept] got = inst.forward(inputs) self.assertEqual(len(got), 1) self.assertEqualArray(expected.ravel(), got[0].numpy().ravel(), decimal=4)
if get_device().upper() == 'GPU': ort_device = C_OrtDevice(C_OrtDevice.cuda(), C_OrtDevice.default_memory(), 0) else: ort_device = C_OrtDevice(C_OrtDevice.cpu(), C_OrtDevice.default_memory(), 0) # session sess = InferenceSession( onx.SerializeToString(), so, providers=['CPUExecutionProvider', 'CUDAExecutionProvider']) bind = SessionIOBinding(sess._sess) # moving the data on CPU or GPU ort_value = C_OrtValue.ortvalue_from_numpy(X, ort_device) ####################################### # A function which calls the API for any device. def run_with_iobinding(sess, bind, ort_device, ort_value, dtype): bind.bind_input('X', ort_device, dtype, ort_value.shape(), ort_value.data_ptr()) bind.bind_output('variable', ort_device) sess._sess.run_with_iobinding(bind, None) ortvalues = bind.get_outputs() return ortvalues[0].numpy() #######################################
def test_gradient_mlpregressor(self): from onnxcustom.training.optimizers_partial import ( OrtGradientForwardBackwardOptimizer) X = numpy.arange(30).reshape((-1, 3)).astype(numpy.float32) / 100 y = numpy.arange(X.shape[0]).astype(numpy.float32) y = y.reshape((-1, 1)) reg = MLPRegressor(hidden_layer_sizes=(5,), max_iter=2, activation='logistic', momentum=0, nesterovs_momentum=False, alpha=0) reg.fit(X, y.ravel()) onx = to_onnx(reg, X, target_opset=opset) onx = onnx_rename_weights(onx) inits = ["I0_coefficient", 'I1_intercepts', 'I2_coefficient1', 'I3_intercepts1'] xp = numpy.arange(2 * X.shape[1]).reshape((2, -1)).astype( numpy.float32) / 10 yp = numpy.array([0.5, -0.5], dtype=numpy.float32).reshape((-1, 1)) train_session = OrtGradientForwardBackwardOptimizer( onx, inits, learning_rate=1e-5, warm_start=True, max_iter=2, batch_size=10) train_session.fit(X, y) state = train_session.get_state() state_np = [st.numpy() for st in state] # gradient scikit-learn coef_grads = state_np[::2] intercept_grads = state_np[1::2] layer_units = [3, 5, 1] activations = [xp] + [None] * (len(layer_units) - 1) deltas = [None] * (len(activations) - 1) skl_pred = reg.predict(xp) batch_loss, coef_grads, intercept_grads = reg._backprop( # pylint: disable=W0212 xp, yp, activations, deltas, coef_grads, intercept_grads) deltas = activations[-1] - yp # gradient onnxcustom ort_xp = C_OrtValue.ortvalue_from_numpy(xp, train_session.device) ort_yp = C_OrtValue.ortvalue_from_numpy(yp, train_session.device) ort_state = [ort_xp] + state prediction = train_session.train_function_.forward( ort_state, training=True) ort_pred = prediction[0].numpy() self.assertEqualArray(skl_pred.ravel(), ort_pred.ravel(), decimal=2) loss, loss_gradient = train_session.learning_loss.loss_gradient( train_session.device, ort_yp, prediction[0]) gradient = train_session.train_function_.backward([loss_gradient]) # comparison self.assertEqualArray( batch_loss, loss.numpy() / xp.shape[0], decimal=3) self.assertEqualArray(deltas, loss_gradient.numpy(), decimal=3) # do not use iterator for gradient, it may crash ort_grad = [gradient[i].numpy() / xp.shape[0] for i in range(len(gradient))][1:] self.assertEqualArray( intercept_grads[1], ort_grad[3].ravel(), decimal=2) self.assertEqualArray(coef_grads[1], ort_grad[2], decimal=2) self.assertEqualArray( intercept_grads[0], ort_grad[1].ravel(), decimal=2) self.assertEqualArray(coef_grads[0], ort_grad[0], decimal=2)
# With onnxruntime. sess = InferenceSession(onx.SerializeToString(), providers=["CPUExecutionProvider"]) y_cpu = sess.run(None, {'X': x})[0] ####################################### # Execution on GPU # ++++++++++++++++ # # If available... if get_device().upper() == 'GPU': dev = get_ort_device('cuda:0') try: gx = C_OrtValue.ortvalue_from_numpy(x, dev) cuda = True except RuntimeError as e: print(e) cuda = False else: cuda = False if cuda: sessg = InferenceSession(onx.SerializeToString(), providers=["CUDAExecutionProvider"]) io_binding = sessg.io_binding()._iobinding io_binding.bind_input('X', dev, numpy.float32, gx.shape(), gx.data_ptr()) io_binding.bind_output('Y', dev) sessg._sess.run_with_iobinding(io_binding, None)