def test_onnx_remove_unused_outputs_new(self): dtype = numpy.float32 x = numpy.array([1, 2, 4, 5, 5, 4]).astype(numpy.float32).reshape( (3, 2)) cop = OnnxAdd('X', numpy.array([1], dtype=dtype), op_version=TARGET_OPSET) cop2 = OnnxAdd('X', numpy.array([1], dtype=dtype), op_version=TARGET_OPSET) cop3 = OnnxAdd('X', numpy.array([2], dtype=dtype), op_version=TARGET_OPSET, output_names=['inter']) cop4 = OnnxSub(OnnxMul(cop, cop3, op_version=TARGET_OPSET), cop2, output_names=['final'], op_version=TARGET_OPSET) model_def0 = cop4.to_onnx({'X': x}) model_def = select_model_inputs_outputs(model_def0, "inter", infer_shapes=True, remove_unused=False) stats = onnx_statistics(model_def, optim=True) c1 = model_def.SerializeToString() new_model = select_model_inputs_outputs(model_def0, "inter", infer_shapes=True) c2 = model_def.SerializeToString() self.assertEqual(c1, c2) stats2 = onnx_statistics(model_def, optim=True) stats3 = onnx_statistics(new_model, optim=False) self.assertEqual(stats['ninits'], 2) self.assertEqual(stats2['ninits'], 2) self.assertEqual(stats3['ninits'], 1) self.assertEqual(stats2['nnodes'], 1) self.assertEqual(stats3['nnodes'], 1) oinf1 = OnnxInference(model_def) y1 = oinf1.run({'X': x}) oinf2 = OnnxInference(new_model) y2 = oinf2.run({'X': x}) self.assertNotIn('final', y1) self.assertNotIn('final', y2) self.assertIn('inter', y1) self.assertIn('inter', y2) self.assertEqualArray(y1['inter'], y2['inter'])
def wtest_ort_gradient_optimizers_fw_sgd_binary(self, use_weight): from onnxcustom.training.optimizers_partial import ( OrtGradientForwardBackwardOptimizer) from onnxcustom.training.sgd_learning_rate import (LearningRateSGD) from onnxcustom.training.sgd_learning_loss import NegLogLearningLoss X = numpy.arange(60).astype(numpy.float32).reshape((-1, 3)) y = numpy.arange(X.shape[0]).astype(numpy.float32).reshape( (-1, 1)) > 10 X = X.astype(numpy.float32) y = y.astype(numpy.int64) y[0, 0] = 0 y[-1, 0] = 1 w = (numpy.random.rand(y.shape[0]) + 1).astype(numpy.float32) X_train, _, y_train, __, w_train, ___ = train_test_split(X, y, w) reg = SGDClassifier(loss='log') if use_weight: reg.fit(X_train, y_train.ravel(), sample_weight=w_train.astype(numpy.float64)) else: reg.fit(X_train, y_train.ravel()) onx = to_onnx(reg, X_train, target_opset=opset, black_op={'LinearRegressor'}, options={ 'zipmap': False, 'raw_scores': True }) onx = select_model_inputs_outputs(onx, outputs=['score']) self.assertIn("output: name='score'", onnx_simple_text_plot(onx)) inits = ['coef', 'intercept'] train_session = OrtGradientForwardBackwardOptimizer( onx, inits, weight_name='weight' if use_weight else None, learning_rate=LearningRateSGD(1e10), learning_loss=NegLogLearningLoss(), warm_start=False, max_iter=100, batch_size=10, enable_logging=False) self.assertIsInstance(train_session.learning_loss, NegLogLearningLoss) self.assertEqual(train_session.learning_loss.eps, 1e-5) y_train = y_train.reshape((-1, 1)) if use_weight: train_session.fit(X_train, y_train, w_train.reshape((-1, 1))) else: train_session.fit(X_train, y_train) losses = train_session.train_losses_ self.assertGreater(len(losses), 1) if any(map(numpy.isnan, losses)): raise AssertionError(losses)
def wtest_ort_gradient_optimizers_grid_cls(self, use_weight=False): from onnxcustom.training.optimizers_partial import ( OrtGradientForwardBackwardOptimizer) from onnxcustom.training.sgd_learning_rate import (LearningRateSGD) from onnxcustom.training.sgd_learning_loss import NegLogLearningLoss values = [ 1e-7, 1e-6, 5e-6, 1e-5, 5e-5, 1e-4, 5e-4, 1e-3, 5e-3, 1e-2, 1e-1, 1, 10, 100, 1000 ] X = numpy.random.randn(30, 3).astype(numpy.float32) y = (X.sum(axis=1) >= 0).astype(numpy.int64).reshape((-1, 1)) X += numpy.random.randn(30, 3).astype(numpy.float32) / 10 X_train, _, y_train, __ = train_test_split(X, y) scorer = make_scorer(lambda y_true, y_pred: (-log_loss(y_true, y_pred))) # pylint: disable=E1130 reg = GridSearchCV(SGDClassifier(max_iter=20), param_grid={'eta0': values}, scoring=scorer, cv=3) reg.fit(X_train, y_train.ravel()) self.assertIsInstance(reg.best_params_, dict) self.assertIn('eta0', reg.best_params_) onx = to_onnx(reg, X_train, target_opset=opset, black_op={'LinearClassifier'}, options={'zipmap': False}) onx = select_model_inputs_outputs(onx, outputs=['score']) onx = onnx_rename_weights(onx) inits = ['I0_coef', 'I1_intercept'] cvalues = [LearningRateSGD(v) for v in values] grid = GridSearchCV(OrtGradientForwardBackwardOptimizer( onx, inits, weight_name='weight' if use_weight else None, learning_rate=LearningRateSGD(1e-4), learning_loss=NegLogLearningLoss(), warm_start=False, max_iter=20, batch_size=10, enable_logging=False, exc=False), param_grid={'learning_rate': cvalues}, cv=3) if use_weight: grid.fit(X_train, y_train) else: grid.fit(X_train, y_train) self.assertIsInstance(grid.best_params_, dict) self.assertEqual(len(grid.best_params_), 1) self.assertIsInstance(grid.best_params_['learning_rate'], LearningRateSGD)
def wtest_ort_gradient_optimizers_fw_nesterov_binary_mlp( self, use_weight=True): from onnxcustom.training.optimizers_partial import ( OrtGradientForwardBackwardOptimizer) from onnxcustom.training.sgd_learning_rate import ( LearningRateSGDNesterov) from onnxcustom.training.sgd_learning_loss import NegLogLearningLoss X, y = make_classification( # pylint: disable=W0632 100, n_features=10, random_state=0) X = X.astype(numpy.float32) y = y.astype(numpy.int64) w = (numpy.random.rand(y.shape[0]) + 1).astype(numpy.float32) X_train, _, y_train, __, w_train, ___ = train_test_split(X, y, w) reg = MLPClassifier(solver='sgd') reg.fit(X_train, y_train) onx = to_onnx(reg, X_train, target_opset=opset, black_op={'LinearRegressor'}, options={'zipmap': False}) onx = select_model_inputs_outputs(onx, outputs=['out_activations_result']) self.assertIn("output: name='out_activations_result'", onnx_simple_text_plot(onx)) set_model_props(onx, {'info': 'unit test'}) onx = onnx_rename_weights(onx) inits = [ 'I0_coefficient', 'I1_intercepts', 'I2_coefficient1', 'I3_intercepts1' ] train_session = OrtGradientForwardBackwardOptimizer( onx, inits, weight_name='weight' if use_weight else None, learning_rate=LearningRateSGDNesterov(1e-4, nesterov=False, momentum=0.9), learning_loss=NegLogLearningLoss(), warm_start=False, max_iter=100, batch_size=10) self.assertIsInstance(train_session.learning_loss, NegLogLearningLoss) self.assertEqual(train_session.learning_loss.eps, 1e-5) if use_weight: train_session.fit(X_train, y_train, w_train) else: train_session.fit(X_train, y_train) temp = get_temp_folder( __file__, "temp_ort_gradient_optimizers_fw_nesterov_binary_mlp%d" % use_weight) train_session.save_onnx_graph(temp)
def forward_training(self, model, debug=False, n_classes=3, add_print=False): from onnxruntime.capi._pybind_state import (OrtValue as C_OrtValue, OrtMemType, OrtDevice as C_OrtDevice) from onnxruntime.capi._pybind_state import (OrtValueVector) from onnxcustom.training.ortgradient import OrtGradientForwardBackward def to_proba(yt): mx = yt.max() + 1 new_yt = numpy.zeros((yt.shape[0], mx), dtype=numpy.float32) for i, y in enumerate(yt): new_yt[i, y] = 1 return new_yt if hasattr(model.__class__, 'predict_proba'): X, y = make_classification( # pylint: disable=W0632 100, n_features=10, n_classes=n_classes, n_informative=7) X = X.astype(numpy.float32) y = y.astype(numpy.int64) else: X, y = make_regression( # pylint: disable=W0632 100, n_features=10, bias=2) X = X.astype(numpy.float32) y = y.astype(numpy.float32) X_train, X_test, y_train, y_test = train_test_split(X, y) reg = model reg.fit(X_train, y_train) # needs if skl2onnx<1.10.4 # reg.coef_ = reg.coef_.reshape((1, -1)) # reg.intercept_ = reg.intercept_.reshape((-1, )) if hasattr(model.__class__, 'predict_proba'): onx = to_onnx(reg, X_train, target_opset=opset, black_op={'LinearClassifier'}, options={'zipmap': False}) onx = select_model_inputs_outputs( onx, outputs=[onx.graph.output[1].name]) else: onx = to_onnx(reg, X_train, target_opset=opset, black_op={'LinearRegressor'}) # remove batch possibility #onx.graph.input[0].type.tensor_type.shape.dim[0].dim_value = 0 #onx.graph.input[0].type.tensor_type.shape.dim[0].dim_param = "batch_size" #onx.graph.output[0].type.tensor_type.shape.dim[0].dim_value = 0 #onx.graph.output[0].type.tensor_type.shape.dim[0].dim_param = "batch_size" providers = device_to_providers('cpu') sess = InferenceSession(onx.SerializeToString(), providers=providers) sess.run(None, {'X': X_test[:1]}) # starts testing forback = OrtGradientForwardBackward(onx, debug=True, enable_logging=True) if debug: n = model.__class__.__name__ temp = get_temp_folder(__file__, f"temp_forward_training_{n}") with open(os.path.join(temp, f"model_{n}.onnx"), "wb") as f: f.write(onx.SerializeToString()) with open(os.path.join(temp, f"fw_train_{n}.onnx"), "wb") as f: f.write(forback.cls_type_._trained_onnx.SerializeToString()) with open(os.path.join(temp, f"fw_pre_{n}.onnx"), "wb") as f: gr = forback.cls_type_._optimized_pre_grad_model f.write(gr.SerializeToString()) if hasattr(model.__class__, 'predict_proba'): expected = reg.predict_proba(X_test) coef = reg.coef_.astype(numpy.float32).T intercept = reg.intercept_.astype(numpy.float32) # only one observation X_test1 = X_test[:1] y_test = to_proba(y_test).astype(numpy.float32) y_test1 = y_test[:1] expected1 = expected[:1] else: expected = reg.predict(X_test) coef = reg.coef_.astype(numpy.float32).reshape((-1, 1)) intercept = numpy.array([reg.intercept_], dtype=numpy.float32) # only one observation X_test1 = X_test[:1] y_test1 = y_test[0].reshape((1, -1)) expected1 = expected[:1] # OrtValueVector inst = forback.new_instance() device = C_OrtDevice(C_OrtDevice.cpu(), OrtMemType.DEFAULT, 0) if add_print: print("\n\n######################\nFORWARD") inputs = OrtValueVector() for a in [X_test1, coef, intercept]: inputs.push_back(C_OrtValue.ortvalue_from_numpy(a, device)) got = inst.forward(inputs, training=True) self.assertEqual(len(got), 1) self.assertEqualArray(expected1.ravel(), got[0].numpy().ravel(), decimal=4) if add_print: print("\n\n######################\nBACKWARD") outputs = OrtValueVector() outputs.push_back(C_OrtValue.ortvalue_from_numpy(y_test1, device)) got = inst.backward(outputs) self.assertEqual(len(got), 3) if add_print: print("\n######################\nEND\n") # OrtValueVectorN inputs = OrtValueVector() for a in [X_test, coef, intercept]: inputs.push_back(C_OrtValue.ortvalue_from_numpy(a, device)) got = inst.forward(inputs, training=True) self.assertEqual(len(got), 1) self.assertEqualArray(expected.ravel(), got[0].numpy().ravel(), decimal=4) outputs = OrtValueVector() outputs.push_back( C_OrtValue.ortvalue_from_numpy(y_test.reshape((1, -1)), device)) got = inst.backward(outputs) self.assertEqual(len(got), 3) # list of OrtValues inputs = [] for a in [X_test, coef, intercept]: inputs.append(C_OrtValue.ortvalue_from_numpy(a, device)) got_ort = inst.forward(inputs, training=True) got = [v.numpy() for v in got_ort] self.assertEqual(len(got), 1) self.assertEqualArray(expected.ravel(), got[0].ravel(), decimal=4) outputs = [ C_OrtValue.ortvalue_from_numpy(y_test.reshape((1, -1)), device) ] got = inst.backward(outputs) self.assertEqual(len(got), 3) # numpy inputs = [X_test, coef, intercept] got_ort = inst.forward(inputs, training=True) got = [v.numpy() for v in got_ort] self.assertEqual(len(got), 1) self.assertEqualArray(expected.ravel(), got[0].ravel(), decimal=4) outputs = [y_test.reshape((1, -1))] got = inst.backward(outputs) self.assertEqual(len(got), 3)
def test_gradient_mlpclassifier(self): from onnxcustom.training.optimizers_partial import ( OrtGradientForwardBackwardOptimizer) from onnxcustom.training.sgd_learning_loss import NegLogLearningLoss X = numpy.arange(30).reshape((-1, 3)).astype(numpy.float32) / 100 y = numpy.arange(X.shape[0]).astype(numpy.float32) y = (y.reshape((-1, 1)) >= 15).astype(numpy.int64) reg = MLPClassifier(hidden_layer_sizes=(5,), max_iter=2, activation='logistic', momentum=0, nesterovs_momentum=False, alpha=0) reg.fit(X, y.ravel()) onx = to_onnx(reg, X, target_opset=opset, options={'zipmap': False}) onx = select_model_inputs_outputs(onx, outputs=['add_result1'], infer_shapes=True) text = onnx_simple_text_plot(onx) self.assertIn("output: name='add_result1'", text) onx = onnx_rename_weights(onx) inits = ["I0_coefficient", 'I1_intercepts', 'I2_coefficient1', 'I3_intercepts1'] xp = numpy.arange(2 * X.shape[1]).reshape((2, -1)).astype( numpy.float32) / 100 xp[0, 0] -= 4 xp[1, :] += 4 yp = numpy.array([0, 1], dtype=numpy.int64).reshape((-1, 1)) train_session = OrtGradientForwardBackwardOptimizer( onx, inits, learning_rate=1e-5, warm_start=True, max_iter=2, batch_size=10, learning_loss=NegLogLearningLoss()) train_session.fit(X, y) state = train_session.get_state() state_np = [st.numpy() for st in state] # gradient scikit-learn coef_grads = state_np[::2] intercept_grads = state_np[1::2] layer_units = [3, 5, 1] activations = [xp] + [None] * (len(layer_units) - 1) deltas = [None] * (len(activations) - 1) skl_pred = reg.predict_proba(xp) batch_loss, coef_grads, intercept_grads = reg._backprop( # pylint: disable=W0212 xp, yp, activations, deltas, coef_grads, intercept_grads) deltas = activations[-1] - yp # gradient onnxcustom ort_xp = C_OrtValue.ortvalue_from_numpy(xp, train_session.device) ort_yp = C_OrtValue.ortvalue_from_numpy(yp, train_session.device) ort_state = [ort_xp] + state prediction = train_session.train_function_.forward( ort_state, training=True) ort_pred = prediction[0].numpy() self.assertEqualArray(skl_pred[:, 1:2], expit(ort_pred), decimal=2) loss, loss_gradient = train_session.learning_loss.loss_gradient( train_session.device, ort_yp, prediction[0]) gradient = train_session.train_function_.backward([loss_gradient]) # comparison self.assertEqualArray( batch_loss * 2, loss.numpy(), decimal=3) self.assertEqualArray(deltas, loss_gradient.numpy(), decimal=3) # do not use iterator for gradient, it may crash ort_grad = [gradient[i].numpy() / xp.shape[0] for i in range(len(gradient))][1:] self.assertEqualArray( intercept_grads[1], ort_grad[3].ravel(), decimal=2) self.assertEqualArray(coef_grads[1], ort_grad[2], decimal=2) self.assertEqualArray( intercept_grads[0], ort_grad[1].ravel(), decimal=2) self.assertEqualArray(coef_grads[0], ort_grad[0], decimal=2)
def wtest_ort_gradient_optimizers_fw_nesterov_binary(self, use_weight): from onnxcustom.training.optimizers_partial import ( OrtGradientForwardBackwardOptimizer) from onnxcustom.training.sgd_learning_rate import ( LearningRateSGDNesterov) from onnxcustom.training.sgd_learning_loss import NegLogLearningLoss X, y = make_classification( # pylint: disable=W0632 100, n_features=10, random_state=0) X = X.astype(numpy.float32) y = y.astype(numpy.int64) w = (numpy.random.rand(y.shape[0]) + 1).astype(numpy.float32) X_train, _, y_train, __, w_train, ___ = train_test_split(X, y, w) reg = SGDClassifier(loss='log') if use_weight: reg.fit(X_train, y_train, sample_weight=w_train.astype(numpy.float64)) else: reg.fit(X_train, y_train) onx = to_onnx(reg, X_train, target_opset=opset, black_op={'LinearRegressor'}, options={ 'zipmap': False, 'raw_scores': True }) onx2 = onx onx = select_model_inputs_outputs(onx, outputs=['score']) self.assertIn("output: name='score'", onnx_simple_text_plot(onx)) set_model_props(onx, {'info': 'unit test'}) inits = ['coef', 'intercept'] train_session = OrtGradientForwardBackwardOptimizer( onx, inits, weight_name='weight' if use_weight else None, learning_rate=LearningRateSGDNesterov(1e-4, nesterov=False, momentum=0.9), learning_loss=NegLogLearningLoss(), warm_start=False, max_iter=100, batch_size=10) self.assertIsInstance(train_session.learning_loss, NegLogLearningLoss) self.assertEqual(train_session.learning_loss.eps, 1e-5) y_train = y_train.reshape((-1, 1)) if use_weight: train_session.fit(X_train, y_train, w_train.reshape((-1, 1))) else: train_session.fit(X_train, y_train) temp = get_temp_folder( __file__, "temp_ort_gradient_optimizers_fw_nesterov_binary") train_session.save_onnx_graph(temp) # get_trained_weight trained_onnx = train_session.get_trained_onnx(model=onx2) sess = InferenceSession(onx2.SerializeToString(), providers=['CPUExecutionProvider']) got1 = sess.run(None, {'X': X_train}) sess = InferenceSession(trained_onnx.SerializeToString(), providers=['CPUExecutionProvider']) got2 = sess.run(None, {'X': X_train}) self.assertEqual(len(got1), len(got2)) self.assertEqual(got1[0].shape, got2[0].shape) # state state = train_session.get_state() self.assertIsInstance(state, list) train_session.set_state(state) for k in range(len(state)): # pylint: disable=C0200 state[k] = state[k].numpy() train_session.set_state(state)
def unreduced_onnx_loss(onx, output_name='score'): """ Every loss function reduces the results to compute a loss. The score function needs to get the loss for every observation, not the whole loss. This function looks for a reducing node and removes it before exposing the output as the only output. :param onx: onx graph :param output_name: new output name :return: new onx graph """ from mlprodict.onnx_tools.onnx_manipulations import ( # pylint: disable=C0415 select_model_inputs_outputs) graph = onx.graph found = [] for node in graph.node: if node.op_type.startswith('Reduce'): found.append(node) if len(found) != 1: raise RuntimeError( # pragma: no cover "Unable to find one unique Reducing node but found %d - %r." "" % (len(found), [(n.op_type, n.name) for n in found])) node = found[0] input_name = node.input[0] new_onx = select_model_inputs_outputs(onx, outputs=[input_name], infer_shapes=True) inits = new_onx.graph.initializer inputs = new_onx.graph.input # pylint: disable=E1101 existing_names = _existing_names(new_onx) new_name = _unique_name(existing_names, output_name) new_nodes = list(new_onx.graph.node) # pylint: disable=E1101 elem = graph.output[0].type.tensor_type.elem_type new_output = [make_tensor_value_info(new_name, elem, [None, 1])] if node.op_type == "ReduceSumSquare": new_node = make_node('Mul', [input_name, input_name], [new_name]) new_nodes.append(new_node) elif node.op_type == 'ReduceSum': new_node = make_node('Identity', [input_name], [new_name]) new_nodes.append(new_node) else: raise RuntimeError( # pragma: no cover f"Unable to unreduce node {node.op_type!r}.") graph = make_graph(new_nodes, graph.name, inputs, new_output, inits) new_model = make_model(graph) new_model.ir_version = onx.ir_version new_model.producer_name = onx.producer_name new_model.producer_version = onx.producer_version new_model.domain = onx.domain new_model.model_version = onx.model_version new_model.doc_string = onx.doc_string if hasattr(onx, 'value_info'): graph.value_info.extend(onx.value_info) # pylint: disable=E1101 del new_model.opset_import[:] # pylint: disable=E1101 for oimp in onx.opset_import: op_set = new_model.opset_import.add() # pylint: disable=E1101 op_set.domain = oimp.domain op_set.version = oimp.version return new_model
onx = to_onnx(nn, X_train[:1].astype(numpy.float32), target_opset=15, options={'zipmap': False}) try: print(onnx_simple_text_plot(onx)) except RuntimeError as e: print("You should upgrade mlprodict.") print(e) ########################################## # Raw scores are the input of operator *Sigmoid*. onx = select_model_inputs_outputs(onx, outputs=["add_result2"], infer_shapes=True) print(onnx_simple_text_plot(onx)) ######################################### # And the names are renamed to have them follow the # alphabetical order (see :class:`OrtGradientForwardBackward # <onnxcustom.training.ortgradient.OrtGradientForwardBackward>`). onx = onnx_rename_weights(onx) print(onnx_simple_text_plot(onx)) ################################################ # We select the log loss (see :class:`NegLogLearningLoss # <onnxcustom.training.sgd_learning_loss.NegLogLearningLoss>`, # a simple regularization defined with :class:`ElasticLearningPenalty