def test_gradient_mlpclassifier(self): from onnxcustom.training.optimizers_partial import ( OrtGradientForwardBackwardOptimizer) from onnxcustom.training.sgd_learning_loss import NegLogLearningLoss X = numpy.arange(30).reshape((-1, 3)).astype(numpy.float32) / 100 y = numpy.arange(X.shape[0]).astype(numpy.float32) y = (y.reshape((-1, 1)) >= 15).astype(numpy.int64) reg = MLPClassifier(hidden_layer_sizes=(5,), max_iter=2, activation='logistic', momentum=0, nesterovs_momentum=False, alpha=0) reg.fit(X, y.ravel()) onx = to_onnx(reg, X, target_opset=opset, options={'zipmap': False}) onx = select_model_inputs_outputs(onx, outputs=['add_result1'], infer_shapes=True) text = onnx_simple_text_plot(onx) self.assertIn("output: name='add_result1'", text) onx = onnx_rename_weights(onx) inits = ["I0_coefficient", 'I1_intercepts', 'I2_coefficient1', 'I3_intercepts1'] xp = numpy.arange(2 * X.shape[1]).reshape((2, -1)).astype( numpy.float32) / 100 xp[0, 0] -= 4 xp[1, :] += 4 yp = numpy.array([0, 1], dtype=numpy.int64).reshape((-1, 1)) train_session = OrtGradientForwardBackwardOptimizer( onx, inits, learning_rate=1e-5, warm_start=True, max_iter=2, batch_size=10, learning_loss=NegLogLearningLoss()) train_session.fit(X, y) state = train_session.get_state() state_np = [st.numpy() for st in state] # gradient scikit-learn coef_grads = state_np[::2] intercept_grads = state_np[1::2] layer_units = [3, 5, 1] activations = [xp] + [None] * (len(layer_units) - 1) deltas = [None] * (len(activations) - 1) skl_pred = reg.predict_proba(xp) batch_loss, coef_grads, intercept_grads = reg._backprop( # pylint: disable=W0212 xp, yp, activations, deltas, coef_grads, intercept_grads) deltas = activations[-1] - yp # gradient onnxcustom ort_xp = C_OrtValue.ortvalue_from_numpy(xp, train_session.device) ort_yp = C_OrtValue.ortvalue_from_numpy(yp, train_session.device) ort_state = [ort_xp] + state prediction = train_session.train_function_.forward( ort_state, training=True) ort_pred = prediction[0].numpy() self.assertEqualArray(skl_pred[:, 1:2], expit(ort_pred), decimal=2) loss, loss_gradient = train_session.learning_loss.loss_gradient( train_session.device, ort_yp, prediction[0]) gradient = train_session.train_function_.backward([loss_gradient]) # comparison self.assertEqualArray( batch_loss * 2, loss.numpy(), decimal=3) self.assertEqualArray(deltas, loss_gradient.numpy(), decimal=3) # do not use iterator for gradient, it may crash ort_grad = [gradient[i].numpy() / xp.shape[0] for i in range(len(gradient))][1:] self.assertEqualArray( intercept_grads[1], ort_grad[3].ravel(), decimal=2) self.assertEqualArray(coef_grads[1], ort_grad[2], decimal=2) self.assertEqualArray( intercept_grads[0], ort_grad[1].ravel(), decimal=2) self.assertEqualArray(coef_grads[0], ort_grad[0], decimal=2)