def test_ort_gradient_optimizers_use_numpy_nan_w(self): from onnxcustom.utils.orttraining_helper import add_loss_output from onnxcustom.training.optimizers import OrtGradientOptimizer X, y = make_regression( # pylint: disable=W0632 100, n_features=10, bias=2, random_state=0) X = X.astype(numpy.float32) y = y.astype(numpy.float32) w = (numpy.random.rand(y.shape[0]) + 1).astype(X.dtype) X_train, _, y_train, __, w_train, ___ = train_test_split(X, y, w) reg = LinearRegression() reg.fit(X_train, y_train, w_train) reg.coef_ = reg.coef_.reshape((1, -1)) onx = to_onnx(reg, X_train, target_opset=opset, black_op={'LinearRegressor'}) set_model_props(onx, {'info': 'unit test'}) onx_loss = add_loss_output(onx, weight_name='weight') inits = ['intercept', 'coef'] train_session = OrtGradientOptimizer(onx_loss, inits, learning_rate=1e3) self.assertRaise( lambda: train_session.fit( X_train, y_train, w_train, use_numpy=True), ConvergenceError)
def test_ort_gradient_optimizers_use_numpy_nesterov(self): from onnxcustom.utils.orttraining_helper import add_loss_output from onnxcustom.training.optimizers import OrtGradientOptimizer X, y = make_regression( # pylint: disable=W0632 100, n_features=10, bias=2, random_state=0) X = X.astype(numpy.float32) y = y.astype(numpy.float32) X_train, _, y_train, __ = train_test_split(X, y) reg = LinearRegression() reg.fit(X_train, y_train) reg.coef_ = reg.coef_.reshape((1, -1)) onx = to_onnx(reg, X_train, target_opset=opset, black_op={'LinearRegressor'}) set_model_props(onx, {'info': 'unit test'}) onx_loss = add_loss_output(onx) inits = ['intercept', 'coef'] self.assertRaise( lambda: OrtGradientOptimizer( onx_loss, inits, learning_rate="Nesterov"), NotImplementedError)
def wtest_ort_gradient_optimizers_binary(self, use_weight=False): from onnxcustom.utils.orttraining_helper import add_loss_output from onnxcustom.training.optimizers import OrtGradientOptimizer X = numpy.arange(60).astype(numpy.float32).reshape((-1, 3)) y = numpy.arange(X.shape[0]).astype(numpy.float32).reshape( (-1, 1)) > 10 X = X.astype(numpy.float32) y = y.astype(numpy.int64) y[0, 0] = 0 y[-1, 0] = 1 w = (numpy.random.rand(X.shape[0]) + 1).astype(numpy.float32) X_train, _, y_train, __, w_train, ___ = train_test_split(X, y, w) reg = SGDClassifier(loss='log') reg.fit(X_train, y_train.ravel()) onx = to_onnx(reg, X_train, target_opset=opset, black_op={'LinearClassifier'}, options={'zipmap': False}) onx_loss = add_loss_output( onx, 'log', output_index=1, weight_name='weight' if use_weight else None) inits = ['intercept', 'coef'] inputs = onx_loss.graph.input self.assertEqual(len(inputs), 3 if use_weight else 2) dt = inputs[1].type.tensor_type.elem_type self.assertEqual(TensorProto.INT64, dt) # pylint: disable=E1101 train_session = OrtGradientOptimizer(onx_loss, inits, learning_rate=1e9) self.assertRaise(lambda: train_session.get_state(), AttributeError) if use_weight: train_session.fit(X_train, y_train.reshape((-1, 1)), w_train.reshape((-1, 1)), use_numpy=False) else: train_session.fit(X_train, y_train.reshape((-1, 1)), use_numpy=False) state_tensors = train_session.get_state() self.assertEqual(len(state_tensors), 2) r = repr(train_session) self.assertIn("OrtGradientOptimizer(model_onnx=", r) self.assertIn("learning_rate='invscaling'", r) losses = train_session.train_losses_ self.assertGreater(len(losses), 1) if any(map(numpy.isnan, losses)): raise AssertionError(losses)
def test_ort_gradient_optimizers_use_numpy_w_l1(self): from onnxcustom.utils.orttraining_helper import add_loss_output from onnxcustom.training.optimizers import OrtGradientOptimizer X, y = make_regression( # pylint: disable=W0632 100, n_features=10, bias=2, random_state=0) X = X.astype(numpy.float32) y = y.astype(numpy.float32) w = (numpy.random.rand(y.shape[0]) + 1).astype(X.dtype) X_train, _, y_train, __, w_train, ___ = train_test_split(X, y, w) reg = LinearRegression() reg.fit(X_train, y_train, sample_weight=w_train) reg.coef_ = reg.coef_.reshape((1, -1)) onx = to_onnx(reg, X_train, target_opset=opset, black_op={'LinearRegressor'}) set_model_props(onx, {'info': 'unit test'}) onx_loss = add_loss_output(onx, weight_name='weight', score_name='l1') inits = ['intercept', 'coef'] train_session = OrtGradientOptimizer(onx_loss, inits, learning_rate=1e-3) self.assertRaise(lambda: train_session.get_state(), AttributeError) train_session.fit(X_train, y_train, w_train, use_numpy=True) state_tensors = train_session.get_state() self.assertEqual(len(state_tensors), 2) r = repr(train_session) self.assertIn("OrtGradientOptimizer(model_onnx=", r) self.assertIn("learning_rate='invscaling'", r) losses = train_session.train_losses_ self.assertGreater(len(losses), 1) self.assertFalse(any(map(numpy.isnan, losses)))
def test_ort_gradient_optimizers_optimal_use_ort(self): from onnxcustom.utils.orttraining_helper import add_loss_output from onnxcustom.training.optimizers import OrtGradientOptimizer X, y = make_regression( # pylint: disable=W0632 100, n_features=10, bias=2, random_state=0) X = X.astype(numpy.float32) y = y.astype(numpy.float32) X_train, _, y_train, __ = train_test_split(X, y) reg = LinearRegression() reg.fit(X_train, y_train) reg.coef_ = reg.coef_.reshape((1, -1)) onx = to_onnx(reg, X_train, target_opset=opset, black_op={'LinearRegressor'}) onx_loss = add_loss_output(onx) inits = ['intercept', 'coef'] train_session = OrtGradientOptimizer( onx_loss, inits, max_iter=10, learning_rate=LearningRateSGD(learning_rate='optimal')) self.assertRaise(lambda: train_session.get_state(), AttributeError) train_session.fit(X_train, y_train, use_numpy=False) state_tensors = train_session.get_state() self.assertEqual(len(state_tensors), 2) r = repr(train_session) self.assertIn("OrtGradientOptimizer(model_onnx=", r) self.assertIn("learning_rate='optimal'", r) losses = train_session.train_losses_ self.assertGreater(len(losses), 1) self.assertFalse(any(map(numpy.isnan, losses)))
def test_ort_gradient_optimizers_use_numpy_pickle(self): from onnxcustom.utils.orttraining_helper import add_loss_output from onnxcustom.training.optimizers import OrtGradientOptimizer X, y = make_regression( # pylint: disable=W0632 100, n_features=10, bias=2, random_state=0) X = X.astype(numpy.float32) y = y.astype(numpy.float32) X_train, _, y_train, __ = train_test_split(X, y) reg = LinearRegression() reg.fit(X_train, y_train) reg.coef_ = reg.coef_.reshape((1, -1)) onx = to_onnx(reg, X_train, target_opset=opset, black_op={'LinearRegressor'}) set_model_props(onx, {'info': 'unit test'}) onx_loss = add_loss_output(onx) inits = ['intercept', 'coef'] train_session0 = OrtGradientOptimizer(onx_loss, inits) st = io.BytesIO() pickle.dump(train_session0, st) st2 = io.BytesIO(st.getvalue()) train_session1 = pickle.load(st2) train_session1.fit(X_train, y_train, use_numpy=True) st = io.BytesIO() pickle.dump(train_session1, st) st2 = io.BytesIO(st.getvalue()) train_session = pickle.load(st2) state_tensors = train_session.get_state() self.assertEqual(len(state_tensors), 2) train_session.fit(X_train, y_train, use_numpy=True) state_tensors = train_session.get_state() self.assertEqual(len(state_tensors), 2) r = repr(train_session) self.assertIn("OrtGradientOptimizer(model_onnx=", r) self.assertIn("learning_rate='invscaling'", r) losses = train_session.train_losses_ self.assertGreater(len(losses), 1) self.assertFalse(any(map(numpy.isnan, losses)))
def wtest_ort_gradient_optimizers_reg(self, use_weight=False): from onnxcustom.utils.orttraining_helper import add_loss_output from onnxcustom.training.optimizers import OrtGradientOptimizer X = numpy.arange(60).astype(numpy.float32).reshape((-1, 3)) y = numpy.arange(X.shape[0]).astype(numpy.float32).reshape((-1, 1)) y[0, 0] += 1 y[-1, 0] += 1 w = (numpy.random.rand(X.shape[0]) + 1).astype(numpy.float32) X_train, _, y_train, __, w_train, ___ = train_test_split(X, y, w) reg = SGDRegressor() reg.fit(X_train, y_train.ravel()) onx = to_onnx(reg, X_train, target_opset=opset, black_op={'LinearRegressor'}) onx_loss = add_loss_output( onx, 'squared_error', weight_name='weight' if use_weight else None) inits = ['intercept', 'coef'] inputs = onx_loss.graph.input self.assertEqual(len(inputs), 3 if use_weight else 2) train_session = OrtGradientOptimizer(onx_loss, inits, learning_rate=1e9) self.assertRaise(lambda: train_session.get_state(), AttributeError) if use_weight: self.assertRaise( lambda: train_session.fit(X_train, y_train.reshape((-1, 1)), w_train.reshape((-1, 1)), use_numpy=False), ConvergenceError) else: self.assertRaise( lambda: train_session.fit( X_train, y_train.reshape((-1, 1)), use_numpy=False), ConvergenceError) state_tensors = train_session.get_state() self.assertEqual(len(state_tensors), 2) r = repr(train_session) self.assertIn("OrtGradientOptimizer(model_onnx=", r) self.assertIn("learning_rate='invscaling'", r) losses = train_session.train_losses_ self.assertGreater(len(losses), 1) if any(map(numpy.isnan, losses)): raise AssertionError(losses)
def test_ort_gradient_optimizers_use_numpy_saved(self): from onnxcustom.utils.orttraining_helper import add_loss_output from onnxcustom.training.optimizers import OrtGradientOptimizer X, y = make_regression( # pylint: disable=W0632 100, n_features=10, bias=2, random_state=0) X = X.astype(numpy.float32) y = y.astype(numpy.float32) X_train, _, y_train, __ = train_test_split(X, y) reg = LinearRegression() reg.fit(X_train, y_train) reg.coef_ = reg.coef_.reshape((1, -1)) onx = to_onnx(reg, X_train, target_opset=opset, black_op={'LinearRegressor'}) set_model_props(onx, {'info': 'unit test'}) onx_loss = add_loss_output(onx) inits = ['intercept', 'coef'] temp = get_temp_folder(__file__, "temp_OrtGradientOptimizer") filename = os.path.join(temp, "saved.onnx") train_session = OrtGradientOptimizer(onx_loss, inits, learning_rate=1e-3, saved_gradient=filename) self.assertRaise(lambda: train_session.get_state(), AttributeError) train_session.fit(X_train, y_train, use_numpy=True) state_tensors = train_session.get_state() self.assertEqual(len(state_tensors), 2) r = repr(train_session) self.assertIn("OrtGradientOptimizer(model_onnx=", r) self.assertIn("learning_rate='invscaling'", r) losses = train_session.train_losses_ self.assertGreater(len(losses), 1) self.assertFalse(any(map(numpy.isnan, losses))) self.assertExists(filename)
# Stochastic Gradient Descent # +++++++++++++++++++++++++++ # # The training logic is hidden in class # :class:`OrtGradientOptimizer # <onnxcustom.training.optimizers.OrtGradientOptimizer>`. # It follows :epkg:`scikit-learn` API (see `SGDRegressor # <https://scikit-learn.org/stable/modules/ # generated/sklearn.linear_model.SGDRegressor.html>`_. # The gradient graph is not available at this stage. train_session = OrtGradientOptimizer(onx_train, list(weights), device=device, verbose=1, learning_rate=1e-2, warm_start=False, max_iter=200, batch_size=10, saved_gradient="saved_gradient.onnx") train_session.fit(X, y) ###################################### # And the trained coefficient are... state_tensors = train_session.get_state() pprint(["trained coefficients:", state_tensors]) print("last_losses:", train_session.train_losses_[-5:]) min_length = min(len(train_session.train_losses_), len(lr.loss_curve_))
def benchmark(N=1000, n_features=100, hidden_layer_sizes="50,10", max_iter=1000, learning_rate_init=1e-4, batch_size=100, run_skl=True, device='cpu', opset=14): """ Compares :epkg:`onnxruntime-training` to :epkg:`scikit-learn` for training. Training algorithm is SGD. :param N: number of observations to train on :param n_features: number of features :param hidden_layer_sizes: hidden layer sizes, comma separated values :param max_iter: number of iterations :param learning_rate_init: initial learning rate :param batch_size: batch size :param run_skl: train scikit-learn in the same condition (True) or just walk through one iterator with *scikit-learn* :param device: `'cpu'` or `'cuda'` :param opset: opset to choose for the conversion """ N = int(N) n_features = int(n_features) max_iter = int(max_iter) learning_rate_init = float(learning_rate_init) batch_size = int(batch_size) run_skl = run_skl in (1, True, '1', 'True') print("N=%d" % N) print("n_features=%d" % n_features) print(f"hidden_layer_sizes={hidden_layer_sizes!r}") print("max_iter=%d" % max_iter) print(f"learning_rate_init={learning_rate_init:f}") print("batch_size=%d" % batch_size) print(f"run_skl={run_skl!r}") print(f"opset={opset!r}") print(f"device={device!r}") print('------------------') if not isinstance(hidden_layer_sizes, tuple): hidden_layer_sizes = tuple(map(int, hidden_layer_sizes.split(","))) X, y = make_regression(N, n_features=n_features, bias=2) X = X.astype(numpy.float32) y = y.astype(numpy.float32) X_train, X_test, y_train, y_test = train_test_split(X, y) nn = MLPRegressor(hidden_layer_sizes=hidden_layer_sizes, max_iter=max_iter if run_skl else 1, solver='sgd', learning_rate_init=learning_rate_init, n_iter_no_change=max_iter, batch_size=batch_size) begin = time.perf_counter() with warnings.catch_warnings(): warnings.simplefilter('ignore') nn.fit(X_train, y_train) dur_skl = time.perf_counter() - begin print("time_skl=%r, mean_squared_error=%r" % ( dur_skl, mean_squared_error(y_train, nn.predict(X_train)))) # conversion to ONNX onx = to_onnx(nn, X_train[:1].astype(numpy.float32), target_opset=opset) # add loss onx_train = add_loss_output(onx) # list of weights weights = get_train_initializer(onx) print('weights:', list(sorted(weights))) # training print(f"device={device!r} get_device()={get_device()!r}") ####################################### # The training session. train_session = OrtGradientOptimizer( onx_train, list(weights), device=device, verbose=0, learning_rate=learning_rate_init, warm_start=False, max_iter=max_iter, batch_size=batch_size) begin = time.perf_counter() train_session.fit(X, y) dur_ort = time.perf_counter() - begin print("time_skl=%r, mean_squared_error=%r" % ( dur_skl, mean_squared_error(y_train, nn.predict(X_train)))) print("time_ort=%r, last_trained_error=%r" % ( dur_ort, train_session.train_losses_[-1]))
def wtest_ort_gradient_optimizers_binary(self, use_weight=False): from onnxcustom.utils.orttraining_helper import add_loss_output from onnxcustom.training.optimizers import OrtGradientOptimizer X, y = make_classification( # pylint: disable=W0632 100, n_features=10, random_state=0) X = X.astype(numpy.float32) y = y.astype(numpy.int64) w = (numpy.random.rand(X.shape[0]) + 1).astype(numpy.float32) X_train, _, y_train, __, w_train, ___ = train_test_split(X, y, w) reg = SGDClassifier(loss='log') reg.fit(X_train, y_train) onx = to_onnx(reg, X_train, target_opset=opset, black_op={'LinearClassifier'}, options={'zipmap': False}) onx2 = load_onnx(BytesIO(onx.SerializeToString())) set_model_props(onx, {'info': 'unit test'}) onx_loss = add_loss_output( onx, 'log', output_index=1, weight_name='weight' if use_weight else None) inits = ['intercept', 'coef'] inputs = onx_loss.graph.input self.assertEqual(len(inputs), 3 if use_weight else 2) dt = inputs[1].type.tensor_type.elem_type self.assertEqual(TensorProto.INT64, dt) # pylint: disable=E1101 train_session = OrtGradientOptimizer(onx_loss, inits, learning_rate=1e-3) self.assertRaise(lambda: train_session.get_state(), AttributeError) if use_weight: train_session.fit(X_train, y_train.reshape((-1, 1)), w_train.reshape((-1, 1)), use_numpy=False) else: train_session.fit(X_train, y_train.reshape((-1, 1)), use_numpy=False) state_tensors = train_session.get_state() self.assertEqual(len(state_tensors), 2) r = repr(train_session) self.assertIn("OrtGradientOptimizer(model_onnx=", r) self.assertIn("learning_rate='invscaling'", r) losses = train_session.train_losses_ self.assertGreater(len(losses), 1) self.assertFalse(any(map(numpy.isnan, losses))) # get_trained_weight trained_onnx = train_session.get_trained_onnx(model=onx2) sess = InferenceSession(onx2.SerializeToString(), providers=['CPUExecutionProvider']) got1 = sess.run(None, {'X': X_train}) sess = InferenceSession(trained_onnx.SerializeToString(), providers=['CPUExecutionProvider']) got2 = sess.run(None, {'X': X_train}) self.assertEqual(len(got1), len(got2)) self.assertEqual(got1[0].shape, got2[0].shape) # state state = train_session.get_state() self.assertIsInstance(state, dict) train_session.set_state(state)
with warnings.catch_warnings(): warnings.simplefilter('ignore') nn.fit(X_train, y_train) ######################################## # Conversion to ONNX and trainer initialization onx = to_onnx(nn, X_train[:1].astype(numpy.float32), target_opset=15) onx_train = add_loss_output(onx) weights = get_train_initializer(onx) pprint(list((k, v[0].shape) for k, v in weights.items())) train_session = OrtGradientOptimizer( onx_train, list(weights), device='cpu', learning_rate=1e-5, warm_start=False, max_iter=max_iter, batch_size=batch_size) benches = [benchmark(nn, train_session, name='NN-CPU')] ###################################### # Profiling # +++++++++ def clean_name(text): pos = text.find('onnxruntime') if pos >= 0: return text[pos:] pos = text.find('onnxcustom')