Example #1
0
 def test_ort_gradient_optimizers_use_numpy_nesterov(self):
     from onnxcustom.utils.orttraining_helper import add_loss_output
     from onnxcustom.training.optimizers import OrtGradientOptimizer
     X, y = make_regression(  # pylint: disable=W0632
         100,
         n_features=10,
         bias=2,
         random_state=0)
     X = X.astype(numpy.float32)
     y = y.astype(numpy.float32)
     X_train, _, y_train, __ = train_test_split(X, y)
     reg = LinearRegression()
     reg.fit(X_train, y_train)
     reg.coef_ = reg.coef_.reshape((1, -1))
     onx = to_onnx(reg,
                   X_train,
                   target_opset=opset,
                   black_op={'LinearRegressor'})
     set_model_props(onx, {'info': 'unit test'})
     onx_loss = add_loss_output(onx)
     inits = ['intercept', 'coef']
     self.assertRaise(
         lambda: OrtGradientOptimizer(
             onx_loss, inits, learning_rate="Nesterov"),
         NotImplementedError)
Example #2
0
 def test_ort_gradient_optimizers_use_numpy_w_l1(self):
     from onnxcustom.utils.orttraining_helper import add_loss_output
     from onnxcustom.training.optimizers import OrtGradientOptimizer
     X, y = make_regression(  # pylint: disable=W0632
         100,
         n_features=10,
         bias=2,
         random_state=0)
     X = X.astype(numpy.float32)
     y = y.astype(numpy.float32)
     w = (numpy.random.rand(y.shape[0]) + 1).astype(X.dtype)
     X_train, _, y_train, __, w_train, ___ = train_test_split(X, y, w)
     reg = LinearRegression()
     reg.fit(X_train, y_train, sample_weight=w_train)
     reg.coef_ = reg.coef_.reshape((1, -1))
     onx = to_onnx(reg,
                   X_train,
                   target_opset=opset,
                   black_op={'LinearRegressor'})
     set_model_props(onx, {'info': 'unit test'})
     onx_loss = add_loss_output(onx, weight_name='weight', score_name='l1')
     inits = ['intercept', 'coef']
     train_session = OrtGradientOptimizer(onx_loss,
                                          inits,
                                          learning_rate=1e-3)
     self.assertRaise(lambda: train_session.get_state(), AttributeError)
     train_session.fit(X_train, y_train, w_train, use_numpy=True)
     state_tensors = train_session.get_state()
     self.assertEqual(len(state_tensors), 2)
     r = repr(train_session)
     self.assertIn("OrtGradientOptimizer(model_onnx=", r)
     self.assertIn("learning_rate='invscaling'", r)
     losses = train_session.train_losses_
     self.assertGreater(len(losses), 1)
     self.assertFalse(any(map(numpy.isnan, losses)))
Example #3
0
    def test_add_loss_output_cls(self):
        from onnxcustom.utils.orttraining_helper import add_loss_output
        X, y = make_classification(  # pylint: disable=W0632
            100, n_features=10)
        X = X.astype(numpy.float32)
        y = y.astype(numpy.int64)
        X_train, X_test, y_train, y_test = train_test_split(X, y)
        reg = LogisticRegression()
        reg.fit(X_train, y_train)
        reg.coef_ = reg.coef_.reshape((1, -1))
        onx = to_onnx(reg,
                      X_train,
                      target_opset=opset,
                      black_op={'LinearClassifier'},
                      options={'zipmap': False})
        onx_loss = add_loss_output(onx,
                                   'log',
                                   output_index='probabilities',
                                   eps=1 - 6)
        try:
            text = onnx_simple_text_plot(onx_loss)
        except RuntimeError:
            text = ""
        if text:
            self.assertIn("Clip(probabilities", text)

        oinf = OnnxInference(onx_loss)
        output = oinf.run({'X': X_test, 'label': y_test.reshape((-1, 1))})
        loss = output['loss']
        skl_loss = log_loss(y_test, reg.predict_proba(X_test), eps=1 - 6)
        self.assertLess(numpy.abs(skl_loss - loss[0, 0]), 1e-5)
Example #4
0
 def test_ort_gradient_optimizers_optimal_use_ort(self):
     from onnxcustom.utils.orttraining_helper import add_loss_output
     from onnxcustom.training.optimizers import OrtGradientOptimizer
     X, y = make_regression(  # pylint: disable=W0632
         100,
         n_features=10,
         bias=2,
         random_state=0)
     X = X.astype(numpy.float32)
     y = y.astype(numpy.float32)
     X_train, _, y_train, __ = train_test_split(X, y)
     reg = LinearRegression()
     reg.fit(X_train, y_train)
     reg.coef_ = reg.coef_.reshape((1, -1))
     onx = to_onnx(reg,
                   X_train,
                   target_opset=opset,
                   black_op={'LinearRegressor'})
     onx_loss = add_loss_output(onx)
     inits = ['intercept', 'coef']
     train_session = OrtGradientOptimizer(
         onx_loss,
         inits,
         max_iter=10,
         learning_rate=LearningRateSGD(learning_rate='optimal'))
     self.assertRaise(lambda: train_session.get_state(), AttributeError)
     train_session.fit(X_train, y_train, use_numpy=False)
     state_tensors = train_session.get_state()
     self.assertEqual(len(state_tensors), 2)
     r = repr(train_session)
     self.assertIn("OrtGradientOptimizer(model_onnx=", r)
     self.assertIn("learning_rate='optimal'", r)
     losses = train_session.train_losses_
     self.assertGreater(len(losses), 1)
     self.assertFalse(any(map(numpy.isnan, losses)))
Example #5
0
 def test_ort_gradient_optimizers_use_numpy_nan_w(self):
     from onnxcustom.utils.orttraining_helper import add_loss_output
     from onnxcustom.training.optimizers import OrtGradientOptimizer
     X, y = make_regression(  # pylint: disable=W0632
         100,
         n_features=10,
         bias=2,
         random_state=0)
     X = X.astype(numpy.float32)
     y = y.astype(numpy.float32)
     w = (numpy.random.rand(y.shape[0]) + 1).astype(X.dtype)
     X_train, _, y_train, __, w_train, ___ = train_test_split(X, y, w)
     reg = LinearRegression()
     reg.fit(X_train, y_train, w_train)
     reg.coef_ = reg.coef_.reshape((1, -1))
     onx = to_onnx(reg,
                   X_train,
                   target_opset=opset,
                   black_op={'LinearRegressor'})
     set_model_props(onx, {'info': 'unit test'})
     onx_loss = add_loss_output(onx, weight_name='weight')
     inits = ['intercept', 'coef']
     train_session = OrtGradientOptimizer(onx_loss,
                                          inits,
                                          learning_rate=1e3)
     self.assertRaise(
         lambda: train_session.fit(
             X_train, y_train, w_train, use_numpy=True), ConvergenceError)
Example #6
0
 def wtest_ort_gradient_optimizers_binary(self, use_weight=False):
     from onnxcustom.utils.orttraining_helper import add_loss_output
     from onnxcustom.training.optimizers import OrtGradientOptimizer
     X = numpy.arange(60).astype(numpy.float32).reshape((-1, 3))
     y = numpy.arange(X.shape[0]).astype(numpy.float32).reshape(
         (-1, 1)) > 10
     X = X.astype(numpy.float32)
     y = y.astype(numpy.int64)
     y[0, 0] = 0
     y[-1, 0] = 1
     w = (numpy.random.rand(X.shape[0]) + 1).astype(numpy.float32)
     X_train, _, y_train, __, w_train, ___ = train_test_split(X, y, w)
     reg = SGDClassifier(loss='log')
     reg.fit(X_train, y_train.ravel())
     onx = to_onnx(reg,
                   X_train,
                   target_opset=opset,
                   black_op={'LinearClassifier'},
                   options={'zipmap': False})
     onx_loss = add_loss_output(
         onx,
         'log',
         output_index=1,
         weight_name='weight' if use_weight else None)
     inits = ['intercept', 'coef']
     inputs = onx_loss.graph.input
     self.assertEqual(len(inputs), 3 if use_weight else 2)
     dt = inputs[1].type.tensor_type.elem_type
     self.assertEqual(TensorProto.INT64, dt)  # pylint: disable=E1101
     train_session = OrtGradientOptimizer(onx_loss,
                                          inits,
                                          learning_rate=1e9)
     self.assertRaise(lambda: train_session.get_state(), AttributeError)
     if use_weight:
         train_session.fit(X_train,
                           y_train.reshape((-1, 1)),
                           w_train.reshape((-1, 1)),
                           use_numpy=False)
     else:
         train_session.fit(X_train,
                           y_train.reshape((-1, 1)),
                           use_numpy=False)
     state_tensors = train_session.get_state()
     self.assertEqual(len(state_tensors), 2)
     r = repr(train_session)
     self.assertIn("OrtGradientOptimizer(model_onnx=", r)
     self.assertIn("learning_rate='invscaling'", r)
     losses = train_session.train_losses_
     self.assertGreater(len(losses), 1)
     if any(map(numpy.isnan, losses)):
         raise AssertionError(losses)
Example #7
0
 def test_add_log_loss(self):
     from onnxcustom.utils.orttraining_helper import add_loss_output
     ide = OnnxIdentity("X", op_version=opset, output_names=['Y'])
     onx = ide.to_onnx(inputs={'X': DoubleTensorType()},
                       outputs={'Y': DoubleTensorType()},
                       target_opset=opset)
     onx_loss = add_loss_output(onx, 'log', eps=1e-6)
     x1 = numpy.array([0, 0, 0.2, 0.5, 0.8, 1, 1])
     X = numpy.vstack([1 - x1, x1]).T.astype(numpy.float64)
     y = numpy.array([0, 1, 0, 1, 1, 1, 0], dtype=numpy.int64)
     oinf = OnnxInference(onx_loss)
     output = oinf.run({'X': X, 'label': y.reshape((-1, 1))})
     loss = output['loss']
     skl_loss = log_loss(y, X[:, 1], eps=1e-6)
     self.assertLess(numpy.abs(skl_loss - loss[0, 0]), 1e-5)
Example #8
0
    def test_ort_gradient_optimizers_use_numpy_pickle(self):
        from onnxcustom.utils.orttraining_helper import add_loss_output
        from onnxcustom.training.optimizers import OrtGradientOptimizer
        X, y = make_regression(  # pylint: disable=W0632
            100,
            n_features=10,
            bias=2,
            random_state=0)
        X = X.astype(numpy.float32)
        y = y.astype(numpy.float32)
        X_train, _, y_train, __ = train_test_split(X, y)
        reg = LinearRegression()
        reg.fit(X_train, y_train)
        reg.coef_ = reg.coef_.reshape((1, -1))
        onx = to_onnx(reg,
                      X_train,
                      target_opset=opset,
                      black_op={'LinearRegressor'})
        set_model_props(onx, {'info': 'unit test'})
        onx_loss = add_loss_output(onx)
        inits = ['intercept', 'coef']
        train_session0 = OrtGradientOptimizer(onx_loss, inits)

        st = io.BytesIO()
        pickle.dump(train_session0, st)
        st2 = io.BytesIO(st.getvalue())
        train_session1 = pickle.load(st2)

        train_session1.fit(X_train, y_train, use_numpy=True)

        st = io.BytesIO()
        pickle.dump(train_session1, st)
        st2 = io.BytesIO(st.getvalue())
        train_session = pickle.load(st2)
        state_tensors = train_session.get_state()
        self.assertEqual(len(state_tensors), 2)

        train_session.fit(X_train, y_train, use_numpy=True)
        state_tensors = train_session.get_state()
        self.assertEqual(len(state_tensors), 2)
        r = repr(train_session)
        self.assertIn("OrtGradientOptimizer(model_onnx=", r)
        self.assertIn("learning_rate='invscaling'", r)
        losses = train_session.train_losses_
        self.assertGreater(len(losses), 1)
        self.assertFalse(any(map(numpy.isnan, losses)))
Example #9
0
 def wtest_ort_gradient_optimizers_reg(self, use_weight=False):
     from onnxcustom.utils.orttraining_helper import add_loss_output
     from onnxcustom.training.optimizers import OrtGradientOptimizer
     X = numpy.arange(60).astype(numpy.float32).reshape((-1, 3))
     y = numpy.arange(X.shape[0]).astype(numpy.float32).reshape((-1, 1))
     y[0, 0] += 1
     y[-1, 0] += 1
     w = (numpy.random.rand(X.shape[0]) + 1).astype(numpy.float32)
     X_train, _, y_train, __, w_train, ___ = train_test_split(X, y, w)
     reg = SGDRegressor()
     reg.fit(X_train, y_train.ravel())
     onx = to_onnx(reg,
                   X_train,
                   target_opset=opset,
                   black_op={'LinearRegressor'})
     onx_loss = add_loss_output(
         onx, 'squared_error', weight_name='weight' if use_weight else None)
     inits = ['intercept', 'coef']
     inputs = onx_loss.graph.input
     self.assertEqual(len(inputs), 3 if use_weight else 2)
     train_session = OrtGradientOptimizer(onx_loss,
                                          inits,
                                          learning_rate=1e9)
     self.assertRaise(lambda: train_session.get_state(), AttributeError)
     if use_weight:
         self.assertRaise(
             lambda: train_session.fit(X_train,
                                       y_train.reshape((-1, 1)),
                                       w_train.reshape((-1, 1)),
                                       use_numpy=False), ConvergenceError)
     else:
         self.assertRaise(
             lambda: train_session.fit(
                 X_train, y_train.reshape((-1, 1)), use_numpy=False),
             ConvergenceError)
     state_tensors = train_session.get_state()
     self.assertEqual(len(state_tensors), 2)
     r = repr(train_session)
     self.assertIn("OrtGradientOptimizer(model_onnx=", r)
     self.assertIn("learning_rate='invscaling'", r)
     losses = train_session.train_losses_
     self.assertGreater(len(losses), 1)
     if any(map(numpy.isnan, losses)):
         raise AssertionError(losses)
Example #10
0
 def test_add_loss_output_reg_l1(self):
     from onnxcustom.utils.orttraining_helper import add_loss_output
     X, y = make_regression(  # pylint: disable=W0632
         100, n_features=10, bias=2)
     X = X.astype(numpy.float32)
     y = y.astype(numpy.float32)
     X_train, X_test, y_train, y_test = train_test_split(X, y)
     reg = LinearRegression()
     reg.fit(X_train, y_train)
     reg.coef_ = reg.coef_.reshape((1, -1))
     onx = to_onnx(reg,
                   X_train,
                   target_opset=opset,
                   black_op={'LinearRegressor'})
     onx_loss = add_loss_output(onx, 'l1')
     oinf = OnnxInference(onx_loss)
     output = oinf.run({'X': X_test, 'label': y_test.reshape((-1, 1))})
     loss = output['loss']
     skl_loss = mean_squared_error(reg.predict(X_test), y_test)
     self.assertLess(numpy.abs(skl_loss - loss[0, 0]), 1e-2)
Example #11
0
 def test_ort_gradient_optimizers_use_numpy_saved(self):
     from onnxcustom.utils.orttraining_helper import add_loss_output
     from onnxcustom.training.optimizers import OrtGradientOptimizer
     X, y = make_regression(  # pylint: disable=W0632
         100,
         n_features=10,
         bias=2,
         random_state=0)
     X = X.astype(numpy.float32)
     y = y.astype(numpy.float32)
     X_train, _, y_train, __ = train_test_split(X, y)
     reg = LinearRegression()
     reg.fit(X_train, y_train)
     reg.coef_ = reg.coef_.reshape((1, -1))
     onx = to_onnx(reg,
                   X_train,
                   target_opset=opset,
                   black_op={'LinearRegressor'})
     set_model_props(onx, {'info': 'unit test'})
     onx_loss = add_loss_output(onx)
     inits = ['intercept', 'coef']
     temp = get_temp_folder(__file__, "temp_OrtGradientOptimizer")
     filename = os.path.join(temp, "saved.onnx")
     train_session = OrtGradientOptimizer(onx_loss,
                                          inits,
                                          learning_rate=1e-3,
                                          saved_gradient=filename)
     self.assertRaise(lambda: train_session.get_state(), AttributeError)
     train_session.fit(X_train, y_train, use_numpy=True)
     state_tensors = train_session.get_state()
     self.assertEqual(len(state_tensors), 2)
     r = repr(train_session)
     self.assertIn("OrtGradientOptimizer(model_onnx=", r)
     self.assertIn("learning_rate='invscaling'", r)
     losses = train_session.train_losses_
     self.assertGreater(len(losses), 1)
     self.assertFalse(any(map(numpy.isnan, losses)))
     self.assertExists(filename)
Example #12
0
 def test_grad_helper_loss(self):
     temp = get_temp_folder(__file__, "temp_grad_helper_loss")
     grad_file = os.path.join(temp, "grad.onnx")
     X, y = make_regression(  # pylint: disable=W0632
         100,
         n_features=10,
         bias=2,
         random_state=0)
     X = X.astype(numpy.float32)
     y = y.astype(numpy.float32)
     reg = LinearRegression()
     reg.fit(X, y)
     reg.coef_ = reg.coef_.reshape((1, -1))
     onx = to_onnx(reg, X, target_opset=opset, black_op={'LinearRegressor'})
     onx_loss = add_loss_output(onx)
     text1 = onnx_simple_text_plot(onx_loss)
     new_onx = onnx_derivative(onx,
                               options=DerivativeOptions.Loss,
                               label='variable',
                               loss='loss',
                               path_name=grad_file)
     text2 = onnx_simple_text_plot(new_onx)
     self.assertNotEqual(text1, text2)
              X_train[:1].astype(numpy.float32),
              target_opset=15,
              black_op={'LinearRegressor'})

###############################################
# Choosing a loss
# +++++++++++++++
#
# The training requires a loss function. By default, it
# is the square function but it could be the absolute error or
# include regularization. Function
# :func:`add_loss_output
# <onnxcustom.utils.orttraining_helper.add_loss_output>`
# appends the loss function to the ONNX graph.

onx_train = add_loss_output(onx)

plot_onnxs(onx,
           onx_train,
           title=['Linear Regression', 'Linear Regression + Loss with ONNX'])

#####################################
# Let's check inference is working.

sess = InferenceSession(onx_train.SerializeToString(),
                        providers=['CPUExecutionProvider'])
res = sess.run(None, {'X': X_test, 'label': y_test.reshape((-1, 1))})
print(f"onnx loss={res[0][0, 0] / X_test.shape[0]!r}")

#####################################
# Weights
Example #14
0
def benchmark(N=1000, n_features=100, hidden_layer_sizes="50,10", max_iter=1000,
              learning_rate_init=1e-4, batch_size=100, run_skl=True,
              device='cpu', opset=14):
    """
    Compares :epkg:`onnxruntime-training` to :epkg:`scikit-learn` for
    training. Training algorithm is SGD.

    :param N: number of observations to train on
    :param n_features: number of features
    :param hidden_layer_sizes: hidden layer sizes, comma separated values
    :param max_iter: number of iterations
    :param learning_rate_init: initial learning rate
    :param batch_size: batch size
    :param run_skl: train scikit-learn in the same condition (True) or
        just walk through one iterator with *scikit-learn*
    :param device: `'cpu'` or `'cuda'`
    :param opset: opset to choose for the conversion
    """
    N = int(N)
    n_features = int(n_features)
    max_iter = int(max_iter)
    learning_rate_init = float(learning_rate_init)
    batch_size = int(batch_size)
    run_skl = run_skl in (1, True, '1', 'True')

    print("N=%d" % N)
    print("n_features=%d" % n_features)
    print(f"hidden_layer_sizes={hidden_layer_sizes!r}")
    print("max_iter=%d" % max_iter)
    print(f"learning_rate_init={learning_rate_init:f}")
    print("batch_size=%d" % batch_size)
    print(f"run_skl={run_skl!r}")
    print(f"opset={opset!r}")
    print(f"device={device!r}")
    print('------------------')

    if not isinstance(hidden_layer_sizes, tuple):
        hidden_layer_sizes = tuple(map(int, hidden_layer_sizes.split(",")))
    X, y = make_regression(N, n_features=n_features, bias=2)
    X = X.astype(numpy.float32)
    y = y.astype(numpy.float32)
    X_train, X_test, y_train, y_test = train_test_split(X, y)

    nn = MLPRegressor(hidden_layer_sizes=hidden_layer_sizes,
                      max_iter=max_iter if run_skl else 1,
                      solver='sgd', learning_rate_init=learning_rate_init,
                      n_iter_no_change=max_iter, batch_size=batch_size)

    begin = time.perf_counter()
    with warnings.catch_warnings():
        warnings.simplefilter('ignore')
        nn.fit(X_train, y_train)
    dur_skl = time.perf_counter() - begin

    print("time_skl=%r, mean_squared_error=%r" % (
        dur_skl, mean_squared_error(y_train, nn.predict(X_train))))

    # conversion to ONNX
    onx = to_onnx(nn, X_train[:1].astype(numpy.float32), target_opset=opset)

    # add loss
    onx_train = add_loss_output(onx)

    # list of weights
    weights = get_train_initializer(onx)
    print('weights:', list(sorted(weights)))

    # training
    print(f"device={device!r} get_device()={get_device()!r}")

    #######################################
    # The training session.

    train_session = OrtGradientOptimizer(
        onx_train, list(weights), device=device, verbose=0,
        learning_rate=learning_rate_init,
        warm_start=False, max_iter=max_iter, batch_size=batch_size)

    begin = time.perf_counter()
    train_session.fit(X, y)
    dur_ort = time.perf_counter() - begin
    print("time_skl=%r, mean_squared_error=%r" % (
        dur_skl, mean_squared_error(y_train, nn.predict(X_train))))
    print("time_ort=%r, last_trained_error=%r" % (
        dur_ort, train_session.train_losses_[-1]))
Example #15
0
    def wtest_ort_gradient_optimizers_binary(self, use_weight=False):
        from onnxcustom.utils.orttraining_helper import add_loss_output
        from onnxcustom.training.optimizers import OrtGradientOptimizer
        X, y = make_classification(  # pylint: disable=W0632
            100, n_features=10, random_state=0)
        X = X.astype(numpy.float32)
        y = y.astype(numpy.int64)
        w = (numpy.random.rand(X.shape[0]) + 1).astype(numpy.float32)
        X_train, _, y_train, __, w_train, ___ = train_test_split(X, y, w)
        reg = SGDClassifier(loss='log')
        reg.fit(X_train, y_train)
        onx = to_onnx(reg,
                      X_train,
                      target_opset=opset,
                      black_op={'LinearClassifier'},
                      options={'zipmap': False})
        onx2 = load_onnx(BytesIO(onx.SerializeToString()))
        set_model_props(onx, {'info': 'unit test'})
        onx_loss = add_loss_output(
            onx,
            'log',
            output_index=1,
            weight_name='weight' if use_weight else None)
        inits = ['intercept', 'coef']
        inputs = onx_loss.graph.input
        self.assertEqual(len(inputs), 3 if use_weight else 2)
        dt = inputs[1].type.tensor_type.elem_type
        self.assertEqual(TensorProto.INT64, dt)  # pylint: disable=E1101
        train_session = OrtGradientOptimizer(onx_loss,
                                             inits,
                                             learning_rate=1e-3)
        self.assertRaise(lambda: train_session.get_state(), AttributeError)
        if use_weight:
            train_session.fit(X_train,
                              y_train.reshape((-1, 1)),
                              w_train.reshape((-1, 1)),
                              use_numpy=False)
        else:
            train_session.fit(X_train,
                              y_train.reshape((-1, 1)),
                              use_numpy=False)
        state_tensors = train_session.get_state()
        self.assertEqual(len(state_tensors), 2)
        r = repr(train_session)
        self.assertIn("OrtGradientOptimizer(model_onnx=", r)
        self.assertIn("learning_rate='invscaling'", r)
        losses = train_session.train_losses_
        self.assertGreater(len(losses), 1)
        self.assertFalse(any(map(numpy.isnan, losses)))

        # get_trained_weight
        trained_onnx = train_session.get_trained_onnx(model=onx2)
        sess = InferenceSession(onx2.SerializeToString(),
                                providers=['CPUExecutionProvider'])
        got1 = sess.run(None, {'X': X_train})
        sess = InferenceSession(trained_onnx.SerializeToString(),
                                providers=['CPUExecutionProvider'])
        got2 = sess.run(None, {'X': X_train})
        self.assertEqual(len(got1), len(got2))
        self.assertEqual(got1[0].shape, got2[0].shape)

        # state
        state = train_session.get_state()
        self.assertIsInstance(state, dict)
        train_session.set_state(state)