Esempio n. 1
0
 def test_ort_gradient_optimizers_use_numpy_nan_w(self):
     from onnxcustom.utils.orttraining_helper import add_loss_output
     from onnxcustom.training.optimizers import OrtGradientOptimizer
     X, y = make_regression(  # pylint: disable=W0632
         100,
         n_features=10,
         bias=2,
         random_state=0)
     X = X.astype(numpy.float32)
     y = y.astype(numpy.float32)
     w = (numpy.random.rand(y.shape[0]) + 1).astype(X.dtype)
     X_train, _, y_train, __, w_train, ___ = train_test_split(X, y, w)
     reg = LinearRegression()
     reg.fit(X_train, y_train, w_train)
     reg.coef_ = reg.coef_.reshape((1, -1))
     onx = to_onnx(reg,
                   X_train,
                   target_opset=opset,
                   black_op={'LinearRegressor'})
     set_model_props(onx, {'info': 'unit test'})
     onx_loss = add_loss_output(onx, weight_name='weight')
     inits = ['intercept', 'coef']
     train_session = OrtGradientOptimizer(onx_loss,
                                          inits,
                                          learning_rate=1e3)
     self.assertRaise(
         lambda: train_session.fit(
             X_train, y_train, w_train, use_numpy=True), ConvergenceError)
Esempio n. 2
0
 def test_ort_gradient_optimizers_use_numpy_nesterov(self):
     from onnxcustom.utils.orttraining_helper import add_loss_output
     from onnxcustom.training.optimizers import OrtGradientOptimizer
     X, y = make_regression(  # pylint: disable=W0632
         100,
         n_features=10,
         bias=2,
         random_state=0)
     X = X.astype(numpy.float32)
     y = y.astype(numpy.float32)
     X_train, _, y_train, __ = train_test_split(X, y)
     reg = LinearRegression()
     reg.fit(X_train, y_train)
     reg.coef_ = reg.coef_.reshape((1, -1))
     onx = to_onnx(reg,
                   X_train,
                   target_opset=opset,
                   black_op={'LinearRegressor'})
     set_model_props(onx, {'info': 'unit test'})
     onx_loss = add_loss_output(onx)
     inits = ['intercept', 'coef']
     self.assertRaise(
         lambda: OrtGradientOptimizer(
             onx_loss, inits, learning_rate="Nesterov"),
         NotImplementedError)
Esempio n. 3
0
 def wtest_ort_gradient_optimizers_binary(self, use_weight=False):
     from onnxcustom.utils.orttraining_helper import add_loss_output
     from onnxcustom.training.optimizers import OrtGradientOptimizer
     X = numpy.arange(60).astype(numpy.float32).reshape((-1, 3))
     y = numpy.arange(X.shape[0]).astype(numpy.float32).reshape(
         (-1, 1)) > 10
     X = X.astype(numpy.float32)
     y = y.astype(numpy.int64)
     y[0, 0] = 0
     y[-1, 0] = 1
     w = (numpy.random.rand(X.shape[0]) + 1).astype(numpy.float32)
     X_train, _, y_train, __, w_train, ___ = train_test_split(X, y, w)
     reg = SGDClassifier(loss='log')
     reg.fit(X_train, y_train.ravel())
     onx = to_onnx(reg,
                   X_train,
                   target_opset=opset,
                   black_op={'LinearClassifier'},
                   options={'zipmap': False})
     onx_loss = add_loss_output(
         onx,
         'log',
         output_index=1,
         weight_name='weight' if use_weight else None)
     inits = ['intercept', 'coef']
     inputs = onx_loss.graph.input
     self.assertEqual(len(inputs), 3 if use_weight else 2)
     dt = inputs[1].type.tensor_type.elem_type
     self.assertEqual(TensorProto.INT64, dt)  # pylint: disable=E1101
     train_session = OrtGradientOptimizer(onx_loss,
                                          inits,
                                          learning_rate=1e9)
     self.assertRaise(lambda: train_session.get_state(), AttributeError)
     if use_weight:
         train_session.fit(X_train,
                           y_train.reshape((-1, 1)),
                           w_train.reshape((-1, 1)),
                           use_numpy=False)
     else:
         train_session.fit(X_train,
                           y_train.reshape((-1, 1)),
                           use_numpy=False)
     state_tensors = train_session.get_state()
     self.assertEqual(len(state_tensors), 2)
     r = repr(train_session)
     self.assertIn("OrtGradientOptimizer(model_onnx=", r)
     self.assertIn("learning_rate='invscaling'", r)
     losses = train_session.train_losses_
     self.assertGreater(len(losses), 1)
     if any(map(numpy.isnan, losses)):
         raise AssertionError(losses)
Esempio n. 4
0
 def test_ort_gradient_optimizers_use_numpy_w_l1(self):
     from onnxcustom.utils.orttraining_helper import add_loss_output
     from onnxcustom.training.optimizers import OrtGradientOptimizer
     X, y = make_regression(  # pylint: disable=W0632
         100,
         n_features=10,
         bias=2,
         random_state=0)
     X = X.astype(numpy.float32)
     y = y.astype(numpy.float32)
     w = (numpy.random.rand(y.shape[0]) + 1).astype(X.dtype)
     X_train, _, y_train, __, w_train, ___ = train_test_split(X, y, w)
     reg = LinearRegression()
     reg.fit(X_train, y_train, sample_weight=w_train)
     reg.coef_ = reg.coef_.reshape((1, -1))
     onx = to_onnx(reg,
                   X_train,
                   target_opset=opset,
                   black_op={'LinearRegressor'})
     set_model_props(onx, {'info': 'unit test'})
     onx_loss = add_loss_output(onx, weight_name='weight', score_name='l1')
     inits = ['intercept', 'coef']
     train_session = OrtGradientOptimizer(onx_loss,
                                          inits,
                                          learning_rate=1e-3)
     self.assertRaise(lambda: train_session.get_state(), AttributeError)
     train_session.fit(X_train, y_train, w_train, use_numpy=True)
     state_tensors = train_session.get_state()
     self.assertEqual(len(state_tensors), 2)
     r = repr(train_session)
     self.assertIn("OrtGradientOptimizer(model_onnx=", r)
     self.assertIn("learning_rate='invscaling'", r)
     losses = train_session.train_losses_
     self.assertGreater(len(losses), 1)
     self.assertFalse(any(map(numpy.isnan, losses)))
Esempio n. 5
0
 def test_ort_gradient_optimizers_optimal_use_ort(self):
     from onnxcustom.utils.orttraining_helper import add_loss_output
     from onnxcustom.training.optimizers import OrtGradientOptimizer
     X, y = make_regression(  # pylint: disable=W0632
         100,
         n_features=10,
         bias=2,
         random_state=0)
     X = X.astype(numpy.float32)
     y = y.astype(numpy.float32)
     X_train, _, y_train, __ = train_test_split(X, y)
     reg = LinearRegression()
     reg.fit(X_train, y_train)
     reg.coef_ = reg.coef_.reshape((1, -1))
     onx = to_onnx(reg,
                   X_train,
                   target_opset=opset,
                   black_op={'LinearRegressor'})
     onx_loss = add_loss_output(onx)
     inits = ['intercept', 'coef']
     train_session = OrtGradientOptimizer(
         onx_loss,
         inits,
         max_iter=10,
         learning_rate=LearningRateSGD(learning_rate='optimal'))
     self.assertRaise(lambda: train_session.get_state(), AttributeError)
     train_session.fit(X_train, y_train, use_numpy=False)
     state_tensors = train_session.get_state()
     self.assertEqual(len(state_tensors), 2)
     r = repr(train_session)
     self.assertIn("OrtGradientOptimizer(model_onnx=", r)
     self.assertIn("learning_rate='optimal'", r)
     losses = train_session.train_losses_
     self.assertGreater(len(losses), 1)
     self.assertFalse(any(map(numpy.isnan, losses)))
Esempio n. 6
0
    def test_ort_gradient_optimizers_use_numpy_pickle(self):
        from onnxcustom.utils.orttraining_helper import add_loss_output
        from onnxcustom.training.optimizers import OrtGradientOptimizer
        X, y = make_regression(  # pylint: disable=W0632
            100,
            n_features=10,
            bias=2,
            random_state=0)
        X = X.astype(numpy.float32)
        y = y.astype(numpy.float32)
        X_train, _, y_train, __ = train_test_split(X, y)
        reg = LinearRegression()
        reg.fit(X_train, y_train)
        reg.coef_ = reg.coef_.reshape((1, -1))
        onx = to_onnx(reg,
                      X_train,
                      target_opset=opset,
                      black_op={'LinearRegressor'})
        set_model_props(onx, {'info': 'unit test'})
        onx_loss = add_loss_output(onx)
        inits = ['intercept', 'coef']
        train_session0 = OrtGradientOptimizer(onx_loss, inits)

        st = io.BytesIO()
        pickle.dump(train_session0, st)
        st2 = io.BytesIO(st.getvalue())
        train_session1 = pickle.load(st2)

        train_session1.fit(X_train, y_train, use_numpy=True)

        st = io.BytesIO()
        pickle.dump(train_session1, st)
        st2 = io.BytesIO(st.getvalue())
        train_session = pickle.load(st2)
        state_tensors = train_session.get_state()
        self.assertEqual(len(state_tensors), 2)

        train_session.fit(X_train, y_train, use_numpy=True)
        state_tensors = train_session.get_state()
        self.assertEqual(len(state_tensors), 2)
        r = repr(train_session)
        self.assertIn("OrtGradientOptimizer(model_onnx=", r)
        self.assertIn("learning_rate='invscaling'", r)
        losses = train_session.train_losses_
        self.assertGreater(len(losses), 1)
        self.assertFalse(any(map(numpy.isnan, losses)))
Esempio n. 7
0
 def wtest_ort_gradient_optimizers_reg(self, use_weight=False):
     from onnxcustom.utils.orttraining_helper import add_loss_output
     from onnxcustom.training.optimizers import OrtGradientOptimizer
     X = numpy.arange(60).astype(numpy.float32).reshape((-1, 3))
     y = numpy.arange(X.shape[0]).astype(numpy.float32).reshape((-1, 1))
     y[0, 0] += 1
     y[-1, 0] += 1
     w = (numpy.random.rand(X.shape[0]) + 1).astype(numpy.float32)
     X_train, _, y_train, __, w_train, ___ = train_test_split(X, y, w)
     reg = SGDRegressor()
     reg.fit(X_train, y_train.ravel())
     onx = to_onnx(reg,
                   X_train,
                   target_opset=opset,
                   black_op={'LinearRegressor'})
     onx_loss = add_loss_output(
         onx, 'squared_error', weight_name='weight' if use_weight else None)
     inits = ['intercept', 'coef']
     inputs = onx_loss.graph.input
     self.assertEqual(len(inputs), 3 if use_weight else 2)
     train_session = OrtGradientOptimizer(onx_loss,
                                          inits,
                                          learning_rate=1e9)
     self.assertRaise(lambda: train_session.get_state(), AttributeError)
     if use_weight:
         self.assertRaise(
             lambda: train_session.fit(X_train,
                                       y_train.reshape((-1, 1)),
                                       w_train.reshape((-1, 1)),
                                       use_numpy=False), ConvergenceError)
     else:
         self.assertRaise(
             lambda: train_session.fit(
                 X_train, y_train.reshape((-1, 1)), use_numpy=False),
             ConvergenceError)
     state_tensors = train_session.get_state()
     self.assertEqual(len(state_tensors), 2)
     r = repr(train_session)
     self.assertIn("OrtGradientOptimizer(model_onnx=", r)
     self.assertIn("learning_rate='invscaling'", r)
     losses = train_session.train_losses_
     self.assertGreater(len(losses), 1)
     if any(map(numpy.isnan, losses)):
         raise AssertionError(losses)
Esempio n. 8
0
 def test_ort_gradient_optimizers_use_numpy_saved(self):
     from onnxcustom.utils.orttraining_helper import add_loss_output
     from onnxcustom.training.optimizers import OrtGradientOptimizer
     X, y = make_regression(  # pylint: disable=W0632
         100,
         n_features=10,
         bias=2,
         random_state=0)
     X = X.astype(numpy.float32)
     y = y.astype(numpy.float32)
     X_train, _, y_train, __ = train_test_split(X, y)
     reg = LinearRegression()
     reg.fit(X_train, y_train)
     reg.coef_ = reg.coef_.reshape((1, -1))
     onx = to_onnx(reg,
                   X_train,
                   target_opset=opset,
                   black_op={'LinearRegressor'})
     set_model_props(onx, {'info': 'unit test'})
     onx_loss = add_loss_output(onx)
     inits = ['intercept', 'coef']
     temp = get_temp_folder(__file__, "temp_OrtGradientOptimizer")
     filename = os.path.join(temp, "saved.onnx")
     train_session = OrtGradientOptimizer(onx_loss,
                                          inits,
                                          learning_rate=1e-3,
                                          saved_gradient=filename)
     self.assertRaise(lambda: train_session.get_state(), AttributeError)
     train_session.fit(X_train, y_train, use_numpy=True)
     state_tensors = train_session.get_state()
     self.assertEqual(len(state_tensors), 2)
     r = repr(train_session)
     self.assertIn("OrtGradientOptimizer(model_onnx=", r)
     self.assertIn("learning_rate='invscaling'", r)
     losses = train_session.train_losses_
     self.assertGreater(len(losses), 1)
     self.assertFalse(any(map(numpy.isnan, losses)))
     self.assertExists(filename)
# Stochastic Gradient Descent
# +++++++++++++++++++++++++++
#
# The training logic is hidden in class
# :class:`OrtGradientOptimizer
# <onnxcustom.training.optimizers.OrtGradientOptimizer>`.
# It follows :epkg:`scikit-learn` API (see `SGDRegressor
# <https://scikit-learn.org/stable/modules/
# generated/sklearn.linear_model.SGDRegressor.html>`_.
# The gradient graph is not available at this stage.

train_session = OrtGradientOptimizer(onx_train,
                                     list(weights),
                                     device=device,
                                     verbose=1,
                                     learning_rate=1e-2,
                                     warm_start=False,
                                     max_iter=200,
                                     batch_size=10,
                                     saved_gradient="saved_gradient.onnx")

train_session.fit(X, y)

######################################
# And the trained coefficient are...

state_tensors = train_session.get_state()
pprint(["trained coefficients:", state_tensors])
print("last_losses:", train_session.train_losses_[-5:])

min_length = min(len(train_session.train_losses_), len(lr.loss_curve_))
Esempio n. 10
0
def benchmark(N=1000, n_features=100, hidden_layer_sizes="50,10", max_iter=1000,
              learning_rate_init=1e-4, batch_size=100, run_skl=True,
              device='cpu', opset=14):
    """
    Compares :epkg:`onnxruntime-training` to :epkg:`scikit-learn` for
    training. Training algorithm is SGD.

    :param N: number of observations to train on
    :param n_features: number of features
    :param hidden_layer_sizes: hidden layer sizes, comma separated values
    :param max_iter: number of iterations
    :param learning_rate_init: initial learning rate
    :param batch_size: batch size
    :param run_skl: train scikit-learn in the same condition (True) or
        just walk through one iterator with *scikit-learn*
    :param device: `'cpu'` or `'cuda'`
    :param opset: opset to choose for the conversion
    """
    N = int(N)
    n_features = int(n_features)
    max_iter = int(max_iter)
    learning_rate_init = float(learning_rate_init)
    batch_size = int(batch_size)
    run_skl = run_skl in (1, True, '1', 'True')

    print("N=%d" % N)
    print("n_features=%d" % n_features)
    print(f"hidden_layer_sizes={hidden_layer_sizes!r}")
    print("max_iter=%d" % max_iter)
    print(f"learning_rate_init={learning_rate_init:f}")
    print("batch_size=%d" % batch_size)
    print(f"run_skl={run_skl!r}")
    print(f"opset={opset!r}")
    print(f"device={device!r}")
    print('------------------')

    if not isinstance(hidden_layer_sizes, tuple):
        hidden_layer_sizes = tuple(map(int, hidden_layer_sizes.split(",")))
    X, y = make_regression(N, n_features=n_features, bias=2)
    X = X.astype(numpy.float32)
    y = y.astype(numpy.float32)
    X_train, X_test, y_train, y_test = train_test_split(X, y)

    nn = MLPRegressor(hidden_layer_sizes=hidden_layer_sizes,
                      max_iter=max_iter if run_skl else 1,
                      solver='sgd', learning_rate_init=learning_rate_init,
                      n_iter_no_change=max_iter, batch_size=batch_size)

    begin = time.perf_counter()
    with warnings.catch_warnings():
        warnings.simplefilter('ignore')
        nn.fit(X_train, y_train)
    dur_skl = time.perf_counter() - begin

    print("time_skl=%r, mean_squared_error=%r" % (
        dur_skl, mean_squared_error(y_train, nn.predict(X_train))))

    # conversion to ONNX
    onx = to_onnx(nn, X_train[:1].astype(numpy.float32), target_opset=opset)

    # add loss
    onx_train = add_loss_output(onx)

    # list of weights
    weights = get_train_initializer(onx)
    print('weights:', list(sorted(weights)))

    # training
    print(f"device={device!r} get_device()={get_device()!r}")

    #######################################
    # The training session.

    train_session = OrtGradientOptimizer(
        onx_train, list(weights), device=device, verbose=0,
        learning_rate=learning_rate_init,
        warm_start=False, max_iter=max_iter, batch_size=batch_size)

    begin = time.perf_counter()
    train_session.fit(X, y)
    dur_ort = time.perf_counter() - begin
    print("time_skl=%r, mean_squared_error=%r" % (
        dur_skl, mean_squared_error(y_train, nn.predict(X_train))))
    print("time_ort=%r, last_trained_error=%r" % (
        dur_ort, train_session.train_losses_[-1]))
Esempio n. 11
0
    def wtest_ort_gradient_optimizers_binary(self, use_weight=False):
        from onnxcustom.utils.orttraining_helper import add_loss_output
        from onnxcustom.training.optimizers import OrtGradientOptimizer
        X, y = make_classification(  # pylint: disable=W0632
            100, n_features=10, random_state=0)
        X = X.astype(numpy.float32)
        y = y.astype(numpy.int64)
        w = (numpy.random.rand(X.shape[0]) + 1).astype(numpy.float32)
        X_train, _, y_train, __, w_train, ___ = train_test_split(X, y, w)
        reg = SGDClassifier(loss='log')
        reg.fit(X_train, y_train)
        onx = to_onnx(reg,
                      X_train,
                      target_opset=opset,
                      black_op={'LinearClassifier'},
                      options={'zipmap': False})
        onx2 = load_onnx(BytesIO(onx.SerializeToString()))
        set_model_props(onx, {'info': 'unit test'})
        onx_loss = add_loss_output(
            onx,
            'log',
            output_index=1,
            weight_name='weight' if use_weight else None)
        inits = ['intercept', 'coef']
        inputs = onx_loss.graph.input
        self.assertEqual(len(inputs), 3 if use_weight else 2)
        dt = inputs[1].type.tensor_type.elem_type
        self.assertEqual(TensorProto.INT64, dt)  # pylint: disable=E1101
        train_session = OrtGradientOptimizer(onx_loss,
                                             inits,
                                             learning_rate=1e-3)
        self.assertRaise(lambda: train_session.get_state(), AttributeError)
        if use_weight:
            train_session.fit(X_train,
                              y_train.reshape((-1, 1)),
                              w_train.reshape((-1, 1)),
                              use_numpy=False)
        else:
            train_session.fit(X_train,
                              y_train.reshape((-1, 1)),
                              use_numpy=False)
        state_tensors = train_session.get_state()
        self.assertEqual(len(state_tensors), 2)
        r = repr(train_session)
        self.assertIn("OrtGradientOptimizer(model_onnx=", r)
        self.assertIn("learning_rate='invscaling'", r)
        losses = train_session.train_losses_
        self.assertGreater(len(losses), 1)
        self.assertFalse(any(map(numpy.isnan, losses)))

        # get_trained_weight
        trained_onnx = train_session.get_trained_onnx(model=onx2)
        sess = InferenceSession(onx2.SerializeToString(),
                                providers=['CPUExecutionProvider'])
        got1 = sess.run(None, {'X': X_train})
        sess = InferenceSession(trained_onnx.SerializeToString(),
                                providers=['CPUExecutionProvider'])
        got2 = sess.run(None, {'X': X_train})
        self.assertEqual(len(got1), len(got2))
        self.assertEqual(got1[0].shape, got2[0].shape)

        # state
        state = train_session.get_state()
        self.assertIsInstance(state, dict)
        train_session.set_state(state)
Esempio n. 12
0
with warnings.catch_warnings():
    warnings.simplefilter('ignore')
    nn.fit(X_train, y_train)

########################################
# Conversion to ONNX and trainer initialization

onx = to_onnx(nn, X_train[:1].astype(numpy.float32), target_opset=15)
onx_train = add_loss_output(onx)

weights = get_train_initializer(onx)
pprint(list((k, v[0].shape) for k, v in weights.items()))

train_session = OrtGradientOptimizer(
    onx_train, list(weights), device='cpu', learning_rate=1e-5,
    warm_start=False, max_iter=max_iter, batch_size=batch_size)


benches = [benchmark(nn, train_session, name='NN-CPU')]

######################################
# Profiling
# +++++++++


def clean_name(text):
    pos = text.find('onnxruntime')
    if pos >= 0:
        return text[pos:]
    pos = text.find('onnxcustom')