def wtest_ort_gradient_optimizers_grid_cls(self, use_weight=False):
        from onnxcustom.training.optimizers_partial import (
            OrtGradientForwardBackwardOptimizer)
        from onnxcustom.training.sgd_learning_rate import (LearningRateSGD)
        from onnxcustom.training.sgd_learning_loss import NegLogLearningLoss
        values = [
            1e-7, 1e-6, 5e-6, 1e-5, 5e-5, 1e-4, 5e-4, 1e-3, 5e-3, 1e-2, 1e-1,
            1, 10, 100, 1000
        ]
        X = numpy.random.randn(30, 3).astype(numpy.float32)
        y = (X.sum(axis=1) >= 0).astype(numpy.int64).reshape((-1, 1))
        X += numpy.random.randn(30, 3).astype(numpy.float32) / 10
        X_train, _, y_train, __ = train_test_split(X, y)
        scorer = make_scorer(lambda y_true, y_pred:
                             (-log_loss(y_true, y_pred)))  # pylint: disable=E1130
        reg = GridSearchCV(SGDClassifier(max_iter=20),
                           param_grid={'eta0': values},
                           scoring=scorer,
                           cv=3)
        reg.fit(X_train, y_train.ravel())
        self.assertIsInstance(reg.best_params_, dict)
        self.assertIn('eta0', reg.best_params_)
        onx = to_onnx(reg,
                      X_train,
                      target_opset=opset,
                      black_op={'LinearClassifier'},
                      options={'zipmap': False})
        onx = select_model_inputs_outputs(onx, outputs=['score'])
        onx = onnx_rename_weights(onx)
        inits = ['I0_coef', 'I1_intercept']

        cvalues = [LearningRateSGD(v) for v in values]
        grid = GridSearchCV(OrtGradientForwardBackwardOptimizer(
            onx,
            inits,
            weight_name='weight' if use_weight else None,
            learning_rate=LearningRateSGD(1e-4),
            learning_loss=NegLogLearningLoss(),
            warm_start=False,
            max_iter=20,
            batch_size=10,
            enable_logging=False,
            exc=False),
                            param_grid={'learning_rate': cvalues},
                            cv=3)
        if use_weight:
            grid.fit(X_train, y_train)
        else:
            grid.fit(X_train, y_train)
        self.assertIsInstance(grid.best_params_, dict)
        self.assertEqual(len(grid.best_params_), 1)
        self.assertIsInstance(grid.best_params_['learning_rate'],
                              LearningRateSGD)
    def wtest_ort_gradient_optimizers_grid_reg(self, use_weight=False):
        from onnxcustom.training.optimizers_partial import (
            OrtGradientForwardBackwardOptimizer)
        from onnxcustom.training.sgd_learning_rate import (LearningRateSGD)
        from onnxcustom.training.sgd_learning_loss import SquareLearningLoss
        values = [
            1e-6, 1e-5, 5e-5, 8e-5, 1e-4, 2e-4, 5e-4, 1e-3, 1e-2, 1e-1, 1
        ]
        X = numpy.random.randn(30, 3).astype(numpy.float32)
        y = X.sum(axis=1).reshape((-1, 1))
        y += numpy.random.randn(y.shape[0]).astype(numpy.float32).reshape(
            (-1, 1)) / 10
        X_train, _, y_train, __ = train_test_split(X, y)
        scorer = make_scorer(lambda y_true, y_pred:
                             (-mean_squared_error(y_true, y_pred)))  # pylint: disable=E1130
        reg = GridSearchCV(SGDRegressor(max_iter=20),
                           param_grid={'eta0': values},
                           scoring=scorer,
                           cv=3,
                           error_score='raise')
        reg.fit(X_train, y_train.ravel())
        self.assertIsInstance(reg.best_params_, dict)
        self.assertIn('eta0', reg.best_params_)
        onx = to_onnx(reg,
                      X_train,
                      target_opset=opset,
                      black_op={'LinearRegressor'})
        onx = onnx_rename_weights(onx)
        inits = ['I0_coef', 'I1_intercept']

        cvalues = [LearningRateSGD(v) for v in values]
        grid = GridSearchCV(OrtGradientForwardBackwardOptimizer(
            onx,
            inits,
            weight_name='weight' if use_weight else None,
            learning_rate=LearningRateSGD(1e-4),
            learning_loss=SquareLearningLoss(),
            warm_start=False,
            max_iter=20,
            batch_size=10,
            enable_logging=False,
            exc=False),
                            param_grid={'learning_rate': cvalues},
                            cv=3)
        if use_weight:
            grid.fit(X_train, y_train)
        else:
            grid.fit(X_train, y_train)
        self.assertIsInstance(grid.best_params_, dict)
        self.assertEqual(len(grid.best_params_), 1)
        self.assertIsInstance(grid.best_params_['learning_rate'],
                              LearningRateSGD)
Exemple #3
0
    def wtest_ort_gradient_optimizers_fw_nesterov_binary_mlp(
            self, use_weight=True):
        from onnxcustom.training.optimizers_partial import (
            OrtGradientForwardBackwardOptimizer)
        from onnxcustom.training.sgd_learning_rate import (
            LearningRateSGDNesterov)
        from onnxcustom.training.sgd_learning_loss import NegLogLearningLoss
        X, y = make_classification(  # pylint: disable=W0632
            100, n_features=10, random_state=0)
        X = X.astype(numpy.float32)
        y = y.astype(numpy.int64)
        w = (numpy.random.rand(y.shape[0]) + 1).astype(numpy.float32)
        X_train, _, y_train, __, w_train, ___ = train_test_split(X, y, w)
        reg = MLPClassifier(solver='sgd')
        reg.fit(X_train, y_train)
        onx = to_onnx(reg,
                      X_train,
                      target_opset=opset,
                      black_op={'LinearRegressor'},
                      options={'zipmap': False})
        onx = select_model_inputs_outputs(onx,
                                          outputs=['out_activations_result'])
        self.assertIn("output: name='out_activations_result'",
                      onnx_simple_text_plot(onx))
        set_model_props(onx, {'info': 'unit test'})
        onx = onnx_rename_weights(onx)
        inits = [
            'I0_coefficient', 'I1_intercepts', 'I2_coefficient1',
            'I3_intercepts1'
        ]

        train_session = OrtGradientForwardBackwardOptimizer(
            onx,
            inits,
            weight_name='weight' if use_weight else None,
            learning_rate=LearningRateSGDNesterov(1e-4,
                                                  nesterov=False,
                                                  momentum=0.9),
            learning_loss=NegLogLearningLoss(),
            warm_start=False,
            max_iter=100,
            batch_size=10)
        self.assertIsInstance(train_session.learning_loss, NegLogLearningLoss)
        self.assertEqual(train_session.learning_loss.eps, 1e-5)
        if use_weight:
            train_session.fit(X_train, y_train, w_train)
        else:
            train_session.fit(X_train, y_train)
        temp = get_temp_folder(
            __file__, "temp_ort_gradient_optimizers_fw_nesterov_binary_mlp%d" %
            use_weight)
        train_session.save_onnx_graph(temp)
Exemple #4
0
    def test_onnx_rename_weights(self):
        N, D_in, D_out, H = 3, 3, 3, 3
        var = [('X', FloatTensorType([N, D_in]))]
        w1 = numpy.random.randn(D_in, H).astype(numpy.float32)
        w2 = numpy.random.randn(H, D_out).astype(numpy.float32)
        opv = 14
        onx_alg = OnnxMatMul(
            OnnxRelu(OnnxMatMul(*var, w1, op_version=opv),
                     op_version=opv),
            w2, op_version=opv, output_names=['Y'])
        onx = onx_alg.to_onnx(
            var, target_opset=opv, outputs=[('Y', FloatTensorType())])

        onx = onnx_rename_weights(onx)
        names = [init.name for init in onx.graph.initializer]
        self.assertEqual(['I0_Ma_MatMulcst', 'I1_Ma_MatMulcst1'], names)
        self.assertEqual(get_onnx_opset(onx), 14)
        self.assertRaise(lambda: get_onnx_opset(onx, "H"), ValueError)
    def wtest_ort_gradient_optimizers_score_reg(self, use_weight=False):
        from onnxcustom.training.optimizers_partial import (
            OrtGradientForwardBackwardOptimizer)
        from onnxcustom.training.sgd_learning_rate import (LearningRateSGD)
        from onnxcustom.training.sgd_learning_loss import SquareLearningLoss
        X = numpy.arange(60).astype(numpy.float32).reshape((-1, 3))
        y = numpy.arange(X.shape[0]).astype(numpy.float32).reshape((-1, 1))
        y[0, 0] += 1
        y[-1, 0] += 1
        w = (numpy.random.rand(X.shape[0]) + 1).astype(numpy.float32)
        X_train, _, y_train, __, w_train, ___ = train_test_split(X, y, w)
        reg = SGDRegressor(max_iter=20)
        reg.fit(X_train, y_train.ravel())
        onx = to_onnx(reg,
                      X_train,
                      target_opset=opset,
                      black_op={'LinearRegressor'})
        onx = onnx_rename_weights(onx)
        inits = ['I0_coef', 'I1_intercept']

        model = OrtGradientForwardBackwardOptimizer(
            onx,
            inits,
            weight_name='weight' if use_weight else None,
            learning_rate=LearningRateSGD(1e-4),
            learning_loss=SquareLearningLoss(),
            warm_start=False,
            max_iter=20,
            batch_size=10)
        if use_weight:
            model.fit(X_train, y_train, w_train)
            losses = model.losses(X_train, y_train, w_train)
            score = model.score(X_train, y_train, w_train)
        else:
            model.fit(X_train, y_train)
            losses = model.losses(X_train, y_train)
            score = model.score(X_train, y_train)
        self.assertEqual(losses.shape[0], y_train.shape[0])
        self.assertFalse(any(map(numpy.isnan, losses)))
        self.assertIsInstance(score, numbers.Number)
        params = model.get_params()
        self.assertIsInstance(params['device'], str)
def benchmark(N=1000,
              n_features=100,
              hidden_layer_sizes="50,50",
              max_iter=500,
              learning_rate_init=1e-8,
              batch_size=15,
              run_skl=True,
              device='cpu',
              opset=14):
    """
    Compares :epkg:`onnxruntime-training` to :epkg:`scikit-learn` for
    training. Training algorithm is SGD.

    :param N: number of observations to train on
    :param n_features: number of features
    :param hidden_layer_sizes: hidden layer sizes, comma separated values
    :param max_iter: number of iterations
    :param learning_rate_init: initial learning rate
    :param batch_size: batch size
    :param run_skl: train scikit-learn in the same condition (True) or
        just walk through one iterator with *scikit-learn*
    :param device: `'cpu'` or `'cuda'`
    :param opset: opset to choose for the conversion
    """
    N = int(N)
    n_features = int(n_features)
    max_iter = int(max_iter)
    learning_rate_init = float(learning_rate_init)
    batch_size = int(batch_size)
    run_skl = run_skl in (1, True, '1', 'True')

    print("N=%d" % N)
    print("n_features=%d" % n_features)
    print(f"hidden_layer_sizes={hidden_layer_sizes!r}")
    print("max_iter=%d" % max_iter)
    print(f"learning_rate_init={learning_rate_init:f}")
    print("batch_size=%d" % batch_size)
    print(f"run_skl={run_skl!r}")
    print(f"opset={opset!r}")
    print(f"device={device!r}")
    print('------------------')

    if not isinstance(hidden_layer_sizes, tuple):
        hidden_layer_sizes = tuple(map(int, hidden_layer_sizes.split(",")))
    X, y = make_regression(N, n_features=n_features, bias=2)
    X = X.astype(numpy.float32)
    y = y.astype(numpy.float32)
    X_train, X_test, y_train, y_test = train_test_split(X, y)

    nn = MLPRegressor(hidden_layer_sizes=hidden_layer_sizes,
                      max_iter=max_iter if run_skl else 1,
                      solver='sgd',
                      learning_rate_init=learning_rate_init,
                      n_iter_no_change=max_iter,
                      batch_size=batch_size,
                      alpha=0,
                      nesterovs_momentum=False,
                      momentum=0,
                      learning_rate="invscaling")

    begin = time.perf_counter()
    with warnings.catch_warnings():
        warnings.simplefilter('ignore')
        nn.fit(X_train, y_train)
    dur_skl = time.perf_counter() - begin

    print("time_skl=%r, mean_squared_error=%r" %
          (dur_skl, mean_squared_error(y_train, nn.predict(X_train))))

    # conversion to ONNX
    onx = to_onnx(nn, X_train[:1].astype(numpy.float32), target_opset=opset)
    onx = onnx_rename_weights(onx)

    # list of weights
    weights = get_train_initializer(onx)
    print('weights:', list(sorted(weights)))

    # training
    print(f"device={device!r} get_device()={get_device()!r}")

    #######################################
    # The training session.

    train_session = OrtGradientForwardBackwardOptimizer(
        onx,
        list(weights),
        device=device,
        verbose=0,
        learning_rate=learning_rate_init,
        warm_start=False,
        max_iter=max_iter,
        batch_size=batch_size)

    begin = time.perf_counter()
    train_session.fit(X, y)
    dur_ort = time.perf_counter() - begin
    print("time_skl=%r, mean_squared_error=%r" %
          (dur_skl, mean_squared_error(y_train, nn.predict(X_train))))
    print("time_ort=%r, last_trained_error=%r" %
          (dur_ort, train_session.train_losses_[-1]))
Exemple #7
0
nn = MLPRegressor(hidden_layer_sizes=(50, 10), max_iter=max_iter,
                  solver='sgd', learning_rate_init=5e-4, alpha=0,
                  n_iter_no_change=max_iter * 3, batch_size=batch_size,
                  nesterovs_momentum=False, momentum=0,
                  learning_rate="invscaling")

with warnings.catch_warnings():
    warnings.simplefilter('ignore')
    nn.fit(X_train, y_train)

########################################
# Conversion to ONNX and trainer initialization

onx = to_onnx(nn, X_train[:1].astype(numpy.float32), target_opset=15)
onx = onnx_rename_weights(onx)

train_session = OrtGradientForwardBackwardOptimizer(
    onx, device='cpu', learning_rate=1e-5,
    warm_start=False, max_iter=max_iter, batch_size=batch_size)


benches = [benchmark(X_train, y_train, nn, train_session, name='NN-CPU')]

######################################
# Profiling
# +++++++++


def clean_name(text):
    pos = text.find('onnxruntime')
    def test_gradient_mlpregressor(self):
        from onnxcustom.training.optimizers_partial import (
            OrtGradientForwardBackwardOptimizer)
        X = numpy.arange(30).reshape((-1, 3)).astype(numpy.float32) / 100
        y = numpy.arange(X.shape[0]).astype(numpy.float32)
        y = y.reshape((-1, 1))
        reg = MLPRegressor(hidden_layer_sizes=(5,), max_iter=2,
                           activation='logistic',
                           momentum=0, nesterovs_momentum=False,
                           alpha=0)
        reg.fit(X, y.ravel())

        onx = to_onnx(reg, X, target_opset=opset)
        onx = onnx_rename_weights(onx)
        inits = ["I0_coefficient", 'I1_intercepts', 'I2_coefficient1',
                 'I3_intercepts1']

        xp = numpy.arange(2 * X.shape[1]).reshape((2, -1)).astype(
            numpy.float32) / 10
        yp = numpy.array([0.5, -0.5], dtype=numpy.float32).reshape((-1, 1))

        train_session = OrtGradientForwardBackwardOptimizer(
            onx, inits, learning_rate=1e-5,
            warm_start=True, max_iter=2, batch_size=10)
        train_session.fit(X, y)
        state = train_session.get_state()
        state_np = [st.numpy() for st in state]

        # gradient scikit-learn

        coef_grads = state_np[::2]
        intercept_grads = state_np[1::2]
        layer_units = [3, 5, 1]
        activations = [xp] + [None] * (len(layer_units) - 1)
        deltas = [None] * (len(activations) - 1)

        skl_pred = reg.predict(xp)

        batch_loss, coef_grads, intercept_grads = reg._backprop(  # pylint: disable=W0212
            xp, yp, activations, deltas,
            coef_grads, intercept_grads)
        deltas = activations[-1] - yp

        # gradient onnxcustom

        ort_xp = C_OrtValue.ortvalue_from_numpy(xp, train_session.device)
        ort_yp = C_OrtValue.ortvalue_from_numpy(yp, train_session.device)
        ort_state = [ort_xp] + state
        prediction = train_session.train_function_.forward(
            ort_state, training=True)

        ort_pred = prediction[0].numpy()
        self.assertEqualArray(skl_pred.ravel(), ort_pred.ravel(), decimal=2)

        loss, loss_gradient = train_session.learning_loss.loss_gradient(
            train_session.device, ort_yp, prediction[0])

        gradient = train_session.train_function_.backward([loss_gradient])

        # comparison

        self.assertEqualArray(
            batch_loss, loss.numpy() / xp.shape[0], decimal=3)
        self.assertEqualArray(deltas, loss_gradient.numpy(), decimal=3)

        # do not use iterator for gradient, it may crash
        ort_grad = [gradient[i].numpy() / xp.shape[0]
                    for i in range(len(gradient))][1:]
        self.assertEqualArray(
            intercept_grads[1], ort_grad[3].ravel(), decimal=2)
        self.assertEqualArray(coef_grads[1], ort_grad[2], decimal=2)
        self.assertEqualArray(
            intercept_grads[0], ort_grad[1].ravel(), decimal=2)
        self.assertEqualArray(coef_grads[0], ort_grad[0], decimal=2)