Example #1
0
 def update_weights(self, n_bind, device, statei, gradienti, batch_size,
                    velocity=None):
     if (not hasattr(self, "axpyw_onnx_") or
             not hasattr(self, "axpyw_sess_binds_")):
         raise RuntimeError(  # pragma: no cover
             "Attributes 'axpyw_sess_binds_' or "
             "'axpyw_onnx_' is missing. Method "
             "'build_onnx_function' has not been called.")
     if velocity is None:
         raise RuntimeError(  # pragma: no cover
             "Velocity must not be None for this way of updating weights.")
     bind = self.axpyw_sess_binds_[n_bind]
     self._bind_input_ortvalue("X1", bind, gradienti, device, cache=True)
     self._bind_input_ortvalue("X2", bind, statei, device, cache=True)
     self._bind_input_ortvalue("G", bind, velocity, device, cache=True)
     self.alpha_[0] = - self.value / batch_size  # pylint: disable=E1130
     self.beta_[0] = self.momentum
     ort_alpha = C_OrtValue.ortvalue_from_numpy(self.alpha_, device)
     ort_beta = C_OrtValue.ortvalue_from_numpy(self.beta_, device)
     self._bind_input_ortvalue("alpha", bind, ort_alpha, device, cache=True)
     self._bind_input_ortvalue("beta", bind, ort_beta, device, cache=True)
     self._bind_output_ortvalue('Y', bind, statei, cache=True)
     self._bind_output_ortvalue('Z', bind, velocity, cache=True)
     self._call_iobinding(self.axpyw_sess_._sess, bind)
     return bind.get_outputs()  # loss, velocity
Example #2
0
 def __iter__(self):
     """
     Iterates over the datasets by drawing
     *batch_size* consecutive observations.
     """
     N = 0
     b = len(self) - self.batch_size
     while N < len(self):
         i = numpy.random.randint(0, b)
         N += self.batch_size
         yield (C_OrtValue.ortvalue_from_numpy(
             self.X[i:i + self.batch_size], self.device),
                C_OrtValue.ortvalue_from_numpy(
                    self.y[i:i + self.batch_size], self.device))
Example #3
0
    def test_print_ortvalue(self):
        expected = ("device=Cpu dtype=dtype('float32') shape=(1, 4) "
                    "value=[0.0, 1.0, 4.0, 4.5]")
        value = numpy.array([[0, 1, 4, 4.5]], dtype=numpy.float32)
        dev = get_ort_device('cpu')
        ort = C_OrtValue.ortvalue_from_numpy(value, dev)
        text = str_ortvalue(ort)
        self.assertEqual(expected, text)
        text = str_ortvalue(ort)  # pylint: disable=W0212
        self.assertEqual(expected, text)

        expected = ("device=Cpu dtype=dtype('int64') shape=(100,) "
                    "value=[0, 1, 2, 3, 4, '...', 95, 96, 97, 98, 99]")
        value = numpy.arange(100).astype(numpy.int64)
        ort = C_OrtValue.ortvalue_from_numpy(value, dev)
        text = str_ortvalue(ort)  # pylint: disable=W0212
        self.assertEqual(expected, text)
def numpy_to_ort_value(arr, device=None):
    """
    Converts a numpy array to :epkg:`C_OrtValue`.

    :param arr: numpy array
    :param device: :epkg:`C_OrtDevice` or None for cpu
    :return: :epkg:`C_OrtValue`
    """
    if device is None:
        device = get_ort_device('cpu')
    return C_OrtValue.ortvalue_from_numpy(arr, device)
Example #5
0
    def input_to_ort(tensors, devices, debug):
        "Converts a list of tensos into an :epkg:`OrtValueVector`."

        def _validate_(tensors):
            if any(
                    map(
                        lambda tu: (tu[0].device_name(
                        ) != OrtGradientForwardBackwardFunction.device_name(tu[
                            1])), zip(tensors, devices))):
                raise RuntimeError(  # pragma: no cover
                    "Not all inputs are on the same device %r != %r." % ([
                        OrtGradientForwardBackward.device_name(d)
                        for d in devices
                    ], [x.device_name() for x in tensors]))

        if isinstance(tensors, OrtValueVector):
            if debug:
                _validate_(tensors)
            return tensors
        if all(map(lambda t: isinstance(t, C_OrtValue), tensors)):
            if debug:
                _validate_(tensors)
            vect = OrtValueVector()
            vect.reserve(len(tensors))
            for t in tensors:
                if t is None:
                    raise NotImplementedError(  # pragma: no cover
                        "Empty vector found.")
                vect.push_back(t)
            return vect

        # generic case
        vect = OrtValueVector()
        vect.reserve(len(tensors))
        for t, dev in zip(tensors, devices):
            if t is None:
                # if gradient then
                # grad_output = torch.zeros(shape, device=device, dtype=dtype)
                raise NotImplementedError(  # pragma: no cover
                    "Empty vector found.")
            if not t.data.contiguous:
                t = t.as_contiguous()  # pragma: no cover
            vect.push_back(C_OrtValue.ortvalue_from_numpy(t, dev))
        if debug:
            if len(vect) != len(tensors):
                raise RuntimeError(  # pragma: no cover
                    "Unexpected array length %d != %d (len(devices)=%d)." %
                    (len(vect), len(tensors), len(devices)))
            _validate_(vect)
        return vect
    def test_bind_input_types(self):

        opset = onnx_opset_version()
        devices = [(C_OrtDevice(C_OrtDevice.cpu(), C_OrtDevice.default_memory(), 0), ['CPUExecutionProvider'])]
        if "CUDAExecutionProvider" in onnxrt.get_all_providers():
            devices.append((C_OrtDevice(C_OrtDevice.cuda(), C_OrtDevice.default_memory(), 0), ['CUDAExecutionProvider']))
            
        for device, provider in devices:
            for dtype in [np.float32, np.float64, np.int32, np.uint32,
                          np.int64, np.uint64, np.int16, np.uint16,
                          np.int8, np.uint8, np.float16, np.bool_]:
                with self.subTest(dtype=dtype, device=str(device)):

                    x = np.arange(8).reshape((-1, 2)).astype(dtype)
                    proto_dtype = NP_TYPE_TO_TENSOR_TYPE[x.dtype]

                    X = helper.make_tensor_value_info('X', proto_dtype, [None, x.shape[1]])
                    Y = helper.make_tensor_value_info('Y', proto_dtype, [None, x.shape[1]])

                    # inference
                    node_add = helper.make_node('Identity', ['X'], ['Y'])

                    # graph
                    graph_def = helper.make_graph([node_add], 'lr', [X], [Y], [])
                    model_def = helper.make_model(
                        graph_def, producer_name='dummy', ir_version=7,
                        producer_version="0",
                        opset_imports=[helper.make_operatorsetid('', opset)])

                    sess = onnxrt.InferenceSession(model_def.SerializeToString(), providers=provider)

                    bind = SessionIOBinding(sess._sess)
                    ort_value = C_OrtValue.ortvalue_from_numpy(x, device)
                    bind.bind_ortvalue_input('X', ort_value)
                    bind.bind_output('Y', device)
                    sess._sess.run_with_iobinding(bind, None)
                    ortvalue = bind.get_outputs()[0]
                    y = ortvalue.numpy()
                    assert_almost_equal(x, y)

                    bind = SessionIOBinding(sess._sess)
                    bind.bind_input('X', device, dtype, x.shape, ort_value.data_ptr())
                    bind.bind_output('Y', device)
                    sess._sess.run_with_iobinding(bind, None)
                    ortvalue = bind.get_outputs()[0]
                    y = ortvalue.numpy()
                    assert_almost_equal(x, y)
Example #7
0
    def fit(self, X, y):
        """
        Trains the model.
        :param X: features
        :param y: expected output
        :return: self
        """
        self.train_session_ = create_training_session(
            self.model_onnx,
            self.weights_to_train,
            loss_output_name=self.loss_output_name,
            training_optimizer_name=self.training_optimizer_name,
            device=self.device)

        data_loader = DataLoaderDevice(X,
                                       y,
                                       batch_size=self.batch_size,
                                       device=self.device)
        lr = self._init_learning_rate()
        self.input_names_ = [i.name for i in self.train_session_.get_inputs()]
        self.output_names_ = [
            o.name for o in self.train_session_.get_outputs()
        ]
        self.loss_index_ = self.output_names_.index(self.loss_output_name)

        bind = self.train_session_.io_binding()._iobinding

        loop = (tqdm(range(self.max_iter))
                if self.verbose else range(self.max_iter))
        train_losses = []
        for it in loop:
            bind_lr = C_OrtValue.ortvalue_from_numpy(
                numpy.array([lr], dtype=numpy.float32), self.device)
            loss = self._iteration(data_loader, bind_lr, bind)
            lr = self._update_learning_rate(it, lr)
            if self.verbose > 1:
                loop.set_description(f"loss={loss:1.3g} lr={lr:1.3g}")
            train_losses.append(loss)
        self.train_losses_ = train_losses
        self.trained_coef_ = self.train_session_.get_state()
        return self
Example #8
0
def benchmark(name,
              onx,
              fct_numpy,
              *args,
              dims=(1, 10, 100, 200, 500, 1000, 2000, 10000)):
    sess = InferenceSession(onx.SerializeToString())
    device = C_OrtDevice(C_OrtDevice.cpu(), C_OrtDevice.default_memory(), 0)
    names = [i.name for i in sess.get_inputs()]
    out_names = [o.name for o in sess.get_outputs()]
    if len(names) != len(args):
        raise RuntimeError(f"Size mismatch {len(names)} != {len(args)}.")

    rows = []
    for dim in tqdm(dims):
        new_args = [reshape(a, dim) for a in args]
        ortvalues = [
            C_OrtValue.ortvalue_from_numpy(a, device) for a in new_args
        ]

        ms = measure_time(lambda: fct_numpy(*new_args), repeat=50, number=100)
        ms.update(dict(name=name, impl='numpy', dim=dim))
        rows.append(ms)

        inps = {n: a for n, a in zip(names, new_args)}
        ms = measure_time(lambda: sess.run(None, inps))
        ms.update(dict(name=name, impl='sess', dim=dim))
        rows.append(ms)

        bind = SessionIOBinding(sess._sess)
        ms = measure_time(lambda: bind_and_run(sess._sess, bind, names,
                                               ortvalues, out_names, device))
        ms.update(dict(name=name, impl='bind_run', dim=dim))
        rows.append(ms)

        ms = measure_time(lambda: nobind_just_run(sess._sess, bind))
        ms.update(dict(name=name, impl='run', dim=dim))
        rows.append(ms)

    return rows
so = SessionOptions()
so.enable_profiling = True
so.optimized_model_filepath = os.path.split(filename)[-1] + ".optimized.onnx"
sess = InferenceSession(onx.SerializeToString(), so, providers=[provider])
bind = SessionIOBinding(sess._sess)

print("graph_optimization_level:", so.graph_optimization_level)

#####################################
# Creates random data
feed = random_feed(sess, batch)

#####################################
# moving the data on CPU or GPU
feed_ort_value = OrderedDict(
    (name, (C_OrtValue.ortvalue_from_numpy(v, ort_device), v.dtype))
    for name, v in feed.items())
outputs = [o.name for o in sess.get_outputs()]

#######################################
# A function which calls the API for any device.


def run_with_iobinding(sess, bind, ort_device, feed_ort_value, outputs):
    for name, (value, dtype) in feed_ort_value.items():
        bind.bind_input(name, ort_device, dtype, value.shape(),
                        value.data_ptr())
    for out in outputs:
        bind.bind_output(out, ort_device)
    sess._sess.run_with_iobinding(bind, None)
    ortvalues = bind.get_outputs()
                        providers=['CPUExecutionProvider'])
ro = RunOptions()
output_names = [o.name for o in sess.get_outputs()]
obs = measure_time(lambda: sess._sess.run(output_names, {'X': X}, ro),
                   context=dict(sess=sess, X=X),
                   repeat=repeat,
                   number=number)
obs['name'] = 'ort-c'
data.append(obs)

###################################
# onnxruntime: run_with_ort_values
print('ort-ov-c')
device = C_OrtDevice(C_OrtDevice.cpu(), OrtMemType.DEFAULT, 0)

Xov = C_OrtValue.ortvalue_from_numpy(X, device)

sess = InferenceSession(onx.SerializeToString(),
                        providers=['CPUExecutionProvider'])
ro = RunOptions()
output_names = [o.name for o in sess.get_outputs()]
obs = measure_time(
    lambda: sess._sess.run_with_ort_values({'X': Xov}, output_names, ro),
    context=dict(sess=sess),
    repeat=repeat,
    number=number)
obs['name'] = 'ort-ov'
data.append(obs)

###################################
# onnxruntime: run_with_iobinding
Example #11
0
train_session = create_training_session(onx_train, ['coefs', 'intercept'],
                                        device=device)
print(train_session)

##########################################
# The coefficients.

state_tensors = train_session.get_state()
pprint(state_tensors)

######################################
# We can now check the coefficients are updated after one iteration.

dev = get_ort_device(device)
ortx = C_OrtValue.ortvalue_from_numpy(X_train[:1], dev)
orty = C_OrtValue.ortvalue_from_numpy(y_train[:1].reshape((-1, 1)), dev)
ortlr = C_OrtValue.ortvalue_from_numpy(
    numpy.array([0.01], dtype=numpy.float32), dev)

bind = train_session.io_binding()._iobinding
bind.bind_ortvalue_input('X', ortx)
bind.bind_ortvalue_input('label', orty)
bind.bind_ortvalue_input('Learning_Rate', ortlr)
bind.bind_output('loss', dev)
train_session._sess.run_with_iobinding(bind, None)
outputs = bind.copy_outputs_to_cpu()
pprint(outputs)

##########################################
# We check the coefficients have changed.
Example #12
0
def benchmark_op(repeat=10,
                 number=10,
                 name="Slice",
                 shape_slice_fct=None,
                 save=None,
                 opset=14,
                 repeat_profile=1500,
                 verbose=1):
    if verbose:
        print("[benchmark_op] start repeat=%d number=%d repeat_profile=%d"
              " opset=%d." % (repeat, number, repeat_profile, opset))
    res = []
    for dim in tqdm([
            8, 16, 32, 64, 100, 128, 200, 256, 400, 512, 600, 784, 800, 1000,
            1024, 1200
    ]):
        shape, slices = shape_slice_fct(dim)
        onx, ort_fct, npy_fct, ort_fct_gpu = build_ort_op(save=save,
                                                          op_version=opset,
                                                          slices=slices)

        n_arrays = 20
        if dim >= 512:
            n_arrays = 10
        xs = [
            numpy.random.rand(*shape).astype(numpy.float32)
            for _ in range(n_arrays)
        ]
        info = dict(shape=shape)

        ctx = dict(xs=xs, loop_fct=loop_fct)

        # numpy
        ctx['fct'] = npy_fct
        obs = measure_time(lambda: loop_fct(npy_fct, xs),
                           div_by_number=True,
                           context=ctx,
                           repeat=repeat,
                           number=number)
        obs['dim'] = dim
        obs['fct'] = 'numpy'
        obs['shape'] = ",".join(map(str, shape))
        obs['slices'] = str(slices)
        obs.update(info)
        res.append(obs)

        # onnxruntime
        ctx['fct'] = ort_fct
        obs = measure_time(lambda: loop_fct(ort_fct, xs),
                           div_by_number=True,
                           context=ctx,
                           repeat=repeat,
                           number=number)
        obs['dim'] = dim
        obs['fct'] = 'ort'
        obs['shape'] = ",".join(map(str, shape))
        obs['slices'] = str(slices)
        obs.update(info)
        res.append(obs)

        if ort_fct_gpu is not None:

            # onnxruntime
            dev = get_ort_device('cuda:0')
            ctx['xs'] = [C_OrtValue.ortvalue_from_numpy(x, dev) for x in xs]
            ctx['fct'] = ort_fct_gpu
            obs = measure_time(lambda: loop_fct(ort_fct_gpu, ctx['xs']),
                               div_by_number=True,
                               context=ctx,
                               repeat=repeat,
                               number=number)
            obs['dim'] = dim
            obs['fct'] = 'ort_gpu'
            obs['shape'] = ",".join(map(str, shape))
            obs['slices'] = str(slices)
            obs.update(info)
            res.append(obs)

    # profiling CPU
    if verbose:
        print("[benchmark_op] done.")
        print("[benchmark_op] profile CPU.")
    so = SessionOptions()
    so.enable_profiling = True
    sess = InferenceSession(onx.SerializeToString(),
                            so,
                            providers=["CPUExecutionProvider"])
    for i in range(0, repeat_profile):
        sess.run(
            None,
            {'X': xs[-1]},
        )
    prof = sess.end_profiling()
    with open(prof, "r") as f:
        js = json.load(f)
    dfprof = DataFrame(OnnxWholeSession.process_profiling(js))
    dfprof['shape'] = ",".join(map(str, shape))
    dfprof['slices'] = str(slices)
    if verbose:
        print("[benchmark_op] done.")

    # profiling CPU
    if ort_fct_gpu is not None:
        if verbose:
            print("[benchmark_op] profile GPU.")
        so = SessionOptions()
        so.enable_profiling = True
        sess = InferenceSession(onx.SerializeToString(),
                                so,
                                providers=["CUDAExecutionProvider"])
        io_binding = sess.io_binding()._iobinding
        device = get_ort_device('cpu')

        for i in range(0, repeat_profile):
            x = ctx['xs'][-1]
            io_binding.bind_input('X', device, numpy.float32, x.shape(),
                                  x.data_ptr())
            io_binding.bind_output('Y', device)
            sess._sess.run_with_iobinding(io_binding, None)

        prof = sess.end_profiling()
        with open(prof, "r") as f:
            js = json.load(f)
        dfprofgpu = DataFrame(OnnxWholeSession.process_profiling(js))
        dfprofgpu['shape'] = ",".join(map(str, shape))
        dfprofgpu['slices'] = str(slices)
        if verbose:
            print("[benchmark_op] profile done.")
    else:
        dfprofgpu = None

    # Dataframes
    shape_name = str(shape).replace(str(dim), "N")
    df = pandas.DataFrame(res)
    piv = df.pivot('shape', 'fct', 'average')

    rs = piv.copy()
    for c in ['numpy', 'ort', 'ort_gpu']:
        if c in rs.columns:
            rs[f"numpy/{c}"] = rs['numpy'] / rs[c]
    rs = rs[[c for c in rs.columns if "/numpy" not in c]].copy()

    # Graphs.
    fig, ax = plt.subplots(1, 2, figsize=(12, 4))
    piv.plot(logx=True,
             logy=True,
             ax=ax[0],
             title=f"{name} benchmark\n{shape_name!r} lower better")
    ax[0].legend(prop={"size": 9})
    rs.plot(
        logx=True,
        logy=True,
        ax=ax[1],
        title=f"{name} Speedup, baseline=numpy\n{shape_name!r} higher better")
    ax[1].plot([min(rs.index), max(rs.index)], [0.5, 0.5], 'g--')
    ax[1].plot([min(rs.index), max(rs.index)], [2., 2.], 'g--')
    ax[1].legend(prop={"size": 9})
    return dfprof, dfprofgpu, df, rs, ax
    def forward_no_training(self, exc=None, verbose=False):
        if exc is None:
            exc = __name__ != '__main__'
        from onnxruntime.capi._pybind_state import (OrtValue as C_OrtValue,
                                                    OrtDevice as C_OrtDevice,
                                                    OrtMemType)
        from onnxruntime.capi._pybind_state import (OrtValueVector)
        from onnxcustom.training.ortgradient import OrtGradientForwardBackward

        X, y = make_regression(  # pylint: disable=W0632
            100, n_features=10, bias=2)
        X = X.astype(numpy.float32)
        y = y.astype(numpy.float32)
        X_train, X_test, y_train, _ = train_test_split(X, y)
        reg = LinearRegression()
        reg.fit(X_train, y_train)
        reg.coef_ = reg.coef_.reshape((1, -1))
        onx = to_onnx(reg,
                      X_train,
                      target_opset=opset,
                      black_op={'LinearRegressor'})

        # starts testing
        if verbose:
            print("[forward_no_training] start testing")
        if exc:
            if verbose:
                print("[forward_no_training] check exception")
            self.assertRaise(
                lambda: OrtGradientForwardBackward(
                    onx, debug=True, enable_logging=True, providers=['NONE']),
                ValueError)
        if verbose:
            print("[forward_no_training] instantiate")
        forback = OrtGradientForwardBackward(onx,
                                             debug=True,
                                             enable_logging=True)
        self.assertEqual(repr(forback), "OrtGradientForwardBackward(...)")
        self.assertTrue(hasattr(forback, 'cls_type_'))
        self.assertEqual(forback.cls_type_._onx_inp,
                         ['X', 'coef', 'intercept'])
        self.assertEqual(forback.cls_type_._onx_out,
                         ['X_grad', 'coef_grad', 'intercept_grad'])
        self.assertEqual(forback.cls_type_._weights_to_train,
                         ['coef', 'intercept'])
        self.assertEqual(forback.cls_type_._grad_input_names,
                         ['X', 'coef', 'intercept'])
        self.assertEqual(forback.cls_type_._input_names, ['X'])
        self.assertEqual(forback.cls_type_._bw_fetches_names,
                         ['X_grad', 'coef_grad', 'intercept_grad'])
        self.assertEqual(forback.cls_type_._output_names, ['variable'])

        if verbose:
            print("[forward_no_training] expected prediction")

        expected = reg.predict(X_test)
        coef = reg.coef_.astype(numpy.float32).reshape((-1, 1))
        intercept = numpy.array([reg.intercept_], dtype=numpy.float32)

        if verbose:
            print("[forward_no_training] InferenceSession")

        providers = device_to_providers('cpu')
        sess0 = InferenceSession(onx.SerializeToString(), providers=providers)
        inames = [i.name for i in sess0.get_inputs()]  # pylint: disable=E1101
        self.assertEqual(inames, ['X'])
        got = sess0.run(None, {'X': X_test})
        self.assertEqualArray(expected.ravel(), got[0].ravel(), decimal=4)

        if verbose:
            print("[forward_no_training] evaluation")

        sess_eval = forback.cls_type_._sess_eval  # pylint: disable=E1101
        inames = [i.name for i in sess_eval.get_inputs()]
        self.assertEqual(inames, ['X', 'coef', 'intercept'])
        got = sess_eval.run(None, {
            'X': X_test,
            'coef': coef,
            'intercept': intercept
        })
        self.assertEqualArray(expected.ravel(), got[0].ravel(), decimal=4)

        # OrtValue
        if verbose:
            print("[forward_no_training] OrtValue")
        inst = forback.new_instance()
        device = C_OrtDevice(C_OrtDevice.cpu(), OrtMemType.DEFAULT, 0)

        # list of OrtValues
        inputs = []
        for a in [X_test, coef, intercept]:
            inputs.append(C_OrtValue.ortvalue_from_numpy(a, device))
        got_ort = inst.forward(inputs)
        got = [v.numpy() for v in got_ort]
        self.assertEqual(len(got), 1)
        self.assertEqualArray(expected.ravel(), got[0].ravel(), decimal=4)

        # OrtValueVector
        if verbose:
            print("[forward_no_training] OrtValueVector")
        inputs = OrtValueVector()
        for a in [X_test, coef, intercept]:
            inputs.push_back(C_OrtValue.ortvalue_from_numpy(a, device))
        got = inst.forward(inputs)
        self.assertEqual(len(got), 1)
        self.assertEqualArray(expected.ravel(),
                              got[0].numpy().ravel(),
                              decimal=4)

        # numpy
        if verbose:
            print("[forward_no_training] numpy")
        inputs = [X_test, coef, intercept]
        got = inst.forward(inputs)
        self.assertEqual(len(got), 1)
        self.assertEqualArray(expected.ravel(),
                              got[0].numpy().ravel(),
                              decimal=4)
        if verbose:
            print("[forward_no_training] end")
    def forward_training(self,
                         model,
                         debug=False,
                         n_classes=3,
                         add_print=False):
        from onnxruntime.capi._pybind_state import (OrtValue as C_OrtValue,
                                                    OrtMemType, OrtDevice as
                                                    C_OrtDevice)
        from onnxruntime.capi._pybind_state import (OrtValueVector)
        from onnxcustom.training.ortgradient import OrtGradientForwardBackward

        def to_proba(yt):
            mx = yt.max() + 1
            new_yt = numpy.zeros((yt.shape[0], mx), dtype=numpy.float32)
            for i, y in enumerate(yt):
                new_yt[i, y] = 1
            return new_yt

        if hasattr(model.__class__, 'predict_proba'):
            X, y = make_classification(  # pylint: disable=W0632
                100,
                n_features=10,
                n_classes=n_classes,
                n_informative=7)
            X = X.astype(numpy.float32)
            y = y.astype(numpy.int64)
        else:
            X, y = make_regression(  # pylint: disable=W0632
                100, n_features=10, bias=2)
            X = X.astype(numpy.float32)
            y = y.astype(numpy.float32)
        X_train, X_test, y_train, y_test = train_test_split(X, y)
        reg = model
        reg.fit(X_train, y_train)
        # needs if skl2onnx<1.10.4
        # reg.coef_ = reg.coef_.reshape((1, -1))
        # reg.intercept_ = reg.intercept_.reshape((-1, ))
        if hasattr(model.__class__, 'predict_proba'):
            onx = to_onnx(reg,
                          X_train,
                          target_opset=opset,
                          black_op={'LinearClassifier'},
                          options={'zipmap': False})
            onx = select_model_inputs_outputs(
                onx, outputs=[onx.graph.output[1].name])
        else:
            onx = to_onnx(reg,
                          X_train,
                          target_opset=opset,
                          black_op={'LinearRegressor'})

        # remove batch possibility
        #onx.graph.input[0].type.tensor_type.shape.dim[0].dim_value = 0
        #onx.graph.input[0].type.tensor_type.shape.dim[0].dim_param = "batch_size"
        #onx.graph.output[0].type.tensor_type.shape.dim[0].dim_value = 0
        #onx.graph.output[0].type.tensor_type.shape.dim[0].dim_param = "batch_size"
        providers = device_to_providers('cpu')
        sess = InferenceSession(onx.SerializeToString(), providers=providers)
        sess.run(None, {'X': X_test[:1]})

        # starts testing
        forback = OrtGradientForwardBackward(onx,
                                             debug=True,
                                             enable_logging=True)
        if debug:
            n = model.__class__.__name__
            temp = get_temp_folder(__file__, f"temp_forward_training_{n}")
            with open(os.path.join(temp, f"model_{n}.onnx"), "wb") as f:
                f.write(onx.SerializeToString())
            with open(os.path.join(temp, f"fw_train_{n}.onnx"), "wb") as f:
                f.write(forback.cls_type_._trained_onnx.SerializeToString())
            with open(os.path.join(temp, f"fw_pre_{n}.onnx"), "wb") as f:
                gr = forback.cls_type_._optimized_pre_grad_model
                f.write(gr.SerializeToString())

        if hasattr(model.__class__, 'predict_proba'):
            expected = reg.predict_proba(X_test)
            coef = reg.coef_.astype(numpy.float32).T
            intercept = reg.intercept_.astype(numpy.float32)
            # only one observation
            X_test1 = X_test[:1]
            y_test = to_proba(y_test).astype(numpy.float32)
            y_test1 = y_test[:1]
            expected1 = expected[:1]
        else:
            expected = reg.predict(X_test)
            coef = reg.coef_.astype(numpy.float32).reshape((-1, 1))
            intercept = numpy.array([reg.intercept_], dtype=numpy.float32)
            # only one observation
            X_test1 = X_test[:1]
            y_test1 = y_test[0].reshape((1, -1))
            expected1 = expected[:1]

        # OrtValueVector
        inst = forback.new_instance()
        device = C_OrtDevice(C_OrtDevice.cpu(), OrtMemType.DEFAULT, 0)

        if add_print:
            print("\n\n######################\nFORWARD")
        inputs = OrtValueVector()
        for a in [X_test1, coef, intercept]:
            inputs.push_back(C_OrtValue.ortvalue_from_numpy(a, device))
        got = inst.forward(inputs, training=True)
        self.assertEqual(len(got), 1)
        self.assertEqualArray(expected1.ravel(),
                              got[0].numpy().ravel(),
                              decimal=4)

        if add_print:
            print("\n\n######################\nBACKWARD")
        outputs = OrtValueVector()
        outputs.push_back(C_OrtValue.ortvalue_from_numpy(y_test1, device))
        got = inst.backward(outputs)
        self.assertEqual(len(got), 3)
        if add_print:
            print("\n######################\nEND\n")

        # OrtValueVectorN
        inputs = OrtValueVector()
        for a in [X_test, coef, intercept]:
            inputs.push_back(C_OrtValue.ortvalue_from_numpy(a, device))
        got = inst.forward(inputs, training=True)
        self.assertEqual(len(got), 1)
        self.assertEqualArray(expected.ravel(),
                              got[0].numpy().ravel(),
                              decimal=4)

        outputs = OrtValueVector()
        outputs.push_back(
            C_OrtValue.ortvalue_from_numpy(y_test.reshape((1, -1)), device))
        got = inst.backward(outputs)
        self.assertEqual(len(got), 3)

        # list of OrtValues
        inputs = []
        for a in [X_test, coef, intercept]:
            inputs.append(C_OrtValue.ortvalue_from_numpy(a, device))
        got_ort = inst.forward(inputs, training=True)
        got = [v.numpy() for v in got_ort]
        self.assertEqual(len(got), 1)
        self.assertEqualArray(expected.ravel(), got[0].ravel(), decimal=4)

        outputs = [
            C_OrtValue.ortvalue_from_numpy(y_test.reshape((1, -1)), device)
        ]
        got = inst.backward(outputs)
        self.assertEqual(len(got), 3)

        # numpy
        inputs = [X_test, coef, intercept]
        got_ort = inst.forward(inputs, training=True)
        got = [v.numpy() for v in got_ort]
        self.assertEqual(len(got), 1)
        self.assertEqualArray(expected.ravel(), got[0].ravel(), decimal=4)

        outputs = [y_test.reshape((1, -1))]
        got = inst.backward(outputs)
        self.assertEqual(len(got), 3)
    def test_forward_no_training_pickle(self):
        from onnxruntime.capi._pybind_state import (OrtValue as C_OrtValue,
                                                    OrtMemType, OrtDevice as
                                                    C_OrtDevice)
        from onnxruntime.capi._pybind_state import (OrtValueVector)
        from onnxcustom.training.ortgradient import OrtGradientForwardBackward
        X, y = make_regression(  # pylint: disable=W0632
            100, n_features=10, bias=2)
        X = X.astype(numpy.float32)
        y = y.astype(numpy.float32)
        X_train, X_test, y_train, _ = train_test_split(X, y)
        reg = LinearRegression()
        reg.fit(X_train, y_train)
        reg.coef_ = reg.coef_.reshape((1, -1))
        onx = to_onnx(reg,
                      X_train,
                      target_opset=opset,
                      black_op={'LinearRegressor'})
        forback0 = OrtGradientForwardBackward(onx, debug=True)
        st = io.BytesIO()
        pickle.dump(forback0, st)
        st2 = io.BytesIO(st.getvalue())
        forback = pickle.load(st2)

        self.assertTrue(hasattr(forback, 'cls_type_'))
        self.assertEqual(forback.cls_type_._onx_inp,
                         ['X', 'coef', 'intercept'])
        self.assertEqual(forback.cls_type_._onx_out,
                         ['X_grad', 'coef_grad', 'intercept_grad'])
        self.assertEqual(forback.cls_type_._weights_to_train,
                         ['coef', 'intercept'])
        self.assertEqual(forback.cls_type_._grad_input_names,
                         ['X', 'coef', 'intercept'])
        self.assertEqual(forback.cls_type_._input_names, ['X'])
        self.assertEqual(forback.cls_type_._bw_fetches_names,
                         ['X_grad', 'coef_grad', 'intercept_grad'])
        self.assertEqual(forback.cls_type_._output_names, ['variable'])

        expected = reg.predict(X_test)
        coef = reg.coef_.astype(numpy.float32).reshape((-1, 1))
        intercept = numpy.array([reg.intercept_], dtype=numpy.float32)

        providers = device_to_providers('cpu')
        sess0 = InferenceSession(onx.SerializeToString(), providers=providers)
        inames = [i.name for i in sess0.get_inputs()]
        self.assertEqual(inames, ['X'])
        got = sess0.run(None, {'X': X_test})
        self.assertEqualArray(expected.ravel(), got[0].ravel(), decimal=4)

        sess_eval = forback.cls_type_._sess_eval  # pylint: disable=W0212
        inames = [i.name for i in sess_eval.get_inputs()]
        self.assertEqual(inames, ['X', 'coef', 'intercept'])
        got = sess_eval.run(None, {
            'X': X_test,
            'coef': coef,
            'intercept': intercept
        })
        self.assertEqualArray(expected.ravel(), got[0].ravel(), decimal=4)

        # OrtValue
        inst = forback.new_instance()
        inputs = []
        device = C_OrtDevice(C_OrtDevice.cpu(), OrtMemType.DEFAULT, 0)
        for a in [X_test, coef, intercept]:
            inputs.append(C_OrtValue.ortvalue_from_numpy(a, device))
        got_ort = inst.forward(inputs)
        got = [v.numpy() for v in got_ort]
        self.assertEqual(len(got), 1)
        self.assertEqualArray(expected.ravel(), got[0].ravel(), decimal=4)

        # OrtValueVector
        inputs = OrtValueVector()
        for a in [X_test, coef, intercept]:
            inputs.push_back(C_OrtValue.ortvalue_from_numpy(a, device))
        got = inst.forward(inputs)
        self.assertEqual(len(got), 1)
        self.assertEqualArray(expected.ravel(),
                              got[0].numpy().ravel(),
                              decimal=4)

        # numpy
        inputs = [X_test, coef, intercept]
        got = inst.forward(inputs)
        self.assertEqual(len(got), 1)
        self.assertEqualArray(expected.ravel(),
                              got[0].numpy().ravel(),
                              decimal=4)
Example #16
0
if get_device().upper() == 'GPU':
    ort_device = C_OrtDevice(C_OrtDevice.cuda(), C_OrtDevice.default_memory(),
                             0)
else:
    ort_device = C_OrtDevice(C_OrtDevice.cpu(), C_OrtDevice.default_memory(),
                             0)

# session
sess = InferenceSession(
    onx.SerializeToString(),
    so,
    providers=['CPUExecutionProvider', 'CUDAExecutionProvider'])
bind = SessionIOBinding(sess._sess)

# moving the data on CPU or GPU
ort_value = C_OrtValue.ortvalue_from_numpy(X, ort_device)

#######################################
# A function which calls the API for any device.


def run_with_iobinding(sess, bind, ort_device, ort_value, dtype):
    bind.bind_input('X', ort_device, dtype, ort_value.shape(),
                    ort_value.data_ptr())
    bind.bind_output('variable', ort_device)
    sess._sess.run_with_iobinding(bind, None)
    ortvalues = bind.get_outputs()
    return ortvalues[0].numpy()


#######################################
Example #17
0
    def test_gradient_mlpregressor(self):
        from onnxcustom.training.optimizers_partial import (
            OrtGradientForwardBackwardOptimizer)
        X = numpy.arange(30).reshape((-1, 3)).astype(numpy.float32) / 100
        y = numpy.arange(X.shape[0]).astype(numpy.float32)
        y = y.reshape((-1, 1))
        reg = MLPRegressor(hidden_layer_sizes=(5,), max_iter=2,
                           activation='logistic',
                           momentum=0, nesterovs_momentum=False,
                           alpha=0)
        reg.fit(X, y.ravel())

        onx = to_onnx(reg, X, target_opset=opset)
        onx = onnx_rename_weights(onx)
        inits = ["I0_coefficient", 'I1_intercepts', 'I2_coefficient1',
                 'I3_intercepts1']

        xp = numpy.arange(2 * X.shape[1]).reshape((2, -1)).astype(
            numpy.float32) / 10
        yp = numpy.array([0.5, -0.5], dtype=numpy.float32).reshape((-1, 1))

        train_session = OrtGradientForwardBackwardOptimizer(
            onx, inits, learning_rate=1e-5,
            warm_start=True, max_iter=2, batch_size=10)
        train_session.fit(X, y)
        state = train_session.get_state()
        state_np = [st.numpy() for st in state]

        # gradient scikit-learn

        coef_grads = state_np[::2]
        intercept_grads = state_np[1::2]
        layer_units = [3, 5, 1]
        activations = [xp] + [None] * (len(layer_units) - 1)
        deltas = [None] * (len(activations) - 1)

        skl_pred = reg.predict(xp)

        batch_loss, coef_grads, intercept_grads = reg._backprop(  # pylint: disable=W0212
            xp, yp, activations, deltas,
            coef_grads, intercept_grads)
        deltas = activations[-1] - yp

        # gradient onnxcustom

        ort_xp = C_OrtValue.ortvalue_from_numpy(xp, train_session.device)
        ort_yp = C_OrtValue.ortvalue_from_numpy(yp, train_session.device)
        ort_state = [ort_xp] + state
        prediction = train_session.train_function_.forward(
            ort_state, training=True)

        ort_pred = prediction[0].numpy()
        self.assertEqualArray(skl_pred.ravel(), ort_pred.ravel(), decimal=2)

        loss, loss_gradient = train_session.learning_loss.loss_gradient(
            train_session.device, ort_yp, prediction[0])

        gradient = train_session.train_function_.backward([loss_gradient])

        # comparison

        self.assertEqualArray(
            batch_loss, loss.numpy() / xp.shape[0], decimal=3)
        self.assertEqualArray(deltas, loss_gradient.numpy(), decimal=3)

        # do not use iterator for gradient, it may crash
        ort_grad = [gradient[i].numpy() / xp.shape[0]
                    for i in range(len(gradient))][1:]
        self.assertEqualArray(
            intercept_grads[1], ort_grad[3].ravel(), decimal=2)
        self.assertEqualArray(coef_grads[1], ort_grad[2], decimal=2)
        self.assertEqualArray(
            intercept_grads[0], ort_grad[1].ravel(), decimal=2)
        self.assertEqualArray(coef_grads[0], ort_grad[0], decimal=2)
Example #18
0
# With onnxruntime.

sess = InferenceSession(onx.SerializeToString(),
                        providers=["CPUExecutionProvider"])
y_cpu = sess.run(None, {'X': x})[0]

#######################################
# Execution on GPU
# ++++++++++++++++
#
# If available...

if get_device().upper() == 'GPU':
    dev = get_ort_device('cuda:0')
    try:
        gx = C_OrtValue.ortvalue_from_numpy(x, dev)
        cuda = True
    except RuntimeError as e:
        print(e)
        cuda = False
else:
    cuda = False

if cuda:
    sessg = InferenceSession(onx.SerializeToString(),
                             providers=["CUDAExecutionProvider"])

    io_binding = sessg.io_binding()._iobinding
    io_binding.bind_input('X', dev, numpy.float32, gx.shape(), gx.data_ptr())
    io_binding.bind_output('Y', dev)
    sessg._sess.run_with_iobinding(io_binding, None)