Exemple #1
0
 def __init__(self, *args, **kwargs):
     "Overwrites the constructor."
     runtime_options = kwargs.pop('runtime_options', {})
     disable_optimisation = runtime_options.pop('disable_optimisation',
                                                False)
     if disable_optimisation:
         if 'sess_options' in kwargs:
             raise RuntimeError(
                 "Incompatible options, 'disable_options' and 'sess_options' cannot "
                 "be sepcified at the same time.")
         kwargs['sess_options'] = SessionOptions()
         kwargs['sess_options'].graph_optimization_level = (
             GraphOptimizationLevel.ORT_DISABLE_ALL)
     self.sess, self.outi, self.erri = _capture_output(
         lambda: InferenceSession(*args, **kwargs), 'c')
Exemple #2
0
def fcts_model(X, y, n_jobs):
    "LinearRegression."
    model = LinearRegression(n_jobs=n_jobs)
    model.fit(X, y)

    initial_types = [('X', FloatTensorType([None, X.shape[1]]))]
    onx = convert_sklearn(model, initial_types=initial_types)
    sess = InferenceSession(onx.SerializeToString())
    outputs = [o.name for o in sess.get_outputs()]
    oinf = OnnxInference(onx, runtime="python")

    def predict_skl_predict(X, model=model):
        return model.predict(X)

    def predict_onnxrt_predict(X, sess=sess):
        return sess.run(outputs[:1], {'X': X})[0]

    def predict_onnx_inference(X, oinf=oinf):
        return oinf.run({'X': X})["variable"]

    return {
        'predict':
        (predict_skl_predict, predict_onnxrt_predict, predict_onnx_inference)
    }
Exemple #3
0
    def test_onnx_init_sparse_coo(self):
        row = np.array([0, 0, 1, 3, 1], dtype=np.float32)
        col = np.array([0, 2, 1, 3, 1], dtype=np.float32)
        data = np.array([1, 1, 1, 1, 1], dtype=np.float32)
        X = coo_matrix((data, (row, col)), shape=(4, 4))

        node = OnnxAdd('X', X, output_names=['Y'], op_version=TARGET_OPSET)

        model_def = node.to_onnx({'X': X}, outputs=[('Y', FloatTensorType())])

        try:
            sess = InferenceSession(model_def.SerializeToString())
        except (RuntimeError, OrtInvalidArgument):
            # Sparse tensor is not supported for constant.
            return
        try:
            res = sess.run(None, {'X': X})[0]
        except RuntimeError as e:
            # Sparse tensor is not supported for constant.
            warnings.warn("Unable to run with %r\n---\n%s\n%s" % ({
                'X': X
            }, model_def, e))
            return
        assert_almost_equal(X + X, res)
Exemple #4
0
 def test_model_mlp_regressor_default(self):
     model, X_test = fit_regression_model(MLPRegressor(random_state=42))
     exp = model.predict(X_test)
     for opv in (1, 2, 7, 8, 9, 10, 11, 12, onnx_opset_version()):
         if opv is not None and opv > get_latest_tested_opset_version():
             continue
         try:
             onx = convert_sklearn(
                 model,
                 "scikit-learn MLPRegressor",
                 [("input", FloatTensorType([None, X_test.shape[1]]))],
                 target_opset=opv)
         except RuntimeError as e:
             if ("is higher than the number of the "
                     "installed onnx package") in str(e):
                 continue
             raise e
         as_string = onx.SerializeToString()
         try:
             ort = InferenceSession(as_string)
         except (RuntimeError, InvalidGraph, Fail) as e:
             if opv in (None, 1, 2):
                 continue
             if opv >= onnx_opset_version():
                 continue
             if ("No suitable kernel definition found for "
                     "op Cast(9)") in str(e):
                 # too old onnxruntime
                 continue
             raise AssertionError(
                 "Unable to load opv={}\n---\n{}\n---".format(opv,
                                                              onx)) from e
         res_out = ort.run(None, {'input': X_test})
         assert len(res_out) == 1
         res = res_out[0]
         assert_almost_equal(exp.ravel(), res.ravel(), decimal=4)
Exemple #5
0
def optimize_model(model_path: Path):
    '''
        Generate model that applies graph optimization (constant folding,etc.)
        parameter model_path: path to the original onnx model
        return: optimized onnx model
    '''
    opt_model_path = generate_identified_filename(model_path, "-opt")
    sess_option = SessionOptions()
    sess_option.optimized_model_filepath = opt_model_path.as_posix()
    sess_option.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_BASIC
    _ = InferenceSession(model_path.as_posix(),
                         sess_option,
                         providers=['CPUExecutionProvider'])
    optimized_model = onnx.load(opt_model_path.as_posix())
    return optimized_model
    def test_model_tfidf_vectorizer11_nolowercase(self):
        corpus = numpy.array([
            "This is the first document.",
            "This document is the second document.",
            "And this is the third one.",
            "Is this the first document?",
        ]).reshape((4, 1))
        vect = TfidfVectorizer(ngram_range=(1, 1), norm=None, lowercase=False)
        vect.fit(corpus.ravel())
        model_onnx = convert_sklearn(vect,
                                     "TfidfVectorizer",
                                     [("input", StringTensorType())],
                                     options=self.get_options(),
                                     target_opset=TARGET_OPSET)
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            corpus,
            vect,
            model_onnx,
            basename="SklearnTfidfVectorizer11NoL-OneOff-SklCol")

        sess = InferenceSession(model_onnx.SerializeToString())
        res = sess.run(None, {'input': corpus.ravel()})[0]
        assert res.shape == (4, 11)
Exemple #7
0
 def __init__(self, onnx_data, runtime, runtime_options=None):
     """
     @param      onnx_data       :epkg:`ONNX` model or data
     @param      runtime         runtime to be used,
                                 mostly :epkg:`onnxruntime`
     @param      runtime_options runtime options
     """
     if runtime != 'onnxruntime1':
         raise NotImplementedError(  # pragma: no cover
             "runtime '{}' is not implemented.".format(runtime))
     if hasattr(onnx_data, 'SerializeToString'):
         onnx_data = onnx_data.SerializeToString()
     self.runtime = runtime
     sess_options = SessionOptions()
     self.run_options = RunOptions()
     try:
         sess_options.sessions_log_verbosity_level = 0
     except AttributeError:  # pragma: no cover
         # onnxruntime not recent enough.
         pass
     try:
         self.run_options.run_log_verbosity_level = 0
     except AttributeError:  # pragma: no cover
         # onnxruntime not recent enough.
         pass
     if (runtime_options is not None
             and runtime_options.get('disable_optimisation', False)):
         sess_options.graph_optimization_level = (
             GraphOptimizationLevel.ORT_ENABLE_ALL)
     try:
         self.sess = InferenceSession(onnx_data, sess_options=sess_options)
     except (OrtFail, OrtNotImplemented, OrtInvalidGraph,
             OrtInvalidArgument, OrtRuntimeException, RuntimeError) as e:
         raise RuntimeError(
             "Unable to create InferenceSession due to '{}'\n{}.".format(
                 e, display_onnx(onnx.load(BytesIO(onnx_data))))) from e
    def test_pipeline_tfidf_svc(self):
        pipe = Pipeline([('tfidf', TfidfVectorizer()),
                         ('clf_svc', SVC(probability=True, kernel='linear'))])
        data = numpy.array([
            "first sentance", "second sentence", "many sentances",
            "dummy sentance", "no sentance at all"
        ])
        y = numpy.array([0, 0, 1, 0, 1])
        pipe.fit(data, y)
        expected_label = pipe.predict(data)
        expected_proba = pipe.predict_proba(data)
        df = pandas.DataFrame(data)
        df.columns = ['text']

        # first conversion if shape=[None, 1]
        model_onnx = convert_sklearn(pipe,
                                     initial_types=[
                                         ('text', StringTensorType([None, 1]))
                                     ],
                                     target_opset=TARGET_OPSET,
                                     options={id(pipe): {
                                                  'zipmap': False
                                              }})
        sess = InferenceSession(model_onnx.SerializeToString())
        got = sess.run(None, {'text': data.reshape((-1, 1))})
        assert_almost_equal(expected_proba, got[1])
        assert_almost_equal(expected_label, got[0])
        # sess.run(None, {'text': df}) --> failures
        # sess.run(None, {'text': df["text"]}) --> failures

        # second conversion with shape=[None]
        model_onnx = convert_sklearn(pipe,
                                     initial_types=[
                                         ('text', StringTensorType([None]))
                                     ],
                                     target_opset=TARGET_OPSET,
                                     options={id(pipe): {
                                                  'zipmap': False
                                              }})
        sess = InferenceSession(model_onnx.SerializeToString())
        got = sess.run(None, {'text': data})
        assert_almost_equal(expected_proba, got[1])
        assert_almost_equal(expected_label, got[0])
        # sess.run(None, {'text': df})  failure
        # sess.run(None, {'text': df["text"]})  failure
        sess.run(None, {'text': df["text"].values})  # success
 def test_extratreesclassifier_decision_path(self):
     model = ExtraTreesClassifier(max_depth=2, n_estimators=3)
     X, y = make_classification(10, n_features=4, random_state=42)
     X = X[:, :2]
     model.fit(X, y)
     initial_types = [('input', FloatTensorType((None, X.shape[1])))]
     model_onnx = convert_sklearn(
         model,
         initial_types=initial_types,
         options={id(model): {
                      'decision_path': True,
                      'zipmap': False
                  }},
         target_opset=TARGET_OPSET)
     sess = InferenceSession(model_onnx.SerializeToString())
     res = sess.run(None, {'input': X.astype(numpy.float32)})
     pred = model.predict(X)
     assert_almost_equal(pred, res[0].ravel())
     prob = model.predict_proba(X)
     assert_almost_equal(prob, res[1])
     dec = model.decision_path(X)
     exp = binary_array_to_string(dec[0].todense())
     got = numpy.array([''.join(row) for row in res[2]])
     assert exp == got.ravel().tolist()
Exemple #10
0
 def test_decisiontree_regressor_decision_path_leaf(self):
     model = DecisionTreeRegressor(max_depth=2)
     X, y = make_classification(10, n_features=4, random_state=42)
     X = X[:, :2]
     model.fit(X, y)
     initial_types = [('input', FloatTensorType((None, X.shape[1])))]
     model_onnx = convert_sklearn(model,
                                  initial_types=initial_types,
                                  options={
                                      id(model): {
                                          'decision_leaf': True,
                                          'decision_path': True
                                      }
                                  },
                                  target_opset=TARGET_OPSET)
     sess = InferenceSession(model_onnx.SerializeToString())
     res = sess.run(None, {'input': X.astype(np.float32)})
     pred = model.predict(X)
     assert_almost_equal(pred, res[0].ravel())
     dec = model.decision_path(X)
     exp_leaf = path_to_leaf(model.tree_, dec.todense())
     exp_path = binary_array_to_string(dec.todense())
     assert exp_path == res[1].ravel().tolist()
     assert exp_leaf.tolist() == res[2].ravel().tolist()
Exemple #11
0
 def check_outputs(self, model, model_onnx, Xtest,
                   predict_attributes, decimal=5,
                   skip_if_float32=False, disable_optimisation=True):
     if "TransposeScaleMatMul" in str(model_onnx):
         raise RuntimeError("This node must not be added.")
     if predict_attributes is None:
         predict_attributes = {}
     exp = model.predict(Xtest, **predict_attributes)
     if disable_optimisation and GraphOptimizationLevel is not None:
         opts = SessionOptions()
         opts.graph_optimization_level = (
             GraphOptimizationLevel.ORT_DISABLE_ALL)
         sess = InferenceSession(
             model_onnx.SerializeToString(), sess_options=opts)
     else:
         sess = InferenceSession(model_onnx.SerializeToString())
     got = sess.run(None, {'X': Xtest})
     if isinstance(exp, tuple):
         if len(exp) != len(got):
             raise AssertionError("Mismatched number of outputs.")
         for i, (e, g) in enumerate(zip(exp, got)):
             if skip_if_float32 and g.dtype == np.float32:
                 continue
             try:
                 assert_almost_equal(self.remove_dim1(e),
                                     self.remove_dim1(g),
                                     decimal=decimal)
             except AssertionError as e:  # noqa
                 raise AssertionError(
                     "Mismatch for output {} and attributes {}"
                     ".".format(i, predict_attributes)) from e
     else:
         if skip_if_float32 and Xtest.dtype == np.float32:
             return
         assert_almost_equal(np.squeeze(exp),
                             np.squeeze(got), decimal=decimal)
    def test_kernel_cosine_double(self):
        ker = PairwiseKernel(metric='cosine')
        onx = convert_kernel(ker,
                             'X',
                             output_names=['Y'],
                             dtype=np.float64,
                             op_version=_TARGET_OPSET_)
        model_onnx = onx.to_onnx(inputs=[('X', DoubleTensorType([None,
                                                                 None]))],
                                 target_opset=TARGET_OPSET)

        x = np.random.randn(4, 3)
        x[0, 0] = x[1, 1] = x[2, 2] = 10.
        x[3, 2] = 5.

        try:
            sess = InferenceSession(model_onnx.SerializeToString())
        except NotImplemented:
            # Failed to find kernel for FusedMatMul(1).
            return
        res = sess.run(None, {'X': x.astype(np.float64)})[0]
        m1 = res
        m2 = ker(x)
        assert_almost_equal(m1, m2, decimal=5)
Exemple #13
0
def do_onnx_inference_test(module, input, *, training=False):
    with tempfile.NamedTemporaryFile() as file:
        onnx_export_to(file.name, module, input, training=training)
        onnx.checker.check_model(onnx.load(file.name), full_check=True)

        # run inference through the exported model
        input = torch.randn_like(input)
        output, = InferenceSession(file.name).run(
            ['output'], {'input': input.numpy()}
        )

        assert torch.allclose(
            torch.from_numpy(output),
            module(input), rtol=5e-5, atol=1e-6
        )
Exemple #14
0
    def test_local_outlier_factor_metric(self):
        for metric in ['cityblock', 'euclidean', 'manhattan', 'sqeuclidean']:
            with self.subTest(metric=metric):
                lof = LocalOutlierFactor(n_neighbors=2,
                                         novelty=True,
                                         metric=metric)
                data = np.array(
                    [[-1.1, -1.2], [0.3, 0.2], [0.5, 0.4], [100., 99.]],
                    dtype=np.float32)
                model = lof.fit(data)
                model_onnx = to_onnx(model, data, target_opset=TARGET_OPSET)

                data = data.copy()
                data[:, 0] += 0.1

                sess = InferenceSession(model_onnx.SerializeToString())
                names = [o.name for o in sess.get_outputs()]
                self.assertEqual(names, ['label', 'scores'])
                got = sess.run(None, {'X': data})
                self.assertEqual(len(got), 2)
                expected_label = lof.predict(data)
                expected_decif = lof.decision_function(data)
                assert_almost_equal(expected_label, got[0].ravel())
                assert_almost_equal(expected_decif, got[1].ravel(), decimal=4)
Exemple #15
0
def fcts_model(X, y, max_depth, n_estimators, n_jobs):
    "RandomForestClassifier."
    rf = RandomForestClassifier(max_depth=max_depth,
                                n_estimators=n_estimators,
                                n_jobs=n_jobs)
    rf.fit(X, y)

    initial_types = [('X', FloatTensorType([None, X.shape[1]]))]
    onx = convert_sklearn(rf,
                          initial_types=initial_types,
                          options={RandomForestClassifier: {
                              'zipmap': False
                          }})
    f = BytesIO()
    f.write(onx.SerializeToString())
    content = f.getvalue()
    sess = InferenceSession(content)
    outputs = [o.name for o in sess.get_outputs()]

    def predict_skl_predict(X, model=rf):
        return rf.predict(X)

    def predict_skl_predict_proba(X, model=rf):
        return rf.predict_proba(X)

    def predict_onnxrt_predict(X, sess=sess):
        return sess.run(outputs[:1], {'X': X})[0]

    def predict_onnxrt_predict_proba(X, sess=sess):
        return sess.run(outputs[1:], {'X': X})[0]

    return {
        'predict': (predict_skl_predict, predict_onnxrt_predict),
        'predict_proba':
        (predict_skl_predict_proba, predict_onnxrt_predict_proba)
    }
Exemple #16
0
    def prepare(cls, model, device=None, **kwargs):
        """
        Load the model and creates a :class:`onnxruntime.InferenceSession`
        ready to be used as a backend.

        :param model: ModelProto (returned by `onnx.load`),
            string for a filename or bytes for a serialized model
        :param device: requested device for the computation,
            None means the default one which depends on
            the compilation settings
        :param kwargs: see :class:`onnxruntime.SessionOptions`
        :return: :class:`onnxruntime.InferenceSession`
        """
        if isinstance(model, OnnxRuntimeBackendRep):
            return model
        elif isinstance(model, InferenceSession):
            return OnnxRuntimeBackendRep(model)
        elif isinstance(model, (str, bytes)):
            options = SessionOptions()
            for k, v in kwargs.items():
                if hasattr(options, k):
                    setattr(options, k, v)
            inf = InferenceSession(model, options)
            # backend API is primarily used for ONNX test/validation. As such, we should disable session.run() fallback
            # which may hide test failures.
            inf.disable_fallback()
            if device is not None and not cls.supports_device(device):
                raise RuntimeError(
                    "Incompatible device expected '{0}', got '{1}'".format(
                        device, get_device()))
            return cls.prepare(inf, device, **kwargs)
        else:
            # type: ModelProto
            check_model(model)
            bin = model.SerializeToString()
            return cls.prepare(bin, device, **kwargs)
def create_ort_session(model_path, use_gpu):
    from onnxruntime import GraphOptimizationLevel, InferenceSession, SessionOptions
    from onnxruntime import __version__ as ort_version
    from onnxruntime import get_available_providers

    sess_options = SessionOptions()
    sess_options.graph_optimization_level = GraphOptimizationLevel.ORT_DISABLE_ALL
    execution_providers = ["CUDAExecutionProvider", "CPUExecutionProvider"] if use_gpu else ["CPUExecutionProvider"]
    if use_gpu:
        if "CUDAExecutionProvider" not in get_available_providers():
            raise RuntimeError("CUDAExecutionProvider is not avaiable for --use_gpu!")
        else:
            print("use CUDAExecutionProvider")

    ort_session = InferenceSession(model_path, sess_options, providers=execution_providers)
    return ort_session
    def _test_lgbm(self, X, model, extra_config={}):
        # Create ONNX-ML model
        onnx_ml_model = convert_model(
            model, 'lgbm-onnxml', [("input", FloatTensorType([X.shape[0], X.shape[1]]))]
        )[0]

        # Create ONNX model
        onnx_model = convert_model(
            model, 'lgbm-onnx', [("input", FloatTensorType([X.shape[0], X.shape[1]]))], without_onnx_ml=True
        )[0]

        try:
            from onnxruntime import InferenceSession
        except ImportError:
            # onnxruntime not installed (python 2.7)
            return
            
        # Get the predictions for the ONNX-ML model
        session = InferenceSession(onnx_ml_model.SerializeToString())
        output_names = [session.get_outputs()[i].name for i in range(len(session.get_outputs()))]
        onnx_ml_pred = [[] for i in range(len(output_names))]
        inputs = {session.get_inputs()[0].name: X}
        pred = session.run(output_names, inputs)
        for i in range(len(output_names)):
            if output_names[i] == "label":
                onnx_ml_pred[1] = pred[i]
            else:
                onnx_ml_pred[0] = pred[i]

        # Get the predictions for the ONNX model
        session = InferenceSession(onnx_model.SerializeToString())
        onnx_pred = [[] for i in range(len(output_names))]
        pred = session.run(output_names, inputs)
        for i in range(len(output_names)):
            if output_names[i] == "label":
                onnx_pred[1] = pred[i]
            else:
                onnx_pred[0] = pred[i]

        return onnx_ml_pred, onnx_pred, output_names
Exemple #19
0
 def test_squeeze(self):
     x = numpy.random.randn(20, 1).astype(numpy.float32)
     y = numpy.squeeze(x)
     for opset in range(10, 20):
         if opset > TARGET_OPSET:
             continue
         with self.subTest(opset=opset):
             onx = OnnxSqueezeApi11('X',
                                    axes=[1],
                                    output_names=['Y'],
                                    op_version=opset)
             model_def = onx.to_onnx({'X': x.astype(numpy.float32)},
                                     target_opset=opset)
             got = InferenceSession(model_def.SerializeToString()).run(
                 None, {'X': x})
             assert_almost_equal(y, got[0])
Exemple #20
0
    def test_onnx_example_pdist(self):
        x = np.array([1, 2, 4, 5, 5, 4]).astype(np.float32).reshape((3, 2))

        diff = OnnxSub('next_in',
                       'next',
                       output_names=['diff'],
                       op_version=onnx.defs.onnx_opset_version())
        id_next = OnnxIdentity('next_in',
                               output_names=['next_out'],
                               op_version=onnx.defs.onnx_opset_version())
        norm = OnnxReduceSumSquare(diff,
                                   output_names=['norm'],
                                   axes=[1],
                                   op_version=onnx.defs.onnx_opset_version())
        flat = OnnxSqueeze(norm,
                           output_names=['scan_out'],
                           axes=[1],
                           op_version=onnx.defs.onnx_opset_version())
        scan_body = id_next.to_onnx(OrderedDict([('next_in', x),
                                                 ('next', FloatTensorType())]),
                                    outputs=[
                                        ('next_out', FloatTensorType([3, 2])),
                                        ('scan_out', FloatTensorType([3]))
                                    ],
                                    other_outputs=[flat])

        sess = InferenceSession(scan_body.SerializeToString())
        res = sess.run(None, {'next_in': x, 'next': x[:1]})
        assert_almost_equal(x, res[0])
        exp = np.array([0., 18., 20.], dtype=np.float32)
        assert_almost_equal(exp, res[1])

        node = OnnxScan('x',
                        'x',
                        output_names=['y', 'z'],
                        num_scan_inputs=1,
                        body=scan_body.graph,
                        op_version=onnx.defs.onnx_opset_version())
        model_def = node.to_onnx({'x': x},
                                 outputs=[('y', FloatTensorType([3, 2])),
                                          ('z', FloatTensorType([3, 3]))])
        try:
            onnx.checker.check_model(model_def)
        except ValidationError as e:
            if StrictVersion(onnx__version__) <= StrictVersion("1.5.0"):
                warnings.warn(e)
            else:
                raise e

        sess = InferenceSession(model_def.SerializeToString())
        res = sess.run(None, {'x': x})

        exp = squareform(pdist(x, metric="sqeuclidean"))
        assert_almost_equal(x, res[0])
        assert_almost_equal(exp, res[1])
Exemple #21
0
 def test_unsqueeze(self):
     x = numpy.random.randn(1, 3, 1, 5).astype(numpy.float32)
     y = numpy.expand_dims(x, axis=-2)
     for opset in (10, 11, 12, 13):
         if opset > TARGET_OPSET:
             continue
         with self.subTest(opset=opset):
             onx = OnnxUnsqueezeApi11('X',
                                      axes=[-2],
                                      output_names=['Y'],
                                      op_version=opset)
             model_def = onx.to_onnx({'X': x.astype(numpy.float32)},
                                     target_opset=opset)
             got = InferenceSession(model_def.SerializeToString()).run(
                 None, {'X': x})
             assert_almost_equal(y, got[0])
def _predict(session: rt.InferenceSession, data: pd.DataFrame) -> pd.Series:
    def _correctly_typed_column(column: pd.Series) -> pd.Series:
        if column.dtype in ['float64']:
            return column.astype(np.float32)
        return column

    def _correctly_shaped_values(values):
        return values.reshape((values.shape[0], 1))

    inputs = {
        c: _correctly_shaped_values(_correctly_typed_column(data[c]).values)
        for c in data.columns
    }

    return pd.Series(session.run(None, inputs)[0].reshape(-1),
                     index=data.index)
    def test_cascade_scaler(self):
        def generate_onnx_graph(dim, nbnode, input_name='X1', opv=1):
            i1 = input_name
            scale = list(np.ones((1, dim)).ravel())
            for i in range(nbnode - 1):
                i2 = list(
                    map(float,
                        np.ones((1, dim)).astype(np.float32).ravel()))
                node = OnnxScaler(i1, offset=i2, scale=scale, op_version=opv)
                i1 = node
            i2 = list(map(float, np.ones((1, dim)).astype(np.float32).ravel()))
            node = OnnxScaler(i1,
                              offset=i2,
                              scale=scale,
                              output_names=['Y'],
                              op_version=opv)
            onx = node.to_onnx([(input_name, FloatTensorType((None, dim)))],
                               outputs=[('Y', FloatTensorType((None, dim)))])
            return onx

        exp = [
            np.zeros((1, 5)),
            np.zeros((1, 5)),
            np.zeros((1, 5)),
            np.zeros((1, 5))
        ]
        for opv in (1, 2, 3, None):
            for i, nbnode in enumerate((1, 2, 3, 100)):
                onx = generate_onnx_graph(5, nbnode, opv=opv)
                as_string = onx.SerializeToString()
                try:
                    ort = InferenceSession(as_string)
                except InvalidGraph as e:
                    if opv in (3, ):
                        continue
                    if opv >= onnx_opset_version():
                        continue
                    raise AssertionError(
                        "Unable to load opv={}\n---\n{}\n---".format(
                            opv, onx)) from e
                X = (np.ones((1, 5)) * nbnode).astype(np.float32)
                res_out = ort.run(None, {'X1': X})
                assert len(res_out) == 1
                res = res_out[0]
                assert_almost_equal(exp[i], res)

        dim = 10
        onx = generate_onnx_graph(dim, 300)
        as_string = onx.SerializeToString()
        ort = InferenceSession(as_string)
        X = (np.ones((1, dim)) * nbnode).astype(np.float32)
        res_out = ort.run(None, {'X1': X})
        assert len(res_out) == 1
        res = res_out[0]
        assert res.shape[1] == dim
    def test_batch_normalization(self):
        def _batchnorm_test_mode(x, s, bias, mean, var, epsilon=1e-5):
            dims_x = len(x.shape)
            dim_ones = (1, ) * (dims_x - 2)
            s = s.reshape(-1, *dim_ones)
            bias = bias.reshape(-1, *dim_ones)
            mean = mean.reshape(-1, *dim_ones)
            var = var.reshape(-1, *dim_ones)
            return s * (x - mean) / np.sqrt(var + epsilon) + bias

        # input size: (1, 2, 1, 3)
        x = np.array([[[[-1, 0, 1]], [[2, 3, 4]]]]).astype(np.float32)
        s = np.array([1.0, 1.5]).astype(np.float32)
        bias = np.array([0, 1]).astype(np.float32)
        mean = np.array([0, 3]).astype(np.float32)
        var = np.array([1, 1.5]).astype(np.float32)
        y = _batchnorm_test_mode(x, s, bias, mean, var).astype(np.float32)

        onx = OnnxBatchNormalization('X',
                                     s,
                                     bias,
                                     mean,
                                     var,
                                     output_names=['Y'],
                                     op_version=TARGET_OPSET)
        model_def = onx.to_onnx({'X': x.astype(np.float32)},
                                target_opset=TARGET_OPSET)
        oinf = InferenceSession(model_def.SerializeToString())
        got = oinf.run(None, {'X': x})
        assert_almost_equal(y, got[0], decimal=5)

        # input size: (2, 3, 4, 5)
        x = np.random.randn(2, 3, 4, 5).astype(np.float32)
        s = np.random.randn(3).astype(np.float32)
        bias = np.random.randn(3).astype(np.float32)
        mean = np.random.randn(3).astype(np.float32)
        var = np.random.rand(3).astype(np.float32)
        epsilon = 1e-2
        y = _batchnorm_test_mode(x, s, bias, mean, var,
                                 epsilon).astype(np.float32)

        onx = OnnxBatchNormalization('X',
                                     s,
                                     bias,
                                     mean,
                                     var,
                                     output_names=['Y'],
                                     epsilon=epsilon,
                                     op_version=TARGET_OPSET)
        model_def = onx.to_onnx({'X': x.astype(np.float32)},
                                target_opset=TARGET_OPSET)
        oinf = InferenceSession(model_def.SerializeToString())
        got = oinf.run(None, {'X': x})
        assert_almost_equal(y, got[0], decimal=5)
Exemple #25
0
 def _predict_with_onnx(model, X):
     session = InferenceSession(model.SerializeToString())
     output_names = [s_output.name for s_output in session.get_outputs()]
     input_names = [s_input.name for s_input in session.get_inputs()]
     if len(input_names) > 1:
         raise RuntimeError(
             "Test expects one input. Found multiple inputs: %r." % input_names)
     input_name = input_names[0]
     if hasattr(X, "values"):
         return session.run(output_names, {input_name: X.values})[0][:, 0]
     return session.run(output_names, {input_name: X})[0][:, 0]
    def __init__(
        self,
        module="",
        args=None,
        args_parser=None,
    ):

        self.device = "cpu"
        self.module = module
        self.args_parser = args_parser

        if not args:
            args = argparse.Namespace()
            self.args = args
        else:
            self.args = args
        self.args = self.args_parser(self.args)

        if not os.path.isdir(self.args.output_dir):
            get_model_dir(
                output_dir=self.args.output_dir,
                add_ro=self.args.add_ro,
                module=self.module,
                onnx=self.args.onnx,
            )  # onnx = True

        self.tokenizer = BertTokenizer.from_pretrained(self.args.output_dir,
                                                       do_lower_case=False)

        self.options = SessionOptions()
        # 1 thread ensures higher throughput overall

        # self.options.enable_profiling = True
        self.options.intra_op_num_threads = 1
        self.options.inter_op_num_threads = 1
        # self.options.log_severity_level = 1
        self.options.execution_mode = ExecutionMode.ORT_SEQUENTIAL
        # the stored optimized onnx model for the module given by the output_dir value.

        # pre-optimized for this hardware so turn off on the fly optimizations
        # self.options.graph_optimization_level = rt.GraphOptimizationLevel.ORT_DISABLE_ALL
        # still faster on the fly

        self.model_quant = os.path.join(self.args.output_dir,
                                        "converted-optimized.onnx")
        self.session = InferenceSession(self.model_quant, self.options)
Exemple #27
0
    def convert_to_onnx(self, onnx_output_dir=None, set_onnx_arg=True):
        """Convert the model to ONNX format and save to output_dir
        Args:
            onnx_output_dir (str, optional): If specified, ONNX model will be saved to output_dir (else args.output_dir will be used). Defaults to None.
            set_onnx_arg (bool, optional): Updates the model args to set onnx=True. Defaults to True.
        """  # noqa
        if not onnx_output_dir:
            onnx_output_dir = os.path.join(self.options.output_dir,
                                           self.options.model_type,
                                           self.options.model_name, "onnx")
        os.makedirs(onnx_output_dir, exist_ok=True)

        if not os.listdir(onnx_output_dir):
            onnx_model_name = os.path.join(onnx_output_dir, "onnx_model.onnx")
            with tempfile.TemporaryDirectory() as temp_dir:
                basedir = os.path.basename(temp_dir)
                temp_dir = os.path.join(self.options.output_dir, basedir)
                self.save_model(output_dir=temp_dir, model=self.model)

                convert(
                    framework="pt",
                    model=temp_dir,
                    tokenizer=self.tokenizer,
                    output=Path(onnx_model_name),
                    pipeline_name="ner",
                    opset=11,
                )
            self.tokenizer.save_pretrained(onnx_output_dir)
            self.config.save_pretrained(onnx_output_dir)

        onnx_options = SessionOptions()
        use_cuda = True if self._device.type != 'cpu' else False
        onnx_execution_provider = "CUDAExecutionProvider" if use_cuda else "CPUExecutionProvider"
        onnx_options.intra_op_num_threads = 1
        onnx_options.execution_mode = ExecutionMode.ORT_SEQUENTIAL
        onnx_model_path = os.path.join(onnx_output_dir, "onnx_model.onnx")
        if self.options.dynamic_quantize:
            # Append "-quantized" at the end of the model's name
            quantized_model_path = generate_identified_filename(
                Path(onnx_model_path), "-quantized")
            quantize_dynamic(Path(onnx_model_path), quantized_model_path)
            onnx_model_path = quantized_model_path.as_posix()

        return InferenceSession(onnx_model_path,
                                onnx_options,
                                providers=[onnx_execution_provider])
    def verify_onnx(model: T5EncoderDecoderInit, ort_session: InferenceSession, device: torch.device, max_cases=4):
        """ Compare the result from PyTorch and OnnxRuntime to verify the ONNX model is good.
        """
        ort_inputs = ort_session.get_inputs()
        use_decoder_input_ids = len(ort_inputs) == 3

        test_cases = [(4, 11), (1, 2), (3, 1), (8, 5)]
        test_cases_max_diff = []
        for (batch_size, encode_sequence_length) in test_cases[:max_cases]:
            inputs = T5EncoderDecoderInitInputs.create_dummy(model.config,
                                                             batch_size,
                                                             encode_sequence_length,
                                                             use_decoder_input_ids=use_decoder_input_ids,
                                                             device=device)

            ort_outputs = T5EncoderDecoderInitHelper.onnxruntime_inference(ort_session, inputs)

            # Run inference of PyTorch model
            input_list = inputs.to_list()
            torch_outputs = model(*input_list)

            assert (torch_outputs[0].cpu().numpy().shape == ort_outputs[0].shape)
            max_diff = numpy.amax(numpy.abs(torch_outputs[0].cpu().numpy() - ort_outputs[0]))
            logger.debug(f"logits max_diff={max_diff}")
            max_diff_all = max_diff

            assert (torch_outputs[1].cpu().numpy().shape == ort_outputs[1].shape)
            max_diff = numpy.amax(numpy.abs(torch_outputs[1].cpu().numpy() - ort_outputs[1]))
            logger.debug(f"encoder_hidden_states max_diff={max_diff}")
            max_diff_all = max(max_diff_all, max_diff)

            for i in range(2 * model.config.num_layers):
                max_diff = numpy.amax(numpy.abs(torch_outputs[2][i].cpu().numpy() - ort_outputs[2 + i]))
                logger.debug(f"self attention past state {i} max_diff={max_diff}")

            for i in range(2 * model.config.num_layers):
                max_diff = numpy.amax(
                    numpy.abs(torch_outputs[3][i].cpu().numpy() - ort_outputs[2 + 2 * model.config.num_layers + i]))
                logger.debug(f"cross attention past state {i} max_diff={max_diff}")
                max_diff_all = max(max_diff_all, max_diff)

            test_cases_max_diff.append(max_diff_all)
            logger.info(
                f"batch_size={batch_size} encode_sequence_length={encode_sequence_length}, max_diff={max_diff_all}")

        return max(test_cases_max_diff)
Exemple #29
0
class NegLogLearningLoss(BaseLearningLoss):
    """
    Implements a negative log loss
    `'log(yt, yp) = -(1-yt)\\log(1-yp) - yt\\log(yp)`,
    this only works for a binary classification where *yp* is the
    predicted probability, *yt* is the expected probability.
    *yt* is expected to be binary, *yp* is a matrix with two
    columns, the sum on every line is 1.
    However, this loss is usually applied after a function softmax
    and the gradient is directly computed from the loss to the
    raw score before they are processed through the softmax function
    (see class `Log
    <https://github.com/scikit-learn/scikit-learn/blob/main/sklearn/
    linear_model/_sgd_fast.pyx#L236>`_).

    :param eps: clipping value for probabilities,
        avoids computing `log(0)`
    :param probability_function: function to convert
        raw scores into probabilities, default value is `sigmoid`
        for a logistic regression
    """
    def __init__(self, eps=1e-5, probability_function='sigmoid'):
        BaseLearningLoss.__init__(self)
        self.eps = eps
        self.probability_function = probability_function

    def build_onnx_function(self, opset, device, weight_name):
        so = SessionOptions()
        so.log_severity_level = 4

        # loss_grad
        fct_name = f"grad_{self.probability_function}_neg_log_loss_error"
        self.loss_grad_onnx_ = function_onnx_graph(fct_name,
                                                   target_opset=opset,
                                                   weight_name=weight_name,
                                                   eps=self.eps)
        self.loss_grad_sess_ = InferenceSession(
            self.loss_grad_onnx_.SerializeToString(),
            so,
            providers=device_to_providers(device))
        self.loss_grad_sess_bind_ = (
            self.loss_grad_sess_.io_binding()._iobinding)

        # score
        self.build_onnx_score_function(opset, device, weight_name)
Exemple #30
0
    def test_cascade_add(self):
        def generate_onnx_graph(dim, nbnode, input_name='X1', opv=None):
            i1 = input_name
            for i in range(nbnode - 1):
                i2 = (np.ones((1, dim)) * nbnode * 10).astype(np.float32)
                node = OnnxAdd(i1, i2, op_version=opv)
                i1 = node
            i2 = (np.ones((1, dim)) * nbnode * 10).astype(np.float32)
            node = OnnxAdd(i1, i2, output_names=['Y'], op_version=opv)
            onx = node.to_onnx([(input_name, FloatTensorType((None, dim)))],
                               outputs=[('Y', FloatTensorType())],
                               target_opset=opv)
            return onx

        exp = [
            np.array([[11., 11., 11., 11., 11.]]),
            np.array([[42., 42., 42., 42., 42.]]),
            np.array([[93., 93., 93., 93., 93.]]),
            np.array([[100100., 100100., 100100., 100100., 100100.]])
        ]
        for opv in ({'': 10}, 9, 10, 11, 12, onnx_opset_version()):
            if isinstance(opv, dict):
                if opv[''] > get_latest_tested_opset_version():
                    continue
            elif opv is not None and opv > get_latest_tested_opset_version():
                continue
            for i, nbnode in enumerate((1, 2, 3, 100)):
                onx = generate_onnx_graph(5, nbnode, opv=opv)
                as_string = onx.SerializeToString()
                try:
                    ort = InferenceSession(as_string)
                except InvalidGraph as e:
                    if opv >= onnx_opset_version():
                        continue
                    raise AssertionError(
                        "Unable to load opv={}\n---\n{}\n---".format(
                            opv, onx)) from e
                X = (np.ones((1, 5)) * nbnode).astype(np.float32)
                res_out = ort.run(None, {'X1': X})
                assert len(res_out) == 1
                res = res_out[0]
                assert_almost_equal(exp[i], res)

        dim = 10
        onx = generate_onnx_graph(dim, 300, opv=11)
        as_string = onx.SerializeToString()
        ort = InferenceSession(as_string)
        X = (np.ones((1, dim)) * nbnode).astype(np.float32)
        res_out = ort.run(None, {'X1': X})
        assert len(res_out) == 1
        res = res_out[0]
        assert res.shape[1] == dim