コード例 #1
0
    def test_create_asv_benchmark_pyspy(self):
        self.assertNotEmpty(mlprodict)
        temp = get_temp_folder(__file__, "temp_create_asv_benchmark_pyspy")
        created = create_asv_benchmark(location=temp,
                                       verbose=0,
                                       runtime=('scikit-learn', 'python',
                                                'onnxruntime1'),
                                       exc=False,
                                       execute=True,
                                       models={'DecisionTreeClassifier'},
                                       add_pyspy=True)
        self.assertNotEmpty(created)

        ops = TARGET_OPSET
        verif = False
        allnames = []
        for path, _, files in os.walk(os.path.join(temp, 'pyspy')):
            for zoo in files:
                if '__init__' in zoo:
                    continue
                allnames.append(zoo)
                fullname = os.path.join(path, zoo)
                with open(fullname, 'r', encoding='utf-8') as f:
                    content = f.read()
                if (zoo.endswith(
                        "bench_DecisionTreeClas_default_b_cl_1_4_%d_float_nozipmap.py"
                        % ops) and compare_module_version(
                            sklearn.__version__, "0.21") >= 0):
                    if "setup_profile" not in content:
                        raise AssertionError(content)
                    verif = True
        if not verif:
            raise AssertionError("Visited files\n{}".format(
                "\n".join(allnames)))
コード例 #2
0
    def test_create_asv_benchmark_pyspy_knn(self):
        self.assertNotEmpty(mlprodict)
        temp = get_temp_folder(__file__, "temp_create_asv_benchmark_pyspy_knn")
        created = create_asv_benchmark(location=temp,
                                       verbose=0,
                                       runtime=('scikit-learn', 'python',
                                                'onnxruntime1'),
                                       exc=False,
                                       execute=True,
                                       models={'KNeighborsClassifier'},
                                       add_pyspy=True)
        self.assertNotEmpty(created)

        verif = False
        target_opset = TARGET_OPSET
        allnames = []
        for path, _, files in os.walk(os.path.join(temp, 'pyspy')):
            for zoo in files:
                if '__init__' in zoo:
                    continue
                allnames.append(zoo)
                fullname = os.path.join(path, zoo)
                with open(fullname, 'r', encoding='utf-8') as f:
                    content = f.read()
                if (zoo.endswith(
                        "bench_KNNClas_default_k3_b_cl_64_algorithmbrute_n_neighbors3"
                        "_10000_20_%d_double_optcdist-zm0.py" % target_opset)
                        and compare_module_version(sklearn.__version__,
                                                   "0.21") >= 0):
                    if "setup_profile" not in content:
                        raise AssertionError(content)
                    verif = True
        if not verif:
            raise AssertionError("Visited files\n{}".format(
                "\n".join(allnames)))
コード例 #3
0
    def test_create_asv_benchmark_pyspy_compiled(self):
        self.assertNotEmpty(mlprodict)
        temp = get_temp_folder(__file__,
                               "temp_create_asv_benchmark_pyspy_compiled")
        created = create_asv_benchmark(location=temp,
                                       verbose=0,
                                       runtime=('python', 'python_compiled'),
                                       exc=False,
                                       execute=True,
                                       models={'AdaBoostRegressor'},
                                       add_pyspy=True)
        self.assertNotEmpty(created)

        ops = get_opset_number_from_onnx()
        verif = False
        allnames = []
        for path, _, files in os.walk(os.path.join(temp, 'pyspy')):
            for zoo in files:
                if '__init__' in zoo:
                    continue
                allnames.append(zoo)
                fullname = os.path.join(path, zoo)
                with open(fullname, 'r', encoding='utf-8') as f:
                    content = f.read()
                if (zoo.endswith(
                        "bench_AdaBoostReg_default_b_reg_nest10_1_4_%d_float_.py"
                        % ops) and compare_module_version(
                            sklearn.__version__, "0.21") >= 0):
                    if "setup_profile" not in content:
                        raise AssertionError(content)
                    verif = True
        if not verif:
            raise AssertionError("Visited files\n{}".format(
                "\n".join(allnames)))
コード例 #4
0
ファイル: register.py プロジェクト: sdpython/mlprodict
def _register_converters_skl2onnx(exc=True):
    """
    This functions registers additional converters
    for :epkg:`skl2onnx`.

    @param      exc     if True, raises an exception if a converter cannot
                        registered (missing package for example)
    @return             list of models supported by the new converters
    """
    registered = []

    try:
        import skl2onnx.sklapi.register  # pylint: disable=W0611
        from skl2onnx.sklapi import WOETransformer
        model = [WOETransformer]
    except ImportError as e:  # pragma: no cover
        try:
            import skl2onnx
            from pyquickhelper.texthelper.version_helper import (
                compare_module_version)
            if compare_module_version(skl2onnx.__version__, '1.9.3') < 0:
                # Too old version of skl2onnx.
                return []
        except ImportError:
            pass
        if exc:
            raise e
        else:
            warnings.warn("Cannot register models from 'skl2onnx' due to %r." %
                          e)
            model = None

    if model is not None:
        registered.extend(model)
    return registered
コード例 #5
0
class TestRtValidateGaussianProcessOrt2(ExtTestCase):
    @ignore_warnings(category=(UserWarning, ConvergenceWarning, RuntimeWarning)
                     )
    @skipif_circleci("to investigate, shape of predictions are different")
    @unittest.skipIf(
        compare_module_version(ort_version, threshold) <= 0,
        reason="Node:Scan1 Field 'shape' of type is required but missing.")
    def test_rt_GaussianProcessRegressor_debug_std(self):
        fLOG(__file__,
             self._testMethodName,
             OutputPrint=__name__ == "__main__")
        logger = getLogger('skl2onnx')
        logger.disabled = True
        verbose = 4

        buffer = []

        def myprint(*args, **kwargs):
            buffer.append(" ".join(map(str, args)))

        debug = True
        rows = list(
            enumerate_validated_operator_opsets(
                verbose,
                models={"GaussianProcessRegressor"},
                fLOG=myprint,
                runtime='onnxruntime2',
                debug=debug,
                filter_exp=lambda m, s: "b-reg-std-NSV" in s))
        self.assertGreater(len(rows), 1)
        self.assertGreater(len(buffer), 1 if debug else 0)
コード例 #6
0
class TestRtValidateIsolationForest(ExtTestCase):
    @ignore_warnings(category=(UserWarning, ConvergenceWarning, RuntimeWarning)
                     )
    @unittest.skipIf(compare_module_version(skl2onnx_version, '1.11') < 0,
                     reason="converter issue")
    def test_rt_IsolationForest_python(self):
        fLOG(__file__,
             self._testMethodName,
             OutputPrint=__name__ == "__main__")
        logger = getLogger('skl2onnx')
        logger.disabled = True
        verbose = 1 if __name__ == "__main__" else 0

        debug = True
        buffer = []

        def myprint(*args, **kwargs):
            buffer.append(" ".join(map(str, args)))

        rows = list(
            enumerate_validated_operator_opsets(
                verbose,
                models={"IsolationForest"},
                fLOG=myprint,
                runtime='python',
                debug=debug,
                filter_exp=lambda m, p: '-64' not in p))
        self.assertGreater(len(rows), 1)
        self.assertGreater(len(buffer), 1 if debug else 0)
コード例 #7
0
    def test_lightgbm_regressor(self):
        try:
            from onnxmltools import __version__
        except ImportError:
            return
        if compare_module_version(__version__, '1.11') <= 0:
            return
        from lightgbm import LGBMRegressor
        try:
            from onnxmltools.convert import convert_lightgbm
        except ImportError:
            convert_lightgbm = None
        X, y = self.data_X, self.data_y

        for ne in [1, 2, 10, 50, 100, 200]:
            for mx in [1, 10]:
                if __name__ != "__main__" and mx > 5:
                    break
                model = LGBMRegressor(max_depth=mx,
                                      n_estimators=ne,
                                      min_child_samples=1,
                                      learning_rate=0.0000001)
                model.fit(X, y)
                expected = model.predict(X)

                model_onnx = to_onnx(model, X)
                if convert_lightgbm is not None:
                    try:
                        model_onnx2 = convert_lightgbm(
                            model,
                            initial_types=[('X',
                                            FloatTensorType([None,
                                                             X.shape[1]]))])
                    except RuntimeError as e:
                        if "is higher than the number of the installed" in str(
                                e):
                            model_onnx2 = None
                        else:
                            raise e
                else:
                    model_onnx2 = None

                for i, mo in enumerate([model_onnx, model_onnx2]):
                    if mo is None:
                        continue
                    for rt in ['python', 'onnxruntime1']:
                        with self.subTest(i=i, rt=rt, max_depth=mx, n_est=ne):
                            oinf = OnnxInference(mo, runtime=rt)
                            got = oinf.run({'X': X})['variable']
                            diff = numpy.abs(got.ravel() -
                                             expected.ravel()).max()
                            if __name__ == "__main__":
                                print("lgb1 mx=%d ne=%d" % (mx, ne),
                                      "mlprod" if i == 0 else "mltool", rt[:6],
                                      diff)
                            self.assertLess(diff, 1e-3)
コード例 #8
0
    def test_create_asv_benchmark_rf(self):
        fLOG(__file__,
             self._testMethodName,
             OutputPrint=__name__ == "__main__")
        self.assertNotEmpty(mlprodict)
        temp = get_temp_folder(__file__, "temp_create_asv_benchmark_rf")
        created = create_asv_benchmark(location=temp,
                                       verbose=1,
                                       fLOG=fLOG,
                                       runtime=('scikit-learn', 'python',
                                                'onnxruntime1'),
                                       exc=False,
                                       execute=True,
                                       models={'RandomForestRegressor'})
        self.assertNotEmpty(created)

        reg = re.compile("class ([a-zA-Z0-9_]+)[(]")
        verif = False
        allnames = []
        for path, _, files in os.walk(os.path.join(temp, 'benches')):
            for zoo in files:
                if '__init__' in zoo:
                    continue
                fLOG("process '{}'".format(zoo))
                fullname = os.path.join(path, zoo)
                with open(fullname, 'r', encoding='utf-8') as f:
                    content = f.read()
                names = reg.findall(content)
                name = names[0]
                content += "\n\ncl = %s()\ncl.setup_cache()\n" % name
                allnames.append(fullname)
                with open(fullname, 'w', encoding='utf-8') as f:
                    f.write(content)
                __, err = run_script(fullname, wait=True)
                lines = [_ for _ in err.split('\n') if _ and _[0] != ' ']
                lines = [_ for _ in lines if "Warning" not in _]
                lines = [
                    _ for _ in lines if "No module named 'mlprodict'" not in _
                ]
                lines = [_ for _ in lines if "Traceback " not in _]
                err = "\n".join(lines).strip(' \n\r')
                if len(err) > 0:
                    raise RuntimeError("Issue with '{}'\n{}".format(
                        fullname, err))
                if (zoo.endswith(
                        "bench_RandomForestReg_default_b_reg_nest100.py")
                        and compare_module_version(sklearn.__version__,
                                                   "0.21") >= 0):
                    if "random_state=42" not in content:
                        raise AssertionError(content)
                    else:
                        verif = True
        if not verif:
            raise AssertionError("Visited files\n{}".format(
                "\n".join(allnames)))
コード例 #9
0
    def test_xgboost_regressor(self):
        try:
            from onnxmltools import __version__
        except ImportError:
            return
        if compare_module_version(__version__, '1.11') <= 0:
            return
        from xgboost import XGBRegressor
        try:
            from onnxmltools.convert import convert_xgboost
        except ImportError:
            convert_xgboost = None

        X, y = self.data_X, self.data_y
        model = XGBRegressor(max_depth=8,
                             n_estimators=100,
                             learning_rate=0.000001)
        model.fit(X, y)
        expected = model.predict(X)

        model_onnx = to_onnx(model, X)
        if convert_xgboost is not None:
            try:
                model_onnx2 = convert_xgboost(
                    model,
                    initial_types=[('X', FloatTensorType([None, X.shape[1]]))])
            except RuntimeError as e:
                if "is higher than the number of the installed" in str(e):
                    model_onnx2 = None
                else:
                    raise e
        else:
            model_onnx2 = None

        for i, mo in enumerate([model_onnx, model_onnx2]):
            if mo is None:
                continue
            for rt in ['python', 'onnxruntime1']:
                with self.subTest(i=i, rt=rt):
                    oinf = OnnxInference(mo, runtime=rt)
                    got = oinf.run({'X': X})['variable']
                    diff = numpy.abs(got.ravel() - expected.ravel()).max()
                    if __name__ == "__main__":
                        print("xgb32", "mlprod" if i == 0 else "mltool", rt,
                              diff)
                    self.assertLess(diff, 1e-5)
コード例 #10
0
    def test_create_asv_benchmark_tiny_same(self):
        fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__")
        self.assertNotEmpty(mlprodict)
        temp = get_temp_folder(
            __file__, "temp_create_asv_benchmark_all_tiny_same")
        created = create_asv_benchmark(
            location=temp, verbose=1, fLOG=fLOG,
            skip_models={
                'DictVectorizer', 'FeatureHasher',  # 'CountVectorizer'
            }, runtime=('scikit-learn', 'python', 'onnxruntime1'),
            exc=False, execute=True, models={
                'SelectFromModel', 'NMF', 'LatentDirichletAllocation'
            }, env='same')
        self.assertNotEmpty(created)

        reg = re.compile("class ([a-zA-Z0-9_]+)[(]")
        for path, _, files in os.walk(os.path.join(temp, 'benches')):
            for zoo in files:
                if '__init__' in zoo:
                    continue
                fLOG("process '{}'".format(zoo))
                fullname = os.path.join(path, zoo)
                with open(fullname, 'r', encoding='utf-8') as f:
                    content = f.read()
                names = reg.findall(content)
                name = names[0]
                content += "\n\ncl = %s()\ncl.setup_cache()\n" % name
                with open(fullname, 'w', encoding='utf-8') as f:
                    f.write(content)
                __, err = run_script(fullname, wait=True)
                lines = [_ for _ in err.split('\n') if _ and _[0] != ' ']
                lines = [_ for _ in lines if "Warning" not in _]
                lines = [
                    _ for _ in lines if "No module named 'mlprodict'" not in _]
                lines = [_ for _ in lines if "Traceback " not in _]
                err = "\n".join(lines).strip(' \n\r')
                if len(err) > 0:
                    raise RuntimeError(
                        "Issue with '{}'\n{}".format(fullname, err))
                if (zoo.endswith("bench_NMF_default_num_tr_pos.py") and
                        compare_module_version(sklearn.__version__, "0.22") >= 0):
                    if ("from sklearn.decomposition.nmf import NMF" not in content and
                            "from sklearn.decomposition import NMF" not in content):
                        raise AssertionError(
                            "Unable to find 'import NMF' in\n{}".format(content))
コード例 #11
0
class TestNotebookOnnxSbs(ExtTestCase):
    def setUp(self):
        add_missing_development_version(["jyquickhelper"], __file__, hide=True)

    @ignore_warnings(category=(UserWarning, ConvergenceWarning, RuntimeWarning)
                     )
    @unittest.skipIf(
        compare_module_version(ort_version, "0.4.0") <= 0,
        reason="Node:Scan1 Field 'shape' of type is required but missing.")
    def test_notebook_onnx_sbs(self):
        fLOG(__file__,
             self._testMethodName,
             OutputPrint=__name__ == "__main__")

        self.assertNotEmpty(mlprodict is not None)
        folder = os.path.join(os.path.dirname(__file__), "..", "..", "_doc",
                              "notebooks")
        test_notebook_execution_coverage(__file__,
                                         "onnx_sbs",
                                         folder,
                                         this_module_name="mlprodict",
                                         fLOG=fLOG)
コード例 #12
0
    def test_create_asv_benchmark_hist_gbc(self):
        self.assertNotEmpty(mlprodict)
        temp = get_temp_folder(__file__, "temp_create_asv_benchmark_hist_gbc")
        created = create_asv_benchmark(
            location=temp,
            verbose=0,
            runtime=('scikit-learn', 'python', 'onnxruntime1'),
            exc=False,
            execute=True,
            models={'HistGradientBoostingClassifier'})
        self.assertNotEmpty(created)

        verif = False
        allnames = []
        for path, _, files in os.walk(os.path.join(temp, 'benches')):
            for zoo in files:
                if '__init__' in zoo:
                    continue
                fullname = os.path.join(path, zoo)
                if "_hist_gradient_boosting" in fullname:
                    raise AssertionError(fullname)
                with open(fullname, 'r', encoding='utf-8') as f:
                    content = f.read()
                if (zoo.endswith("bench_HGBClas_default_b_cl_mxit100.py")
                        and compare_module_version(sklearn.__version__,
                                                   "0.21") >= 0):
                    if "random_state=42" not in content:
                        raise AssertionError(content)
                    if "from sklearn.ensemble._hist_gradient_boosting.gradient_boosting import" not in content:
                        raise AssertionError(content)
                    if "par_full_test_name = 'bench" not in content:
                        raise AssertionError(content)
                    verif = True
        if not verif:
            raise AssertionError("Visited files\n{}".format(
                "\n".join(allnames)))
コード例 #13
0
    def test_search_predictions_lr(self):
        fLOG(__file__,
             self._testMethodName,
             OutputPrint=__name__ == "__main__")

        iris = datasets.load_iris()
        X = iris.data[:, :2]
        y = iris.target
        clf = LogisticRegression()
        clf.fit(X, y)

        res = []
        for i in range(20):
            h = i * 0.05
            h2 = 1 - i * 0.05
            res.append(
                dict(ind=i * 5,
                     meta1="m%d" % i,
                     meta2="m%d" % (i + 1),
                     f1=h,
                     f2=h2))
        df = pandas.DataFrame(res)

        se = SearchEnginePredictions(clf, n_neighbors=5)
        r = repr(se)
        if compare_module_version(sklearn.__version__, '0.21.0') < 0:
            self.assertEqual(
                r.replace("\n", "").replace(" ", ""),
                "SearchEnginePredictions(fct=LogisticRegression(C=1.0,class_weight=None,dual=False,"
                +
                "fit_intercept=True,intercept_scaling=1,max_iter=100,multi_class='warn',n_jobs=None,"
                +
                "penalty='l2',random_state=None,solver='warn',tol=0.0001,verbose=0,warm_start=False),"
                + "fct_params=None,n_neighbors=5)")
        else:
            self.assertEqual(
                r.replace("\n", "").replace(" ", ""),
                "SearchEnginePredictions(fct=LogisticRegression(C=1.0,class_weight=None,dual=False,"
                +
                "fit_intercept=True,intercept_scaling=1,l1_ratio=None,max_iter=100,multi_class='warn',n_jobs=None,"
                +
                "penalty='l2',random_state=None,solver='warn',tol=0.0001,verbose=0,warm_start=False),"
                + "fct_params=None,n_neighbors=5)")

        se.fit(data=None,
               features=df[["f1", "f2"]].values,
               metadata=df[["ind", "meta1", "meta2"]])
        score, ind, meta = se.kneighbors([0.5, 0.5])

        self.assertIsInstance(ind, (list, numpy.ndarray))
        self.assertEqual(len(ind), 5)
        self.assertEqual(ind[0], 10)

        self.assertIsInstance(score, numpy.ndarray)
        self.assertEqual(score.shape, (5, ))
        self.assertEqual(score[0], 0)

        self.assertIsInstance(meta, (numpy.ndarray, pandas.DataFrame))
        self.assertEqual(meta.shape, (5, 3))
        self.assertEqual(meta.iloc[0, 0], 50)

        se.fit(data=df,
               features=["f1", "f2"],
               metadata=["ind", "meta1", "meta2"])
        score, ind, meta = se.kneighbors([0.5, 0.5])

        self.assertIsInstance(ind, (list, numpy.ndarray))
        self.assertEqual(len(ind), 5)
        self.assertEqual(ind[0], 10)

        self.assertIsInstance(score, numpy.ndarray)
        self.assertEqual(score.shape, (5, ))
        self.assertEqual(score[0], 0)

        self.assertIsInstance(meta, (numpy.ndarray, pandas.DataFrame))
        self.assertEqual(meta.shape, (5, 3))
        self.assertEqual(meta.iloc[0, 0], 50)

        se.fit(data=df, features=["f1", "f2"])
        score, ind, meta = se.kneighbors([0.5, 0.5])

        self.assertIsInstance(ind, (list, numpy.ndarray))
        self.assertEqual(len(ind), 5)
        self.assertEqual(ind[0], 10)

        self.assertIsInstance(score, numpy.ndarray)
        self.assertEqual(score.shape, (5, ))
        self.assertEqual(score[0], 0)
        self.assertTrue(meta is None)
コード例 #14
0
class TestRtValidateGaussianProcessOrt(ExtTestCase):

    @ignore_warnings(category=(UserWarning, ConvergenceWarning, RuntimeWarning))
    @unittest.skipIf(compare_module_version(ort_version, threshold) <= 0,
                     reason="Node:Scan1 Field 'shape' of type is required but missing.")
    def test_kernel_rbf1(self):
        from skl2onnx.operator_converters.gaussian_process import convert_kernel
        ker = RBF(length_scale=1, length_scale_bounds=(1e-3, 1e3))
        onx = convert_kernel(ker, 'X', output_names=['Y'], dtype=numpy.float32,
                             op_version=10)
        model_onnx = onx.to_onnx(
            inputs=[('X', FloatTensorType([None, None]))])
        model_onnx.ir_version = get_ir_version_from_onnx()
        sess = OnnxInference(model_onnx, runtime='onnxruntime1')
        Xtest_ = numpy.arange(6).reshape((3, 2))
        res = sess.run({'X': Xtest_.astype(numpy.float32)})
        m1 = res['Y']
        m2 = ker(Xtest_)
        self.assertEqualArray(m1, m2)

    @ignore_warnings(category=(UserWarning, ConvergenceWarning, RuntimeWarning))
    @unittest.skipIf(compare_module_version(ort_version, threshold) <= 0,
                     reason="Node:Scan1 Field 'shape' of type is required but missing.")
    def test_kernel_exp_sine_squared(self):
        from skl2onnx.operator_converters.gaussian_process import convert_kernel
        ker = ExpSineSquared()
        onx = convert_kernel(ker, 'X', output_names=['Y'], dtype=numpy.float32,
                             op_version=10)
        model_onnx = onx.to_onnx(
            inputs=[('X', FloatTensorType([None, None]))])
        model_onnx.ir_version = get_ir_version_from_onnx()
        sess = OnnxInference(model_onnx, runtime='onnxruntime1')
        Xtest_ = numpy.arange(6).reshape((3, 2))
        res = sess.run({'X': Xtest_.astype(numpy.float32)})
        m1 = res['Y']
        m2 = ker(Xtest_)
        self.assertEqualArray(m1, m2, decimal=5)

    @ignore_warnings(category=(UserWarning, ConvergenceWarning, RuntimeWarning))
    def test_rt_GaussianProcessRegressor_onnxruntime_nofit(self):
        fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__")
        logger = getLogger('skl2onnx')
        logger.disabled = True
        verbose = 1

        buffer = []

        def myprint(*args, **kwargs):
            buffer.append(" ".join(map(str, args)))

        debug = False
        rows = list(enumerate_validated_operator_opsets(
            verbose, models={"GaussianProcessRegressor"},
            fLOG=myprint,
            runtime='onnxruntime1', debug=debug,
            filter_exp=lambda m, s: "NF-std" in s))
        self.assertGreater(len(rows), 1)
        self.assertGreater(len(buffer), 1 if debug else 0)

    @ignore_warnings(category=(UserWarning, ConvergenceWarning, RuntimeWarning))
    def test_rt_GaussianProcessRegressor_python_nofit(self):
        fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__")
        logger = getLogger('skl2onnx')
        logger.disabled = True
        verbose = 1

        buffer = []

        def myprint(*args, **kwargs):
            buffer.append(" ".join(map(str, args)))

        debug = False
        rows = list(enumerate_validated_operator_opsets(
            verbose, models={"GaussianProcessRegressor"},
            fLOG=myprint,
            runtime='onnxruntime1', debug=debug,
            filter_exp=lambda m, s: "NF" in s))
        self.assertGreater(len(rows), 6)
        self.assertGreater(len(buffer), 1 if debug else 0)

    @ignore_warnings(category=(UserWarning, ConvergenceWarning, RuntimeWarning))
    def test_rt_GaussianProcessRegressor_python_fit(self):
        fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__")
        logger = getLogger('skl2onnx')
        logger.disabled = True
        verbose = 4

        buffer = []

        def myprint(*args, **kwargs):
            buffer.append(" ".join(map(str, args)))

        debug = False
        rows = list(enumerate_validated_operator_opsets(
            verbose, models={"GaussianProcessRegressor"},
            fLOG=myprint,
            runtime='onnxruntime1', debug=debug,
            filter_exp=lambda m, s: "nofit" not in s and "multi" not in s))
        self.assertGreater(len(rows), 6)
        self.assertGreater(len(buffer), 1 if debug else 0)

    @ignore_warnings(category=(UserWarning, ConvergenceWarning, RuntimeWarning))
    @unittest.skipIf(compare_module_version(ort_version, threshold) <= 0,
                     reason="Node:Scan1 Field 'shape' of type is required but missing.")
    def test_rt_GaussianProcessRegressor_debug(self):
        fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__")
        logger = getLogger('skl2onnx')
        logger.disabled = True
        verbose = 2

        buffer = []

        def myprint(*args, **kwargs):
            buffer.append(" ".join(map(str, args)))

        def filter_scenario(a, b, c, d, e):
            if isinstance(e, dict) and GaussianProcessRegressor in e:
                opt = e[GaussianProcessRegressor]
                if opt.get('optim', '') == 'cdist':
                    return False
            return True

        debug = True
        rows = list(enumerate_validated_operator_opsets(
            verbose, models={"GaussianProcessRegressor"},
            fLOG=myprint,
            runtime='onnxruntime1', debug=debug,
            filter_exp=lambda m, s: "reg-NSV" in s,
            filter_scenario=filter_scenario))
        self.assertGreater(len(rows), 1)
        self.assertGreater(len(buffer), 1 if debug else 0)

    @ignore_warnings(category=(UserWarning, ConvergenceWarning, RuntimeWarning))
    @skipif_circleci("to investigate, shape of predictions are different")
    @unittest.skipIf(compare_module_version(ort_version, threshold) <= 0,
                     reason="Node:Scan1 Field 'shape' of type is required but missing.")
    def test_rt_GaussianProcessRegressor_debug_std(self):
        fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__")
        logger = getLogger('skl2onnx')
        logger.disabled = True
        verbose = 4

        buffer = []

        def myprint(*args, **kwargs):
            buffer.append(" ".join(map(str, args)))

        def filter_scenario(a, b, c, d, e):
            if isinstance(e, dict) and GaussianProcessRegressor in e:
                opt = e[GaussianProcessRegressor]
                if opt.get('optim', '') == 'cdist':
                    return False
            return True

        debug = True
        rows = list(enumerate_validated_operator_opsets(
            verbose, models={"GaussianProcessRegressor"},
            fLOG=myprint,
            runtime='onnxruntime1', debug=debug,
            filter_exp=lambda m, s: "b-reg-std-NSV" in s,
            filter_scenario=filter_scenario))
        self.assertGreater(len(rows), 1)
        self.assertGreater(len(buffer), 1 if debug else 0)

    @ignore_warnings(category=(UserWarning, ConvergenceWarning, RuntimeWarning))
    @skipif_circleci("to investigate, shape of predictions are different")
    @unittest.skipIf(compare_module_version(ort_version, threshold) <= 0,
                     reason="Node:Scan1 Field 'shape' of type is required but missing.")
    def test_rt_GaussianProcessRegressor_debug_multi(self):
        fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__")
        logger = getLogger('skl2onnx')
        logger.disabled = True
        verbose = 2

        buffer = []

        def myprint(*args, **kwargs):
            buffer.append(" ".join(map(str, args)))

        debug = True
        rows = list(enumerate_validated_operator_opsets(
            verbose, models={"GaussianProcessRegressor"},
            fLOG=myprint,
            runtime='onnxruntime1', debug=debug,
            filter_exp=lambda m, s: 'm-reg-std-NSV' in s))
        self.assertGreater(len(rows), 0)

    @ignore_warnings(category=(UserWarning, ConvergenceWarning, RuntimeWarning))
    @skipif_circleci("to investigate, shape of predictions are different")
    @unittest.skipIf(compare_module_version(ort_version, threshold) <= 0,
                     reason="Node:Scan1 Field 'shape' of type is required but missing.")
    def test_rt_GaussianProcessRegressor_debug_all(self):
        fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__")
        logger = getLogger('skl2onnx')
        logger.disabled = True
        verbose = 2

        buffer = []

        def myprint(*args, **kwargs):
            buffer.append(" ".join(map(str, args)))

        debug = False
        rows = list(enumerate_validated_operator_opsets(
            verbose, models={"GaussianProcessRegressor"},
            fLOG=myprint, runtime='onnxruntime1', debug=debug))
        self.assertGreater(len(rows), 1)
        self.assertGreater(len(buffer), 1 if debug else 0)
コード例 #15
0
@brief      test log(time=4s)
"""
import unittest
import numpy as np
from scipy import sparse as sp
from sklearn import __version__ as sklearn_vers
from sklearn.utils._testing import (assert_array_equal,
                                    assert_array_almost_equal,
                                    assert_almost_equal, assert_raise_message)
from sklearn.metrics.cluster import v_measure_score
from sklearn.datasets import make_blobs
from pyquickhelper.pycode import ExtTestCase, ignore_warnings
from pyquickhelper.texthelper.version_helper import compare_module_version
from mlinsights.mlmodel import KMeansL1L2

sklearn_023 = compare_module_version(sklearn_vers, "0.23.2") >= 0


class TestKMeansL1L2Sklearn(ExtTestCase):

    # non centered, sparse centers to check the
    centers = np.array([
        [0.0, 5.0, 0.0, 0.0, 0.0],
        [1.0, 1.0, 4.0, 0.0, 0.0],
        [1.0, 0.0, 0.0, 5.0, 1.0],
    ])
    n_samples = 100
    n_clusters, n_features = centers.shape  # pylint: disable=E0633
    X, true_labels = make_blobs(n_samples=n_samples,
                                centers=centers,
                                cluster_std=1.,
コード例 #16
0
class TestOnnxrtPythonRuntimeMlText(ExtTestCase):
    def setUp(self):
        logger = getLogger('skl2onnx')
        logger.disabled = True

    def test_onnxrt_label_encoder_strings(self):

        corpus = numpy.array(['AA', 'BB', 'AA', 'CC'])
        op = OnnxLabelEncoder('text',
                              op_version=TARGET_OPSET,
                              keys_strings=['AA', 'BB', 'CC'],
                              values_strings=['LEAA', 'LEBB', 'LECC'],
                              output_names=['out'])
        onx = op.to_onnx(inputs=[('text', StringTensorType())])
        oinf = OnnxInference(onx)
        res = oinf.run({'text': corpus})
        self.assertEqual(list(res['out']), ['LEAA', 'LEBB', 'LEAA', 'LECC'])

    def test_onnxrt_label_encoder_floats(self):

        corpus = numpy.array([0.1, 0.2, 0.3, 0.2], dtype=numpy.float32)
        op = OnnxLabelEncoder('text',
                              op_version=TARGET_OPSET,
                              keys_floats=[0.1, 0.2, 0.3],
                              values_floats=[0.3, 0.4, 0.5],
                              output_names=['out'])
        onx = op.to_onnx(inputs=[('text', FloatTensorType())])
        oinf = OnnxInference(onx)
        res = oinf.run({'text': corpus})
        self.assertEqualArray(
            res['out'], numpy.array([0.3, 0.4, 0.5, 0.4], dtype=numpy.float32))

    def test_onnxrt_label_encoder_string_floats(self):

        op = OnnxLabelEncoder('text',
                              op_version=TARGET_OPSET,
                              keys_strings=['AA', 'BB', 'CC'],
                              values_floats=[0.1, 0.2, 0.3],
                              output_names=['out'])

        onx = op.to_onnx(inputs=[('text', StringTensorType())])
        oinf = OnnxInference(onx)
        res = oinf.run({'text': numpy.array(['AA', 'DD']).reshape((-1, 1))})
        self.assertEqualArray(res['out'], numpy.array([0.1, 0]))

    def test_onnxrt_label_encoder_raise(self):

        self.assertRaise(
            lambda: OnnxLabelEncoder('text',
                                     op_version=TARGET_OPSET,
                                     keys_strings=['AA', 'BB', 'CC'],
                                     classes_strings=['LEAA', 'LEBB', 'LECC'],
                                     output_names=['out']), TypeError)

        op = OnnxLabelEncoder('text',
                              op_version=TARGET_OPSET,
                              keys_strings=['AA', 'BB', 'CC'],
                              values_strings=[],
                              output_names=['out'])

        onx = op.to_onnx(inputs=[('text', StringTensorType())])
        self.assertRaise(lambda: OnnxInference(onx), RuntimeError)

    def test_onnxrt_string_normalizer(self):
        corpus = numpy.array([
            'This is the first document.',
            'This document is the second document.',
            'And this is the third one.', 'Is this the first document?'
        ])

        op = OnnxStringNormalizer('text',
                                  op_version=TARGET_OPSET,
                                  output_names=['out'])
        onx = op.to_onnx(inputs=[('text', StringTensorType())])
        oinf = OnnxInference(onx)
        res = oinf.run({'text': corpus})
        self.assertEqual(list(res['out']), list(corpus))

        res = oinf.run({'text': corpus.reshape((2, 2))})
        self.assertEqual(res['out'].tolist(), corpus.reshape((2, 2)).tolist())

        op = OnnxStringNormalizer('text',
                                  op_version=TARGET_OPSET,
                                  output_names=['out'],
                                  case_change_action='LOWER')
        onx = op.to_onnx(inputs=[('text', StringTensorType())])
        oinf = OnnxInference(onx)
        res = oinf.run({'text': corpus})
        self.assertEqual(list(res['out']), list(_.lower() for _ in corpus))

        op = OnnxStringNormalizer('text',
                                  op_version=TARGET_OPSET,
                                  output_names=['out'],
                                  case_change_action='UPPER')
        onx = op.to_onnx(inputs=[('text', StringTensorType())])
        oinf = OnnxInference(onx)
        res = oinf.run({'text': corpus})
        self.assertEqual(list(res['out']), list(_.upper() for _ in corpus))

        op = OnnxStringNormalizer('text',
                                  op_version=TARGET_OPSET,
                                  output_names=['out'],
                                  case_change_action='UPPER2')
        onx = op.to_onnx(inputs=[('text', StringTensorType())])
        oinf = OnnxInference(onx)
        self.assertRaise(lambda: oinf.run({'text': corpus}), RuntimeError)

    def test_onnxrt_string_normalizer_stopwords(self):
        corpus = numpy.array([
            'This is the first document.',
            'This document is the second document.',
            'And this is the third one.', 'Is this the first document?'
        ])

        op = OnnxStringNormalizer('text',
                                  op_version=TARGET_OPSET,
                                  output_names=['out'],
                                  stopwords=['this'])
        onx = op.to_onnx(inputs=[('text', StringTensorType())])
        oinf = OnnxInference(onx)
        res = oinf.run({'text': corpus})
        self.assertEqual(list(res['out']),
                         list(_.replace("this ", "") for _ in corpus))

        op = OnnxStringNormalizer('text',
                                  op_version=TARGET_OPSET,
                                  output_names=['out'],
                                  stopwords=['this'],
                                  case_change_action='LOWER',
                                  is_case_sensitive=0)
        onx = op.to_onnx(inputs=[('text', StringTensorType())])
        oinf = OnnxInference(onx)
        res = oinf.run({'text': corpus})
        self.assertEqual(list(res['out']),
                         list(_.lower().replace("this ", "") for _ in corpus))

    def test_onnxrt_string_normalizer_stopwords_french(self):
        corpus = numpy.array([
            'A is the first document.',
            'This document is the second document.', 'And a is the third one.',
            'Is à the first document?'
        ])
        exp = numpy.array([
            'a is the first document.',
            'this document is the second document.', 'and a is the third one.',
            'is a the first document?'
        ])

        op = OnnxStringNormalizer('text',
                                  op_version=TARGET_OPSET,
                                  output_names=['out'],
                                  case_change_action='LOWER',
                                  locale='fr_FR')
        onx = op.to_onnx(inputs=[('text', StringTensorType())])
        oinf = OnnxInference(onx)
        res = oinf.run({'text': corpus})
        self.assertEqual(list(res['out']), list(exp))

    def test_onnxrt_string_normalizer_empty(self):
        corpus = numpy.array([
            'This is the first document.',
            'This document is the second document.',
            'And this is the third one.', 'Is this the first document?'
        ])

        op = OnnxStringNormalizer('text',
                                  op_version=TARGET_OPSET,
                                  output_names=['out'])
        onx = op.to_onnx(inputs=[('text', StringTensorType())])
        oinf = OnnxInference(onx)
        corpus[-1] = ""
        res = oinf.run({'text': corpus})
        self.assertEqual(list(res['out']), list(corpus))

    def test_onnxrt_tokenizer_char(self):
        corpus = numpy.array(['abc', 'abc d', 'abc  e'])
        exp = numpy.array([['a', 'b', 'c', '#', '#', '#'],
                           ['a', 'b', 'c', ' ', 'd', '#'],
                           ['a', 'b', 'c', ' ', ' ', 'e']])

        op = OnnxTokenizer('text',
                           op_version=TARGET_OPSET,
                           output_names=['out'],
                           tokenexp='.')
        onx = op.to_onnx(inputs=[('text', StringTensorType())],
                         outputs=[('out', StringTensorType())])
        self.assertIn('domain: "mlprodict"', str(onx))
        self.assertIn('version: 1', str(onx))
        oinf = OnnxInference(onx)
        res = oinf.run({'text': corpus})
        self.assertEqual(res['out'].tolist(), exp.tolist())
        res = oinf.run({'text': corpus.reshape((-1, 1))})
        self.assertEqual(res['out'].tolist(), exp.reshape((3, 1, -1)).tolist())

    def test_onnxrt_tokenizer_char_mark(self):
        corpus = numpy.array(['abc', 'abc d', 'abc  e'])
        exp = numpy.array([['#', 'a', 'b', 'c', '#', '#', '#', '#'],
                           ['#', 'a', 'b', 'c', ' ', 'd', '#', '#'],
                           ['#', 'a', 'b', 'c', ' ', ' ', 'e', '#']])

        op = OnnxTokenizer('text',
                           op_version=TARGET_OPSET,
                           output_names=['out'],
                           tokenexp='.',
                           mark=1)
        onx = op.to_onnx(inputs=[('text', StringTensorType())],
                         outputs=[('out', StringTensorType())])
        self.assertIn('domain: "mlprodict"', str(onx))
        self.assertIn('version: 1', str(onx))
        oinf = OnnxInference(onx)
        res = oinf.run({'text': corpus})
        self.assertEqual(res['out'].tolist(), exp.tolist())

    def test_onnxrt_tokenizer_word_mark(self):
        corpus = numpy.array(['abc ef zoo', 'abc,d', 'ab/e'])
        exp = numpy.array([['#', 'abc', 'ef', 'zoo', '#'],
                           ['#', 'abc', 'd', '#', '#'],
                           ['#', 'ab', 'e', '#', '#']])

        op = OnnxTokenizer('text',
                           op_version=TARGET_OPSET,
                           output_names=['out'],
                           separators=[' ', ',', '/'],
                           mark=1)
        onx = op.to_onnx(inputs=[('text', StringTensorType())],
                         outputs=[('out', StringTensorType())])
        oinf = OnnxInference(onx)
        res = oinf.run({'text': corpus})
        self.assertEqual(res['out'].tolist(), exp.tolist())

    def test_onnxrt_tokenizer_word_stop(self):
        corpus = numpy.array(['abc ef zoo', 'abc,d', 'ab/e'])
        exp = numpy.array([['abc', 'ef', 'zoo'], ['abc', '#', '#'],
                           ['ab', 'e', '#']])

        op = OnnxTokenizer('text',
                           op_version=TARGET_OPSET,
                           output_names=['out'],
                           separators=[' ', ',', '/'],
                           mark=0,
                           stopwords=['d'])
        onx = op.to_onnx(inputs=[('text', StringTensorType())],
                         outputs=[('out', StringTensorType())])
        oinf = OnnxInference(onx)
        res = oinf.run({'text': corpus})
        self.assertEqual(res['out'].tolist(), exp.tolist())

    def test_onnxrt_tokenizer_word_regex_mark_split(self):
        corpus = numpy.array(['abc ef zoo', 'abc,d', 'ab/e'])
        exp = numpy.array([['#', ' ef zoo', '#'], ['#', ',d', '#'],
                           ['#', '/e', '#']])

        op = OnnxTokenizer('text',
                           op_version=TARGET_OPSET,
                           output_names=['out'],
                           mark=1,
                           tokenexp='[a-c]+',
                           tokenexpsplit=1)
        onx = op.to_onnx(inputs=[('text', StringTensorType())],
                         outputs=[('out', StringTensorType())])
        oinf = OnnxInference(onx)
        res = oinf.run({'text': corpus})
        self.assertEqual(res['out'].tolist(), exp.tolist())

    def test_onnxrt_tokenizer_word_regex_mark_findall(self):
        corpus = numpy.array(['abc ef zoo', 'abc,d', 'ab/e'])
        exp = numpy.array([['#', 'abc', '#'], ['#', 'abc', '#'],
                           ['#', 'ab', '#']])

        op = OnnxTokenizer('text',
                           op_version=TARGET_OPSET,
                           output_names=['out'],
                           mark=1,
                           tokenexp='[a-c]+',
                           tokenexpsplit=0)
        onx = op.to_onnx(inputs=[('text', StringTensorType())],
                         outputs=[('out', StringTensorType())])
        oinf = OnnxInference(onx)
        res = oinf.run({'text': corpus})
        self.assertEqual(res['out'].tolist(), exp.tolist())

    def test_onnxrt_tfidf_vectorizer(self):
        inputi = numpy.array([[1, 1, 3, 3, 3, 7], [8, 6, 7, 5, 6,
                                                   8]]).astype(numpy.int64)
        output = numpy.array([[0., 0., 0., 0., 0., 0., 0.],
                              [0., 0., 0., 0., 1., 0.,
                               1.]]).astype(numpy.float32)

        ngram_counts = numpy.array([0, 4]).astype(numpy.int64)
        ngram_indexes = numpy.array([0, 1, 2, 3, 4, 5, 6]).astype(numpy.int64)
        pool_int64s = numpy.array([
            2,
            3,
            5,
            4,  # unigrams
            5,
            6,
            7,
            8,
            6,
            7
        ]).astype(numpy.int64)  # bigrams

        op = OnnxTfIdfVectorizer('tokens',
                                 op_version=TARGET_OPSET,
                                 mode='TF',
                                 min_gram_length=2,
                                 max_gram_length=2,
                                 max_skip_count=0,
                                 ngram_counts=ngram_counts,
                                 ngram_indexes=ngram_indexes,
                                 pool_int64s=pool_int64s,
                                 output_names=['out'])
        onx = op.to_onnx(inputs=[('tokens', Int64TensorType())],
                         outputs=[('out', FloatTensorType())])
        oinf = OnnxInference(onx)
        res = oinf.run({'tokens': inputi})
        self.assertEqual(output.tolist(), res['out'].tolist())

    def test_onnxrt_tfidf_vectorizer_skip5(self):
        inputi = numpy.array([[1, 1, 3, 3, 3, 7], [8, 6, 7, 5, 6,
                                                   8]]).astype(numpy.int64)
        output = numpy.array([[0., 0., 0., 0., 0., 0., 0.],
                              [0., 0., 0., 0., 1., 1.,
                               1.]]).astype(numpy.float32)

        ngram_counts = numpy.array([0, 4]).astype(numpy.int64)
        ngram_indexes = numpy.array([0, 1, 2, 3, 4, 5, 6]).astype(numpy.int64)
        pool_int64s = numpy.array([
            2,
            3,
            5,
            4,  # unigrams
            5,
            6,
            7,
            8,
            6,
            7
        ]).astype(numpy.int64)  # bigrams

        op = OnnxTfIdfVectorizer('tokens',
                                 op_version=TARGET_OPSET,
                                 mode='TF',
                                 min_gram_length=2,
                                 max_gram_length=2,
                                 max_skip_count=5,
                                 ngram_counts=ngram_counts,
                                 ngram_indexes=ngram_indexes,
                                 pool_int64s=pool_int64s,
                                 output_names=['out'])
        onx = op.to_onnx(inputs=[('tokens', Int64TensorType())],
                         outputs=[('out', FloatTensorType())])
        oinf = OnnxInference(onx)
        res = oinf.run({'tokens': inputi})
        self.assertEqual(output.tolist(), res['out'].tolist())

    def test_onnxrt_tfidf_vectorizer_unibi_skip5(self):
        inputi = numpy.array([[1, 1, 3, 3, 3, 7], [8, 6, 7, 5, 6,
                                                   8]]).astype(numpy.int64)
        output = numpy.array([[0., 3., 0., 0., 0., 0., 0.],
                              [0., 0., 1., 0., 1., 1.,
                               1.]]).astype(numpy.float32)

        ngram_counts = numpy.array([0, 4]).astype(numpy.int64)
        ngram_indexes = numpy.array([0, 1, 2, 3, 4, 5, 6]).astype(numpy.int64)
        pool_int64s = numpy.array([
            2,
            3,
            5,
            4,  # unigrams
            5,
            6,
            7,
            8,
            6,
            7
        ]).astype(numpy.int64)  # bigrams

        op = OnnxTfIdfVectorizer('tokens',
                                 op_version=TARGET_OPSET,
                                 mode='TF',
                                 min_gram_length=1,
                                 max_gram_length=2,
                                 max_skip_count=5,
                                 ngram_counts=ngram_counts,
                                 ngram_indexes=ngram_indexes,
                                 pool_int64s=pool_int64s,
                                 output_names=['out'])
        onx = op.to_onnx(inputs=[('tokens', Int64TensorType())],
                         outputs=[('out', FloatTensorType())])
        oinf = OnnxInference(onx)
        res = oinf.run({'tokens': inputi})
        self.assertEqual(output.tolist(), res['out'].tolist())

    def test_onnxrt_tfidf_vectorizer_bi_skip0(self):
        inputi = numpy.array([[1, 1, 3, 3, 3, 7, 8, 6, 7, 5, 6,
                               8]]).astype(numpy.int64)
        output = numpy.array([[0., 0., 0., 0., 1., 1.,
                               1.]]).astype(numpy.float32)

        ngram_counts = numpy.array([0, 4]).astype(numpy.int64)
        ngram_indexes = numpy.array([0, 1, 2, 3, 4, 5, 6]).astype(numpy.int64)
        pool_int64s = numpy.array([
            2,
            3,
            5,
            4,  # unigrams
            5,
            6,
            7,
            8,
            6,
            7
        ]).astype(numpy.int64)  # bigrams

        op = OnnxTfIdfVectorizer('tokens',
                                 op_version=TARGET_OPSET,
                                 mode='TF',
                                 min_gram_length=2,
                                 max_gram_length=2,
                                 max_skip_count=0,
                                 ngram_counts=ngram_counts,
                                 ngram_indexes=ngram_indexes,
                                 pool_int64s=pool_int64s,
                                 output_names=['out'])
        onx = op.to_onnx(inputs=[('tokens', Int64TensorType())],
                         outputs=[('out', FloatTensorType())])
        oinf = OnnxInference(onx)
        res = oinf.run({'tokens': inputi})
        self.assertEqual(output.tolist(), res['out'].tolist())

    def test_onnxrt_tfidf_vectorizer_empty(self):
        inputi = numpy.array([[1, 1, 3, 3, 3, 7, 8, 6, 7, 5, 6,
                               8]]).astype(numpy.int64)
        output = numpy.array([[1., 1., 1.]]).astype(numpy.float32)

        ngram_counts = numpy.array([0, 0]).astype(numpy.int64)
        ngram_indexes = numpy.array([0, 1, 2]).astype(numpy.int64)
        pool_int64s = numpy.array([  # unigrams
            5, 6, 7, 8, 6, 7
        ]).astype(numpy.int64)  # bigrams

        op = OnnxTfIdfVectorizer('tokens',
                                 op_version=TARGET_OPSET,
                                 mode='TF',
                                 min_gram_length=2,
                                 max_gram_length=2,
                                 max_skip_count=0,
                                 ngram_counts=ngram_counts,
                                 ngram_indexes=ngram_indexes,
                                 pool_int64s=pool_int64s,
                                 output_names=['out'])
        onx = op.to_onnx(inputs=[('tokens', Int64TensorType())],
                         outputs=[('out', FloatTensorType())])
        oinf = OnnxInference(onx)
        res = oinf.run({'tokens': inputi})
        self.assertEqual(output.tolist(), res['out'].tolist())

    @ignore_warnings(UserWarning)
    def test_onnxrt_python_count_vectorizer(self):
        corpus = numpy.array([
            'This is the first document.',
            'This document is the second document.',
            'And this is the third one.', 'Is this the first document?'
        ])
        vect = CountVectorizer()
        vect.fit(corpus)
        exp = vect.transform(corpus)
        onx = to_onnx(vect, corpus, target_opset=TARGET_OPSET)
        oinf = OnnxInference(onx)
        got = oinf.run({'X': corpus})
        self.assertEqualArray(exp.todense(), got['variable'])

    @unittest.skipIf(compare_module_version(sk2ver, '1.9.3') < 0,
                     reason="fails on that example")
    @ignore_warnings(UserWarning)
    def test_multi_output_classifier(self):
        dfx = pandas.DataFrame({
            'CAT1': ['985332', '985333', '985334', '985335', '985336'],
            'CAT2': ['1985332', '1985333', '1985334', '1985335', '1985336'],
            'TEXT': ["abc abc", "abc def", "def ghj", "abcdef", "abc ii"]
        })
        dfy = pandas.DataFrame({
            'REAL': [5, 6, 7, 6, 5],
            'CATY': [0, 1, 0, 1, 0]
        })

        cat_features = ['CAT1', 'CAT2']
        categorical_transformer = OneHotEncoder(handle_unknown='ignore')
        textual_feature = 'TEXT'
        count_vect_transformer = Pipeline(steps=[(
            'count_vect',
            CountVectorizer(max_df=0.8, min_df=0.05, max_features=1000))])
        preprocessor = ColumnTransformer(transformers=[(
            'cat_transform', categorical_transformer, cat_features
        ), ('count_vector', count_vect_transformer, textual_feature)])
        model_RF = RandomForestClassifier(random_state=42, max_depth=50)
        rf_clf = Pipeline(
            steps=[('preprocessor', preprocessor
                    ), ('classifier',
                        MultiOutputClassifier(estimator=model_RF))])
        rf_clf.fit(dfx, dfy)
        expected_label = rf_clf.predict(dfx)
        expected_proba = rf_clf.predict_proba(dfx)

        inputs = {
            'CAT1': dfx['CAT1'].values.reshape((-1, 1)),
            'CAT2': dfx['CAT2'].values.reshape((-1, 1)),
            'TEXT': dfx['TEXT'].values.reshape((-1, 1))
        }
        onx = to_onnx(rf_clf, dfx, target_opset=TARGET_OPSET)
        sess = OnnxInference(onx)

        got = sess.run(inputs)
        self.assertEqualArray(expected_label, got['label'])
        self.assertEqual(len(expected_proba), len(got['probabilities']))
        for e, g in zip(expected_proba, got['probabilities']):
            self.assertEqualArray(e, g, decimal=5)

    def test_onnxrt_category_mapper_intstr(self):

        op = OnnxCategoryMapper('cat',
                                op_version=TARGET_OPSET,
                                cats_int64s=[1, 2],
                                cats_strings=["cat1", "cat2"],
                                output_names=['out'])
        onx = op.to_onnx(inputs=[('cat', Int64TensorType())],
                         outputs=[('out', StringTensorType())])
        oinf = OnnxInference(onx)
        res = oinf.run({'cat': numpy.array([1, 2, 1, 5], dtype=numpy.int64)})
        self.assertEqual(res['out'].tolist(), ["cat1", "cat2", "cat1", ""])

    def test_onnxrt_category_mapper_strint(self):

        op = OnnxCategoryMapper('cat',
                                op_version=TARGET_OPSET,
                                cats_int64s=[1, 2],
                                cats_strings=["cat1", "cat2"],
                                output_names=['out'])
        onx = op.to_onnx(inputs=[('cat', StringTensorType())],
                         outputs=[('out', Int64TensorType())])
        oinf = OnnxInference(onx)
        res = oinf.run({
            'cat':
            numpy.array(["cat1", "cat2", "cat1", "R"], dtype=numpy.str_)
        })
        self.assertEqualArray(res['out'],
                              numpy.array([1, 2, 1, -1], dtype=numpy.int64))
コード例 #17
0
    def test_create_asv_benchmark_logreg(self):
        fLOG(__file__,
             self._testMethodName,
             OutputPrint=__name__ == "__main__")
        self.assertNotEmpty(mlprodict)
        temp = get_temp_folder(__file__, "temp_create_asv_benchmark_logreg")
        created = create_asv_benchmark(location=temp,
                                       verbose=3,
                                       fLOG=fLOG,
                                       runtime=('scikit-learn', 'python',
                                                'onnxruntime1'),
                                       exc=False,
                                       execute=True,
                                       models={'LogisticRegression'})
        if len(created) < 6:
            raise AssertionError(
                "Number of created files is too small.\n{}".format("\n".join(
                    sorted(created))))

        reg = re.compile("class ([a-zA-Z0-9_]+)[(]")
        verif = 0
        allnames = []
        for path, _, files in os.walk(os.path.join(temp, 'benches')):
            for zoo in files:
                if '__init__' in zoo:
                    continue
                fLOG("process '{}'".format(zoo))
                fullname = os.path.join(path, zoo)
                with open(fullname, 'r', encoding='utf-8') as f:
                    content = f.read()
                names = reg.findall(content)
                name = names[0]
                content += "\n\ncl = %s()\ncl.setup_cache()\n" % name
                allnames.append(fullname)
                with open(fullname, 'w', encoding='utf-8') as f:
                    f.write(content)
                __, err = run_script(fullname, wait=True)
                lines = [_ for _ in err.split('\n') if _ and _[0] != ' ']
                lines = [_ for _ in lines if "Warning" not in _]
                lines = [
                    _ for _ in lines if "No module named 'mlprodict'" not in _
                ]
                lines = [_ for _ in lines if "Traceback " not in _]
                err = "\n".join(lines).strip(' \n\r')
                if len(err) > 0:
                    raise RuntimeError("Issue with '{}'\n{}".format(
                        fullname, err))

                if (zoo.endswith(
                        "bench_LogReg_liblinear_m_cl_solverliblinear.py")
                        and compare_module_version(sklearn.__version__,
                                                   "0.21") >= 0):
                    if "{LogisticRegression: {'zipmap': False}}" in content:
                        raise AssertionError(content)
                    elif "'nozipmap'" not in content:
                        raise AssertionError(content)
                    if 'predict_proba' not in content:
                        raise AssertionError(content)
                    verif += 1
                if (zoo.endswith(
                        "bench_LogReg_liblinear_dec_b_cl_dec_solverliblinear.py"
                ) and compare_module_version(sklearn.__version__, "0.21") >=
                        0):
                    if "{LogisticRegression: {'raw_scores': True}}" in content:
                        raise AssertionError(content)
                    elif "'raw_scores'" not in content:
                        raise AssertionError(content)
                    if 'decision_function' not in content:
                        raise AssertionError(content)
                    verif += 1

        if not verif:
            raise AssertionError("Visited files\n{}".format(
                "\n".join(allnames)))
コード例 #18
0
class TestOnnxPipeline(ExtTestCase):
    def test_pipeline_iris(self):
        iris = load_iris()
        X, y = iris.data, iris.target
        pipe = OnnxPipeline([('pca', PCA(n_components=2)),
                             ('no', StandardScaler()),
                             ('lr', LogisticRegression())],
                            enforce_float32=True,
                            op_version=TARGET_OPSET)
        pipe.fit(X, y)
        pipe.fit(X, y)
        self.assertTrue(hasattr(pipe, 'raw_steps_'))
        self.assertEqual(len(pipe.steps), 3)
        self.assertEqual(len(pipe.raw_steps_), 3)
        self.assertIsInstance(pipe.steps[0][1], OnnxTransformer)
        self.assertIsInstance(pipe.steps[1][1], OnnxTransformer)

        X = X.astype(numpy.float32)
        model_def = to_onnx(pipe,
                            X[:1],
                            target_opset=pipe.op_version,
                            options={id(pipe): {
                                         'zipmap': False
                                     }})
        sess = OnnxInference(model_def)
        res = sess.run({'X': X})
        self.assertEqualArray(res["label"], pipe.predict(X))
        self.assertEqualArray(res["probabilities"], pipe.predict_proba(X))

    def test_pipeline_none_params(self):
        model_onx = OnnxPipeline([('scaler', StandardScaler()),
                                  ('dt', DecisionTreeRegressor(max_depth=2))])
        self.assertNotEmpty(model_onx)

    def test_pipeline_iris_enforce_false(self):
        iris = load_iris()
        X, y = iris.data, iris.target
        pipe = OnnxPipeline([('pca', PCA(n_components=2)),
                             ('no', StandardScaler()),
                             ('lr', LogisticRegression())],
                            enforce_float32=False,
                            op_version=TARGET_OPSET)
        pipe.fit(X, y)
        pipe.fit(X, y)
        self.assertTrue(hasattr(pipe, 'raw_steps_'))
        self.assertEqual(len(pipe.steps), 3)
        self.assertEqual(len(pipe.raw_steps_), 3)
        self.assertIsInstance(pipe.steps[0][1], OnnxTransformer)
        self.assertIsInstance(pipe.steps[1][1], OnnxTransformer)

        X = X.astype(numpy.float64)
        model_def = to_onnx(pipe,
                            X[:1],
                            target_opset=pipe.op_version,
                            options={id(pipe): {
                                         'zipmap': False
                                     }})
        sess = OnnxInference(model_def)
        res = sess.run({'X': X})
        self.assertEqualArray(res["label"], pipe.predict(X))
        self.assertEqualArray(res["probabilities"], pipe.predict_proba(X))
        self.assertRaise(lambda: sess.run({'X': X.astype(numpy.float32)}),
                         RuntimeError)
        self.assertRaise(lambda: sess.run({'X': X.reshape((2, -1, 4))}),
                         (ValueError, IndexError))
        self.assertRaise(
            lambda: sess.run({
                'X': X.astype(numpy.float64),
                'Y': X.astype(numpy.float64)
            }), KeyError)

    @unittest.skipIf(compare_module_version(s2_ver, '1.9.3') < 0,
                     reason="skl2onnx too old")
    def test_transfer_transformer(self):
        _register_converters_mlinsights(True)
        iris = load_iris()
        X, y = iris.data, iris.target
        pipe = TransferTransformer(StandardScaler(), trainable=True)
        pipe.fit(X, y)
        model_def = to_onnx(pipe, X[:1].astype(numpy.float32))
        sess = OnnxInference(model_def)
        res = sess.run({'X': X.astype(numpy.float32)})
        exp = pipe.transform(X.astype(numpy.float32))
        self.assertEqualArray(exp, res['variable'], decimal=5)

    @unittest.skipIf(compare_module_version(s2_ver, '1.9.3') < 0,
                     reason="skl2onnx too old")
    def test_transfer_logistic_regression(self):
        _register_converters_mlinsights(True)
        iris = load_iris()
        X, y = iris.data, iris.target
        pipe = TransferTransformer(LogisticRegression(solver='liblinear'),
                                   trainable=True)
        pipe.fit(X, y)
        model_def = to_onnx(pipe, X[:1])
        sess = OnnxInference(model_def)
        res = sess.run({'X': X})
        exp = pipe.transform(X)
        self.assertEqualArray(exp, res['probabilities'], decimal=5)

    @unittest.skipIf(compare_module_version(s2_ver, '1.9.3') < 0,
                     reason="skl2onnx too old")
    def test_pipeline_pickable(self):
        _register_converters_mlinsights(True)
        iris = load_iris()
        X, y = iris.data, iris.target
        pipe = OnnxPipeline(
            [('gm', TransferTransformer(StandardScaler(), trainable=True)),
             ('lr', LogisticRegression())],
            enforce_float32=True,
            op_version=TARGET_OPSET)
        pipe.fit(X, y)
        pipe.fit(X, y)

        self.assertTrue(hasattr(pipe, 'raw_steps_'))
        self.assertEqual(len(pipe.steps), 2)
        self.assertEqual(len(pipe.raw_steps_), 2)
        self.assertIsInstance(pipe.steps[0][1], OnnxTransformer)

        X = X.astype(numpy.float32)
        model_def = to_onnx(pipe,
                            X[:1],
                            target_opset=pipe.op_version,
                            options={id(pipe): {
                                         'zipmap': False
                                     }})
        sess = OnnxInference(model_def)
        res = sess.run({'X': X})
        self.assertEqual(list(sorted(res)), ['label', 'probabilities'])
        self.assertEqualArray(res["label"], pipe.predict(X))
        self.assertEqualArray(res["probabilities"], pipe.predict_proba(X))

    @unittest.skipIf(compare_module_version(s2_ver, '1.9.3') < 0,
                     reason="skl2onnx too old")
    @ignore_warnings(warns=FutureWarning)
    def test_pipeline_pickable_options(self):
        _register_converters_mlinsights(True)
        iris = load_iris()
        X, y = iris.data, iris.target
        pipe = OnnxPipeline([('gm',
                              TransferTransformer(GaussianMixture(
                                  n_components=5, random_state=2),
                                                  trainable=True,
                                                  method='predict_proba')),
                             ('lr', LogisticRegression(random_state=2))],
                            enforce_float32=True,
                            op_version=TARGET_OPSET,
                            options={
                                'gm__score_samples': True,
                                'lr__zipmap': False
                            })
        pipe.fit(X, y)
        pipe.fit(X, y)

        self.assertTrue(hasattr(pipe, 'raw_steps_'))
        self.assertEqual(len(pipe.steps), 2)
        self.assertEqual(len(pipe.raw_steps_), 2)
        self.assertIsInstance(pipe.steps[0][1], OnnxTransformer)

        X = X.astype(numpy.float32)
        model_def = to_onnx(pipe,
                            X[:1],
                            target_opset=pipe.op_version,
                            options={id(pipe): {
                                         'zipmap': False
                                     }})
        sess = OnnxInference(model_def, runtime="python_compiled")
        self.assertIn("'probabilities': probabilities,", str(sess))
        sess = InferenceSession(model_def.SerializeToString())
        r = sess.run(None, {'X': X})
        self.assertEqual(len(r), 2)
        sess = OnnxInference(model_def)
        res = sess.run({'X': X})
        self.assertEqual(list(sorted(res)), ['label', 'probabilities'])
        self.assertEqualArray(res["probabilities"], pipe.predict_proba(X))
        self.assertEqualArray(res["label"], pipe.predict(X))

    def test_pipeline_iris_column_transformer(self):
        iris = load_iris()
        X, y = iris.data, iris.target
        pipe = OnnxPipeline(
            [('col',
              ColumnTransformer([('pca', PCA(n_components=2), [0, 1]),
                                 ('no', StandardScaler(), [2]),
                                 ('pass', 'passthrough', [3])])),
             ('lr', LogisticRegression())],
            enforce_float32=True,
            op_version=TARGET_OPSET)
        pipe.fit(X, y)
        pipe.fit(X, y)
        self.assertTrue(hasattr(pipe, 'raw_steps_'))
        self.assertEqual(len(pipe.steps), 2)
        self.assertEqual(len(pipe.raw_steps_), 2)
        self.assertIsInstance(pipe.steps[0][1], OnnxTransformer)
        self.assertIsInstance(pipe.steps[1][1], LogisticRegression)

        X = X.astype(numpy.float32)
        model_def = to_onnx(pipe,
                            X[:1],
                            target_opset=pipe.op_version,
                            options={id(pipe): {
                                         'zipmap': False
                                     }})
        sess = OnnxInference(model_def)
        res = sess.run({'X': X})
        self.assertEqualArray(res["label"], pipe.predict(X))
        self.assertEqualArray(res["probabilities"],
                              pipe.predict_proba(X),
                              decimal=5)

    def test_pipeline_iris_column_transformer_nocache(self):
        class MyMemory:
            def __init__(self):
                pass

            def cache(self, obj):
                return obj

        iris = load_iris()
        X, y = iris.data, iris.target
        pipe = OnnxPipeline(
            [('col',
              ColumnTransformer([('pca', PCA(n_components=2), [0, 1]),
                                 ('no', StandardScaler(), [2]),
                                 ('pass', 'passthrough', [3])])),
             ('lr', LogisticRegression())],
            enforce_float32=True,
            op_version=TARGET_OPSET,
            memory=MyMemory())
        pipe.fit(X, y)
        pipe.fit(X, y)
        self.assertTrue(hasattr(pipe, 'raw_steps_'))
        self.assertEqual(len(pipe.steps), 2)
        self.assertEqual(len(pipe.raw_steps_), 2)
        self.assertIsInstance(pipe.steps[0][1], OnnxTransformer)
        self.assertIsInstance(pipe.steps[1][1], LogisticRegression)

        X = X.astype(numpy.float32)
        model_def = to_onnx(pipe,
                            X[:1],
                            target_opset=pipe.op_version,
                            options={id(pipe): {
                                         'zipmap': False
                                     }})
        sess = OnnxInference(model_def)
        res = sess.run({'X': X})
        self.assertEqualArray(res["label"], pipe.predict(X))
        self.assertEqualArray(res["probabilities"],
                              pipe.predict_proba(X),
                              decimal=5)
コード例 #19
0
class TestOnnxrtSideBySide(ExtTestCase):

    def setUp(self):
        logger = getLogger('skl2onnx')
        logger.disabled = True

    @unittest.skipIf(convert_kernel is None, reason="not enough recent version")
    def test_kernel_ker12_def(self):
        ker = (Sum(CK(0.1, (1e-3, 1e3)), CK(0.1, (1e-3, 1e3)) *
                   RBF(length_scale=1, length_scale_bounds=(1e-3, 1e3))))
        onx = convert_kernel(ker, 'X', output_names=['Y'], dtype=numpy.float32,
                             op_version=get_opset_number_from_onnx())
        model_onnx = onx.to_onnx(
            inputs=[('X', FloatTensorType([None, None]))],
            outputs=[('Y', FloatTensorType([None, None]))],
            target_opset=get_opset_number_from_onnx())
        sess = OnnxInference(model_onnx.SerializeToString())
        res = sess.run({'X': Xtest_.astype(numpy.float32)})
        m1 = res['Y']
        m2 = ker(Xtest_)
        self.assertEqualArray(m1, m2)

    @unittest.skipIf(convert_kernel is None, reason="not enough recent version")
    def test_kernel_ker2_def(self):
        ker = Sum(
            CK(0.1, (1e-3, 1e3)) * RBF(length_scale=10,
                                       length_scale_bounds=(1e-3, 1e3)),
            CK(0.1, (1e-3, 1e3)) * RBF(length_scale=1,
                                       length_scale_bounds=(1e-3, 1e3))
        )
        onx = convert_kernel(ker, 'X', output_names=['Y'], dtype=numpy.float32,
                             op_version=get_opset_number_from_onnx())
        model_onnx = onx.to_onnx(
            inputs=[('X', FloatTensorType([None, None]))],
            outputs=[('Y', FloatTensorType([None, None]))],
            target_opset=get_opset_number_from_onnx())
        sess = OnnxInference(model_onnx.SerializeToString())

        res = sess.run({'X': Xtest_.astype(numpy.float32)})
        m1 = res['Y']
        m2 = ker(Xtest_)
        self.assertEqualArray(m1, m2)

        res = sess.run({'X': Xtest_.astype(numpy.float32)}, intermediate=True)
        self.assertGreater(len(res), 30)
        self.assertIsInstance(res, dict)

    @unittest.skipIf(convert_kernel is None, reason="not enough recent version")
    @unittest.skipIf(compare_module_version(ort_version, threshold) <= 0,
                     reason="Node:Scan1 Field 'shape' of type is required but missing.")
    def test_kernel_ker2_def_ort(self):
        ker = Sum(
            CK(0.1, (1e-3, 1e3)) * RBF(length_scale=10,
                                       length_scale_bounds=(1e-3, 1e3)),
            CK(0.1, (1e-3, 1e3)) * RBF(length_scale=1,
                                       length_scale_bounds=(1e-3, 1e3)))
        onx = convert_kernel(ker, 'X', output_names=['Y'], dtype=numpy.float32,
                             op_version=get_opset_number_from_onnx())
        model_onnx = onx.to_onnx(
            inputs=[('X', FloatTensorType([None, None]))],
            outputs=[('Y', FloatTensorType([None, None]))],
            target_opset=get_opset_number_from_onnx())
        model_onnx.ir_version = get_ir_version_from_onnx()
        sess = _capture_output(
            lambda: OnnxInference(model_onnx.SerializeToString(),
                                  runtime="onnxruntime2"), 'c')[0]
        try:
            res = sess.run({'X': Xtest_.astype(numpy.float32)})
        except RuntimeError as e:
            if "Got invalid dimensions for input" in str(e):
                # probable bug somewhere
                return
            raise e
        m1 = res['Y']
        m2 = ker(Xtest_)
        self.assertEqualArray(m1, m2, decimal=5)

    @unittest.skipIf(convert_kernel is None, reason="not enough recent version")
    @unittest.skipIf(compare_module_version(ort_version, threshold) <= 0,
                     reason="Node:Scan1 Field 'shape' of type is required but missing.")
    def test_kernel_ker2_def_ort1(self):
        ker = Sum(
            CK(0.1, (1e-3, 1e3)) * RBF(length_scale=10,
                                       length_scale_bounds=(1e-3, 1e3)),
            CK(0.1, (1e-3, 1e3)) * RBF(length_scale=1,
                                       length_scale_bounds=(1e-3, 1e3))
        )
        onx = convert_kernel(ker, 'X', output_names=['Y'], dtype=numpy.float32,
                             op_version=get_opset_number_from_onnx())
        model_onnx = onx.to_onnx(
            inputs=[('X', FloatTensorType([None, None]))],
            outputs=[('Y', FloatTensorType([None, None]))],
            target_opset=get_opset_number_from_onnx())
        model_onnx.ir_version = get_ir_version_from_onnx()
        sess = OnnxInference(model_onnx.SerializeToString(),
                             runtime="onnxruntime1")

        rows = []

        def myprint(*args, **kwargs):
            rows.append(" ".join(map(str, args)))

        res = _capture_output(
            lambda: sess.run({'X': Xtest_.astype(numpy.float32)},
                             intermediate=True, verbose=1, fLOG=myprint),
            'c')[0]
        self.assertGreater(len(rows), 2)
        m1 = res['Y']
        self.assertNotEmpty(m1)
        self.assertGreater(len(res), 2)
        # m2 = ker(Xtest_)
        # self.assertEqualArray(m1, m2, decimal=5)

        cpu = OnnxInference(model_onnx.SerializeToString())
        sbs = side_by_side_by_values(
            [cpu, sess], inputs={'X': Xtest_.astype(numpy.float32)})
        self.assertGreater(len(sbs), 2)
        self.assertIsInstance(sbs, list)
        self.assertIsInstance(sbs[0], dict)
        self.assertIn('step', sbs[0])
        self.assertIn('step', sbs[1])
        self.assertIn('metric', sbs[0])
        self.assertIn('metric', sbs[1])
        self.assertIn('cmp', sbs[0])
        self.assertIn('cmp', sbs[1])

        sess3 = _capture_output(
            lambda: OnnxInference(model_onnx.SerializeToString(),
                                  runtime="onnxruntime2"), 'c')[0]
        try:
            sbs = side_by_side_by_values(
                [cpu, sess, sess3], inputs={'X': Xtest_.astype(numpy.float32)})
        except RuntimeError as e:
            if "Got invalid dimensions for input" in str(e):
                # probable bug somewhere
                return
            raise e
        self.assertNotEmpty(sbs)

        inputs = {'X': Xtest_.astype(numpy.float32)}
        sbs = side_by_side_by_values(
            [(cpu, inputs), (sess, inputs), (sess3, inputs)])
        self.assertNotEmpty(sbs)

    def test_merge_results(self):
        res1 = [('AA', [0, 0]), ('BB', [1, 1])]
        res2 = [('AA', [2, 2]), ('BB', [3, 3])]
        res = merge_results([res1, res2])
        exp = [('AA', [[0, 0], [2, 2]]), ('BB', [[1, 1], [3, 3]])]
        self.assertEqual(exp, res)

        res1 = [('AA', [0, 0]), ('BB', [1, 1]), ('CC', [10, 10])]
        res2 = [('AA', [2, 2]), ('BB', [3, 3])]
        res = merge_results([res1, res2])
        exp = [('AA', [[0, 0], [2, 2]]),
               ('BB', [[1, 1], [3, 3]]),
               ('CC', [[10, 10], None])]
        self.assertEqual(exp, res)

        res1 = [('AA', [0, 0]), ('BB', [1, 1])]
        res2 = [('AA', [2, 2]), ('BB', [3, 3]), ('CC', [10, 10])]
        res = merge_results([res1, res2])
        exp = [('AA', [[0, 0], [2, 2]]),
               ('BB', [[1, 1], [3, 3]]),
               ('CC', [None, [10, 10]])]
        self.assertEqual(exp, res)

        res1 = [('AA', [0, 0]), ('CC', [10, 10]), ('BB', [1, 1])]
        res2 = [('AA', [2, 2]), ('BB', [3, 3])]
        res = merge_results([res1, res2])
        exp = [('AA', [[0, 0], [2, 2]]),
               ('CC', [[10, 10], None]),
               ('BB', [[1, 1], [3, 3]])]
        self.assertEqual(exp, res)

        res1 = [('AA', [0, 0]), ('BB', [1, 1])]
        res2 = [('AA', [2, 2]), ('CC', [10, 10]), ('BB', [3, 3])]
        res = merge_results([res1, res2])
        exp = [('AA', [[0, 0], [2, 2]]),
               ('CC', [None, [10, 10]]),
               ('BB', [[1, 1], [3, 3]])]
        self.assertEqual(exp, res)
コード例 #20
0
class TestOnnxrtSideBySide(ExtTestCase):

    def setUp(self):
        logger = getLogger('skl2onnx')
        logger.disabled = True

    @unittest.skipIf(convert_kernel is None, reason="not enough recent version")
    def test_kernel_ker12_def(self):
        ker = (Sum(CK(0.1, (1e-3, 1e3)), CK(0.1, (1e-3, 1e3)) *
                   RBF(length_scale=1, length_scale_bounds=(1e-3, 1e3))))
        onx = convert_kernel(ker, 'X', output_names=['Y'], dtype=numpy.float32)
        model_onnx = onx.to_onnx(
            inputs=[('X', FloatTensorType([None, None]))],
            outputs=[('Y', FloatTensorType([None, None]))])
        sess = OnnxInference(model_onnx.SerializeToString())
        res = sess.run({'X': Xtest_.astype(numpy.float32)})
        m1 = res['Y']
        m2 = ker(Xtest_)
        self.assertEqualArray(m1, m2)

    @unittest.skipIf(convert_kernel is None, reason="not enough recent version")
    def test_kernel_ker2_def(self):
        ker = Sum(
            CK(0.1, (1e-3, 1e3)) * RBF(length_scale=10,
                                       length_scale_bounds=(1e-3, 1e3)),
            CK(0.1, (1e-3, 1e3)) * RBF(length_scale=1,
                                       length_scale_bounds=(1e-3, 1e3))
        )
        onx = convert_kernel(ker, 'X', output_names=['Y'], dtype=numpy.float32)
        model_onnx = onx.to_onnx(
            inputs=[('X', FloatTensorType([None, None]))],
            outputs=[('Y', FloatTensorType([None, None]))])
        sess = OnnxInference(model_onnx.SerializeToString())

        res = sess.run({'X': Xtest_.astype(numpy.float32)})
        m1 = res['Y']
        m2 = ker(Xtest_)
        self.assertEqualArray(m1, m2)

        res = sess.run({'X': Xtest_.astype(numpy.float32)}, intermediate=True)
        self.assertGreater(len(res), 30)
        self.assertIsInstance(res, OrderedDict)

    @unittest.skipIf(convert_kernel is None, reason="not enough recent version")
    @unittest.skipIf(compare_module_version(ort_version, threshold) <= 0,
                     reason="Node:Scan1 Field 'shape' of type is required but missing.")
    def test_kernel_ker2_def_ort(self):
        ker = Sum(
            CK(0.1, (1e-3, 1e3)) * RBF(length_scale=10,
                                       length_scale_bounds=(1e-3, 1e3)),
            CK(0.1, (1e-3, 1e3)) * RBF(length_scale=1,
                                       length_scale_bounds=(1e-3, 1e3))
        )
        onx = convert_kernel(ker, 'X', output_names=['Y'], dtype=numpy.float32)
        model_onnx = onx.to_onnx(
            inputs=[('X', FloatTensorType([None, None]))],
            outputs=[('Y', FloatTensorType([None, None]))])
        sess = OnnxInference(model_onnx.SerializeToString(),
                             runtime="onnxruntime2")
        res = sess.run({'X': Xtest_.astype(numpy.float32)})
        m1 = res['Y']
        m2 = ker(Xtest_)
        self.assertEqualArray(m1, m2, decimal=5)

    @unittest.skipIf(convert_kernel is None, reason="not enough recent version")
    @unittest.skipIf(compare_module_version(ort_version, threshold) <= 0,
                     reason="Node:Scan1 Field 'shape' of type is required but missing.")
    def test_kernel_ker2_def_ort1(self):
        ker = Sum(
            CK(0.1, (1e-3, 1e3)) * RBF(length_scale=10,
                                       length_scale_bounds=(1e-3, 1e3)),
            CK(0.1, (1e-3, 1e3)) * RBF(length_scale=1,
                                       length_scale_bounds=(1e-3, 1e3))
        )
        onx = convert_kernel(ker, 'X', output_names=['Y'], dtype=numpy.float32)
        model_onnx = onx.to_onnx(
            inputs=[('X', FloatTensorType([None, None]))],
            outputs=[('Y', FloatTensorType([None, None]))])
        sess = OnnxInference(model_onnx.SerializeToString(),
                             runtime="onnxruntime1")

        rows = []

        def myprint(*args, **kwargs):
            rows.append(" ".join(map(str, args)))

        res = sess.run({'X': Xtest_.astype(numpy.float32)},
                       intermediate=True, verbose=1, fLOG=myprint)
        self.assertGreater(len(rows), 2)
        m1 = res['Y']
        self.assertNotEmpty(m1)
        self.assertGreater(len(res), 2)
        # m2 = ker(Xtest_)
        # self.assertEqualArray(m1, m2, decimal=5)

        cpu = OnnxInference(model_onnx.SerializeToString())
        sbs = side_by_side_by_values(
            [cpu, sess], inputs={'X': Xtest_.astype(numpy.float32)})
        self.assertGreater(len(sbs), 2)
        self.assertIsInstance(sbs, list)
        self.assertIsInstance(sbs[0], dict)
        self.assertIn('step', sbs[0])
        self.assertIn('step', sbs[1])
        self.assertIn('metric', sbs[0])
        self.assertIn('metric', sbs[1])
        self.assertIn('cmp', sbs[0])
        self.assertIn('cmp', sbs[1])

        sess3 = OnnxInference(model_onnx.SerializeToString(),
                              runtime="onnxruntime2")
        sbs = side_by_side_by_values(
            [cpu, sess, sess3], inputs={'X': Xtest_.astype(numpy.float32)})
        self.assertNotEmpty(sbs)

        inputs = {'X': Xtest_.astype(numpy.float32)}
        sbs = side_by_side_by_values(
            [(cpu, inputs), (sess, inputs), (sess3, inputs)])
        self.assertNotEmpty(sbs)