Esempio n. 1
0
    def test_create_asv_benchmark_rf(self):
        fLOG(__file__,
             self._testMethodName,
             OutputPrint=__name__ == "__main__")
        self.assertNotEmpty(mlprodict)
        temp = get_temp_folder(__file__, "temp_create_asv_benchmark_rf")
        created = create_asv_benchmark(location=temp,
                                       verbose=1,
                                       fLOG=fLOG,
                                       runtime=('scikit-learn', 'python',
                                                'onnxruntime1'),
                                       exc=False,
                                       execute=True,
                                       models={'RandomForestRegressor'})
        self.assertNotEmpty(created)

        reg = re.compile("class ([a-zA-Z0-9_]+)[(]")
        verif = False
        allnames = []
        for path, _, files in os.walk(os.path.join(temp, 'benches')):
            for zoo in files:
                if '__init__' in zoo:
                    continue
                fLOG("process '{}'".format(zoo))
                fullname = os.path.join(path, zoo)
                with open(fullname, 'r', encoding='utf-8') as f:
                    content = f.read()
                names = reg.findall(content)
                name = names[0]
                content += "\n\ncl = %s()\ncl.setup_cache()\n" % name
                allnames.append(fullname)
                with open(fullname, 'w', encoding='utf-8') as f:
                    f.write(content)
                __, err = run_script(fullname, wait=True)
                lines = [_ for _ in err.split('\n') if _ and _[0] != ' ']
                lines = [_ for _ in lines if "Warning" not in _]
                lines = [
                    _ for _ in lines if "No module named 'mlprodict'" not in _
                ]
                lines = [_ for _ in lines if "Traceback " not in _]
                err = "\n".join(lines).strip(' \n\r')
                if len(err) > 0:
                    raise RuntimeError("Issue with '{}'\n{}".format(
                        fullname, err))
                if (zoo.endswith(
                        "bench_RandomForestReg_default_b_reg_nest100.py")
                        and compare_module_version(sklearn.__version__,
                                                   "0.21") >= 0):
                    if "random_state=42" not in content:
                        raise AssertionError(content)
                    else:
                        verif = True
        if not verif:
            raise AssertionError("Visited files\n{}".format(
                "\n".join(allnames)))
Esempio n. 2
0
 def test_python(self):
     fLOG(__file__,
          self._testMethodName,
          OutputPrint=__name__ == "__main__")
     file = os.path.join(
         os.path.split(__file__)[0], "..", "..", "src", "pyquickhelper",
         "loghelper", "flog_fake_classes.py")
     out, err = run_script(file)
     assert out is not None
     assert err is None
     out.__exit__(None, None, None)
    def test_create_asv_benchmark_tiny_same(self):
        fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__")
        self.assertNotEmpty(mlprodict)
        temp = get_temp_folder(
            __file__, "temp_create_asv_benchmark_all_tiny_same")
        created = create_asv_benchmark(
            location=temp, verbose=1, fLOG=fLOG,
            skip_models={
                'DictVectorizer', 'FeatureHasher',  # 'CountVectorizer'
            }, runtime=('scikit-learn', 'python', 'onnxruntime1'),
            exc=False, execute=True, models={
                'SelectFromModel', 'NMF', 'LatentDirichletAllocation'
            }, env='same')
        self.assertNotEmpty(created)

        reg = re.compile("class ([a-zA-Z0-9_]+)[(]")
        for path, _, files in os.walk(os.path.join(temp, 'benches')):
            for zoo in files:
                if '__init__' in zoo:
                    continue
                fLOG("process '{}'".format(zoo))
                fullname = os.path.join(path, zoo)
                with open(fullname, 'r', encoding='utf-8') as f:
                    content = f.read()
                names = reg.findall(content)
                name = names[0]
                content += "\n\ncl = %s()\ncl.setup_cache()\n" % name
                with open(fullname, 'w', encoding='utf-8') as f:
                    f.write(content)
                __, err = run_script(fullname, wait=True)
                lines = [_ for _ in err.split('\n') if _ and _[0] != ' ']
                lines = [_ for _ in lines if "Warning" not in _]
                lines = [
                    _ for _ in lines if "No module named 'mlprodict'" not in _]
                lines = [_ for _ in lines if "Traceback " not in _]
                err = "\n".join(lines).strip(' \n\r')
                if len(err) > 0:
                    raise RuntimeError(
                        "Issue with '{}'\n{}".format(fullname, err))
                if (zoo.endswith("bench_NMF_default_num_tr_pos.py") and
                        compare_module_version(sklearn.__version__, "0.22") >= 0):
                    if ("from sklearn.decomposition.nmf import NMF" not in content and
                            "from sklearn.decomposition import NMF" not in content):
                        raise AssertionError(
                            "Unable to find 'import NMF' in\n{}".format(content))
    def test_create_asv_benchmark_all(self):
        fLOG(__file__,
             self._testMethodName,
             OutputPrint=__name__ == "__main__")
        self.assertNotEmpty(mlprodict)
        temp = get_temp_folder(__file__, "temp_create_asv_benchmark_all")
        created = create_asv_benchmark(
            location=temp,
            verbose=1,
            fLOG=fLOG,
            skip_models={
                'DictVectorizer',
                'FeatureHasher',  # 'CountVectorizer'
            },
            runtime=('scikit-learn', 'python', 'onnxruntime1'),
            exc=False,
            execute=True)
        self.assertGreater(len(created), 2)

        name = os.path.join(
            temp, 'benches', 'linear_model', 'LogisticRegression',
            'bench_LogReg_liblinear_b_cl_solverliblinear_onnx.py')
        self.assertExists(name)
        with open(name, "r", encoding="utf-8") as f:
            content = f.read()
        self.assertIn(
            "class LogReg_liblinear_b_cl_solverliblinear_onnx_benchClassifier(",
            content)
        self.assertIn("solver='liblinear'", content)
        self.assertIn("return onnx_optimisations(onx)", content)
        try:
            self.assertIn(
                "from sklearn.linear_model._logistic import LogisticRegression",
                content)
        except AssertionError:
            try:
                self.assertIn(
                    "from sklearn.linear_model.logistic import LogisticRegression",
                    content)
            except AssertionError:
                self.assertIn(
                    "from sklearn.linear_model import LogisticRegression",
                    content)

        if __name__ == "__main__":
            fLOG("[] checks setup_cache")
            reg = re.compile("class ([a-zA-Z0-9_]+)[(]")
            checked = []
            folder = os.path.join(temp, 'benches')
            subsets_test = [
                'Stacking', 'ovariance', 'bench_LogReg_liblinear', 'Latent'
            ]
            for path, _, files in os.walk(folder):
                for zoo in files:
                    if '__init__' in zoo:
                        continue
                    if 'chain' in zoo.lower():
                        continue
                    if not any(map(lambda x, z=zoo: x in z, subsets_test)):
                        continue
                    checked.append(zoo)
                    fLOG("process '{}'".format(zoo))
                    fullname = os.path.join(path, zoo)
                    with open(fullname, 'r', encoding='utf-8') as f:
                        content = f.read()
                    names = reg.findall(content)
                    name = names[0]
                    content += "\n\ncl = %s()\ncl.setup_cache()\n" % name
                    with open(fullname, 'w', encoding='utf-8') as f:
                        f.write(content)
                    __, err = run_script(fullname, wait=True)
                    lines = [_ for _ in err.split('\n') if _ and _[0] != ' ']
                    lines = [_ for _ in lines if "Warning" not in _]
                    err = "\n".join(lines).strip(' \n\r')
                    if len(err) > 0:
                        raise RuntimeError("Issue with '{}'\n{}".format(
                            fullname, err))
            if len(checked) == 0:
                raise AssertionError("Nothing found in '{}'.".format(folder))
    def test_create_asv_benchmark_logreg(self):
        fLOG(__file__,
             self._testMethodName,
             OutputPrint=__name__ == "__main__")
        self.assertNotEmpty(mlprodict)
        temp = get_temp_folder(__file__, "temp_create_asv_benchmark_logreg")
        created = create_asv_benchmark(location=temp,
                                       verbose=3,
                                       fLOG=fLOG,
                                       runtime=('scikit-learn', 'python',
                                                'onnxruntime1'),
                                       exc=False,
                                       execute=True,
                                       models={'LogisticRegression'})
        if len(created) < 6:
            raise AssertionError(
                "Number of created files is too small.\n{}".format("\n".join(
                    sorted(created))))

        reg = re.compile("class ([a-zA-Z0-9_]+)[(]")
        verif = 0
        allnames = []
        for path, _, files in os.walk(os.path.join(temp, 'benches')):
            for zoo in files:
                if '__init__' in zoo:
                    continue
                fLOG("process '{}'".format(zoo))
                fullname = os.path.join(path, zoo)
                with open(fullname, 'r', encoding='utf-8') as f:
                    content = f.read()
                names = reg.findall(content)
                name = names[0]
                content += "\n\ncl = %s()\ncl.setup_cache()\n" % name
                allnames.append(fullname)
                with open(fullname, 'w', encoding='utf-8') as f:
                    f.write(content)
                __, err = run_script(fullname, wait=True)
                lines = [_ for _ in err.split('\n') if _ and _[0] != ' ']
                lines = [_ for _ in lines if "Warning" not in _]
                lines = [
                    _ for _ in lines if "No module named 'mlprodict'" not in _
                ]
                lines = [_ for _ in lines if "Traceback " not in _]
                err = "\n".join(lines).strip(' \n\r')
                if len(err) > 0:
                    raise RuntimeError("Issue with '{}'\n{}".format(
                        fullname, err))

                if (zoo.endswith(
                        "bench_LogReg_liblinear_m_cl_solverliblinear.py")
                        and compare_module_version(sklearn.__version__,
                                                   "0.21") >= 0):
                    if "{LogisticRegression: {'zipmap': False}}" in content:
                        raise AssertionError(content)
                    elif "'nozipmap'" not in content:
                        raise AssertionError(content)
                    if 'predict_proba' not in content:
                        raise AssertionError(content)
                    verif += 1
                if (zoo.endswith(
                        "bench_LogReg_liblinear_dec_b_cl_dec_solverliblinear.py"
                ) and compare_module_version(sklearn.__version__, "0.21") >=
                        0):
                    if "{LogisticRegression: {'raw_scores': True}}" in content:
                        raise AssertionError(content)
                    elif "'raw_scores'" not in content:
                        raise AssertionError(content)
                    if 'decision_function' not in content:
                        raise AssertionError(content)
                    verif += 1

        if not verif:
            raise AssertionError("Visited files\n{}".format(
                "\n".join(allnames)))