Esempio n. 1
0
 def test_unzip_and_convert_metadata(self):
     file_zip = os.path.join(TestAsvJsonText.data, 'results2.zip')
     temp = get_temp_folder(__file__, 'temp_unzip_and_convert_metadata')
     create_asv_benchmark(location=temp,
                          models={'LogisticRegression', 'LinearRegression'})
     unzip_files(file_zip, temp)
     data = os.path.join(temp, 'results')
     conf = os.path.join(temp, 'asv.conf.json')
     exp = export_asv_json(data, baseline="skl", conf=conf)
     par_problem = []
     par_scenario = []
     for row in exp:
         if 'par_problem' in row:
             par_problem.append(row['par_problem'])
         if 'par_scenario' in row:
             par_scenario.append(row['par_scenario'])
     s = set(par_scenario)
     self.assertEqual(s, {'default', 'liblinear'})
     s = set(par_problem)
     self.assertEqual(s, {
         'm-cl', '~m-reg-64', 'b-cl', 'm-reg', 'b-reg', '~b-cl-64',
         '~b-reg-64'
     })
     out = os.path.join(temp, "df.xlsx")
     df = pandas.DataFrame(exp)
     df.to_excel(out)
    def test_create_asv_benchmark_pyspy_compiled(self):
        self.assertNotEmpty(mlprodict)
        temp = get_temp_folder(__file__,
                               "temp_create_asv_benchmark_pyspy_compiled")
        created = create_asv_benchmark(location=temp,
                                       verbose=0,
                                       runtime=('python', 'python_compiled'),
                                       exc=False,
                                       execute=True,
                                       models={'AdaBoostRegressor'},
                                       add_pyspy=True)
        self.assertNotEmpty(created)

        ops = get_opset_number_from_onnx()
        verif = False
        allnames = []
        for path, _, files in os.walk(os.path.join(temp, 'pyspy')):
            for zoo in files:
                if '__init__' in zoo:
                    continue
                allnames.append(zoo)
                fullname = os.path.join(path, zoo)
                with open(fullname, 'r', encoding='utf-8') as f:
                    content = f.read()
                if (zoo.endswith(
                        "bench_AdaBoostReg_default_b_reg_nest10_1_4_%d_float_.py"
                        % ops) and compare_module_version(
                            sklearn.__version__, "0.21") >= 0):
                    if "setup_profile" not in content:
                        raise AssertionError(content)
                    verif = True
        if not verif:
            raise AssertionError("Visited files\n{}".format(
                "\n".join(allnames)))
    def test_create_asv_benchmark_pyspy(self):
        self.assertNotEmpty(mlprodict)
        temp = get_temp_folder(__file__, "temp_create_asv_benchmark_pyspy")
        created = create_asv_benchmark(location=temp,
                                       verbose=0,
                                       runtime=('scikit-learn', 'python',
                                                'onnxruntime1'),
                                       exc=False,
                                       execute=True,
                                       models={'DecisionTreeClassifier'},
                                       add_pyspy=True)
        self.assertNotEmpty(created)

        ops = TARGET_OPSET
        verif = False
        allnames = []
        for path, _, files in os.walk(os.path.join(temp, 'pyspy')):
            for zoo in files:
                if '__init__' in zoo:
                    continue
                allnames.append(zoo)
                fullname = os.path.join(path, zoo)
                with open(fullname, 'r', encoding='utf-8') as f:
                    content = f.read()
                if (zoo.endswith(
                        "bench_DecisionTreeClas_default_b_cl_1_4_%d_float_nozipmap.py"
                        % ops) and compare_module_version(
                            sklearn.__version__, "0.21") >= 0):
                    if "setup_profile" not in content:
                        raise AssertionError(content)
                    verif = True
        if not verif:
            raise AssertionError("Visited files\n{}".format(
                "\n".join(allnames)))
    def test_create_asv_benchmark_pyspy_knn(self):
        self.assertNotEmpty(mlprodict)
        temp = get_temp_folder(__file__, "temp_create_asv_benchmark_pyspy_knn")
        created = create_asv_benchmark(location=temp,
                                       verbose=0,
                                       runtime=('scikit-learn', 'python',
                                                'onnxruntime1'),
                                       exc=False,
                                       execute=True,
                                       models={'KNeighborsClassifier'},
                                       add_pyspy=True)
        self.assertNotEmpty(created)

        verif = False
        target_opset = TARGET_OPSET
        allnames = []
        for path, _, files in os.walk(os.path.join(temp, 'pyspy')):
            for zoo in files:
                if '__init__' in zoo:
                    continue
                allnames.append(zoo)
                fullname = os.path.join(path, zoo)
                with open(fullname, 'r', encoding='utf-8') as f:
                    content = f.read()
                if (zoo.endswith(
                        "bench_KNNClas_default_k3_b_cl_64_algorithmbrute_n_neighbors3"
                        "_10000_20_%d_double_optcdist-zm0.py" % target_opset)
                        and compare_module_version(sklearn.__version__,
                                                   "0.21") >= 0):
                    if "setup_profile" not in content:
                        raise AssertionError(content)
                    verif = True
        if not verif:
            raise AssertionError("Visited files\n{}".format(
                "\n".join(allnames)))
Esempio n. 5
0
    def test_create_asv_benchmark_flat(self):
        fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__")
        self.assertNotEmpty(mlprodict)
        temp = get_temp_folder(__file__, "temp_create_asv_benchmark_flat")
        created = create_asv_benchmark(
            location=temp, models={'LogisticRegression', 'LinearRegression'},
            verbose=5, fLOG=fLOG, flat=True, execute=True)
        self.assertGreater(len(created), 2)

        name = os.path.join(
            temp, 'benches', 'bench_LogReg_liblinear_b_cl_solverliblinear_onnx.py')
        self.assertExists(name)
        with open(name, "r", encoding="utf-8") as f:
            content = f.read()
        self.assertIn(
            "class LogReg_liblinear_b_cl_solverliblinear_onnx_benchClassifier(", content)
        self.assertIn("solver='liblinear'", content)
        self.assertIn("return onnx_optimisations(onx)", content)
        if 'LogisticRegression' in content:
            if ("from sklearn.linear_model.logistic import LogisticRegression" not in content and
                    "from sklearn.linear_model import LogisticRegression" not in content):
                raise AssertionError(
                    "Unable to find 'import LogisticRegression in \n{}".format(content))
        self.assertIn("par_optimonnx = True", content)
        self.assertIn("par_scenario = ", content)
        self.assertIn("par_problem = ", content)
Esempio n. 6
0
    def test_create_asv_benchmark_noflat_ext(self):
        fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__")
        temp = get_temp_folder(
            __file__, "temp_create_asv_benchmark_noflat__ext")
        created = create_asv_benchmark(
            location=temp, models={
                'LogisticRegression', 'BernoulliNB', 'XGBRegressor', 'LGBMRegressor'},
            verbose=5, fLOG=fLOG, flat=False, execute=True)
        self.assertGreater(len(created), 2)

        name = os.path.join(
            temp, 'benches', 'linear_model', 'LogisticRegression',
            'bench_LogReg_liblinear_b_cl_solverliblinear.py')
        self.assertExists(name)

        name = os.path.join(
            temp, 'benches', 'naive_bayes', 'BernoulliNB',
            'bench_BernoulliNB_default_b_cl.py')
        self.assertExists(name)

        name = os.path.join(
            temp, 'benches', '_externals', 'XGBRegressor',
            'bench_XGBReg_default_b_reg_nest100.py')
        self.assertExists(name)
        with open(name, "r", encoding="utf-8") as f:
            content = f.read()
        self.assertIn("from xgboost import XGBRegressor", content)

        name = os.path.join(
            temp, 'benches', '_externals', 'LGBMRegressor',
            'bench_LGBMReg_default_b_reg_nest100.py')
        self.assertExists(name)
        with open(name, "r", encoding="utf-8") as f:
            content = f.read()
        self.assertIn("from lightgbm import LGBMRegressor", content)
Esempio n. 7
0
    def test_create_asv_benchmark_noflat_vc(self):
        fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__")
        temp = get_temp_folder(__file__, "temp_create_asv_benchmark_noflat_vc")
        created = create_asv_benchmark(
            location=temp, models={'VotingClassifier'},
            verbose=5, fLOG=fLOG, flat=False, execute=True)
        self.assertGreater(len(created), 2)

        names = os.listdir(os.path.join(
            temp, 'benches', 'ensemble', 'VotingClassifier'))
        names = [name for name in names if '__init__' not in name]
        full_name = os.path.join(
            temp, 'benches', 'ensemble', 'VotingClassifier', names[0])
        self.assertExists(full_name)
        with open(full_name, "r", encoding="utf-8") as f:
            content = f.read()
        self.assertIn("class VotingClas_", content)
        if 'LogisticRegression' in content:
            if ("from sklearn.linear_model.logistic import LogisticRegression" not in content and
                    "from sklearn.linear_model import LogisticRegression" not in content):
                raise AssertionError(
                    "Unable to find 'import LogisticRegression in \n{}".format(content))
        if 'VotingClassifier' in content:
            if ("from sklearn.ensemble.voting import VotingClassifier" not in content and
                    "from sklearn.ensemble import VotingClassifier" not in content):
                raise AssertionError(
                    "Unable to find 'import LogisticRegression in \n{}".format(content))
Esempio n. 8
0
 def test_create_asv_benchmark_gpr(self):
     fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__")
     temp = get_temp_folder(
         __file__, "temp_create_asv_benchmark_gpr")
     created = create_asv_benchmark(
         location=temp, models={'GaussianProcessRegressor'},
         verbose=5, fLOG=fLOG, flat=False, execute=True)
     self.assertGreater(len(created), 2)
Esempio n. 9
0
    def test_create_asv_benchmark_rf(self):
        fLOG(__file__,
             self._testMethodName,
             OutputPrint=__name__ == "__main__")
        self.assertNotEmpty(mlprodict)
        temp = get_temp_folder(__file__, "temp_create_asv_benchmark_rf")
        created = create_asv_benchmark(location=temp,
                                       verbose=1,
                                       fLOG=fLOG,
                                       runtime=('scikit-learn', 'python',
                                                'onnxruntime1'),
                                       exc=False,
                                       execute=True,
                                       models={'RandomForestRegressor'})
        self.assertNotEmpty(created)

        reg = re.compile("class ([a-zA-Z0-9_]+)[(]")
        verif = False
        allnames = []
        for path, _, files in os.walk(os.path.join(temp, 'benches')):
            for zoo in files:
                if '__init__' in zoo:
                    continue
                fLOG("process '{}'".format(zoo))
                fullname = os.path.join(path, zoo)
                with open(fullname, 'r', encoding='utf-8') as f:
                    content = f.read()
                names = reg.findall(content)
                name = names[0]
                content += "\n\ncl = %s()\ncl.setup_cache()\n" % name
                allnames.append(fullname)
                with open(fullname, 'w', encoding='utf-8') as f:
                    f.write(content)
                __, err = run_script(fullname, wait=True)
                lines = [_ for _ in err.split('\n') if _ and _[0] != ' ']
                lines = [_ for _ in lines if "Warning" not in _]
                lines = [
                    _ for _ in lines if "No module named 'mlprodict'" not in _
                ]
                lines = [_ for _ in lines if "Traceback " not in _]
                err = "\n".join(lines).strip(' \n\r')
                if len(err) > 0:
                    raise RuntimeError("Issue with '{}'\n{}".format(
                        fullname, err))
                if (zoo.endswith(
                        "bench_RandomForestReg_default_b_reg_nest100.py")
                        and compare_module_version(sklearn.__version__,
                                                   "0.21") >= 0):
                    if "random_state=42" not in content:
                        raise AssertionError(content)
                    else:
                        verif = True
        if not verif:
            raise AssertionError("Visited files\n{}".format(
                "\n".join(allnames)))
Esempio n. 10
0
    def test_create_asv_benchmark_knnr(self):
        fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__")
        temp = get_temp_folder(
            __file__, "temp_create_asv_benchmark_knnr")
        created = create_asv_benchmark(
            location=temp, models={'KNeighborsRegressor'},
            verbose=5, fLOG=fLOG, flat=False, execute=True)
        self.assertGreater(len(created), 2)

        full_name = os.path.join(
            temp, "benches", "neighbors", "KNeighborsRegressor",
            "bench_KNNReg_default_k3_b_reg_algorithmbrute_n_neighbors3.py")
        self.assertExists(full_name)
        with open(full_name, "r", encoding="utf-8") as f:
            content = f.read()
        self.assertIn("class KNNReg_", content)
        self.assertIn("['cdist'],", content)
    def test_create_asv_benchmark_tiny_same(self):
        fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__")
        self.assertNotEmpty(mlprodict)
        temp = get_temp_folder(
            __file__, "temp_create_asv_benchmark_all_tiny_same")
        created = create_asv_benchmark(
            location=temp, verbose=1, fLOG=fLOG,
            skip_models={
                'DictVectorizer', 'FeatureHasher',  # 'CountVectorizer'
            }, runtime=('scikit-learn', 'python', 'onnxruntime1'),
            exc=False, execute=True, models={
                'SelectFromModel', 'NMF', 'LatentDirichletAllocation'
            }, env='same')
        self.assertNotEmpty(created)

        reg = re.compile("class ([a-zA-Z0-9_]+)[(]")
        for path, _, files in os.walk(os.path.join(temp, 'benches')):
            for zoo in files:
                if '__init__' in zoo:
                    continue
                fLOG("process '{}'".format(zoo))
                fullname = os.path.join(path, zoo)
                with open(fullname, 'r', encoding='utf-8') as f:
                    content = f.read()
                names = reg.findall(content)
                name = names[0]
                content += "\n\ncl = %s()\ncl.setup_cache()\n" % name
                with open(fullname, 'w', encoding='utf-8') as f:
                    f.write(content)
                __, err = run_script(fullname, wait=True)
                lines = [_ for _ in err.split('\n') if _ and _[0] != ' ']
                lines = [_ for _ in lines if "Warning" not in _]
                lines = [
                    _ for _ in lines if "No module named 'mlprodict'" not in _]
                lines = [_ for _ in lines if "Traceback " not in _]
                err = "\n".join(lines).strip(' \n\r')
                if len(err) > 0:
                    raise RuntimeError(
                        "Issue with '{}'\n{}".format(fullname, err))
                if (zoo.endswith("bench_NMF_default_num_tr_pos.py") and
                        compare_module_version(sklearn.__version__, "0.22") >= 0):
                    if ("from sklearn.decomposition.nmf import NMF" not in content and
                            "from sklearn.decomposition import NMF" not in content):
                        raise AssertionError(
                            "Unable to find 'import NMF' in\n{}".format(content))
Esempio n. 12
0
    def test_create_asv_benchmark_text(self):
        fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__")
        temp = get_temp_folder(__file__, "temp_create_asv_benchmark_text")
        created = create_asv_benchmark(
            location=temp, models={'HashingVectorizer'},
            verbose=5, fLOG=fLOG, flat=False, execute=True)
        self.assertGreater(len(created), 2)

        names = os.listdir(os.path.join(
            temp, 'benches', 'feature_extraction', 'HashingVectorizer'))
        names = [name for name in names if '__init__' not in name]
        full_name = os.path.join(
            temp, 'benches', 'feature_extraction', 'HashingVectorizer', names[0])
        self.assertExists(full_name)
        with open(full_name, "r", encoding="utf-8") as f:
            content = f.read()
        self.assertIn("class HashingVectorizer_", content)
        self.assertIn(
            "from sklearn.feature_extraction.text import HashingVectorizer", content)
Esempio n. 13
0
    def test_create_asv_benchmark_calibrated(self):
        fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__")
        temp = get_temp_folder(
            __file__, "temp_create_asv_benchmark_calibrated")
        created = create_asv_benchmark(
            location=temp, models={'CalibratedClassifierCV'},
            verbose=5, fLOG=fLOG, flat=False, execute=True)
        self.assertGreater(len(created), 2)

        names = os.listdir(os.path.join(
            temp, 'benches', 'calibration', 'CalibratedClassifierCV'))
        names = [name for name in names if '__init__' not in name]
        full_name = os.path.join(
            temp, 'benches', 'calibration', 'CalibratedClassifierCV', names[0])
        self.assertExists(full_name)
        with open(full_name, "r", encoding="utf-8") as f:
            content = f.read()
        self.assertIn("class CalibratedClasCV_", content)
        self.assertIn(
            "from sklearn.calibration import CalibratedClassifierCV", content)
        if 'SGDclassifier' in content:
            self.assertIn(
                "from sklearn.linear_model import SGDClassifier", content)
    def test_create_asv_benchmark_hist_gbc(self):
        self.assertNotEmpty(mlprodict)
        temp = get_temp_folder(__file__, "temp_create_asv_benchmark_hist_gbc")
        created = create_asv_benchmark(
            location=temp,
            verbose=0,
            runtime=('scikit-learn', 'python', 'onnxruntime1'),
            exc=False,
            execute=True,
            models={'HistGradientBoostingClassifier'})
        self.assertNotEmpty(created)

        verif = False
        allnames = []
        for path, _, files in os.walk(os.path.join(temp, 'benches')):
            for zoo in files:
                if '__init__' in zoo:
                    continue
                fullname = os.path.join(path, zoo)
                if "_hist_gradient_boosting" in fullname:
                    raise AssertionError(fullname)
                with open(fullname, 'r', encoding='utf-8') as f:
                    content = f.read()
                if (zoo.endswith("bench_HGBClas_default_b_cl_mxit100.py")
                        and compare_module_version(sklearn.__version__,
                                                   "0.21") >= 0):
                    if "random_state=42" not in content:
                        raise AssertionError(content)
                    if "from sklearn.ensemble._hist_gradient_boosting.gradient_boosting import" not in content:
                        raise AssertionError(content)
                    if "par_full_test_name = 'bench" not in content:
                        raise AssertionError(content)
                    verif = True
        if not verif:
            raise AssertionError("Visited files\n{}".format(
                "\n".join(allnames)))
    def test_create_asv_benchmark_all(self):
        fLOG(__file__,
             self._testMethodName,
             OutputPrint=__name__ == "__main__")
        self.assertNotEmpty(mlprodict)
        temp = get_temp_folder(__file__, "temp_create_asv_benchmark_all")
        created = create_asv_benchmark(
            location=temp,
            verbose=1,
            fLOG=fLOG,
            skip_models={
                'DictVectorizer',
                'FeatureHasher',  # 'CountVectorizer'
            },
            runtime=('scikit-learn', 'python', 'onnxruntime1'),
            exc=False,
            execute=True)
        self.assertGreater(len(created), 2)

        name = os.path.join(
            temp, 'benches', 'linear_model', 'LogisticRegression',
            'bench_LogReg_liblinear_b_cl_solverliblinear_onnx.py')
        self.assertExists(name)
        with open(name, "r", encoding="utf-8") as f:
            content = f.read()
        self.assertIn(
            "class LogReg_liblinear_b_cl_solverliblinear_onnx_benchClassifier(",
            content)
        self.assertIn("solver='liblinear'", content)
        self.assertIn("return onnx_optimisations(onx)", content)
        try:
            self.assertIn(
                "from sklearn.linear_model._logistic import LogisticRegression",
                content)
        except AssertionError:
            try:
                self.assertIn(
                    "from sklearn.linear_model.logistic import LogisticRegression",
                    content)
            except AssertionError:
                self.assertIn(
                    "from sklearn.linear_model import LogisticRegression",
                    content)

        if __name__ == "__main__":
            fLOG("[] checks setup_cache")
            reg = re.compile("class ([a-zA-Z0-9_]+)[(]")
            checked = []
            folder = os.path.join(temp, 'benches')
            subsets_test = [
                'Stacking', 'ovariance', 'bench_LogReg_liblinear', 'Latent'
            ]
            for path, _, files in os.walk(folder):
                for zoo in files:
                    if '__init__' in zoo:
                        continue
                    if 'chain' in zoo.lower():
                        continue
                    if not any(map(lambda x, z=zoo: x in z, subsets_test)):
                        continue
                    checked.append(zoo)
                    fLOG("process '{}'".format(zoo))
                    fullname = os.path.join(path, zoo)
                    with open(fullname, 'r', encoding='utf-8') as f:
                        content = f.read()
                    names = reg.findall(content)
                    name = names[0]
                    content += "\n\ncl = %s()\ncl.setup_cache()\n" % name
                    with open(fullname, 'w', encoding='utf-8') as f:
                        f.write(content)
                    __, err = run_script(fullname, wait=True)
                    lines = [_ for _ in err.split('\n') if _ and _[0] != ' ']
                    lines = [_ for _ in lines if "Warning" not in _]
                    err = "\n".join(lines).strip(' \n\r')
                    if len(err) > 0:
                        raise RuntimeError("Issue with '{}'\n{}".format(
                            fullname, err))
            if len(checked) == 0:
                raise AssertionError("Nothing found in '{}'.".format(folder))
    def test_create_asv_benchmark_logreg(self):
        fLOG(__file__,
             self._testMethodName,
             OutputPrint=__name__ == "__main__")
        self.assertNotEmpty(mlprodict)
        temp = get_temp_folder(__file__, "temp_create_asv_benchmark_logreg")
        created = create_asv_benchmark(location=temp,
                                       verbose=3,
                                       fLOG=fLOG,
                                       runtime=('scikit-learn', 'python',
                                                'onnxruntime1'),
                                       exc=False,
                                       execute=True,
                                       models={'LogisticRegression'})
        if len(created) < 6:
            raise AssertionError(
                "Number of created files is too small.\n{}".format("\n".join(
                    sorted(created))))

        reg = re.compile("class ([a-zA-Z0-9_]+)[(]")
        verif = 0
        allnames = []
        for path, _, files in os.walk(os.path.join(temp, 'benches')):
            for zoo in files:
                if '__init__' in zoo:
                    continue
                fLOG("process '{}'".format(zoo))
                fullname = os.path.join(path, zoo)
                with open(fullname, 'r', encoding='utf-8') as f:
                    content = f.read()
                names = reg.findall(content)
                name = names[0]
                content += "\n\ncl = %s()\ncl.setup_cache()\n" % name
                allnames.append(fullname)
                with open(fullname, 'w', encoding='utf-8') as f:
                    f.write(content)
                __, err = run_script(fullname, wait=True)
                lines = [_ for _ in err.split('\n') if _ and _[0] != ' ']
                lines = [_ for _ in lines if "Warning" not in _]
                lines = [
                    _ for _ in lines if "No module named 'mlprodict'" not in _
                ]
                lines = [_ for _ in lines if "Traceback " not in _]
                err = "\n".join(lines).strip(' \n\r')
                if len(err) > 0:
                    raise RuntimeError("Issue with '{}'\n{}".format(
                        fullname, err))

                if (zoo.endswith(
                        "bench_LogReg_liblinear_m_cl_solverliblinear.py")
                        and compare_module_version(sklearn.__version__,
                                                   "0.21") >= 0):
                    if "{LogisticRegression: {'zipmap': False}}" in content:
                        raise AssertionError(content)
                    elif "'nozipmap'" not in content:
                        raise AssertionError(content)
                    if 'predict_proba' not in content:
                        raise AssertionError(content)
                    verif += 1
                if (zoo.endswith(
                        "bench_LogReg_liblinear_dec_b_cl_dec_solverliblinear.py"
                ) and compare_module_version(sklearn.__version__, "0.21") >=
                        0):
                    if "{LogisticRegression: {'raw_scores': True}}" in content:
                        raise AssertionError(content)
                    elif "'raw_scores'" not in content:
                        raise AssertionError(content)
                    if 'decision_function' not in content:
                        raise AssertionError(content)
                    verif += 1

        if not verif:
            raise AssertionError("Visited files\n{}".format(
                "\n".join(allnames)))