def test_unzip_and_convert_metadata(self): file_zip = os.path.join(TestAsvJsonText.data, 'results2.zip') temp = get_temp_folder(__file__, 'temp_unzip_and_convert_metadata') create_asv_benchmark(location=temp, models={'LogisticRegression', 'LinearRegression'}) unzip_files(file_zip, temp) data = os.path.join(temp, 'results') conf = os.path.join(temp, 'asv.conf.json') exp = export_asv_json(data, baseline="skl", conf=conf) par_problem = [] par_scenario = [] for row in exp: if 'par_problem' in row: par_problem.append(row['par_problem']) if 'par_scenario' in row: par_scenario.append(row['par_scenario']) s = set(par_scenario) self.assertEqual(s, {'default', 'liblinear'}) s = set(par_problem) self.assertEqual(s, { 'm-cl', '~m-reg-64', 'b-cl', 'm-reg', 'b-reg', '~b-cl-64', '~b-reg-64' }) out = os.path.join(temp, "df.xlsx") df = pandas.DataFrame(exp) df.to_excel(out)
def test_create_asv_benchmark_pyspy_compiled(self): self.assertNotEmpty(mlprodict) temp = get_temp_folder(__file__, "temp_create_asv_benchmark_pyspy_compiled") created = create_asv_benchmark(location=temp, verbose=0, runtime=('python', 'python_compiled'), exc=False, execute=True, models={'AdaBoostRegressor'}, add_pyspy=True) self.assertNotEmpty(created) ops = get_opset_number_from_onnx() verif = False allnames = [] for path, _, files in os.walk(os.path.join(temp, 'pyspy')): for zoo in files: if '__init__' in zoo: continue allnames.append(zoo) fullname = os.path.join(path, zoo) with open(fullname, 'r', encoding='utf-8') as f: content = f.read() if (zoo.endswith( "bench_AdaBoostReg_default_b_reg_nest10_1_4_%d_float_.py" % ops) and compare_module_version( sklearn.__version__, "0.21") >= 0): if "setup_profile" not in content: raise AssertionError(content) verif = True if not verif: raise AssertionError("Visited files\n{}".format( "\n".join(allnames)))
def test_create_asv_benchmark_pyspy(self): self.assertNotEmpty(mlprodict) temp = get_temp_folder(__file__, "temp_create_asv_benchmark_pyspy") created = create_asv_benchmark(location=temp, verbose=0, runtime=('scikit-learn', 'python', 'onnxruntime1'), exc=False, execute=True, models={'DecisionTreeClassifier'}, add_pyspy=True) self.assertNotEmpty(created) ops = TARGET_OPSET verif = False allnames = [] for path, _, files in os.walk(os.path.join(temp, 'pyspy')): for zoo in files: if '__init__' in zoo: continue allnames.append(zoo) fullname = os.path.join(path, zoo) with open(fullname, 'r', encoding='utf-8') as f: content = f.read() if (zoo.endswith( "bench_DecisionTreeClas_default_b_cl_1_4_%d_float_nozipmap.py" % ops) and compare_module_version( sklearn.__version__, "0.21") >= 0): if "setup_profile" not in content: raise AssertionError(content) verif = True if not verif: raise AssertionError("Visited files\n{}".format( "\n".join(allnames)))
def test_create_asv_benchmark_pyspy_knn(self): self.assertNotEmpty(mlprodict) temp = get_temp_folder(__file__, "temp_create_asv_benchmark_pyspy_knn") created = create_asv_benchmark(location=temp, verbose=0, runtime=('scikit-learn', 'python', 'onnxruntime1'), exc=False, execute=True, models={'KNeighborsClassifier'}, add_pyspy=True) self.assertNotEmpty(created) verif = False target_opset = TARGET_OPSET allnames = [] for path, _, files in os.walk(os.path.join(temp, 'pyspy')): for zoo in files: if '__init__' in zoo: continue allnames.append(zoo) fullname = os.path.join(path, zoo) with open(fullname, 'r', encoding='utf-8') as f: content = f.read() if (zoo.endswith( "bench_KNNClas_default_k3_b_cl_64_algorithmbrute_n_neighbors3" "_10000_20_%d_double_optcdist-zm0.py" % target_opset) and compare_module_version(sklearn.__version__, "0.21") >= 0): if "setup_profile" not in content: raise AssertionError(content) verif = True if not verif: raise AssertionError("Visited files\n{}".format( "\n".join(allnames)))
def test_create_asv_benchmark_flat(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") self.assertNotEmpty(mlprodict) temp = get_temp_folder(__file__, "temp_create_asv_benchmark_flat") created = create_asv_benchmark( location=temp, models={'LogisticRegression', 'LinearRegression'}, verbose=5, fLOG=fLOG, flat=True, execute=True) self.assertGreater(len(created), 2) name = os.path.join( temp, 'benches', 'bench_LogReg_liblinear_b_cl_solverliblinear_onnx.py') self.assertExists(name) with open(name, "r", encoding="utf-8") as f: content = f.read() self.assertIn( "class LogReg_liblinear_b_cl_solverliblinear_onnx_benchClassifier(", content) self.assertIn("solver='liblinear'", content) self.assertIn("return onnx_optimisations(onx)", content) if 'LogisticRegression' in content: if ("from sklearn.linear_model.logistic import LogisticRegression" not in content and "from sklearn.linear_model import LogisticRegression" not in content): raise AssertionError( "Unable to find 'import LogisticRegression in \n{}".format(content)) self.assertIn("par_optimonnx = True", content) self.assertIn("par_scenario = ", content) self.assertIn("par_problem = ", content)
def test_create_asv_benchmark_noflat_ext(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") temp = get_temp_folder( __file__, "temp_create_asv_benchmark_noflat__ext") created = create_asv_benchmark( location=temp, models={ 'LogisticRegression', 'BernoulliNB', 'XGBRegressor', 'LGBMRegressor'}, verbose=5, fLOG=fLOG, flat=False, execute=True) self.assertGreater(len(created), 2) name = os.path.join( temp, 'benches', 'linear_model', 'LogisticRegression', 'bench_LogReg_liblinear_b_cl_solverliblinear.py') self.assertExists(name) name = os.path.join( temp, 'benches', 'naive_bayes', 'BernoulliNB', 'bench_BernoulliNB_default_b_cl.py') self.assertExists(name) name = os.path.join( temp, 'benches', '_externals', 'XGBRegressor', 'bench_XGBReg_default_b_reg_nest100.py') self.assertExists(name) with open(name, "r", encoding="utf-8") as f: content = f.read() self.assertIn("from xgboost import XGBRegressor", content) name = os.path.join( temp, 'benches', '_externals', 'LGBMRegressor', 'bench_LGBMReg_default_b_reg_nest100.py') self.assertExists(name) with open(name, "r", encoding="utf-8") as f: content = f.read() self.assertIn("from lightgbm import LGBMRegressor", content)
def test_create_asv_benchmark_noflat_vc(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") temp = get_temp_folder(__file__, "temp_create_asv_benchmark_noflat_vc") created = create_asv_benchmark( location=temp, models={'VotingClassifier'}, verbose=5, fLOG=fLOG, flat=False, execute=True) self.assertGreater(len(created), 2) names = os.listdir(os.path.join( temp, 'benches', 'ensemble', 'VotingClassifier')) names = [name for name in names if '__init__' not in name] full_name = os.path.join( temp, 'benches', 'ensemble', 'VotingClassifier', names[0]) self.assertExists(full_name) with open(full_name, "r", encoding="utf-8") as f: content = f.read() self.assertIn("class VotingClas_", content) if 'LogisticRegression' in content: if ("from sklearn.linear_model.logistic import LogisticRegression" not in content and "from sklearn.linear_model import LogisticRegression" not in content): raise AssertionError( "Unable to find 'import LogisticRegression in \n{}".format(content)) if 'VotingClassifier' in content: if ("from sklearn.ensemble.voting import VotingClassifier" not in content and "from sklearn.ensemble import VotingClassifier" not in content): raise AssertionError( "Unable to find 'import LogisticRegression in \n{}".format(content))
def test_create_asv_benchmark_gpr(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") temp = get_temp_folder( __file__, "temp_create_asv_benchmark_gpr") created = create_asv_benchmark( location=temp, models={'GaussianProcessRegressor'}, verbose=5, fLOG=fLOG, flat=False, execute=True) self.assertGreater(len(created), 2)
def test_create_asv_benchmark_rf(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") self.assertNotEmpty(mlprodict) temp = get_temp_folder(__file__, "temp_create_asv_benchmark_rf") created = create_asv_benchmark(location=temp, verbose=1, fLOG=fLOG, runtime=('scikit-learn', 'python', 'onnxruntime1'), exc=False, execute=True, models={'RandomForestRegressor'}) self.assertNotEmpty(created) reg = re.compile("class ([a-zA-Z0-9_]+)[(]") verif = False allnames = [] for path, _, files in os.walk(os.path.join(temp, 'benches')): for zoo in files: if '__init__' in zoo: continue fLOG("process '{}'".format(zoo)) fullname = os.path.join(path, zoo) with open(fullname, 'r', encoding='utf-8') as f: content = f.read() names = reg.findall(content) name = names[0] content += "\n\ncl = %s()\ncl.setup_cache()\n" % name allnames.append(fullname) with open(fullname, 'w', encoding='utf-8') as f: f.write(content) __, err = run_script(fullname, wait=True) lines = [_ for _ in err.split('\n') if _ and _[0] != ' '] lines = [_ for _ in lines if "Warning" not in _] lines = [ _ for _ in lines if "No module named 'mlprodict'" not in _ ] lines = [_ for _ in lines if "Traceback " not in _] err = "\n".join(lines).strip(' \n\r') if len(err) > 0: raise RuntimeError("Issue with '{}'\n{}".format( fullname, err)) if (zoo.endswith( "bench_RandomForestReg_default_b_reg_nest100.py") and compare_module_version(sklearn.__version__, "0.21") >= 0): if "random_state=42" not in content: raise AssertionError(content) else: verif = True if not verif: raise AssertionError("Visited files\n{}".format( "\n".join(allnames)))
def test_create_asv_benchmark_knnr(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") temp = get_temp_folder( __file__, "temp_create_asv_benchmark_knnr") created = create_asv_benchmark( location=temp, models={'KNeighborsRegressor'}, verbose=5, fLOG=fLOG, flat=False, execute=True) self.assertGreater(len(created), 2) full_name = os.path.join( temp, "benches", "neighbors", "KNeighborsRegressor", "bench_KNNReg_default_k3_b_reg_algorithmbrute_n_neighbors3.py") self.assertExists(full_name) with open(full_name, "r", encoding="utf-8") as f: content = f.read() self.assertIn("class KNNReg_", content) self.assertIn("['cdist'],", content)
def test_create_asv_benchmark_tiny_same(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") self.assertNotEmpty(mlprodict) temp = get_temp_folder( __file__, "temp_create_asv_benchmark_all_tiny_same") created = create_asv_benchmark( location=temp, verbose=1, fLOG=fLOG, skip_models={ 'DictVectorizer', 'FeatureHasher', # 'CountVectorizer' }, runtime=('scikit-learn', 'python', 'onnxruntime1'), exc=False, execute=True, models={ 'SelectFromModel', 'NMF', 'LatentDirichletAllocation' }, env='same') self.assertNotEmpty(created) reg = re.compile("class ([a-zA-Z0-9_]+)[(]") for path, _, files in os.walk(os.path.join(temp, 'benches')): for zoo in files: if '__init__' in zoo: continue fLOG("process '{}'".format(zoo)) fullname = os.path.join(path, zoo) with open(fullname, 'r', encoding='utf-8') as f: content = f.read() names = reg.findall(content) name = names[0] content += "\n\ncl = %s()\ncl.setup_cache()\n" % name with open(fullname, 'w', encoding='utf-8') as f: f.write(content) __, err = run_script(fullname, wait=True) lines = [_ for _ in err.split('\n') if _ and _[0] != ' '] lines = [_ for _ in lines if "Warning" not in _] lines = [ _ for _ in lines if "No module named 'mlprodict'" not in _] lines = [_ for _ in lines if "Traceback " not in _] err = "\n".join(lines).strip(' \n\r') if len(err) > 0: raise RuntimeError( "Issue with '{}'\n{}".format(fullname, err)) if (zoo.endswith("bench_NMF_default_num_tr_pos.py") and compare_module_version(sklearn.__version__, "0.22") >= 0): if ("from sklearn.decomposition.nmf import NMF" not in content and "from sklearn.decomposition import NMF" not in content): raise AssertionError( "Unable to find 'import NMF' in\n{}".format(content))
def test_create_asv_benchmark_text(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") temp = get_temp_folder(__file__, "temp_create_asv_benchmark_text") created = create_asv_benchmark( location=temp, models={'HashingVectorizer'}, verbose=5, fLOG=fLOG, flat=False, execute=True) self.assertGreater(len(created), 2) names = os.listdir(os.path.join( temp, 'benches', 'feature_extraction', 'HashingVectorizer')) names = [name for name in names if '__init__' not in name] full_name = os.path.join( temp, 'benches', 'feature_extraction', 'HashingVectorizer', names[0]) self.assertExists(full_name) with open(full_name, "r", encoding="utf-8") as f: content = f.read() self.assertIn("class HashingVectorizer_", content) self.assertIn( "from sklearn.feature_extraction.text import HashingVectorizer", content)
def test_create_asv_benchmark_calibrated(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") temp = get_temp_folder( __file__, "temp_create_asv_benchmark_calibrated") created = create_asv_benchmark( location=temp, models={'CalibratedClassifierCV'}, verbose=5, fLOG=fLOG, flat=False, execute=True) self.assertGreater(len(created), 2) names = os.listdir(os.path.join( temp, 'benches', 'calibration', 'CalibratedClassifierCV')) names = [name for name in names if '__init__' not in name] full_name = os.path.join( temp, 'benches', 'calibration', 'CalibratedClassifierCV', names[0]) self.assertExists(full_name) with open(full_name, "r", encoding="utf-8") as f: content = f.read() self.assertIn("class CalibratedClasCV_", content) self.assertIn( "from sklearn.calibration import CalibratedClassifierCV", content) if 'SGDclassifier' in content: self.assertIn( "from sklearn.linear_model import SGDClassifier", content)
def test_create_asv_benchmark_hist_gbc(self): self.assertNotEmpty(mlprodict) temp = get_temp_folder(__file__, "temp_create_asv_benchmark_hist_gbc") created = create_asv_benchmark( location=temp, verbose=0, runtime=('scikit-learn', 'python', 'onnxruntime1'), exc=False, execute=True, models={'HistGradientBoostingClassifier'}) self.assertNotEmpty(created) verif = False allnames = [] for path, _, files in os.walk(os.path.join(temp, 'benches')): for zoo in files: if '__init__' in zoo: continue fullname = os.path.join(path, zoo) if "_hist_gradient_boosting" in fullname: raise AssertionError(fullname) with open(fullname, 'r', encoding='utf-8') as f: content = f.read() if (zoo.endswith("bench_HGBClas_default_b_cl_mxit100.py") and compare_module_version(sklearn.__version__, "0.21") >= 0): if "random_state=42" not in content: raise AssertionError(content) if "from sklearn.ensemble._hist_gradient_boosting.gradient_boosting import" not in content: raise AssertionError(content) if "par_full_test_name = 'bench" not in content: raise AssertionError(content) verif = True if not verif: raise AssertionError("Visited files\n{}".format( "\n".join(allnames)))
def test_create_asv_benchmark_all(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") self.assertNotEmpty(mlprodict) temp = get_temp_folder(__file__, "temp_create_asv_benchmark_all") created = create_asv_benchmark( location=temp, verbose=1, fLOG=fLOG, skip_models={ 'DictVectorizer', 'FeatureHasher', # 'CountVectorizer' }, runtime=('scikit-learn', 'python', 'onnxruntime1'), exc=False, execute=True) self.assertGreater(len(created), 2) name = os.path.join( temp, 'benches', 'linear_model', 'LogisticRegression', 'bench_LogReg_liblinear_b_cl_solverliblinear_onnx.py') self.assertExists(name) with open(name, "r", encoding="utf-8") as f: content = f.read() self.assertIn( "class LogReg_liblinear_b_cl_solverliblinear_onnx_benchClassifier(", content) self.assertIn("solver='liblinear'", content) self.assertIn("return onnx_optimisations(onx)", content) try: self.assertIn( "from sklearn.linear_model._logistic import LogisticRegression", content) except AssertionError: try: self.assertIn( "from sklearn.linear_model.logistic import LogisticRegression", content) except AssertionError: self.assertIn( "from sklearn.linear_model import LogisticRegression", content) if __name__ == "__main__": fLOG("[] checks setup_cache") reg = re.compile("class ([a-zA-Z0-9_]+)[(]") checked = [] folder = os.path.join(temp, 'benches') subsets_test = [ 'Stacking', 'ovariance', 'bench_LogReg_liblinear', 'Latent' ] for path, _, files in os.walk(folder): for zoo in files: if '__init__' in zoo: continue if 'chain' in zoo.lower(): continue if not any(map(lambda x, z=zoo: x in z, subsets_test)): continue checked.append(zoo) fLOG("process '{}'".format(zoo)) fullname = os.path.join(path, zoo) with open(fullname, 'r', encoding='utf-8') as f: content = f.read() names = reg.findall(content) name = names[0] content += "\n\ncl = %s()\ncl.setup_cache()\n" % name with open(fullname, 'w', encoding='utf-8') as f: f.write(content) __, err = run_script(fullname, wait=True) lines = [_ for _ in err.split('\n') if _ and _[0] != ' '] lines = [_ for _ in lines if "Warning" not in _] err = "\n".join(lines).strip(' \n\r') if len(err) > 0: raise RuntimeError("Issue with '{}'\n{}".format( fullname, err)) if len(checked) == 0: raise AssertionError("Nothing found in '{}'.".format(folder))
def test_create_asv_benchmark_logreg(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") self.assertNotEmpty(mlprodict) temp = get_temp_folder(__file__, "temp_create_asv_benchmark_logreg") created = create_asv_benchmark(location=temp, verbose=3, fLOG=fLOG, runtime=('scikit-learn', 'python', 'onnxruntime1'), exc=False, execute=True, models={'LogisticRegression'}) if len(created) < 6: raise AssertionError( "Number of created files is too small.\n{}".format("\n".join( sorted(created)))) reg = re.compile("class ([a-zA-Z0-9_]+)[(]") verif = 0 allnames = [] for path, _, files in os.walk(os.path.join(temp, 'benches')): for zoo in files: if '__init__' in zoo: continue fLOG("process '{}'".format(zoo)) fullname = os.path.join(path, zoo) with open(fullname, 'r', encoding='utf-8') as f: content = f.read() names = reg.findall(content) name = names[0] content += "\n\ncl = %s()\ncl.setup_cache()\n" % name allnames.append(fullname) with open(fullname, 'w', encoding='utf-8') as f: f.write(content) __, err = run_script(fullname, wait=True) lines = [_ for _ in err.split('\n') if _ and _[0] != ' '] lines = [_ for _ in lines if "Warning" not in _] lines = [ _ for _ in lines if "No module named 'mlprodict'" not in _ ] lines = [_ for _ in lines if "Traceback " not in _] err = "\n".join(lines).strip(' \n\r') if len(err) > 0: raise RuntimeError("Issue with '{}'\n{}".format( fullname, err)) if (zoo.endswith( "bench_LogReg_liblinear_m_cl_solverliblinear.py") and compare_module_version(sklearn.__version__, "0.21") >= 0): if "{LogisticRegression: {'zipmap': False}}" in content: raise AssertionError(content) elif "'nozipmap'" not in content: raise AssertionError(content) if 'predict_proba' not in content: raise AssertionError(content) verif += 1 if (zoo.endswith( "bench_LogReg_liblinear_dec_b_cl_dec_solverliblinear.py" ) and compare_module_version(sklearn.__version__, "0.21") >= 0): if "{LogisticRegression: {'raw_scores': True}}" in content: raise AssertionError(content) elif "'raw_scores'" not in content: raise AssertionError(content) if 'decision_function' not in content: raise AssertionError(content) verif += 1 if not verif: raise AssertionError("Visited files\n{}".format( "\n".join(allnames)))