def test_validate_pyrt_ort(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") logger = getLogger('skl2onnx') logger.disabled = True verbose = 1 if __name__ == "__main__" else 0 temp = get_temp_folder(__file__, "temp_validate_pyrt_ort") rows = list( enumerate_validated_operator_opsets( verbose, models={"LinearRegression"}, fLOG=fLOG, runtime=['python', 'onnxruntime1'], debug=False, filter_exp=lambda m, p: '-64' not in p, benchmark=True, n_features=[None, 10])) self.assertGreater(len(rows), 1) df = DataFrame(rows) self.assertGreater(df.shape[1], 1) fLOG("output results") df.to_csv(os.path.join(temp, "sklearn_opsets_report.csv"), index=False) df.to_excel(os.path.join(temp, "sklearn_opsets_report.xlsx"), index=False) piv = summary_report(df) piv.to_excel(os.path.join(temp, "sklearn_opsets_summary.xlsx"), index=False) rts = set(piv['runtime']) self.assertEqual(rts, {'python', 'onnxruntime1'}) nfs = set(piv['n_features']) self.assertEqual(nfs, {4, 10})
def test_validate_sklearn_operators_all_onnxruntime(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") logger = getLogger('skl2onnx') logger.disabled = True verbose = 1 if __name__ == "__main__" else 0 temp = get_temp_folder( __file__, "temp_validate_sklearn_operators_all_onnxruntime2") if False: # pylint: disable=W0125 rows = list(enumerate_validated_operator_opsets( verbose, models={"LogisticRegression"}, fLOG=fLOG, runtime='onnxruntime2', debug=True)) else: rows = [] for row in enumerate_validated_operator_opsets( verbose, debug=None, fLOG=fLOG, runtime='onnxruntime2', benchmark=False, dump_folder=temp, filter_exp=lambda m, s: m not in {AdaBoostRegressor, GaussianProcessClassifier}): rows.append(row) if len(rows) > 30: break self.assertGreater(len(rows), 1) df = DataFrame(rows) self.assertGreater(df.shape[1], 1) fLOG("output results") df.to_csv(os.path.join(temp, "sklearn_opsets_report.csv"), index=False) df.to_excel(os.path.join( temp, "sklearn_opsets_report.xlsx"), index=False) piv = summary_report(df) piv.to_excel(os.path.join( temp, "sklearn_opsets_summary.xlsx"), index=False)
def test_validate_sklearn_operators_benchmark(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") logger = getLogger('skl2onnx') logger.disabled = True verbose = 1 if __name__ == "__main__" else 0 temp = get_temp_folder(__file__, "temp_validate_sklearn_operators_benchmark") rows = list( enumerate_validated_operator_opsets(verbose, models={"LinearRegression"}, opset_min=10, benchmark=True, fLOG=fLOG)) self.assertGreater(len(rows), 1) df = DataFrame(rows) for col in ['skl', 'batch']: self.assertIn('lambda-' + col, df.columns) for col in ['1', '10']: self.assertIn('time-ratio-N=' + col, df.columns) self.assertGreater(df.shape[1], 1) self.assertGreater(df.loc[0, "tostring_time"], 0) piv = summary_report(df) self.assertGreater(piv.shape[1], 1) self.assertIn('RT/SKL-N=1', piv.columns) self.assertNotIn('RT/SKL-N=10', piv.columns) self.assertIn('N=10', piv.columns) fLOG("output results") df.to_excel(os.path.join(temp, "sklearn_opsets_report.xlsx"), index=False) piv.to_excel(os.path.join(temp, "sklearn_opsets_summary.xlsx"), index=False)
def test_rt_KMeans_python(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") logger = getLogger('skl2onnx') logger.disabled = True verbose = 2 if __name__ == "__main__" else 0 debug = False buffer = [] def myprint(*args, **kwargs): buffer.append(" ".join(map(str, args))) rows = list(enumerate_validated_operator_opsets( verbose, models={"KMeans"}, opset_min=11, opset_max=11, fLOG=myprint, runtime='python', debug=debug)) self.assertGreater(len(rows), 1) self.assertIn('skl_nop', rows[-1]) keys = set() for row in rows: keys.update(set(row)) self.assertIn('onx_size', keys) piv = summary_report(DataFrame(rows)) opset = [c for c in piv.columns if 'opset' in c] self.assertTrue('opset11' in opset or 'opset10' in opset) self.assertGreater(len(buffer), 1 if debug else 0) common, subsets = split_columns_subsets(piv) try: conv = df2rst(piv, split_col_common=common, # pylint: disable=E1123 split_col_subsets=subsets) self.assertIn('| KMeans |', conv) except TypeError as e: if "got an unexpected keyword argument 'split_col_common'" in str(e): return raise e
def test_rt_tfidftransformer_onnxruntime1(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") logger = getLogger('skl2onnx') logger.disabled = True verbose = 1 if __name__ == "__main__" else 0 debug = False buffer = [] def myprint(*args, **kwargs): buffer.append(" ".join(map(str, args))) rows = list( enumerate_validated_operator_opsets(verbose, models={"TfidfTransformer"}, fLOG=myprint, runtime='onnxruntime1', debug=debug, filter_exp=lambda m, p: True)) self.assertGreater(len(rows), 1) self.assertIn('skl_nop', rows[0]) self.assertIn('onx_size', rows[-1]) piv = summary_report(DataFrame(rows)) self.assertGreater(piv.shape[0], 1)
def test_validate_sklearn_one_hot_encoder(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") logger = getLogger('skl2onnx') logger.disabled = True verbose = 1 if __name__ == "__main__" else 0 temp = get_temp_folder(__file__, "temp_validate_one_hot_encoder") rows = list( enumerate_validated_operator_opsets(verbose, models={"OneHotEncoder"}, filter_exp=lambda m, p: True, debug=False, fLOG=fLOG, dump_folder=temp, benchmark=True)) self.assertGreater(len(rows), 1) df = DataFrame(rows) self.assertGreater(df.shape[1], 1) fLOG("output results") data = os.path.join(temp, "sklearn_opsets_report.csv") df.to_csv(data, index=False) df = read_csv(data) piv = summary_report(df) self.assertGreater(piv.shape[0], 1) self.assertGreater(piv.shape[1], 10) self.assertIn('OneHotEncoder', set(piv['name'])) piv.to_csv(os.path.join(temp, "sklearn_opsets_summary.csv"), index=False) piv.to_excel(os.path.join(temp, "sklearn_opsets_summary.xlsx"), index=False)
def test_validate_pyrt_ort2(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") logger = getLogger('skl2onnx') logger.disabled = True verbose = 0 if __name__ == "__main__" else 0 rows = list( enumerate_validated_operator_opsets( verbose, models={"LinearRegression"}, fLOG=fLOG, runtime=['python', 'onnxruntime1'], debug=False, filter_exp=lambda m, p: '-64' not in p, benchmark=True, n_features=[None, 10])) df = DataFrame(rows) piv = summary_report(df) import matplotlib.pyplot as plt fig, ax = plot_validate_benchmark(piv) if __name__ == "__main__": plt.show() plt.clf() self.assertNotEmpty(fig) self.assertNotEmpty(ax)
def test_rt_OneVsRestClassifier_python(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") logger = getLogger('skl2onnx') logger.disabled = True verbose = 1 if __name__ == "__main__" else 0 debug = False buffer = [] def myprint(*args, **kwargs): buffer.append(" ".join(map(str, args))) rows = list(enumerate_validated_operator_opsets( verbose, models={"OneVsRestClassifier"}, opset_min=9, opset_max=11, fLOG=myprint, benchmark=True, runtime='python', debug=debug, filter_exp=lambda m, p: True or 'm-cl' in p)) self.assertGreater(len(rows), 1) self.assertIn('skl_nop', rows[0]) self.assertIn('onx_size', rows[-1]) piv = summary_report(DataFrame(rows)) self.assertGreater(piv.shape[0], 1) self.assertGreater(piv.shape[0], 2) common, subsets = split_columns_subsets(piv) rst = df2rst(piv, number_format=2, replacements={'nan': '', 'ERR: 4convert': ''}, split_row=lambda index, dp=piv: build_key_split( dp.loc[index, "name"], index), split_col_common=common, split_col_subsets=subsets, filter_rows=filter_rows, column_size={'problem': 25}, label_pattern=".. _lpy-{section}:") self.assertIn("opset9 | RT/SKL-N=1", rst)
def test_validate_Voting(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") logger = getLogger('skl2onnx') logger.disabled = True verbose = 1 if __name__ == "__main__" else 0 rows = list( enumerate_validated_operator_opsets(verbose, models={ "VotingRegressor", "VotingClassifier", 'LinearRegression' }, opset_min=9, fLOG=fLOG, runtime='python', debug=False)) self.assertGreater(len(rows), 4) df = DataFrame(rows) piv = summary_report(df) reg = piv[piv.name == 'VotingRegressor'] self.assertGreater(reg.shape[0], 1) nonan = reg['opset10'].dropna() self.assertEqual(nonan.shape[0], reg.shape[0])
def test_validate_summary(self): this = os.path.abspath(os.path.dirname(__file__)) data = os.path.join(this, "data", "sklearn_opsets_report.csv") df = read_csv(data) piv = summary_report(df) self.assertGreater(piv.shape[0], 1) self.assertGreater(piv.shape[1], 10) self.assertIn('LogisticRegression', set(piv['name'])) temp = get_temp_folder(__file__, "temp_validate_summary") fLOG("output results") piv.to_csv(os.path.join(temp, "sklearn_opsets_summary.csv"), index=False) piv.to_excel(os.path.join(temp, "sklearn_opsets_summary.xlsx"), index=False)
def test_rt_GaussianProcessRegressor_python_fit(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") logger = getLogger('skl2onnx') logger.disabled = True verbose = 4 if __name__ == "__main__" else 0 buffer = [] def myprint(*args, **kwargs): buffer.append(" ".join(map(str, args))) debug = False rows = list( enumerate_validated_operator_opsets( verbose, models={"GaussianProcessRegressor"}, fLOG=myprint, runtime='python', debug=debug, filter_exp=lambda m, s: "nofit" not in s and "multi" not in s)) self.assertGreater(len(rows), 6) self.assertGreater(len(buffer), 1 if debug else 0) optim_values = [] for row in rows: optim_values.append(row.get('optim', '')) expcl = "<class 'sklearn.gaussian_process.gpr.GaussianProcessRegressor'>={'optim': 'cdist'}" expcl2 = "<class 'sklearn.gaussian_process._gpr.GaussianProcessRegressor'>={'optim': 'cdist'}" exp = [ {'', 'onnx/' + expcl, expcl, 'onnx'}, {'', 'onnx/' + expcl, expcl}, {'', 'onnx/' + expcl2, expcl2, 'onnx'}, {'', 'onnx/' + expcl2, expcl2}, {'', 'onnx'}, {'', expcl}, {'', expcl2}, ] self.assertIn(set(optim_values), exp) piv = summary_report(DataFrame(rows)) expcl = 'cdist' exp = [{'', 'onnx/' + expcl, expcl, 'onnx'}, {'', 'onnx/' + expcl, expcl}, {'', 'cdist', 'onnx'}, {'', expcl}, {'', expcl2}, {'', 'onnx'}] self.assertIn(set(piv['optim']), exp)
def test_validate_sklearn_operators_benchmark_all(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") logger = getLogger('skl2onnx') logger.disabled = True verbose = 11 if __name__ == "__main__" else 0 temp = get_temp_folder( __file__, "temp_validate_sklearn_operators_benchmark_all") rows = [] for row in enumerate_validated_operator_opsets(verbose, opset_min=10, benchmark=True, fLOG=fLOG, runtime="onnxruntime1", versions=True): rows.append(row) if len(rows) > 10: break self.assertGreater(len(rows), 1) df = DataFrame(rows) for col in ['skl', 'batch']: self.assertIn('lambda-' + col, df.columns) for col in ['1']: self.assertIn('time-ratio-N=' + col, df.columns) self.assertGreater(df.shape[1], 1) self.assertGreater(df.loc[0, "tostring_time"], 0) piv = summary_report(df) self.assertGreater(piv.shape[1], 1) self.assertIn('RT/SKL-N=1', piv.columns) self.assertNotIn('RT/SKL-N=10', piv.columns) # self.assertIn('N=10', piv.columns) fLOG("output results") self.assertIn('v_numpy', df.columns) df.to_excel(os.path.join(temp, "sklearn_opsets_report.xlsx"), index=False) piv.to_excel(os.path.join(temp, "sklearn_opsets_summary.xlsx"), index=False) self.assertIn('v_numpy', piv.columns)
def test_rt_GaussianProcessRegressor_python_optim(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") logger = getLogger('skl2onnx') logger.disabled = True verbose = 1 if __name__ == "__main__" else 0 buffer = [] def myprint(*args, **kwargs): buffer.append(" ".join(map(str, args))) debug = True rows = list( enumerate_validated_operator_opsets( verbose, models={"GaussianProcessRegressor"}, fLOG=myprint, runtime='python', debug=debug, filter_scenario=lambda m, p, s, e, e2: p == "b-reg" and s == "rbf")) self.assertGreater(len(rows), 1) self.assertGreater(len(buffer), 1 if debug else 0) opt = set(_.get('optim', '') for _ in rows) expcl = "<class 'sklearn.gaussian_process.gpr.GaussianProcessRegressor'>={'optim': 'cdist'}" exp = [{'', 'onnx/' + expcl, expcl, 'onnx'}, {'', 'onnx/' + expcl, expcl}, {expcl}] for i in range(len(exp)): # pylint: disable=C0200 exp[i] = set(e.replace("._gpr", ".gpr") for e in exp[i]) opt = set(e.replace("._gpr", ".gpr") for e in opt) self.assertIn(opt, exp) piv = summary_report(DataFrame(rows)) opt = set(piv['optim']) expcl = "cdist" exp = [{'', 'onnx/' + expcl, expcl, 'onnx'}, {'', 'onnx/' + expcl}, {expcl}] self.assertIn(opt, exp)