def test_cli_convert_validater_switch(self): iris = load_iris() X, y = iris.data, iris.target X_train, X_test, y_train, _ = train_test_split(X, y, random_state=11) clr = LogisticRegression() clr.fit(X_train, y_train) temp = get_temp_folder(__file__, "temp_cli_convert_validate_switch") data = os.path.join(temp, "data.csv") pandas.DataFrame(X_test).to_csv(data, index=False) pkl = os.path.join(temp, "model.pkl") with open(pkl, "wb") as f: pickle.dump(clr, f) res = convert_validate(pkl=pkl, data=data, verbose=0, method="predict,predict_proba", name="output_label,output_probability") st = BufferedPrint() args = ["convert_validate", "--pkl", pkl, '--data', data, '--method', "predict,predict_proba", '--name', "output_label,output_probability", '--verbose', '1', '--use_double', 'switch'] main(args, fLOG=st.fprint) res = str(st) self.assertIn( "[convert_validate] compute predictions with method 'predict_proba'", res)
def test_cli_validate_kmeans(self): temp = get_temp_folder(__file__, "temp_validate_runtime_kmeans") out1 = os.path.join(temp, "raw.csv") out2 = os.path.join(temp, "sum.csv") gr = os.path.join(temp, 'gr.png') st = BufferedPrint() main(args=[ "validate_runtime", "--n_features", "4,50", "-nu", "3", "-re", "3", "-o", "11", "-op", "11", "-v", "2", "--out_raw", out1, "--out_summary", out2, "-b", "1", "--runtime", "python_compiled,onnxruntime1", "--models", "KMeans", "--out_graph", gr, "--dtype", "32" ], fLOG=st.fprint) res = str(st) self.assertIn('KMeans', res) self.assertExists(out1) self.assertExists(out2) self.assertExists(gr) df1 = pandas.read_csv(out1) merged = merge_benchmark({ 'r1-': df1, 'r2-': df1.copy() }, baseline='r1-onnxruntime1') add_cols = list( sorted(c for c in merged.columns if c.endswith('-base'))) suma = summary_report(merged, add_cols=add_cols) self.assertEqual(merged.shape[0], suma.shape[0]) self.assertIn('N=10-base', suma.columns) outdf = os.path.join(temp, "merged.xlsx") suma.to_excel(outdf, index=False)
def test_onnx_stats(self): iris = load_iris() X, y = iris.data, iris.target X_train, X_test, y_train, _ = train_test_split(X, y, random_state=11) clr = LogisticRegression() clr.fit(X_train, y_train) temp = get_temp_folder(__file__, "temp_onnx_stats") data = os.path.join(temp, "data.csv") pandas.DataFrame(X_test).to_csv(data, index=False) pkl = os.path.join(temp, "model.pkl") with open(pkl, "wb") as f: pickle.dump(clr, f) outonnx = os.path.join(temp, 'outolr.onnx') convert_validate(pkl=pkl, data=data, verbose=0, method="predict,predict_proba", outonnx=outonnx, name="output_label,output_probability") st = BufferedPrint() main(args=["onnx_stats", "--name", outonnx], fLOG=st.fprint) res = str(st) self.assertIn("ninits: 0", res)
def test_cli_validate_model_lightgbm(self): temp = get_temp_folder(__file__, "temp_validate_model_lgbm_csv") out1 = os.path.join(temp, "raw.csv") out2 = os.path.join(temp, "sum.csv") st = BufferedPrint() main(args=[ "validate_runtime", "--out_raw", out1, "--out_summary", out2, "--models", "LGBMClassifier", '-o', '10', '-op', '10', '-v', '2', '-b', '1', '-dum', '1', '-du', temp ], fLOG=st.fprint) res = str(st) self.assertIn('LGBMClassifier', res) self.assertExists(out1) self.assertExists(out2) exp1 = os.path.join( temp, "dump-ERROR-python-LGBMClassifier-default-b-cl--op10-nf4.pkl") exp2 = os.path.join( temp, "dump-i-python-LGBMClassifier-default-b-cl-lightgbm.sklearn" ".LGBMClassifierzipmapFalse-op10-nf4.pkl") if not os.path.exists(exp1) and not os.path.exists(exp2): names = os.listdir(temp) raise FileNotFoundError( "Unable to find '{}' or '{}' in\n{}.".format( exp1, exp2, '\n'.join(names)))
def test_cli_validate_model_process_csv(self): temp = get_temp_folder(__file__, "temp_validate_model_process_csv") out1 = os.path.join(temp, "raw.csv") out2 = os.path.join(temp, "sum.csv") st = BufferedPrint() main( args=[ "validate_runtime", "--out_raw", out1, "--out_summary", out2, "--models", "LogisticRegression,LinearRegression", '-o', '10', '-op', '11', '-v', '3', '-b', '1', '-se', '1', # '-d', '1', ], fLOG=st.fprint) res = str(st) self.assertIn('Linear', res) self.assertExists(out1) self.assertExists(out2)
def test_cli_csv_perm(self): temp = get_temp_folder(__file__, "temp_cli_csv_perm") name = os.path.join(temp, "res.csv") st = BufferedPrint() main(args=["einsum_test", "--equation", "abc,cd->ad", "--output", name, "--shape", "(5,5,5);(5,5)", "--verbose", "0", "--perm", "1"], fLOG=st.fprint) self.assertExists(name) res = str(st) self.assertIn("wrote", res)
def test_cli_validate_model_dump(self): fLOG(OutputPrint=__name__ == "__main__") temp = get_temp_folder(__file__, "temp_validate_model_dump") out1 = os.path.join(temp, "raw.csv") out2 = os.path.join(temp, "sum.csv") graph = os.path.join(temp, 'benchmark.png') st = BufferedPrint() models = ','.join([ "LinearRegression", "LogisticRegression", "DecisionTreeRegressor", # "DecisionTreeClassifier", ]) # ~ models = ','.join([ #~ 'KMeans', #~ 'LGBMClassifier', #~ 'LGBMRegressor', #~ 'LinearSVC', #~ 'LogisticRegression', #~ 'MLPClassifier', #~ 'MLPRegressor', #~ 'RandomForestClassifier', #~ 'Perceptron', #~ 'RandomForestClassifier', #~ 'Ridge', #~ 'SGDRegressor', #~ 'RandomForestRegressor', # ~ ]) args = [ "validate_runtime", "--out_raw", out1, "--out_summary", out2, "--models", models, '-r', "python,onnxruntime1", '-o', '10', '-op', '10', '-v', '1', '-b', '1', '-dum', '1', '-du', temp, '-n', '20,100,500', '--out_graph', graph, '--dtype', '32' ] cmd = "python -m mlprodict " + " ".join(args) fLOG(cmd) main(args=args, fLOG=fLOG if __name__ == "__main__" else st.fprint) names = os.listdir(temp) names = [_ for _ in names if "dump-i-" in _] self.assertNotEmpty(names) for i, name in enumerate(names): fLOG("{}/{}: {}".format(i + 1, len(names), name)) fullname = os.path.join(temp, name) with open(fullname, 'rb') as f: pkl = pickle.load(f) root = os.path.splitext(fullname)[0] with open(root + '.onnx', 'wb') as f: f.write(pkl['onnx_bytes']) with open(root + '.data.pkl', 'wb') as f: pickle.dump(pkl['Xort_test'], f) with open(root + '.ypred.pkl', 'wb') as f: pickle.dump(pkl['ypred'], f) with open(root + '.skl.pkl', 'wb') as f: pickle.dump(pkl['skl_model'], f)
def test_cli_validate_bench_doc(self): temp = get_temp_folder(__file__, "temp_bench_doc") out1 = os.path.join(temp, "raw.xlsx") out2 = os.path.join(temp, "sum.csv") st = BufferedPrint() main(args=[ "benchmark_doc", "-o", out1, "-ou", out2, "-w", "LinearRegression", '-d', temp, '-r', 'python_compiled' ], fLOG=st.fprint) res = str(st) self.assertIn('Linear', res) self.assertExists(out1) self.assertExists(out2)
def test_cli_asv2csv(self): temp = get_temp_folder(__file__, "temp_asv2csv") file_zip = os.path.join(TestCliAsvBench.data, 'results.zip') unzip_files(file_zip, temp) data = os.path.join(temp, 'results') out = os.path.join(temp, "data.csv") st = BufferedPrint() main(args=["asv2csv", "-f", data, "-o", out], fLOG=st.fprint) self.assertExists(out) df = pandas.read_csv(out) self.assertEqual(df.shape, (168, 66)) out = os.path.join(temp, "data<date>.csv") main(args=["asv2csv", "-f", data, "-o", out], fLOG=st.fprint)
def test_cli_plot_onnx(self): temp = get_temp_folder(__file__, "temp_cli_plot_onnx") name = os.path.join(temp, "..", "..", "ut_tools", "data", "fft2d_any.onnx") self.assertExists(name) for fmt in ['simple', 'dot', 'io', 'raw']: with self.subTest(fmt=fmt): output = os.path.join(temp, "code_%s.py" % fmt) st = BufferedPrint() main(args=[ "plot_onnx", "--filename", name, '--format', fmt, "--output", output, "--verbose", "1" ], fLOG=st.fprint) self.assertExists(output)
def test_cli_validate_model_csv_bug(self): temp = get_temp_folder(__file__, "temp_validate_model_csv_bug") out1 = os.path.join(temp, "raw.csv") out2 = os.path.join(temp, "sum.csv") st = BufferedPrint() main(args=[ "validate_runtime", "--out_raw", out1, "--out_summary", out2, "--models", "AgglomerativeClustering", '-o', '10', '-op', '10', '-v', '1', '-b', '1' ], fLOG=st.fprint) res = str(st) self.assertIn('AgglomerativeClustering', res) self.assertExists(out1) self.assertExists(out2)
def test_cli_validate_model_lightgbm(self): temp = get_temp_folder(__file__, "temp_validate_model_lgbm_csv") out1 = os.path.join(temp, "raw.csv") out2 = os.path.join(temp, "sum.csv") st = BufferedPrint() main(args=[ "validate_runtime", "--out_raw", out1, "--out_summary", out2, "--models", "LGBMClassifier", '-o', '10', '-op', '10', '-v', '1', '-b', '1' ], fLOG=st.fprint) res = str(st) self.assertIn('LGBMClassifier', res) self.assertExists(out1) self.assertExists(out2)
def test_cli_onnx_code_numpy(self): temp = get_temp_folder(__file__, "temp_cli_onnx_code_numpy") name = os.path.join(temp, "..", "..", "ut_tools", "data", "fft2d_any.onnx") self.assertExists(name) output = os.path.join(temp, "code_numpy.py") st = BufferedPrint() main(args=[ "onnx_code", "--filename", name, '--format', 'numpy', "--output", output, "--verbose", "1" ], fLOG=st.fprint) self.assertExists(output) with open(output, "r", encoding='utf-8') as f: content = f.read() self.assertIn("def numpy_", content)
def test_cli_validate_model(self): temp = get_temp_folder(__file__, "temp_validate_model") out1 = os.path.join(temp, "raw.xlsx") out2 = os.path.join(temp, "sum.xlsx") st = BufferedPrint() main(args=[ "validate_runtime", "--out_raw", out1, "--out_summary", out2, "--models", "LogisticRegression,LinearRegression", '-o', '10', '-op', '10', '-v', '1', '-b', '1', '-t', '{"1":{"number":10,"repeat":10},"10":{"number":5,"repeat":5}}' ], fLOG=st.fprint) res = str(st) self.assertIn('Linear', res) self.assertExists(out1) self.assertExists(out2)
def test_cli_asv_bench_model(self): temp = get_temp_folder(__file__, "temp_asv_bench") st = BufferedPrint() main(args=["asv_bench", "-l", temp, "-o", '10', '-m', "LogisticRegression,LinearRegression", '-v', '2', '--flat', '1', '--matrix', '{"onnxruntime":["1.1.1","1.1.2"]}'], fLOG=st.fprint) res = str(st) self.assertIn('Lin', res) name = "bench_LogReg_liblinear_b_cl_solverliblinear_onnx.py" self.assertExists(os.path.join(temp, 'benches', name)) self.assertExists(os.path.join(temp, 'asv.conf.json')) self.assertExists(os.path.join(temp, 'tools', 'flask_serve.py')) conf = os.path.join(temp, 'asv.conf.json') with open(conf, "r") as f: content = f.read() self.assertIn('"1.1.1"', content)
def test_cli_convert_validater_float64_gpr(self): iris = load_iris() X, y = iris.data, iris.target X_train, X_test, y_train, _ = train_test_split(X, y, random_state=11) clr = GaussianProcessRegressor() clr.fit(X_train, y_train) temp = get_temp_folder(__file__, "temp_cli_convert_validate_float64_gpr") monx = os.path.join(temp, "gpr.onnx") data = os.path.join(temp, "data.csv") pandas.DataFrame(X_test).to_csv(data, index=False) pkl = os.path.join(temp, "model.pkl") with open(pkl, "wb") as f: pickle.dump(clr, f) try: res = convert_validate( pkl=pkl, data=data, verbose=0, method="predict", name="GPmean", use_double='float64', options="{GaussianProcessRegressor:{'optim':'cdist'}}") except RuntimeError as e: if "requested version 10 < 11 schema version" in str(e): return raise e self.assertNotEmpty(res) st = BufferedPrint() args = [ "convert_validate", "--pkl", pkl, '--data', data, '--method', "predict", '--name', "GPmean", '--verbose', '1', '--use_double', 'float64', '--options', "{GaussianProcessRegressor:{'optim':'cdist'}}", '--outonnx', monx ] main(args, fLOG=st.fprint) res = str(st) self.assertExists(monx) with open(monx, 'rb') as f: model = onnx.load(f) self.assertIn('CDist', str(model))
def test_cli_benchmark_replay(self): temp = get_temp_folder(__file__, "temp_benchmark_replay") out1 = os.path.join(temp, "raw.csv") st = BufferedPrint() out1 = os.path.join(temp, "raw.csv") st = BufferedPrint() main(args=["validate_runtime", "--n_features", "4", "-nu", "3", "-re", "3", "-o", "11", "-op", "11", "-v", "2", "--out_raw", out1, "-b", "0", "--runtime", "python_compiled", "--models", "KMeans", "--dtype", "32", "--dump_all", '1', '--dump_folder', temp], fLOG=st.fprint) out = os.path.join(temp, "res.xlsx") main(args=["benchmark_replay", "--folder", temp, "--out", out, '--verbose', '2'], fLOG=st.fprint) res = str(st) self.assertExists(out) self.assertIn("'folder'", res)
def test_cli_validate_model_rfbug_410(self): temp = get_temp_folder(__file__, "temp_validate_model_rfbug410") out1 = os.path.join(temp, "raw.xlsx") out2 = os.path.join(temp, "sum.xlsx") gr = os.path.join(temp, 'gr.png') st = BufferedPrint() main(args=[ "validate_runtime", "--out_raw", out1, "--out_summary", out2, '-o', '11', '-op', '11', '-v', '2', '-b', '1', '--runtime', 'python_compiled,onnxruntime1', '--models', 'RandomForestRegressor', '--n_features', '4,10', '--out_graph', gr, '--dtype', '32' ], fLOG=st.fprint) res = str(st) self.assertIn('RandomForestRegressor', res) self.assertIn('time_kwargs', res) self.assertExists(out1) self.assertExists(out2) self.assertExists(gr)
def test_cli_plot_onnx_tree(self): temp = get_temp_folder(__file__, "temp_cli_plot_onnx_tree") X, y = make_regression(n_features=2) # pylint: disable=W0632 tree = DecisionTreeRegressor() tree.fit(X, y) onx = to_onnx(tree, X.astype(numpy.float32), target_opset=__max_supported_opsets__) name = os.path.join(temp, "tree.onnx") with open(name, "wb") as f: f.write(onx.SerializeToString()) self.assertExists(name) for fmt in ['tree', 'mat']: with self.subTest(fmt=fmt): output = os.path.join(temp, "code_%s.py" % fmt) st = BufferedPrint() main(args=[ "plot_onnx", "--filename", name, '--format', fmt, "--output", output, "--verbose", "1" ], fLOG=st.fprint) self.assertExists(output)
def test_cli_validate_model_csv_bug(self): temp = get_temp_folder(__file__, "temp_validate_model_csv_bug") out1 = os.path.join(temp, "raw.csv") out2 = os.path.join(temp, "sum.csv") st = BufferedPrint() self.assertRaise( lambda: main(args=[ "validate_runtime", "--out_raw", out1, "--out_summary", out2, "--models", "AgglomerativeClustering", '-o', '10', '-op', '10', '-v', '0', '-b', '1' ], fLOG=st.fprint), RuntimeError, "No result produced by the benchmark.") res = str(st) self.assertEmpty(res) self.assertExists(out1) self.assertNotExists(out2)
def test_cli_benchmark_replay_help(self): st = BufferedPrint() main(args=["benchmark_replay", "--help"], fLOG=st.fprint) res = str(st) self.assertIn('benchmark_replay', res)
def test_cli_onnx_code(self): st = BufferedPrint() main(args=["onnx_code", "--help"], fLOG=st.fprint) res = str(st) self.assertIn("verbose", res)
def test_cli_validate(self): st = BufferedPrint() main(args=["validate_runtime", "--help"], fLOG=st.fprint) res = str(st) self.assertIn("verbose", res)
def test_cli_validate_bench_doc_help(self): st = BufferedPrint() main(args=["benchmark_doc", "--help"], fLOG=st.fprint) res = str(st) self.assertIn("verbose", res)
def test_cli_einsum(self): st = BufferedPrint() main(args=["einsum_test", "--help"], fLOG=st.fprint) res = str(st) self.assertIn("verbose", res)
def test_cli_latency(self): st = BufferedPrint() main(args=["latency", "--help"], fLOG=st.fprint) res = str(st) self.assertIn("latency", res)
def test_cli_onnx_stats(self): st = BufferedPrint() main(args=["onnx_stats", "--help"], fLOG=st.fprint) res = str(st) self.assertIn("optim", res)
def test_cli_asv_bench(self): st = BufferedPrint() main(args=["asv_bench", "--help"], fLOG=st.fprint) res = str(st) self.assertIn("verbose", res)
def test_cli_onnx_code(self): st = BufferedPrint() main(args=["dynamic_doc", '--verbose', '1'], fLOG=st.fprint) res = str(st) if len(res) > 0: self.assertIn("Abs", res)
def test_cli_onnx_code_help(self): st = BufferedPrint() main(args=["dynamic_doc", "--help"], fLOG=st.fprint) res = str(st) self.assertIn("Generates", res)