def test_cli_convert_validater_switch(self):
        iris = load_iris()
        X, y = iris.data, iris.target
        X_train, X_test, y_train, _ = train_test_split(X, y, random_state=11)
        clr = LogisticRegression()
        clr.fit(X_train, y_train)

        temp = get_temp_folder(__file__, "temp_cli_convert_validate_switch")
        data = os.path.join(temp, "data.csv")
        pandas.DataFrame(X_test).to_csv(data, index=False)
        pkl = os.path.join(temp, "model.pkl")
        with open(pkl, "wb") as f:
            pickle.dump(clr, f)

        res = convert_validate(pkl=pkl, data=data, verbose=0,
                               method="predict,predict_proba",
                               name="output_label,output_probability")
        st = BufferedPrint()
        args = ["convert_validate", "--pkl", pkl, '--data', data,
                '--method', "predict,predict_proba",
                '--name', "output_label,output_probability",
                '--verbose', '1', '--use_double', 'switch']
        main(args, fLOG=st.fprint)
        res = str(st)
        self.assertIn(
            "[convert_validate] compute predictions with method 'predict_proba'", res)
Example #2
0
 def test_cli_validate_kmeans(self):
     temp = get_temp_folder(__file__, "temp_validate_runtime_kmeans")
     out1 = os.path.join(temp, "raw.csv")
     out2 = os.path.join(temp, "sum.csv")
     gr = os.path.join(temp, 'gr.png')
     st = BufferedPrint()
     main(args=[
         "validate_runtime", "--n_features", "4,50", "-nu", "3", "-re", "3",
         "-o", "11", "-op", "11", "-v", "2", "--out_raw", out1,
         "--out_summary", out2, "-b", "1", "--runtime",
         "python_compiled,onnxruntime1", "--models", "KMeans",
         "--out_graph", gr, "--dtype", "32"
     ],
          fLOG=st.fprint)
     res = str(st)
     self.assertIn('KMeans', res)
     self.assertExists(out1)
     self.assertExists(out2)
     self.assertExists(gr)
     df1 = pandas.read_csv(out1)
     merged = merge_benchmark({
         'r1-': df1,
         'r2-': df1.copy()
     },
                              baseline='r1-onnxruntime1')
     add_cols = list(
         sorted(c for c in merged.columns if c.endswith('-base')))
     suma = summary_report(merged, add_cols=add_cols)
     self.assertEqual(merged.shape[0], suma.shape[0])
     self.assertIn('N=10-base', suma.columns)
     outdf = os.path.join(temp, "merged.xlsx")
     suma.to_excel(outdf, index=False)
Example #3
0
    def test_onnx_stats(self):
        iris = load_iris()
        X, y = iris.data, iris.target
        X_train, X_test, y_train, _ = train_test_split(X, y, random_state=11)
        clr = LogisticRegression()
        clr.fit(X_train, y_train)

        temp = get_temp_folder(__file__, "temp_onnx_stats")
        data = os.path.join(temp, "data.csv")
        pandas.DataFrame(X_test).to_csv(data, index=False)
        pkl = os.path.join(temp, "model.pkl")
        with open(pkl, "wb") as f:
            pickle.dump(clr, f)

        outonnx = os.path.join(temp, 'outolr.onnx')
        convert_validate(pkl=pkl,
                         data=data,
                         verbose=0,
                         method="predict,predict_proba",
                         outonnx=outonnx,
                         name="output_label,output_probability")
        st = BufferedPrint()
        main(args=["onnx_stats", "--name", outonnx], fLOG=st.fprint)
        res = str(st)
        self.assertIn("ninits: 0", res)
Example #4
0
 def test_cli_validate_model_lightgbm(self):
     temp = get_temp_folder(__file__, "temp_validate_model_lgbm_csv")
     out1 = os.path.join(temp, "raw.csv")
     out2 = os.path.join(temp, "sum.csv")
     st = BufferedPrint()
     main(args=[
         "validate_runtime", "--out_raw", out1, "--out_summary", out2,
         "--models", "LGBMClassifier", '-o', '10', '-op', '10', '-v', '2',
         '-b', '1', '-dum', '1', '-du', temp
     ],
          fLOG=st.fprint)
     res = str(st)
     self.assertIn('LGBMClassifier', res)
     self.assertExists(out1)
     self.assertExists(out2)
     exp1 = os.path.join(
         temp,
         "dump-ERROR-python-LGBMClassifier-default-b-cl--op10-nf4.pkl")
     exp2 = os.path.join(
         temp, "dump-i-python-LGBMClassifier-default-b-cl-lightgbm.sklearn"
         ".LGBMClassifierzipmapFalse-op10-nf4.pkl")
     if not os.path.exists(exp1) and not os.path.exists(exp2):
         names = os.listdir(temp)
         raise FileNotFoundError(
             "Unable to find '{}' or '{}' in\n{}.".format(
                 exp1, exp2, '\n'.join(names)))
Example #5
0
 def test_cli_validate_model_process_csv(self):
     temp = get_temp_folder(__file__, "temp_validate_model_process_csv")
     out1 = os.path.join(temp, "raw.csv")
     out2 = os.path.join(temp, "sum.csv")
     st = BufferedPrint()
     main(
         args=[
             "validate_runtime",
             "--out_raw",
             out1,
             "--out_summary",
             out2,
             "--models",
             "LogisticRegression,LinearRegression",
             '-o',
             '10',
             '-op',
             '11',
             '-v',
             '3',
             '-b',
             '1',
             '-se',
             '1',
             # '-d', '1',
         ],
         fLOG=st.fprint)
     res = str(st)
     self.assertIn('Linear', res)
     self.assertExists(out1)
     self.assertExists(out2)
Example #6
0
 def test_cli_csv_perm(self):
     temp = get_temp_folder(__file__, "temp_cli_csv_perm")
     name = os.path.join(temp, "res.csv")
     st = BufferedPrint()
     main(args=["einsum_test", "--equation", "abc,cd->ad",
                "--output", name, "--shape", "(5,5,5);(5,5)",
                "--verbose", "0", "--perm", "1"], fLOG=st.fprint)
     self.assertExists(name)
     res = str(st)
     self.assertIn("wrote", res)
Example #7
0
 def test_cli_validate_model_dump(self):
     fLOG(OutputPrint=__name__ == "__main__")
     temp = get_temp_folder(__file__, "temp_validate_model_dump")
     out1 = os.path.join(temp, "raw.csv")
     out2 = os.path.join(temp, "sum.csv")
     graph = os.path.join(temp, 'benchmark.png')
     st = BufferedPrint()
     models = ','.join([
         "LinearRegression",
         "LogisticRegression",
         "DecisionTreeRegressor",
         # "DecisionTreeClassifier",
     ])
     # ~ models = ','.join([
     #~ 'KMeans',
     #~ 'LGBMClassifier',
     #~ 'LGBMRegressor',
     #~ 'LinearSVC',
     #~ 'LogisticRegression',
     #~ 'MLPClassifier',
     #~ 'MLPRegressor',
     #~ 'RandomForestClassifier',
     #~ 'Perceptron',
     #~ 'RandomForestClassifier',
     #~ 'Ridge',
     #~ 'SGDRegressor',
     #~ 'RandomForestRegressor',
     # ~ ])
     args = [
         "validate_runtime", "--out_raw", out1, "--out_summary", out2,
         "--models", models, '-r', "python,onnxruntime1", '-o', '10', '-op',
         '10', '-v', '1', '-b', '1', '-dum', '1', '-du', temp, '-n',
         '20,100,500', '--out_graph', graph, '--dtype', '32'
     ]
     cmd = "python -m mlprodict " + " ".join(args)
     fLOG(cmd)
     main(args=args, fLOG=fLOG if __name__ == "__main__" else st.fprint)
     names = os.listdir(temp)
     names = [_ for _ in names if "dump-i-" in _]
     self.assertNotEmpty(names)
     for i, name in enumerate(names):
         fLOG("{}/{}: {}".format(i + 1, len(names), name))
         fullname = os.path.join(temp, name)
         with open(fullname, 'rb') as f:
             pkl = pickle.load(f)
         root = os.path.splitext(fullname)[0]
         with open(root + '.onnx', 'wb') as f:
             f.write(pkl['onnx_bytes'])
         with open(root + '.data.pkl', 'wb') as f:
             pickle.dump(pkl['Xort_test'], f)
         with open(root + '.ypred.pkl', 'wb') as f:
             pickle.dump(pkl['ypred'], f)
         with open(root + '.skl.pkl', 'wb') as f:
             pickle.dump(pkl['skl_model'], f)
 def test_cli_validate_bench_doc(self):
     temp = get_temp_folder(__file__, "temp_bench_doc")
     out1 = os.path.join(temp, "raw.xlsx")
     out2 = os.path.join(temp, "sum.csv")
     st = BufferedPrint()
     main(args=[
         "benchmark_doc", "-o", out1, "-ou", out2, "-w", "LinearRegression",
         '-d', temp, '-r', 'python_compiled'
     ],
          fLOG=st.fprint)
     res = str(st)
     self.assertIn('Linear', res)
     self.assertExists(out1)
     self.assertExists(out2)
Example #9
0
    def test_cli_asv2csv(self):
        temp = get_temp_folder(__file__, "temp_asv2csv")
        file_zip = os.path.join(TestCliAsvBench.data, 'results.zip')
        unzip_files(file_zip, temp)
        data = os.path.join(temp, 'results')

        out = os.path.join(temp, "data.csv")
        st = BufferedPrint()
        main(args=["asv2csv", "-f", data, "-o", out], fLOG=st.fprint)
        self.assertExists(out)
        df = pandas.read_csv(out)
        self.assertEqual(df.shape, (168, 66))
        out = os.path.join(temp, "data<date>.csv")
        main(args=["asv2csv", "-f", data, "-o", out], fLOG=st.fprint)
Example #10
0
 def test_cli_plot_onnx(self):
     temp = get_temp_folder(__file__, "temp_cli_plot_onnx")
     name = os.path.join(temp, "..", "..", "ut_tools", "data",
                         "fft2d_any.onnx")
     self.assertExists(name)
     for fmt in ['simple', 'dot', 'io', 'raw']:
         with self.subTest(fmt=fmt):
             output = os.path.join(temp, "code_%s.py" % fmt)
             st = BufferedPrint()
             main(args=[
                 "plot_onnx", "--filename", name, '--format', fmt,
                 "--output", output, "--verbose", "1"
             ],
                  fLOG=st.fprint)
             self.assertExists(output)
 def test_cli_validate_model_csv_bug(self):
     temp = get_temp_folder(__file__, "temp_validate_model_csv_bug")
     out1 = os.path.join(temp, "raw.csv")
     out2 = os.path.join(temp, "sum.csv")
     st = BufferedPrint()
     main(args=[
         "validate_runtime", "--out_raw", out1, "--out_summary", out2,
         "--models", "AgglomerativeClustering", '-o', '10', '-op', '10',
         '-v', '1', '-b', '1'
     ],
          fLOG=st.fprint)
     res = str(st)
     self.assertIn('AgglomerativeClustering', res)
     self.assertExists(out1)
     self.assertExists(out2)
 def test_cli_validate_model_lightgbm(self):
     temp = get_temp_folder(__file__, "temp_validate_model_lgbm_csv")
     out1 = os.path.join(temp, "raw.csv")
     out2 = os.path.join(temp, "sum.csv")
     st = BufferedPrint()
     main(args=[
         "validate_runtime", "--out_raw", out1, "--out_summary", out2,
         "--models", "LGBMClassifier", '-o', '10', '-op', '10', '-v', '1',
         '-b', '1'
     ],
          fLOG=st.fprint)
     res = str(st)
     self.assertIn('LGBMClassifier', res)
     self.assertExists(out1)
     self.assertExists(out2)
Example #13
0
 def test_cli_onnx_code_numpy(self):
     temp = get_temp_folder(__file__, "temp_cli_onnx_code_numpy")
     name = os.path.join(temp, "..", "..", "ut_tools", "data",
                         "fft2d_any.onnx")
     self.assertExists(name)
     output = os.path.join(temp, "code_numpy.py")
     st = BufferedPrint()
     main(args=[
         "onnx_code", "--filename", name, '--format', 'numpy', "--output",
         output, "--verbose", "1"
     ],
          fLOG=st.fprint)
     self.assertExists(output)
     with open(output, "r", encoding='utf-8') as f:
         content = f.read()
     self.assertIn("def numpy_", content)
 def test_cli_validate_model(self):
     temp = get_temp_folder(__file__, "temp_validate_model")
     out1 = os.path.join(temp, "raw.xlsx")
     out2 = os.path.join(temp, "sum.xlsx")
     st = BufferedPrint()
     main(args=[
         "validate_runtime", "--out_raw", out1, "--out_summary", out2,
         "--models", "LogisticRegression,LinearRegression", '-o', '10',
         '-op', '10', '-v', '1', '-b', '1', '-t',
         '{"1":{"number":10,"repeat":10},"10":{"number":5,"repeat":5}}'
     ],
          fLOG=st.fprint)
     res = str(st)
     self.assertIn('Linear', res)
     self.assertExists(out1)
     self.assertExists(out2)
Example #15
0
 def test_cli_asv_bench_model(self):
     temp = get_temp_folder(__file__, "temp_asv_bench")
     st = BufferedPrint()
     main(args=["asv_bench", "-l", temp,
                "-o", '10', '-m',
                "LogisticRegression,LinearRegression",
                '-v', '2', '--flat', '1',
                '--matrix', '{"onnxruntime":["1.1.1","1.1.2"]}'],
          fLOG=st.fprint)
     res = str(st)
     self.assertIn('Lin', res)
     name = "bench_LogReg_liblinear_b_cl_solverliblinear_onnx.py"
     self.assertExists(os.path.join(temp, 'benches', name))
     self.assertExists(os.path.join(temp, 'asv.conf.json'))
     self.assertExists(os.path.join(temp, 'tools', 'flask_serve.py'))
     conf = os.path.join(temp, 'asv.conf.json')
     with open(conf, "r") as f:
         content = f.read()
     self.assertIn('"1.1.1"', content)
Example #16
0
    def test_cli_convert_validater_float64_gpr(self):
        iris = load_iris()
        X, y = iris.data, iris.target
        X_train, X_test, y_train, _ = train_test_split(X, y, random_state=11)
        clr = GaussianProcessRegressor()
        clr.fit(X_train, y_train)

        temp = get_temp_folder(__file__,
                               "temp_cli_convert_validate_float64_gpr")
        monx = os.path.join(temp, "gpr.onnx")
        data = os.path.join(temp, "data.csv")
        pandas.DataFrame(X_test).to_csv(data, index=False)
        pkl = os.path.join(temp, "model.pkl")
        with open(pkl, "wb") as f:
            pickle.dump(clr, f)

        try:
            res = convert_validate(
                pkl=pkl,
                data=data,
                verbose=0,
                method="predict",
                name="GPmean",
                use_double='float64',
                options="{GaussianProcessRegressor:{'optim':'cdist'}}")
        except RuntimeError as e:
            if "requested version 10 < 11 schema version" in str(e):
                return
            raise e
        self.assertNotEmpty(res)
        st = BufferedPrint()
        args = [
            "convert_validate", "--pkl", pkl, '--data', data, '--method',
            "predict", '--name', "GPmean", '--verbose', '1', '--use_double',
            'float64', '--options',
            "{GaussianProcessRegressor:{'optim':'cdist'}}", '--outonnx', monx
        ]
        main(args, fLOG=st.fprint)
        res = str(st)
        self.assertExists(monx)
        with open(monx, 'rb') as f:
            model = onnx.load(f)
        self.assertIn('CDist', str(model))
Example #17
0
 def test_cli_benchmark_replay(self):
     temp = get_temp_folder(__file__, "temp_benchmark_replay")
     out1 = os.path.join(temp, "raw.csv")
     st = BufferedPrint()
     out1 = os.path.join(temp, "raw.csv")
     st = BufferedPrint()
     main(args=["validate_runtime", "--n_features", "4", "-nu", "3",
                "-re", "3", "-o", "11", "-op", "11", "-v", "2", "--out_raw",
                out1, "-b", "0",
                "--runtime", "python_compiled",
                "--models", "KMeans", "--dtype", "32",
                "--dump_all", '1', '--dump_folder', temp],
          fLOG=st.fprint)
     out = os.path.join(temp, "res.xlsx")
     main(args=["benchmark_replay", "--folder", temp, "--out", out, '--verbose', '2'],
          fLOG=st.fprint)
     res = str(st)
     self.assertExists(out)
     self.assertIn("'folder'", res)
 def test_cli_validate_model_rfbug_410(self):
     temp = get_temp_folder(__file__, "temp_validate_model_rfbug410")
     out1 = os.path.join(temp, "raw.xlsx")
     out2 = os.path.join(temp, "sum.xlsx")
     gr = os.path.join(temp, 'gr.png')
     st = BufferedPrint()
     main(args=[
         "validate_runtime", "--out_raw", out1, "--out_summary", out2, '-o',
         '11', '-op', '11', '-v', '2', '-b', '1', '--runtime',
         'python_compiled,onnxruntime1', '--models',
         'RandomForestRegressor', '--n_features', '4,10', '--out_graph', gr,
         '--dtype', '32'
     ],
          fLOG=st.fprint)
     res = str(st)
     self.assertIn('RandomForestRegressor', res)
     self.assertIn('time_kwargs', res)
     self.assertExists(out1)
     self.assertExists(out2)
     self.assertExists(gr)
Example #19
0
    def test_cli_plot_onnx_tree(self):
        temp = get_temp_folder(__file__, "temp_cli_plot_onnx_tree")

        X, y = make_regression(n_features=2)  # pylint: disable=W0632
        tree = DecisionTreeRegressor()
        tree.fit(X, y)
        onx = to_onnx(tree,
                      X.astype(numpy.float32),
                      target_opset=__max_supported_opsets__)
        name = os.path.join(temp, "tree.onnx")
        with open(name, "wb") as f:
            f.write(onx.SerializeToString())
        self.assertExists(name)
        for fmt in ['tree', 'mat']:
            with self.subTest(fmt=fmt):
                output = os.path.join(temp, "code_%s.py" % fmt)
                st = BufferedPrint()
                main(args=[
                    "plot_onnx", "--filename", name, '--format', fmt,
                    "--output", output, "--verbose", "1"
                ],
                     fLOG=st.fprint)
                self.assertExists(output)
Example #20
0
 def test_cli_validate_model_csv_bug(self):
     temp = get_temp_folder(__file__, "temp_validate_model_csv_bug")
     out1 = os.path.join(temp, "raw.csv")
     out2 = os.path.join(temp, "sum.csv")
     st = BufferedPrint()
     self.assertRaise(
         lambda: main(args=[
             "validate_runtime", "--out_raw", out1, "--out_summary", out2,
             "--models", "AgglomerativeClustering", '-o', '10', '-op', '10',
             '-v', '0', '-b', '1'
         ],
                      fLOG=st.fprint), RuntimeError,
         "No result produced by the benchmark.")
     res = str(st)
     self.assertEmpty(res)
     self.assertExists(out1)
     self.assertNotExists(out2)
 def test_cli_benchmark_replay_help(self):
     st = BufferedPrint()
     main(args=["benchmark_replay", "--help"], fLOG=st.fprint)
     res = str(st)
     self.assertIn('benchmark_replay', res)
Example #22
0
 def test_cli_onnx_code(self):
     st = BufferedPrint()
     main(args=["onnx_code", "--help"], fLOG=st.fprint)
     res = str(st)
     self.assertIn("verbose", res)
 def test_cli_validate(self):
     st = BufferedPrint()
     main(args=["validate_runtime", "--help"], fLOG=st.fprint)
     res = str(st)
     self.assertIn("verbose", res)
 def test_cli_validate_bench_doc_help(self):
     st = BufferedPrint()
     main(args=["benchmark_doc", "--help"], fLOG=st.fprint)
     res = str(st)
     self.assertIn("verbose", res)
Example #25
0
 def test_cli_einsum(self):
     st = BufferedPrint()
     main(args=["einsum_test", "--help"], fLOG=st.fprint)
     res = str(st)
     self.assertIn("verbose", res)
Example #26
0
 def test_cli_latency(self):
     st = BufferedPrint()
     main(args=["latency", "--help"], fLOG=st.fprint)
     res = str(st)
     self.assertIn("latency", res)
Example #27
0
 def test_cli_onnx_stats(self):
     st = BufferedPrint()
     main(args=["onnx_stats", "--help"], fLOG=st.fprint)
     res = str(st)
     self.assertIn("optim", res)
Example #28
0
 def test_cli_asv_bench(self):
     st = BufferedPrint()
     main(args=["asv_bench", "--help"], fLOG=st.fprint)
     res = str(st)
     self.assertIn("verbose", res)
Example #29
0
 def test_cli_onnx_code(self):
     st = BufferedPrint()
     main(args=["dynamic_doc", '--verbose', '1'], fLOG=st.fprint)
     res = str(st)
     if len(res) > 0:
         self.assertIn("Abs", res)
Example #30
0
 def test_cli_onnx_code_help(self):
     st = BufferedPrint()
     main(args=["dynamic_doc", "--help"], fLOG=st.fprint)
     res = str(st)
     self.assertIn("Generates", res)