def test_optimize_transformer(transformer_enabled): model_path = os.path.join(os.path.dirname(__file__), "other_models", "TFBertForQuestionAnswering.onnx") result_path = "transformer_opt_{}".format(transformer_enabled) inputs_spec = { "attention_mask": [1, 7], "input_ids": [1, 7], "token_type_ids": [1, 7] } if transformer_enabled: transformer_args = "--model_type bert --num_heads 12" opt_config = OptimizationConfig( model_path=model_path, inputs_spec=inputs_spec, result_path=result_path, transformer_enabled=transformer_enabled, transformer_args=transformer_args) optimize(opt_config) else: opt_config = OptimizationConfig(model_path=model_path, inputs_spec=inputs_spec, result_path=result_path) optimize(opt_config) assert os.path.exists(result_path) shutil.rmtree(result_path)
def test_optimize_openmp(openmp_enabled): result_path = "openmp_opt" opt_config = OptimizationConfig(model_path=ONNX_MODEL_PATH, openmp_enabled=openmp_enabled, result_path=result_path) optimize(opt_config) shutil.rmtree(result_path)
def test_optimize_concurrency(concurrency_num): result_path = "concurrency_opt_{}".format(concurrency_num) opt_config = OptimizationConfig(model_path=ONNX_MODEL_PATH, concurrency_num=concurrency_num, result_path=result_path) optimize(opt_config) shutil.rmtree(result_path)
def test_optimization_config_model_path_2(): model_path = os.path.join(os.path.dirname(__file__), "other_models", "pytorch_model.pth") try: opt_config = OptimizationConfig(model_path=model_path) except ValueError as e: assert str(e) == "File ends with .onnx is required for ONNX model"
def test_optimization_config_model_path_1(): model_path = "model_not_exist.onnx" try: opt_config = OptimizationConfig(model_path=model_path) except FileNotFoundError as e: assert str( e) == "Can't find the model file, please check the model_path"
def test_optimize_sample_data(sample_input_data_path): result_path = "sample_data_opt" opt_config = OptimizationConfig( model_path=ONNX_MODEL_PATH, sample_input_data_path=sample_input_data_path, result_path=result_path) optimize(opt_config) shutil.rmtree(result_path)
def test_optimize_providers(providers_list): result_path = "ep_opt_{}".format(providers_list) opt_config = OptimizationConfig(model_path=ONNX_MODEL_PATH, providers_list=providers_list, result_path=result_path) optimize(opt_config) assert os.path.exists(result_path) shutil.rmtree(result_path)
def test_test_optimization_config_input_data_1(): model_path = ONNX_MODEL_PATH inputs_spec = {"Input3": [1, 1, 28, 28]} opt_config = OptimizationConfig(model_path=model_path, inputs_spec=inputs_spec) input_dict = opt_config.inference_input_dict onnx_session = ort.InferenceSession(opt_config.model_path, providers=DEFAULT_EP) output_names = [o.name for o in onnx_session.get_outputs()] onnx_session.run(output_names, input_dict)
def test_test_optimization_config_input_data_2(): model_path = ONNX_MODEL_PATH sample_input_data_path = os.path.join(os.path.dirname(__file__), "onnx_mnist", "sample_input_data.npz") inputs_spec = {"Input3": [1, 1, 28, 28]} opt_config_1 = OptimizationConfig(model_path=model_path, inputs_spec=inputs_spec) input_dict_1 = opt_config_1.inference_input_dict # save random generated input data into sample_input_data.npz generate_npz_files(output_npz_path=sample_input_data_path, name_list=["Input3"], value_list=[input_dict_1.get("Input3")]) # test gegerate_input_data_when sample_input_data_path is given opt_config_2 = OptimizationConfig( model_path=model_path, sample_input_data_path=sample_input_data_path) input_dict_2 = opt_config_2.inference_input_dict onnx_session = ort.InferenceSession(model_path, providers=DEFAULT_EP) output_names = [o.name for o in onnx_session.get_outputs()] result_1 = onnx_session.run(output_names, input_dict_1) result_2 = onnx_session.run(output_names, input_dict_2) assert len(result_1) == len(result_2) for i in range(len(result_1)): assert result_1[i].all() == result_2[i].all()
def test_optimize_quantization(quantization_enabled): model_path = os.path.join(os.path.dirname(__file__), "other_models", "TFBertForQuestionAnswering.onnx") result_path = "quantization_opt_{}".format(quantization_enabled) inputs_spec = { "attention_mask": [1, 7], "input_ids": [1, 7], "token_type_ids": [1, 7] } opt_config = OptimizationConfig(model_path=model_path, inputs_spec=inputs_spec, quantization_enabled=quantization_enabled, result_path=result_path) optimize(opt_config) assert os.path.exists(result_path) shutil.rmtree(result_path)
def test_throughput_tuning(dynamic_batching_size): result_path = "throughput_tuning_res" model_path = os.path.join(os.path.dirname(__file__), "other_models", "TFBertForQuestionAnswering.onnx") opt_config = OptimizationConfig( model_path=model_path, inputs_spec={ "attention_mask": [-1, 7], "input_ids": [-1, 7], "token_type_ids": [-1, 7] }, throughput_tuning_enabled=True, max_latency_percentile=0.95, max_latency_ms=100, threads_num=1, dynamic_batching_size=dynamic_batching_size, result_path=result_path, min_duration_sec=1) optimize(opt_config) shutil.rmtree(result_path)
def test_optimization_config_model_path_3(): model_path = ONNX_MODEL_PATH opt_config = OptimizationConfig(model_path=model_path) assert opt_config.model_path == os.path.join(opt_config.result_path, "optimized_model.onnx")
def test_optimization_config_providers_3(): try: opt_config = OptimizationConfig(model_path=ONNX_MODEL_PATH, providers_list=["cuda"]) except ValueError as e: assert str(e) == "No providers available for test"
def test_optimization_config_providers_2(): opt_config = OptimizationConfig(model_path=ONNX_MODEL_PATH, providers_list=["cpu"]) assert opt_config.providers_list == ["CPUExecutionProvider"]
def test_optimization_config_providers_1(): opt_config = OptimizationConfig(model_path=ONNX_MODEL_PATH) assert sorted(opt_config.providers_list) == sorted( ort.get_available_providers())