def test_optimize_concurrency(concurrency_num): result_path = "concurrency_opt_{}".format(concurrency_num) opt_config = OptimizationConfig(model_path=ONNX_MODEL_PATH, concurrency_num=concurrency_num, result_path=result_path) optimize(opt_config) shutil.rmtree(result_path)
def test_optimize_openmp(openmp_enabled): result_path = "openmp_opt" opt_config = OptimizationConfig(model_path=ONNX_MODEL_PATH, openmp_enabled=openmp_enabled, result_path=result_path) optimize(opt_config) shutil.rmtree(result_path)
def test_optimize_transformer(transformer_enabled): model_path = os.path.join(os.path.dirname(__file__), "other_models", "TFBertForQuestionAnswering.onnx") result_path = "transformer_opt_{}".format(transformer_enabled) inputs_spec = { "attention_mask": [1, 7], "input_ids": [1, 7], "token_type_ids": [1, 7] } if transformer_enabled: transformer_args = "--model_type bert --num_heads 12" opt_config = OptimizationConfig( model_path=model_path, inputs_spec=inputs_spec, result_path=result_path, transformer_enabled=transformer_enabled, transformer_args=transformer_args) optimize(opt_config) else: opt_config = OptimizationConfig(model_path=model_path, inputs_spec=inputs_spec, result_path=result_path) optimize(opt_config) assert os.path.exists(result_path) shutil.rmtree(result_path)
def test_optimize_sample_data(sample_input_data_path): result_path = "sample_data_opt" opt_config = OptimizationConfig( model_path=ONNX_MODEL_PATH, sample_input_data_path=sample_input_data_path, result_path=result_path) optimize(opt_config) shutil.rmtree(result_path)
def test_optimize_providers(providers_list): result_path = "ep_opt_{}".format(providers_list) opt_config = OptimizationConfig(model_path=ONNX_MODEL_PATH, providers_list=providers_list, result_path=result_path) optimize(opt_config) assert os.path.exists(result_path) shutil.rmtree(result_path)
def test_optimize_quantization(quantization_enabled): model_path = os.path.join(os.path.dirname(__file__), "other_models", "TFBertForQuestionAnswering.onnx") result_path = "quantization_opt_{}".format(quantization_enabled) inputs_spec = { "attention_mask": [1, 7], "input_ids": [1, 7], "token_type_ids": [1, 7] } opt_config = OptimizationConfig(model_path=model_path, inputs_spec=inputs_spec, quantization_enabled=quantization_enabled, result_path=result_path) optimize(opt_config) assert os.path.exists(result_path) shutil.rmtree(result_path)
def test_throughput_tuning(dynamic_batching_size): result_path = "throughput_tuning_res" model_path = os.path.join(os.path.dirname(__file__), "other_models", "TFBertForQuestionAnswering.onnx") opt_config = OptimizationConfig( model_path=model_path, inputs_spec={ "attention_mask": [-1, 7], "input_ids": [-1, 7], "token_type_ids": [-1, 7] }, throughput_tuning_enabled=True, max_latency_percentile=0.95, max_latency_ms=100, threads_num=1, dynamic_batching_size=dynamic_batching_size, result_path=result_path, min_duration_sec=1) optimize(opt_config) shutil.rmtree(result_path)