def test_pytorch_model_0_gpu_onnxruntime(self): if 'CUDAExecutionProvider' not in onnxruntime.get_available_providers( ): print( "skip test_pytorch_model_0_gpu_onnxruntime since no gpu found") return input = _get_test_model_path('bert_pytorch_0') output = 'temp.onnx' optimize_by_onnxruntime(input, use_gpu=True, optimized_model_path=output) model = ModelProto() with open(output, "rb") as f: model.ParseFromString(f.read()) os.remove(output) bert_model = OnnxModel(model) expected_node_count = { 'EmbedLayerNormalization': 1, 'Attention': 12, 'SkipLayerNormalization': 24, 'Gelu': 0, 'FastGelu': 12, 'BiasGelu': 0 } self.verify_node_count(bert_model, expected_node_count, 'test_pytorch_model_0_gpu_onnxruntime')
def test_pytorch_model_0_cpu_onnxruntime(self): input = BERT_TEST_MODELS['bert_pytorch_0'] output = 'temp.onnx' optimize_by_onnxruntime(input, use_gpu=False, optimized_model_path=output) model = ModelProto() with open(output, "rb") as f: model.ParseFromString(f.read()) os.remove(output) bert_model = OnnxModel(model) expected_node_count = { 'EmbedLayerNormalization': 1, 'Attention': 12, 'SkipLayerNormalization': 24, 'Gelu': 0, 'FastGelu': 0, 'BiasGelu': 12 } self.verify_node_count(bert_model, expected_node_count)
def optimize_onnx_model_by_ort(onnx_model_path, ort_model_path, use_gpu, overwrite, model_fusion_statistics): if overwrite or not os.path.exists(ort_model_path): from optimizer import optimize_by_onnxruntime, get_fusion_statistics # Use onnxruntime to optimize model, which will be saved to *_ort.onnx opt_model = optimize_by_onnxruntime(onnx_model_path, use_gpu=use_gpu, optimized_model_path=ort_model_path, opt_level=99) model_fusion_statistics[ort_model_path] = get_fusion_statistics(ort_model_path) else: logger.info(f"Skip optimization since model existed: {ort_model_path}")