def test_pytorch_model_0_gpu_onnxruntime(self): if 'CUDAExecutionProvider' not in onnxruntime.get_available_providers( ): print( "skip test_pytorch_model_0_gpu_onnxruntime since no gpu found") return input = BERT_TEST_MODELS['bert_pytorch_0'] output = 'temp.onnx' optimize_by_onnxruntime(input, use_gpu=True, optimized_model_path=output) model = ModelProto() with open(output, "rb") as f: model.ParseFromString(f.read()) os.remove(output) bert_model = OnnxModel(model) expected_node_count = { 'EmbedLayerNormalization': 1, 'Attention': 12, 'SkipLayerNormalization': 24, 'Gelu': 0, 'FastGelu': 12, 'BiasGelu': 0 } self.verify_node_count(bert_model, expected_node_count)
def test_pytorch_model_0_cpu_onnxruntime(self): input = BERT_TEST_MODELS['bert_pytorch_0'] output = 'temp.onnx' optimize_by_onnxruntime(input, use_gpu=False, optimized_model_path=output) model = ModelProto() with open(output, "rb") as f: model.ParseFromString(f.read()) os.remove(output) bert_model = OnnxModel(model) expected_node_count = { 'EmbedLayerNormalization': 1, 'Attention': 12, 'SkipLayerNormalization': 24, 'Gelu': 0, 'FastGelu': 0, 'BiasGelu': 12 } self.verify_node_count(bert_model, expected_node_count)
def main(): args = parse_arguments() optimized_model = optimize_by_onnxruntime( args.baseline_model, args.use_gpu) if ( args.optimized_model is None) else args.optimized_model if args.use_gpu and ('CUDAExecutionProvider' not in onnxruntime.get_available_providers()): print( "Please install onnxruntime-gpu package instead of onnxruntime, and use a machine with GPU for testing gpu." ) if args.output_dir is not None: # create the output directory if not existed path = Path(args.output_dir) path.mkdir(parents=True, exist_ok=True) run_test(args.baseline_model, optimized_model, args.output_dir, args.batch_size, args.sequence_length, args.use_gpu, args.samples, args.seed, not args.no_openmp, args.verbose, args.rtol, args.atol)