def parse_arguments(): parser = argparse.ArgumentParser() parser.add_argument( "-m", "--models", required=False, nargs="+", type=str, default=["bert-base-cased", "roberta-base", "gpt2"], choices=list(MODELS.keys()), help="Pre-trained models in the list: " + ", ".join(MODELS.keys()), ) parser.add_argument( "--model_source", required=False, nargs=1, type=str, default="pt", choices=["pt", "tf"], help="Export onnx from pt or tf", ) parser.add_argument( "--model_class", required=False, type=str, default=None, choices=list(MODEL_CLASSES), help="Model type selected in the list: " + ", ".join(MODEL_CLASSES), ) parser.add_argument( "-e", "--engines", required=False, nargs="+", type=str, default=["onnxruntime"], choices=["onnxruntime", "torch", "torchscript", "tensorflow"], help="Engines to benchmark", ) parser.add_argument( "-c", "--cache_dir", required=False, type=str, default=os.path.join(".", "cache_models"), help="Directory to cache pre-trained models", ) parser.add_argument( "--onnx_dir", required=False, type=str, default=os.path.join(".", "onnx_models"), help="Directory to store onnx models", ) parser.add_argument("-g", "--use_gpu", required=False, action="store_true", help="Run on gpu device") parser.add_argument( "--provider", required=False, type=str, default=None, help="Execution provider to use", ) parser.add_argument( "-p", "--precision", type=Precision, default=Precision.FLOAT32, choices=list(Precision), help= "Precision of model to run. fp32 for full precision, fp16 for half precision, and int8 for quantization", ) parser.add_argument("--verbose", required=False, action="store_true", help="Print more information") parser.add_argument( "--overwrite", required=False, action="store_true", help="Overwrite existing models", ) parser.add_argument( "-o", "--optimizer_info", type=OptimizerInfo, default=OptimizerInfo.BYSCRIPT, choices=list(OptimizerInfo), help= "Optimizer info: Use optimizer.py to optimize onnx model as default. Can also choose from by_ort and no_opt", ) parser.add_argument( "-v", "--validate_onnx", required=False, action="store_true", help="Validate ONNX model", ) parser.add_argument( "-f", "--fusion_csv", required=False, default=None, help="CSV file for saving summary results of graph optimization.", ) parser.add_argument( "-d", "--detail_csv", required=False, default=None, help="CSV file for saving detail results.", ) parser.add_argument( "-r", "--result_csv", required=False, default=None, help="CSV file for saving summary results.", ) parser.add_argument( "-i", "--input_counts", required=False, nargs="+", default=[1], type=int, choices=[1, 2, 3], help= "Number of ONNX model inputs. Please use 1 for fair comparison with Torch or TorchScript.", ) parser.add_argument( "-t", "--test_times", required=False, default=100, type=int, help="Number of repeat times to get average inference latency.", ) parser.add_argument("-b", "--batch_sizes", nargs="+", type=int, default=[1]) parser.add_argument( "-s", "--sequence_lengths", nargs="+", type=int, default=[4, 8, 16, 32, 64, 128, 256], ) parser.add_argument( "--disable_ort_io_binding", required=False, action="store_true", help="Disable running ONNX Runtime with binded inputs and outputs. ", ) parser.set_defaults(disable_ort_io_binding=False) parser.add_argument( "-n", "--num_threads", required=False, nargs="+", type=int, default=[0], help="Threads to use", ) parser.add_argument( "--force_num_layers", required=False, type=int, default=None, help="Manually set the model's layer number", ) FusionOptions.add_arguments(parser) args = parser.parse_args() return args
def _parse_arguments(): parser = argparse.ArgumentParser( description= 'Graph optimization tool for ONNX Runtime. It transforms ONNX graph to use optimized operators for Transformer models.' ) parser.add_argument('--input', required=True, type=str, help="input onnx model path") parser.add_argument('--output', required=True, type=str, help="optimized onnx model path") parser.add_argument('--model_type', required=False, type=str.lower, default="bert", choices=list(MODEL_TYPES.keys()), help="Model type selected in the list: " + ", ".join(MODEL_TYPES.keys())) parser.add_argument( '--num_heads', required=False, type=int, default=0, help= "number of attention heads like 12 for bert-base and 16 for bert-large. Default is 0 to detect automatically for BERT. For other model type, this parameter need specify correctly." ) parser.add_argument( '--hidden_size', required=False, type=int, default=0, help= "hidden size like 768 for bert-base and 1024 for bert-large. Default is 0 to detect automatically for BERT. For other model type, this parameter need specify correctly." ) parser.add_argument( '--input_int32', required=False, action='store_true', help= "Use int32 (instead of int64) inputs. It could avoid unnecessary data cast when EmbedLayerNormalization is fused for BERT." ) parser.set_defaults(input_int32=False) parser.add_argument( '--float16', required=False, action='store_true', help= "Convert all weights and nodes in float32 to float16. It has potential loss in precision compared to mixed precision conversion (see convert_float_to_float16)." ) parser.set_defaults(float16=False) FusionOptions.add_arguments(parser) parser.add_argument('--verbose', required=False, action='store_true', help="show debug information.") parser.set_defaults(verbose=False) parser.add_argument( '--use_gpu', required=False, action='store_true', help= "Use GPU for inference. Set this flag if your model is intended for GPU when opt_level > 1." ) parser.set_defaults(use_gpu=False) parser.add_argument( '--only_onnxruntime', required=False, action='store_true', help="optimized by onnxruntime only, and no graph fusion in Python") parser.set_defaults(only_onnxruntime=False) parser.add_argument( '--opt_level', required=False, type=int, choices=[0, 1, 2, 99], default=None, help= "onnxruntime optimization level. 0 will disable onnxruntime graph optimization. The recommended value is 1. When opt_level > 1 is used, optimized model for GPU might not run in CPU. Level 2 and 99 are intended for --only_onnxruntime." ) parser.add_argument( '--use_external_data_format', required=False, action='store_true', help="use external data format to store large model (>2GB)") parser.set_defaults(use_external_data_format=False) args = parser.parse_args() return args
def _parse_arguments(): parser = argparse.ArgumentParser( description= 'Graph optimization tool for ONNX Runtime. It transforms ONNX graph to use optimized operators for Transformer models.' ) parser.add_argument('--input', required=True, type=str, help="input onnx model path") parser.add_argument('--output', required=True, type=str, help="optimized onnx model path") parser.add_argument('--model_type', required=False, type=str.lower, default="bert", choices=list(MODEL_TYPES.keys()), help="Model type selected in the list: " + ", ".join(MODEL_TYPES.keys())) parser.add_argument( '--num_heads', required=False, type=int, default=12, help= "number of attention heads. 12 for bert-base model and 16 for bert-large. For BERT, set it to 0 to detect automatically." ) parser.add_argument( '--hidden_size', required=False, type=int, default=768, help= "bert model hidden size. 768 for bert-base model and 1024 for bert-large. For BERT, set it to 0 to detect automatically." ) parser.add_argument('--input_int32', required=False, action='store_true', help="Use int32 (instead of int64) tensor as input to avoid unnecessary data cast.") parser.set_defaults(input_int32=False) parser.add_argument( '--float16', required=False, action='store_true', help= "If your target device is V100 or T4 GPU, try this to convert float32 to float16 for best performance (with potential loss in precision)." ) parser.set_defaults(float16=False) FusionOptions.add_arguments(parser) parser.add_argument('--verbose', required=False, action='store_true', help="show debug information.") parser.set_defaults(verbose=False) parser.add_argument( '--use_gpu', required=False, action='store_true', help="use GPU for inference. Set this flag if your model is intended for GPU and opt_level > 1.") parser.set_defaults(use_gpu=False) parser.add_argument('--only_onnxruntime', required=False, action='store_true', help="optimized by onnxruntime only, and no graph fusion in Python") parser.set_defaults(only_onnxruntime=False) parser.add_argument( '--opt_level', required=False, type=int, choices=[0, 1, 2, 99], default=None, help= "onnxruntime optimization level. 0 will disable onnxruntime graph optimization. Graph fusion in Python is not impacted by setting." ) parser.add_argument('--use_external_data_format', required=False, action='store_true', help="use external data format") parser.set_defaults(use_external_data_format=False) args = parser.parse_args() return args