def optimize(onnx_model_path: Path) -> Path: """ Load the model at the specified path and let onnxruntime look at transformations on the graph to enable all the optimizations possible Args: onnx_model_path: filepath where the model binary description is stored Returns: Path where the optimized model binary description has been saved """ from onnxruntime import InferenceSession, SessionOptions # Generate model name with suffix "optimized" opt_model_path = generate_identified_filename(onnx_model_path, "-optimized") sess_option = SessionOptions() sess_option.optimized_model_filepath = opt_model_path.as_posix() _ = InferenceSession(onnx_model_path.as_posix(), sess_option) print( f"Optimized model has been written at {opt_model_path}: \N{heavy check mark}" ) print( "/!\\ Optimized model contains hardware specific operators which might not be portable. /!\\" ) return opt_model_path
def optimize_model(model_path: Path, opt_model_path: Path): """ Generate model that applies graph optimization (constant folding, etc.) parameter model_path: path to the original onnx model parameter opt_model_path: path to the optimized onnx model :return: optimized onnx model """ sess_option = SessionOptions() sess_option.optimized_model_filepath = opt_model_path.as_posix() sess_option.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_BASIC _ = InferenceSession(model_path.as_posix(), sess_option, providers=["CPUExecutionProvider"])
def optimize_model(model_path: Path): ''' Generate model that applies graph optimization (constant folding,etc.) parameter model_path: path to the original onnx model return: optimized onnx model ''' opt_model_path = generate_identified_filename(model_path, "-opt") sess_option = SessionOptions() sess_option.optimized_model_filepath = opt_model_path.as_posix() sess_option.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_BASIC _ = InferenceSession(model_path.as_posix(), sess_option) optimized_model = onnx.load(opt_model_path.as_posix()) return optimized_model
provider = 'CPUExecutionProvider' print(f"provider = {provider!r}") #################################### # We load the graph. with open(filename, 'rb') as f: onx = onnx.load(f) ############################### # Create of the session. so = SessionOptions() so.enable_profiling = True so.optimized_model_filepath = os.path.split(filename)[-1] + ".optimized.onnx" sess = InferenceSession(onx.SerializeToString(), so, providers=[provider]) bind = SessionIOBinding(sess._sess) print("graph_optimization_level:", so.graph_optimization_level) ##################################### # Creates random data feed = random_feed(sess, batch) ##################################### # moving the data on CPU or GPU feed_ort_value = OrderedDict( (name, (C_OrtValue.ortvalue_from_numpy(v, ort_device), v.dtype)) for name, v in feed.items()) outputs = [o.name for o in sess.get_outputs()]
############################### # Create of the session. data = [] files = [] legend = [] for graph_opt, name_opt in tqdm([ (GraphOptimizationLevel.ORT_DISABLE_ALL, "ORT_DISABLE_ALL"), (GraphOptimizationLevel.ORT_ENABLE_BASIC, "ORT_ENABLE_BASIC"), (GraphOptimizationLevel.ORT_ENABLE_EXTENDED, "ORT_ENABLE_EXTENDED"), (GraphOptimizationLevel.ORT_ENABLE_ALL, "ORT_ENABLE_ALL") ]): so = SessionOptions() so.graph_optimization_level = graph_opt so.optimized_model_filepath = (os.path.split(filename)[-1] + f".optimized.{name_opt}.onnx") files.append(so.optimized_model_filepath) legend.append(name_opt) sess = InferenceSession(onx.SerializeToString(), so, providers=[provider]) bind = SessionIOBinding(sess._sess) ##################################### # Creates random data feed = random_feed(sess, batch) ##################################### # moving the data on CPU or GPU feed_ort_value = OrderedDict( (name, (C_OrtValue.ortvalue_from_numpy(v, ort_device), v.dtype)) for name, v in feed.items()) outputs = [o.name for o in sess.get_outputs()]