def pytorch_constant_folding(m): """Constant folding needed by Pytorch exported models. It should be done before using onnx optimizers since the dynamic shape structure may affect the optimizations. :param m: the original model input\\ :return: the new model after preprocessing """ logging.info("Working on Pytorch constant folding.") replacing.replace_shape_with_constant(m.graph) # constant_folding m = modhelper.inference_shapes(m) while constant_folding.constant_folding(m.graph): logging.debug("After constant folding jobs.") other.topological_sort(m.graph) while len(m.graph.value_info) != 0: m.graph.value_info.pop() m = modhelper.inference_shapes(m) replacing.replace_shape_with_constant(m.graph) other.topological_sort(m.graph) m = torch_pattern_match(m) m = optimizer.optimize(m, ['eliminate_deadend']) return m
def create_svm(m, optimize_model): batch_size = 1 model_name = f"svm{m}" with tf.Session() as sess: x = tf.placeholder(tf.float32, shape=(m,), name='x') y = tf.placeholder(tf.float32, shape=(1,), name='y') w = tf.placeholder(tf.float32, shape=(m,), name='W') input_shapes = {"x:0": x.shape, "W:0": w.shape, "y:0": y.shape} mu = tf.constant(1, dtype=tf.float32, name="mu") h = tf.reduce_sum(w * x) c = y * h ny = 0 - y p = tf.cast((c > y), dtype=ny.dtype) * ny g = p * x w = tf.subtract(w, mu * g, name="W_out") sess.run(tf.initialize_all_variables()) input_names = ['x:0', 'y:0'] output_names = ['W_out:0'] onnx_graph = tf2onnx.tfonnx.process_tf_graph(sess.graph, input_names=input_names, output_names=output_names) model_proto = onnx_graph.make_model(model_name) model_proto = optimizer.optimize(model_proto, ['eliminate_identity']) if optimize_model: model_proto, check = simplify(model_proto, input_shapes=input_shapes) assert check with open(f"./{model_name}.onnx", "wb") as f: f.write(model_proto.SerializeToString())
def make_model(self, graph_doc, optimize=False, graph_name="tf2onnx", **kwargs): """ Create final ModelProto for onnx from internal graph. Args: optimize: optimize graph via onnx doc: text for doc string of the model """ graph = self.make_graph(graph_doc, graph_name) if "producer_name" not in kwargs: kwargs = { "producer_name": "tf2onnx", "producer_version": __version__ } if "opset_imports" not in kwargs: opsets = [] imp = OperatorSetIdProto() imp.version = self._opset opsets.append(imp) if self._extra_opset is not None: opsets.extend(self._extra_opset) kwargs["opset_imports"] = opsets model_proto = helper.make_model(graph, **kwargs) # optimize the model proto. # TODO: this is disabled by default because of bugs in fuse_consecutive_transposes if optimize: model_proto = optimizer.optimize(model_proto) return model_proto
def create_logistic(m, optimize_model): batch_size = 1 model_name = f"logistic{m}" with tf.Session() as sess: x = tf.placeholder(tf.float32, shape=(m,), name='x') y = tf.placeholder(tf.float32, shape=(1,), name='y') w = tf.placeholder(tf.float32, shape=(m,), name='W') input_shapes = {"x:0": x.shape, "W:0": w.shape, "y:0": y.shape} mu = tf.constant(1, dtype=tf.float32, name="mu") h = tf.reduce_sum(tf.multiply(w, x)) h = tf.math.sigmoid(h) d = tf.subtract(h, y) g = tf.multiply(d, x) g = tf.multiply(mu, g) w = tf.subtract(w, g, name='w_out') input_names = ['x:0', 'y:0'] output_names = ['w_out:0'] onnx_graph = tf2onnx.tfonnx.process_tf_graph(sess.graph, input_names=input_names, output_names=output_names) model_proto = onnx_graph.make_model(model_name) model_proto = optimizer.optimize(model_proto, ['eliminate_identity']) if optimize_model: model_proto, check = simplify(model_proto, input_shapes=input_shapes) assert check with open(f"./{model_name}.onnx", "wb") as f: f.write(model_proto.SerializeToString())
def main(input_file: str, output_file: str): import onnx from onnx import optimizer from skl2onnx import convert_sklearn from skl2onnx.common.data_types import FloatTensorType print("Loading model...") with open(input_file, mode="rb") as f: model = pickle.load(f) initial_type = [("float_input", FloatTensorType([400]))] onnx_model = convert_sklearn(model, initial_types=initial_type) print("Validating model... ", end="") onnx.checker.check_model(onnx_model) print("Pass.") print("Optimizing model...") passes = [ "extract_constant_to_initializer", "eliminate_unused_initializer" ] optimized_model = optimizer.optimize(onnx_model, passes) onnx.save(optimized_model, output_file) print("Finished.")
def polish_model(model, internals=True, extras=True, checking=True): """ polish_model enhanced for inference """ if checking: check_model(model) strip_doc_string(model) if internals: passes = optimizer.get_available_passes() passes = list( filter(lambda name: not name.startswith('split_'), passes)) # logger.debug('builtin optimizations to perform in ONNX:\n\t%s', passes) model = optimizer.optimize(model, passes=passes) if extras: for optimize in ( optimize_model_skip_op_for_inference, optimize_model_strip_initializer, optimize_model_cast, optimize_model_slice, ): model = optimize(model) model = infer_shapes(model) if checking: check_model(model) return model
def main(input_file: str, output_file: str): import onnx import torch.onnx from onnx import optimizer from nn_models import resnet20 print("Loading model...") model = resnet20() cpu = torch.device("cpu") checkpoint = torch.load(input_file, map_location=cpu) model.load_state_dict(checkpoint["net"]) with NamedTemporaryFile() as temp_file: print("Tracing model...") input_array = torch.ones(1, 1, 40, 100) torch.onnx.export(model, input_array, temp_file, keep_initializers_as_inputs=True) temp_file.seek(0) print("Validating model... ", end="") onnx_model = onnx.load(temp_file) onnx.checker.check_model(onnx_model) print("Pass.") print("Optimizing model...") passes = [ "extract_constant_to_initializer", "eliminate_unused_initializer" ] optimized_model = optimizer.optimize(onnx_model, passes) onnx.save(optimized_model, output_file) print("Finished.")
def runPytorch(img): torchModel = MobileFaceNet_DEX_c3() #MobileFaceNet_DEX_c3() # MobileFaceNetVerifyAgeGender() #IR_SE_FaceNet() # model_dict = torchModel.state_dict() torchWeights = torch.load(modelFileName, map_location=lambda storage, loc: storage) updated_dict, match_layers, mismatch_layers = weight_filler(torchWeights, model_dict) print("The mismatch layers %s", mismatch_layers) model_dict.update(updated_dict) torchModel.load_state_dict(model_dict) torchModel.eval().cpu() #img = img[0] #imgs = np.array([img, img,img, img], dtype=np.float32) y = torchModel.forward(torch.from_numpy(img).cpu()) print(y.abs().sum()) onxFileName = "nameAgeGenderFaceBlur.onnx" dummy_input = torch.randn(4, 3, modelWidthHeight, modelWidthHeight) torch.onnx.export(torchModel.cpu(), dummy_input.cpu(), onxFileName, verbose=True) torch.onnx.in om = onnx.load(onxFileName) om = infer_shapes(om) om = optimize(om) onnx.save(om, onxFileName) a = om.graph #import pdb #pdb.set_trace() helper.printable_graph(a) v = a.value_info
def onnx2trt_infer( onnx_model_filename: str, input_values: 'Sequence[np.ndarray]', batch_size: int = 1, workspace_size: int = (1024 * 1024 * 16), ) -> 'Sequence[np.ndarray]': r"""infer model with 'onnx_tensorrt' backend""" import onnx import onnx.optimizer as optimizer import onnx_tensorrt.backend as backend from onnx.utils import polish_model model = onnx.load(onnx_model_filename) passes = optimizer.get_available_passes() passes = list(filter(lambda name: not name.startswith('split_'), passes)) logger.debug('optimizations to perform in ONNX:\n\t%s', passes) model = optimizer.optimize(model, passes=passes) model = polish_model(model) onnx.save(model, onnx_model_filename.rpartition('.onnx')[0] + '.optimized.onnx') engine = backend.prepare( model, device='CUDA', max_batch_size=batch_size, max_workspace_size=workspace_size, ) return engine.run(input_values)
def postprocess(m): """Inference the shape and prepare for export. :param m: the original model input\\ :return: the new model after preprocessing """ m = onnx.utils.polish_model(m) eliminating.eliminate_single_input_Concat(m.graph) eliminating.eliminate_nop_Maxpool_and_AveragePool(m.graph) m = onnx.utils.polish_model(m) replacing.replace_depthwise_1x1_with_bn(m.graph) m = onnx.utils.polish_model(m) # removing transpose m = removing_transpose.eliminate_transposes(m) m = onnx.utils.polish_model(m) removing_transpose.remove_trivial_transpose(m.graph) removing_transpose.fuse_Transpose_into_Gemm_weight(m.graph) # fuse some nodes fusing.fuse_mul_and_add_into_bn(m.graph) m = onnx.utils.polish_model(m) fusing.fuse_mul_and_add_into_gemm(m.graph) m = onnx.utils.polish_model(m) fusing.fuse_conv_and_add_into_conv(m.graph) replacing.replace_mul_to_bn(m.graph) replacing.replace_add_to_bn(m.graph) other.add_output_to_value_info(m.graph) m = optimizer.optimize(m, ['eliminate_deadend']) m.producer_name = 'kneron_formatter' return m
def export_onnx_model(): """Export onnx model.""" import onnx from onnx import optimizer onnx_file = "output/image_color.onnx" weight_file = "output/ImageColor.pth" # 1. Load model print("Loading model ...") model = get_model() model_load(model, weight_file) model.eval() # 2. Model export print("Export model ...") dummy_input = torch.randn(1, 3, 512, 512) input_names = ["input"] output_names = ["noise_level", "output"] # variable lenght axes dynamic_axes = { 'input': { 0: 'batch_size', 1: 'channel', 2: "height", 3: 'width' }, 'output': { 0: 'batch_size', 1: 'channel', 2: "height", 3: 'width' } } torch.onnx.export(model, dummy_input, onnx_file, input_names=input_names, output_names=output_names, verbose=True, opset_version=11, keep_initializers_as_inputs=True, export_params=True, dynamic_axes=dynamic_axes) # 3. Optimize model print('Checking model ...') model = onnx.load(onnx_file) onnx.checker.check_model(model) print("Optimizing model ...") passes = [ "extract_constant_to_initializer", "eliminate_unused_initializer" ] optimized_model = optimizer.optimize(model, passes) onnx.save(optimized_model, onnx_file)
def _export_via_onnx(model, inputs): def _check_val(module): assert not module.training model.apply(_check_val) # Export the model to ONNX with torch.no_grad(): with io.BytesIO() as f: torch.onnx.export( model, inputs, f, # verbose=True, # NOTE: uncomment this for debugging export_params=True, ) onnx_model = onnx.load_from_string(f.getvalue()) # torch.onnx.export(model, # model being run # inputs, # model input (or a tuple for multiple inputs) # "reid_test.onnx", # where to save the model (can be a file or file-like object) # export_params=True, # store the trained parameter weights inside the model file # opset_version=10, # the ONNX version to export the model to # do_constant_folding=True, # whether to execute constant folding for optimization # input_names=['input'], # the model's input names # output_names=['output'], # the model's output names # dynamic_axes={'input': {0: 'batch_size'}, # variable lenght axes # 'output': {0: 'batch_size'}}) # ) # Apply ONNX's Optimization all_passes = optimizer.get_available_passes() passes = ["fuse_bn_into_conv"] assert all(p in all_passes for p in passes) onnx_model = optimizer.optimize(onnx_model, passes) # Convert ONNX Model to Tensorflow Model tf_rep = prepare(onnx_model, strict=False) # Import the ONNX model to Tensorflow print(tf_rep.inputs) # Input nodes to the model print('-----') print(tf_rep.outputs) # Output nodes from the model print('-----') # print(tf_rep.tensor_dict) # All nodes in the model # """ # install onnx-tensorflow from github,and tf_rep = prepare(onnx_model, strict=False) # Reference https://github.com/onnx/onnx-tensorflow/issues/167 # tf_rep = prepare(onnx_model) # whthout strict=False leads to KeyError: 'pyfunc_0' # debug, here using the same input to check onnx and tf. # output_onnx_tf = tf_rep.run(to_numpy(img)) # print('output_onnx_tf = {}'.format(output_onnx_tf)) # onnx --> tf.graph.pb # tf_pb_path = 'reid_tf_graph.pb' # tf_rep.export_graph(tf_pb_path) return tf_rep
def torch_to_onnx(model: nn.Module, activation: nn.Module = None, save_path: str = '../exported-models/', model_fname: str = 'onnx-model', input_shape: tuple = (1, 3, 224, 224), input_name: str = 'input_image', output_names: Union[str, list] = 'output', **export_args) -> None: """ Export a `nn.Module` -> ONNX This function exports the model with support for batching, checks that the export was done properly, and polishes the model up (removes unnecessary fluff added during conversion) Key Arguments ============= * activation: If not None, append this to the end of your model. Typically a `nn.Softmax(-1)` or `nn.Sigmoid()` * input_shape: Shape of the inputs to the model """ save_path = Path(save_path) if isinstance(output_names, str): output_names = [output_names] if activation: model = nn.Sequential(*[model, activation]) model.eval() x = torch.randn(input_shape, requires_grad=True) x = x.cuda() if torch.cuda.is_available() else x model(x) dynamic_batch = {0: 'batch'} dynamic_axes = {input_name: dynamic_batch} for out in output_names: dynamic_axes[out] = dynamic_batch torch.onnx._export(model, x, f"{save_path/model_fname}.onnx", export_params=True, verbose=False, input_names=[input_name], output_names=output_names, dynamic_axes=dynamic_axes, keep_initializers_as_inputs=True, **export_args) print( f"Loading, polishing, and optimising exported model from {save_path/model_fname}.onnx" ) onnx_model = onnx.load(f'{save_path/model_fname}.onnx') model = onnx.utils.polish_model(onnx_model) #onnx.checker.check_model(model) # removing unused parts of the model passes = [ "extract_constant_to_initializer", "eliminate_unused_initializer" ] optimized_model = optimizer.optimize(onnx_model, passes) onnx.save(optimized_model, f'{save_path/model_fname}.onnx') print('Exported successfully')
def optimize(): import onnx from onnx import optimizer file = sys.argv[2] base = os.path.splitext(file) onnx_model = onnx.load(file) passes = optimizer.get_available_passes() optimized_model = optimizer.optimize(onnx_model, passes) onnx.save(optimized_model, base + '.optimized.onnx')
def preprocess(model_proto): """The most common used functions before other processing. :param model_proto: the original model input\\ :return: the new model after preprocessing It includes: - inference shapes - optimize model by ONNX library - give names to the nodes - replace initializer with Constant node - replace -1 batch size with 1 - eliminate dropout and identity - eliminate no children inputs - topological sort The optimizations provided by ONNX: - eliminate_identity - eliminate_nop_dropout - eliminate_nop_transpose - eliminate_nop_pad - eliminate_unused_initializer - eliminate_deadend - fuse_consecutive_squeezes - fuse_consecutive_transposes - fuse_add_bias_into_conv - fuse_transpose_into_gemm - fuse_matmul_add_bias_into_gemm - fuse_bn_into_conv - fuse_pad_into_conv """ m = onnx.utils.polish_model(model_proto) passes = ['extract_constant_to_initializer', 'fuse_bn_into_conv', 'eliminate_nop_dropout', 'eliminate_deadend', 'fuse_matmul_add_bias_into_gemm', 'fuse_pad_into_conv'] m = optimizer.optimize(m, passes) g = m.graph other.add_name_to_node(g) replacing.replace_initializer_with_Constant(g) eliminating.eliminate_Identify_and_Dropout(g) eliminating.eliminate_trivial_maxpool(g) eliminating.eliminate_no_children_input(g) other.format_value_info_shape(g) other.topological_sort(g) m = other.inference_shapes(m) g = m.graph replacing.replace_split_with_slices(g) other.topological_sort(g) return m
def optimize(): model = onnx.load(onnxfile) onnx.checker.check_model(model) print('Checked.') passes = [ "extract_constant_to_initializer", "eliminate_unused_initializer" ] optimized_model = optimizer.optimize(model, passes) print('Optimized.') onnx.save(optimized_model, onnxfile)
def getGraph(onnx_path, with_opt=False): model = onnx.load(onnx_path) if with_opt: opt_passes = ['eliminate_nop_pad', 'eliminate_identity'] model = optimizer.optimize(model, opt_passes) model = shape_inference.infer_shapes(model) model_graph = model.graph graph = Graph.from_onnx(model_graph) graph = graph.transformed(transformers) graph.channel_dims = {} return graph
def _make_onnx_model(cls, tf_graph, opset, producer_name, ignore_unimplemented, optimizer_passes): opset = cls._process_opset(opset) onnx_graph = cls.tensorflow_graph_to_onnx_graph( tf_graph, opset, ignore_unimplemented) opset_imports = [make_opsetid(item[0], item[1]) for item in opset] onnx_model = make_model(onnx_graph, producer_name=producer_name, opset_imports=opset_imports) if isinstance(optimizer_passes, (list, tuple)) and optimizer_passes: onnx_model = optimize(onnx_model, optimizer_passes) return onnx_model
def convert2onnx(vDstModelPrefix, vEpoch): sym = '%s-symbol.json' % vDstModelPrefix params = '%s-%04d.params' % (vDstModelPrefix, vEpoch) input_shape = (1, 3, 960, 960) onnxFileName = params[0:-6] + "onnx" # pdb.set_trace() converted_model_path = onnx_mxnet.export_model(sym, params, [input_shape], np.float32, onnxFileName) from onnx.shape_inference import infer_shapes from onnx.optimizer import optimize om = onnx.load(onnxFileName) om = infer_shapes(om) om = optimize(om) onnx.save(om, onnxFileName)
def export_onnx( cls, module: Module, input_shape: Tuple[int, ...], export_path: str, input_t: Optional[Union[Tensor, QuantTensor]] = None, **kwargs): """ * input_shape : tuple describing the shape of network input e.g. (1, 1, 28, 28) * export_path : ONNX filename to export to * input_t : if specified, do an initial forward pass with this value. this may be necessary for QuantTensor caching. * torch_onnx_kwargs : will be passed as kwargs to torch.onnx.export """ def set_export_handler(m: Module): if hasattr(m, 'export_handler') and m.export_handler is None: handler = cls.handler_from_module(m) m.export_handler = handler() if onnx is None or opt is None: raise ModuleNotFoundError("Installation of ONNX is required.") cls.solve_keep_initializers_as_inputs(kwargs) cls.solve_enable_onnx_checker(kwargs) with torch.no_grad(): module = module.eval() module.apply(set_export_handler) if input_t is None: input_t = torch.empty(input_shape, dtype=torch.float) # do a forward pass with the dummy input to e.g. store input/output shapes cls.cache_inp_out(module, input_t) # override any given input_t to make sure it's a standard PyTorch tensor input_t = torch.empty(input_shape, dtype=torch.float) # enable export mode, this triggers collecting export values into handlers module.apply(lambda m: _set_export_mode(m, enabled=True)) # temporarily disable input caching to avoid collectives empty debug values module.apply(lambda m: _override_inp_caching_mode(m, enabled=False)) # perform export pass torch.onnx.export(module, input_t, export_path, **kwargs) # restore the model to previous properties module.apply(lambda m: _restore_inp_caching_mode(m)) module.apply(lambda m: _set_export_mode(m, enabled=False)) # do some cleanup on the exported ONNX model model = onnx.load(export_path) model = opt.optimize(model, cls.onnx_passes) model = cls.apply_model_transforms(model) onnx.save(model, export_path)
def optimize_onnx(onnx_model): passes = ['nop', 'extract_constant_to_initializer', 'eliminate_identity', 'eliminate_nop_pad', 'eliminate_nop_transpose', 'eliminate_unused_initializer', 'fuse_add_bias_into_conv', 'fuse_bn_into_conv', 'fuse_consecutive_squeezes', 'fuse_consecutive_transposes', 'fuse_transpose_into_gemm'] if onnx_ver_int >= 10400: passes += ['eliminate_nop_dropout', 'fuse_consecutive_concats', 'fuse_matmul_add_bias_into_gemm', 'fuse_pad_into_conv'] from onnx import optimizer all_passes = optimizer.get_available_passes() for p in passes: assert p in all_passes return optimizer.optimize(onnx_model, passes)
def optimize_onnx(onnx_model): passes = ['nop', 'extract_constant_to_initializer', 'eliminate_identity', 'eliminate_nop_pad', 'eliminate_nop_transpose', 'eliminate_unused_initializer', 'fuse_add_bias_into_conv', 'fuse_bn_into_conv', 'fuse_consecutive_squeezes', 'fuse_consecutive_transposes', 'fuse_transpose_into_gemm'] if onnx_ver_int >= 10400: passes += ['eliminate_nop_dropout', 'fuse_consecutive_concats', 'fuse_matmul_add_bias_into_gemm', 'fuse_pad_into_conv'] from onnx import optimizer all_passes = optimizer.get_available_passes() for p in passes: assert p in all_passes return optimizer.optimize(onnx_model, passes)
def optim_onnx(model: onnx.ModelProto, verbose=False): """Optimize ONNX network""" logger.info("Begin Simplify ONNX Model ...") passes = [ 'eliminate_deadend', 'eliminate_identity', 'extract_constant_to_initializer', 'eliminate_unused_initializer', 'fuse_add_bias_into_conv', 'fuse_bn_into_conv', 'fuse_matmul_add_bias_into_gemm' ] model = optimizer.optimize(model, passes) if verbose: for m in onnx.helper.printable_graph(model.graph).split("\n"): logger.debug(m) return model
def optimize_onnx_graph(onnx_model_path): onnx_model = onnx.load(onnx_model_path) onnx_model = optimize(onnx_model, ['extract_constant_to_initializer', 'eliminate_unused_initializer']) inputs = onnx_model.graph.input name_to_input = {} for input in inputs: name_to_input[input.name] = input for initializer in onnx_model.graph.initializer: if initializer.name in name_to_input: inputs.remove(name_to_input[initializer.name]) onnx.save(onnx_model, onnx_model_path)
def create_reco(m, n, k, optimize_model): batch_size = 1 model_name = f"reco{m}_{n}_{k}" with tf.Session() as sess: x1 = tf.placeholder(tf.float32, shape=(k, 1), name='x1') x2 = tf.placeholder(tf.float32, shape=(k, 1), name='x2') r1 = tf.placeholder(tf.float32, shape=(m, 1), name='r1') y1 = tf.placeholder(tf.float32, shape=(m, 1), name='y1') r2 = tf.placeholder(tf.float32, shape=(n, 1), name='r2') y2 = tf.placeholder(tf.float32, shape=(n, 1), name='y2') w1 = tf.placeholder(tf.float32, shape=(m, k), name='w1') w2 = tf.placeholder(tf.float32, shape=(n, k), name='w2') input_shapes = {"x1:0": x1.shape, "w1:0": w1.shape, "y1:0": y1.shape, "r1:0": r1.shape, "x2:0": x2.shape, "w2:0": w2.shape, "y2:0": y2.shape, "r2:0": r2.shape,} mu = tf.constant(1, dtype=tf.float32, name="mu") h1_sum = tf.matmul(w1, x2) h1 = h1_sum * r1 h2_sum = tf.matmul(w2, x1) h2 = h2_sum * r2 d1 = h1 - y1 d2 = h2 - y2 g1 = d1 * tf.transpose(x2) g2 = d2 * tf.transpose(x1) w1_out = tf.subtract(w1, mu * g1, name="w1_out") w2_out = tf.subtract(w2, mu * g2, name="w2_out") sess.run(tf.initialize_all_variables()) input_names = ['x1:0', 'y1:0', 'r1:0', 'x2:0', 'y2:0', 'r2:0'] output_names = ['w1_out:0', 'w2_out:0'] onnx_graph = tf2onnx.tfonnx.process_tf_graph(sess.graph, input_names=input_names, output_names=output_names) model_proto = onnx_graph.make_model(model_name) model_proto = optimizer.optimize(model_proto, ['eliminate_identity']) if optimize_model: model_proto, check = simplify(model_proto, input_shapes=input_shapes) assert check with open(f"./{model_name}.onnx", "wb") as f: f.write(model_proto.SerializeToString())
def tensorflow_optimization(m): """Optimizations for tf models can be used in most cases. :param m: the original model input\\ :return: the new model after preprocessing It includes: - eliminate consecutive Cast - eliminate cast after input - eliminate shape change after input - eliminate Reshape cast - eliminate Squeeze before Reshape - fuse Transpose into Constant - replace Shape with Constant """ eliminating.eliminate_consecutive_Cast(m.graph) fusing.fuse_Transpose_into_Constant(m.graph) fusing.fuse_MatMul_and_Add_into_Gemm(m.graph) eliminating.eliminate_Cast_after_input(m.graph) other.topological_sort(m.graph) m = onnx.utils.polish_model(m) # constant folding replacing.replace_shape_with_constant(m.graph) # constant_folding m = other.inference_shapes(m) while constant_folding.constant_folding(m.graph): logging.debug("After constant folding jobs.") other.topological_sort(m.graph) while len(m.graph.value_info) != 0: m.graph.value_info.pop() m = other.inference_shapes(m) replacing.replace_shape_with_constant(m.graph) other.topological_sort(m.graph) m = tf_pattern_match(m) m = optimizer.optimize(m, ['eliminate_deadend']) eliminating.eliminate_consecutive_reshape(m.graph) eliminating.eliminate_Squeeze_before_Reshape(m.graph) other.topological_sort(m.graph) return m
def optim_onnx(onnx_path, verbose=True): model = onnx.load(onnx_path) print("Begin Simplify ONNX Model ...") passes = [ "eliminate_deadend", "eliminate_identity", "extract_constant_to_initializer", "eliminate_unused_initializer", "fuse_add_bias_into_conv", "fuse_bn_into_conv", "fuse_matmul_add_bias_into_gemm" ] model = optimizer.optimize(model, passes) #model = shape_inference.infer_shapes(model) #model = onnxsim.simplify(model) if verbose: for m in onnx.helper.printable_graph(model.graph).split("\n"): print(m) return model
def optim_onnx(onnx_path, verbose=True): """Optimize ONNX network """ model = onnx.load(onnx_path) print("Begin Simplify ONNX Model ...") passes = [ 'eliminate_deadend', 'eliminate_identity', 'extract_constant_to_initializer', 'eliminate_unused_initializer', 'fuse_add_bias_into_conv', 'fuse_bn_into_conv', 'fuse_matmul_add_bias_into_gemm' ] model = optimizer.optimize(model, passes) if verbose: for m in onnx.helper.printable_graph(model.graph).split("\n"): print(m) return model
def export_onnx_model(model, inputs, passes): """ Trace and export a model to onnx format. Modified from https://github.com/facebookresearch/detectron2/ Args: model (nn.Module): inputs (tuple[args]): the model will be called by `model(*inputs)` passes (None or list[str]): the optimization passed for ONNX model Returns: an onnx model """ assert isinstance(model, torch.nn.Module) # make sure all modules are in eval mode, onnx may change the training # state of the module if the states are not consistent def _check_eval(module): assert not module.training model.apply(_check_eval) # Export the model to ONNX with torch.no_grad(): with io.BytesIO() as f: torch.onnx.export( model, inputs, f, operator_export_type=OperatorExportTypes.ONNX_ATEN_FALLBACK, # verbose=True, # NOTE: uncomment this for debugging # export_params=True, ) onnx_model = onnx.load_from_string(f.getvalue()) # Apply ONNX's Optimization if passes is not None: all_passes = optimizer.get_available_passes() assert all(p in all_passes for p in passes), "Only {} are supported".format( all_passes ) onnx_model = optimizer.optimize(onnx_model, passes) return onnx_model
def export_finn_onnx(module, input_shape, export_path, input_t=None, torch_onnx_kwargs={}): """Export given module with Brevitas layers to FINN-ONNX with some cleanup. * input_shape : tuple describing the shape of network input e.g. (1, 1, 28, 28) * export_path : ONNX filename to export to * input_t : if specified, do an initial forward pass with this value. this may be necessary for QuantTensor caching. * torch_onnx_kwargs : will be passed as kwargs to torch.onnx.export """ if onnx is None or opt is None: raise ModuleNotFoundError("Installation of ONNX is required.") with torch.no_grad(): # TODO maybe consider a deepcopy of the module first? module = module.eval() if input_t is None: input_t = torch.empty(input_shape, dtype=torch.float) # do a forward pass with the dummy input to e.g. store per-layer input # and output shapes output_t = module.forward(input_t) # override any given input_t to make sure it's a standard PyTorch tensor input_t = torch.empty(input_shape, dtype=torch.float) # enable export mode and call export _prepare_for_finn_onnx_export(module, enable_export=True) torch.onnx.export(module, input_t, export_path, **torch_onnx_kwargs) # restore the model to non-export mode to keep it clean _prepare_for_finn_onnx_export(module, enable_export=False) # do some cleanup on the exported ONNX model model = onnx.load(export_path) onnx_passes = [ # use initializers instead of Constant nodes for fixed params "extract_constant_to_initializer", # remove unused graph inputs (e.g. zero_hw_sentinel) & initializers "eliminate_unused_initializer" ] model = opt.optimize(model, onnx_passes) model = _move_quant_attributes_into_annotations(model) model = _move_domain_attributes_into_domain(model) onnx.save(model, export_path)
def main(): if len(sys.argv) != 3: print "Usage: python onnx_optimizer.py model.onnx model_opt.onnx" sys.exit(0) in_path = sys.argv[1] out_path = sys.argv[2] original_model = onnx.load(in_path) print "Start optimize ONNX model for inference:" passes = [ 'eliminate_identity', 'fuse_consecutive_squeezes', 'fuse_consecutive_transposes', 'eliminate_nop_pad', 'eliminate_nop_transpose', 'eliminate_unused_initializer', 'extract_constant_to_initializer', 'fuse_add_bias_into_conv', 'fuse_bn_into_conv', 'fuse_transpose_into_gemm' ] for i in range(len(passes)): print i, ".", passes[i] optimized_model = optimizer.optimize(original_model, passes) onnx.save_model(optimized_model, out_path) print "Optimize Finished!" print "Please check new model in:", out_path