def onnx2trt_infer( onnx_model_filename: str, input_values: 'Sequence[np.ndarray]', batch_size: int = 1, workspace_size: int = (1024 * 1024 * 16), ) -> 'Sequence[np.ndarray]': r"""infer model with 'onnx_tensorrt' backend""" import onnx import onnx.optimizer as optimizer import onnx_tensorrt.backend as backend from onnx.utils import polish_model model = onnx.load(onnx_model_filename) passes = optimizer.get_available_passes() passes = list(filter(lambda name: not name.startswith('split_'), passes)) logger.debug('optimizations to perform in ONNX:\n\t%s', passes) model = optimizer.optimize(model, passes=passes) model = polish_model(model) onnx.save(model, onnx_model_filename.rpartition('.onnx')[0] + '.optimized.onnx') engine = backend.prepare( model, device='CUDA', max_batch_size=batch_size, max_workspace_size=workspace_size, ) return engine.run(input_values)
def polish_model(model, internals=True, extras=True, checking=True): """ polish_model enhanced for inference """ if checking: check_model(model) strip_doc_string(model) if internals: passes = optimizer.get_available_passes() passes = list( filter(lambda name: not name.startswith('split_'), passes)) # logger.debug('builtin optimizations to perform in ONNX:\n\t%s', passes) model = optimizer.optimize(model, passes=passes) if extras: for optimize in ( optimize_model_skip_op_for_inference, optimize_model_strip_initializer, optimize_model_cast, optimize_model_slice, ): model = optimize(model) model = infer_shapes(model) if checking: check_model(model) return model
def optimize(): import onnx from onnx import optimizer file = sys.argv[2] base = os.path.splitext(file) onnx_model = onnx.load(file) passes = optimizer.get_available_passes() optimized_model = optimizer.optimize(onnx_model, passes) onnx.save(optimized_model, base + '.optimized.onnx')
def _export_via_onnx(model, inputs): def _check_val(module): assert not module.training model.apply(_check_val) # Export the model to ONNX with torch.no_grad(): with io.BytesIO() as f: torch.onnx.export( model, inputs, f, # verbose=True, # NOTE: uncomment this for debugging export_params=True, ) onnx_model = onnx.load_from_string(f.getvalue()) # torch.onnx.export(model, # model being run # inputs, # model input (or a tuple for multiple inputs) # "reid_test.onnx", # where to save the model (can be a file or file-like object) # export_params=True, # store the trained parameter weights inside the model file # opset_version=10, # the ONNX version to export the model to # do_constant_folding=True, # whether to execute constant folding for optimization # input_names=['input'], # the model's input names # output_names=['output'], # the model's output names # dynamic_axes={'input': {0: 'batch_size'}, # variable lenght axes # 'output': {0: 'batch_size'}}) # ) # Apply ONNX's Optimization all_passes = optimizer.get_available_passes() passes = ["fuse_bn_into_conv"] assert all(p in all_passes for p in passes) onnx_model = optimizer.optimize(onnx_model, passes) # Convert ONNX Model to Tensorflow Model tf_rep = prepare(onnx_model, strict=False) # Import the ONNX model to Tensorflow print(tf_rep.inputs) # Input nodes to the model print('-----') print(tf_rep.outputs) # Output nodes from the model print('-----') # print(tf_rep.tensor_dict) # All nodes in the model # """ # install onnx-tensorflow from github,and tf_rep = prepare(onnx_model, strict=False) # Reference https://github.com/onnx/onnx-tensorflow/issues/167 # tf_rep = prepare(onnx_model) # whthout strict=False leads to KeyError: 'pyfunc_0' # debug, here using the same input to check onnx and tf. # output_onnx_tf = tf_rep.run(to_numpy(img)) # print('output_onnx_tf = {}'.format(output_onnx_tf)) # onnx --> tf.graph.pb # tf_pb_path = 'reid_tf_graph.pb' # tf_rep.export_graph(tf_pb_path) return tf_rep
def optimize_onnx(onnx_model): passes = ['nop', 'extract_constant_to_initializer', 'eliminate_identity', 'eliminate_nop_pad', 'eliminate_nop_transpose', 'eliminate_unused_initializer', 'fuse_add_bias_into_conv', 'fuse_bn_into_conv', 'fuse_consecutive_squeezes', 'fuse_consecutive_transposes', 'fuse_transpose_into_gemm'] if onnx_ver_int >= 10400: passes += ['eliminate_nop_dropout', 'fuse_consecutive_concats', 'fuse_matmul_add_bias_into_gemm', 'fuse_pad_into_conv'] from onnx import optimizer all_passes = optimizer.get_available_passes() for p in passes: assert p in all_passes return optimizer.optimize(onnx_model, passes)
def export_onnx_model(model, inputs, passes): """ Trace and export a model to onnx format. Modified from https://github.com/facebookresearch/detectron2/ Args: model (nn.Module): inputs (tuple[args]): the model will be called by `model(*inputs)` passes (None or list[str]): the optimization passed for ONNX model Returns: an onnx model """ assert isinstance(model, torch.nn.Module) # make sure all modules are in eval mode, onnx may change the training # state of the module if the states are not consistent def _check_eval(module): assert not module.training model.apply(_check_eval) # Export the model to ONNX with torch.no_grad(): with io.BytesIO() as f: torch.onnx.export( model, inputs, f, operator_export_type=OperatorExportTypes.ONNX_ATEN_FALLBACK, # verbose=True, # NOTE: uncomment this for debugging # export_params=True, ) onnx_model = onnx.load_from_string(f.getvalue()) # Apply ONNX's Optimization if passes is not None: all_passes = optimizer.get_available_passes() assert all(p in all_passes for p in passes), "Only {} are supported".format( all_passes ) onnx_model = optimizer.optimize(onnx_model, passes) return onnx_model
def pytorch_to_keras(model, args, input_shapes=None, change_ordering=False, verbose=False, name_policy=None, use_optimizer=False, do_constant_folding=False): """ By given PyTorch model convert layers with ONNX. Args: model: pytorch model args: pytorch model arguments input_shapes: keras input shapes (using for each InputLayer) change_ordering: change CHW to HWC verbose: verbose output name_policy: use short names, use random-suffix or keep original names for keras layers Returns: model: created keras model. """ logger = logging.getLogger('pytorch2keras') if verbose: logging.basicConfig(level=logging.DEBUG) logger.info('Converter is called.') if name_policy: logger.warning('Name policy isn\'t supported now.') if input_shapes: logger.warning('Custom shapes isn\'t supported now.') if input_shapes and not isinstance(input_shapes, list): input_shapes = [input_shapes] if not isinstance(args, list): args = [args] args = tuple(args) dummy_output = model(*args) if isinstance(dummy_output, torch.autograd.Variable): dummy_output = [dummy_output] input_names = ['input_{0}'.format(i) for i in range(len(args))] output_names = ['output_{0}'.format(i) for i in range(len(dummy_output))] logger.debug('Input_names:') logger.debug(input_names) logger.debug('Output_names:') logger.debug(output_names) stream = io.BytesIO() torch.onnx.export(model, args, stream, do_constant_folding=do_constant_folding, verbose=verbose, input_names=input_names, output_names=output_names) stream.seek(0) onnx_model = onnx.load(stream) if use_optimizer: if use_optimizer is True: optimizer2run = optimizer.get_available_passes() else: use_optimizer = set(use_optimizer) optimizer2run = [ x for x in optimizer.get_available_passes() if x in use_optimizer ] logger.info("Running optimizer:\n%s", "\n".join(optimizer2run)) onnx_model = optimizer.optimize(onnx_model, optimizer2run) k_model = onnx_to_keras(onnx_model=onnx_model, input_names=input_names, input_shapes=input_shapes, name_policy=name_policy, verbose=verbose, change_ordering=change_ordering) return k_model
import onnx from onnx import optimizer # Preprocessing: load the model to be optimized. model_path = 'espnetv2fusion.onnx' original_model = onnx.load(model_path) all_passes = optimizer.get_available_passes() # print("Available optimization passes:") # for p in all_passes: # print(p) # print() passes = [ 'fuse_consecutive_transposes', 'eliminate_deadend', 'eliminate_identity', 'eliminate_nop_dropout', 'eliminate_nop_monotone_argmax', 'eliminate_nop_pad', 'eliminate_nop_transpose', 'eliminate_unused_initializer', 'extract_constant_to_initializer', 'fuse_add_bias_into_conv', # 'fuse_bn_into_conv', 'fuse_consecutive_concats', 'fuse_consecutive_log_softmax', 'fuse_consecutive_reduce_unsqueeze', 'fuse_consecutive_squeezes', 'fuse_consecutive_transposes', 'fuse_matmul_add_bias_into_gemm',
def main(self, onnx_filename, bundle_dir=None, checker=False, optimize=False): dnncModule = sys.modules.get('deepC.dnnc') if (dnncModule is None): print( "ERROR (DNNC): could not find dnnc module. Please make sure dnnc is imported before calling ", __name__) return print("reading onnx model from file ", onnx_filename) self._bundleDir = bundle_dir if (self._bundleDir is None): self._bundleDir = os.path.dirname(onnx_filename) model = onnx.load(onnx_filename) print("Model info:\n ir_vesion : ", model.ir_version, "\n doc :", model.doc_string) if (optimize): print(" Optimization enabled.") from onnx import optimizer for opt_pass in optimizer.get_available_passes(): print(' running optimization step : {}'.format( opt_pass.replace("_", " "))) try: model = optimizer.optimize(model, [opt_pass]) except Exception as e: print(" optimization failed." + str(e) + "\n. Abandoning and trying next.") print(" optimization done.") if (checker): try: print( "running ONNX model shape inference engine and verification" ) onnx.checker.check_model(model) from onnx import shape_inference model = shape_inference.infer_shapes(model) onnx.checker.check_model(model) except Exception as e: print(" failed. moving to next step." + str(e)) graph = model.graph self._dcGraph = dnnc.Graph() self._dcGraph.setName(graph.name) nodes = graph.node for node in nodes: dcNode = self.addOPNode(node) for terminal in graph.input: dcTerm = self.createTermNode(terminal) if (dcTerm != None and len(dcTerm) == 3): self._dcGraph.addInput(dcTerm[0], dcTerm[1], dcTerm[2]) for terminal in graph.output: dcTerm = self.createTermNode(terminal) if (dcTerm != None and len(dcTerm) == 3): self._dcGraph.addOutput(dcTerm[0], dcTerm[1], dcTerm[2]) for param in graph.initializer: self.addParams(param) try: print("running DNNC graph sanity check.") if (False == self._dcGraph.sanityCheck()): print(" FAILED. Please check your model.") except Exception as e: print(" FAILED.\n" + str(e)) return self._dcGraph