def onnx2trt_infer( onnx_model_filename: str, input_values: 'Sequence[np.ndarray]', batch_size: int = 1, workspace_size: int = (1024 * 1024 * 16), ) -> 'Sequence[np.ndarray]': r"""infer model with 'onnx_tensorrt' backend""" import onnx import onnx.optimizer as optimizer import onnx_tensorrt.backend as backend from onnx.utils import polish_model model = onnx.load(onnx_model_filename) passes = optimizer.get_available_passes() passes = list(filter(lambda name: not name.startswith('split_'), passes)) logger.debug('optimizations to perform in ONNX:\n\t%s', passes) model = optimizer.optimize(model, passes=passes) model = polish_model(model) onnx.save(model, onnx_model_filename.rpartition('.onnx')[0] + '.optimized.onnx') engine = backend.prepare( model, device='CUDA', max_batch_size=batch_size, max_workspace_size=workspace_size, ) return engine.run(input_values)
def onnx_to_pico_cnn(onnx_model, model_name): # print(onnx_model.graph) # Set input batch size to 1 onnx_model.graph.input[0].type.tensor_type.shape.dim[0].dim_value = 1 onnx_model.graph.output[0].type.tensor_type.shape.dim[0].dim_param = "?" # onnx_model.graph.output[0].type.tensor_type.shape.dim[0].dim_value = 1 # inputs = onnx_model.graph.input # for input in inputs: # dim1 = input.type.tensor_type.shape.dim[0] # dim1.dim_value = 1 # Remove dropouts for op_id, op in enumerate(onnx_model.graph.node): if op.op_type == "Dropout": op.attribute[0].f = 0.0 onnx_model = onnx.shape_inference.infer_shapes(onnx_model) onnx.checker.check_model(onnx_model) print("Running model optimization") # TODO: There are more optimizations available optimized_model = optimizer.optimize(onnx_model, ["eliminate_nop_dropout"]) optimized_model = utils.polish_model(optimized_model) try: os.makedirs("./polished_models") print("Created directory for polished models.") except FileExistsError: pass onnx.save( optimized_model, os.path.join("./polished_models", "{}_polished.onnx".format(model_name))) backend_model = Backend.prepare(optimized_model, model_name) return 0
def convert(onnx_model_filename, save_dir, model_basename='model.py', model_func_name='inference', embed_params=False, onnx_opset_version=9, onnx_opset_pedantic=True, onnx_skip_version_conversion=False, debug=False, **kwargs): """ convert an ONNX model to Paddle fluid Python code and desc pb """ import onnx from onnx.checker import ValidationError from onnx.checker import check_model from onnx.utils import polish_model from onnx.version_converter import convert_version from .onnx_utils import DEFAULT_OP_DOMAIN from .onnx_utils import graph_ops, graph_weights from .onnx_utils import inferred_model_value_info from .onnx_utils import optimize_model_skip_op_for_inference from .onnx_utils import optimize_model_strip_initializer from .onnx_utils import optimize_model_cast, optimize_model_slice from .writer import Program, Writer from .writer import make_var_name logger = logging.getLogger('convert') # prepare onnx model logger.info('loading model: %s ...', onnx_model_filename) onnx_model = onnx.load(onnx_model_filename) try: logger.info('checking model ...') check_model(onnx_model) if onnx_skip_version_conversion: # WORKAROUND: RuntimeError: No Adapter For OP logger.debug('assumed opset version: %d', onnx_opset_version) logger.warning( 'opset conversion skipped for onnx_opset_pedantic is OFF') else: logger.debug('using opset version: %d', onnx_opset_version) onnx_model = convert_version(onnx_model, onnx_opset_version) onnx_model = polish_model(onnx_model) except ValidationError as e: if onnx_opset_pedantic: raise e else: logger.warning('due to onnx_opset_pedantic is OFF') logger.warning('the ONNX model sanity checking error is suppressed') logger.warning('value_info inferring may be uncompleted') # onnx model optimization logger.info('model has %d ops', len(onnx_model.graph.node)) logger.info('optimizing model ...') onnx_model = optimize_model_skip_op_for_inference(onnx_model) onnx_model = optimize_model_strip_initializer(onnx_model) onnx_model = optimize_model_cast(onnx_model) onnx_model = optimize_model_slice(onnx_model) # prepare filesystem shutil.rmtree(save_dir, ignore_errors=True) shutil.os.makedirs(save_dir, exist_ok=True) logger.info('folder %s cleared', save_dir) # DEBUG: if debug: model = onnx.shape_inference.infer_shapes(onnx_model) debug_model_filename, _ = shutil.os.path.splitext(onnx_model_filename) onnx.save(model, debug_model_filename + '.optimized_and_inffered.onnx') # onnx.save(model, '/tmp/export/optimized_and_inffered.onnx') # I/O instances onnx_graph = onnx_model.graph fluid_program = Program() fluid_writer = Writer() # model components # graph_name = onnx_graph.name graph_inputs = [value.name for value in onnx_graph.input] graph_outputs = [value.name for value in onnx_graph.output] graph_params = [] graph_value_infos = inferred_model_value_info(onnx_model) # prepare additional value_info # for weights for name, weight in graph_weights(onnx_graph): value_info = graph_value_infos[name] value_info['embeded_as'] = [] value_info['get_weight'] = (lambda w: lambda: w.tolist())( weight) # lazy getter logger.info('conversion started') # op set conversion # topo = 'backward' if embed_params else 'forward' topo = 'forward' for name, domain, op_type, inputs, outputs, attrs in graph_ops( onnx_graph, topo=topo): logger.debug('translating op %s %s::%s ...', name, domain, op_type) if domain == DEFAULT_OP_DOMAIN: domain = '' try: fluid_writer.emit_op( fluid_program, name, domain, op_type, inputs, outputs, attrs, graph_value_infos, embed_params=embed_params, ) except BaseException as e: logger.fatal('conversion failed for:\n\t%s -> %s::%s -> %s', inputs, domain, op_type, outputs) raise e op_codes = fluid_program.codes fluid_program.codes = [] logger.info('%d ops in, %d ops out', len(onnx_graph.node), len(fluid_program.op_descs)) # weight writer for name, weight in graph_weights(onnx_graph): graph_params.append(name) value_info = graph_value_infos[name] var_names = value_info.get('embeded_as', []) if var_names: if len(var_names) > 1: logger.info( 'weight %s is shared between ops, more disk space will be consumed', name) logger.debug('saving weight %s(%s[%d], %dB) as %s ...', name, weight.dtype, weight.size, weight.nbytes, var_names) for var_name in var_names: # multiple references fluid_writer.write_weight( weight, shutil.os.path.join(save_dir, var_name)) else: logger.debug('saving weight %s(%s[%d], %dB) to %s ...', name, weight.dtype, weight.size, weight.nbytes, make_var_name(name)) fluid_writer.write_weight( weight, shutil.os.path.join(save_dir, make_var_name(name))) fluid_writer.emit_param(fluid_program, name, value_info) param_codes = fluid_program.codes fluid_program.codes = [] logger.info('%d weights converted', len(graph_params)) # input writer external_inputs = [] for name in graph_inputs: if name not in graph_params: value_info = graph_value_infos[name] assert value_info['external'] external_inputs.append(name) fluid_writer.emit_inputs( fluid_program, external_inputs, graph_value_infos, remove_batch=False) # TODO: input_codes = fluid_program.codes fluid_program.codes = [] logger.info('%d inputs converted', len(external_inputs)) # output writer external_outputs = [] for name in graph_outputs: if name not in graph_params: value_info = graph_value_infos[name] assert value_info['external'] external_outputs.append(name) fluid_writer.emit_outputs(fluid_program, external_outputs) output_codes = [''] + fluid_program.codes # add an empty line fluid_program.codes = [] logger.info('%d outputs converted', len(external_outputs)) # code generation header_codes = fluid_writer.header_code( model_func_name, 'From: {}'.format(onnx_model_filename)) code_filename = shutil.os.path.join(save_dir, model_basename) fluid_writer.write_code_file(code_filename, header_codes, input_codes, param_codes, op_codes, output_codes) logger.info('code saved to %s, factory function: %s', code_filename, model_func_name) # desc generation desc_filename = shutil.os.path.join(save_dir, '__model__') fluid_writer.write_desc_file( desc_filename, op_descs=fluid_program.op_descs, var_descs=fluid_program.var_descs, ) logger.info('program saved to %s', desc_filename) logger.info('conversion finished')
) from onnx.checker import check_model from onnx.utils import polish_model from onnx.version_converter import convert_version model = onnx.load('../examples/t1.onnx') print_pb_structure(model, loop_iterative=False) check_model(model) model = convert_version(model, 9) model = optimize_model_skip_op_for_inference(model) model = optimize_model_strip_initializer(model) model = optimize_model_cast(model) model = optimize_model_slice(model) model = polish_model(model) onnx.save(model, '/tmp/optimized.onnx') graph = model.graph value_info = inferred_model_value_info(model) name = graph.name inputs = [value.name for value in graph.input] outputs = [value.name for value in graph.output] weights = [] logger.info('ops:') for name, domain, op_type, _, _, attrs in graph_ops(graph, topo='forward'): logger.info('%s %s::%s: %s', name, domain, op_type, attrs)