def _onnx_model_to_caffe2_net(cls, onnx_model, device, opset_version, include_initializers): device_option = get_device_option(Device(device)) init_model = ModelProto() init_model.ParseFromString(cls.optimize_onnx(onnx_model.SerializeToString(), init=True)) cls._inplace_rewrite(init_model.graph) predict_model = ModelProto() predict_model.ParseFromString(cls.optimize_onnx(onnx_model.SerializeToString(), predict=True)) cls._inplace_rewrite(predict_model.graph) init_net = caffe2_pb2.NetDef() predict_net = caffe2_pb2.NetDef() init_net.name = onnx_model.graph.name + '_init' predict_net.name = onnx_model.graph.name + '_predict' if include_initializers: init_net.op.extend(cls._create_tensor_filling_op(tp) for tp in onnx_model.graph.initializer) dummy_name(cls._all_names_in_graph(init_model.graph) | cls._all_names_in_graph(predict_model.graph)) for net, model in ( (init_net, init_model), (predict_net, predict_model) ): net.device_option.CopyFrom(device_option) for node in model.graph.node: net.op.extend(cls._onnx_node_to_caffe2_op(node, opset_version)) net.external_output.extend( value_info.name for value_info in model.graph.output) net.external_input.extend( value_info.name for value_info in model.graph.input) return init_net, predict_net
def _onnx_model_to_caffe2_net(cls, onnx_model, device, opset_version, include_initializers): device_option = get_device_option(Device(device)) init_model = ModelProto() init_model.ParseFromString( cls.optimize_onnx(onnx_model.SerializeToString(), init=True)) pred_model = ModelProto() pred_model.ParseFromString( cls.optimize_onnx(onnx_model.SerializeToString(), predict=True)) init_net = caffe2_pb2.NetDef() pred_net = caffe2_pb2.NetDef() init_net.name = onnx_model.graph.name + '_init' pred_net.name = onnx_model.graph.name + '_predict' if include_initializers: init_net.op.extend( cls._create_tensor_filling_op(tp) for tp in onnx_model.graph.initializer) dummy_name( cls._all_names_in_graph(init_model.graph) | cls._all_names_in_graph(pred_model.graph)) success = True for net, model in ((init_net, init_model), (pred_net, pred_model)): net.device_option.CopyFrom(device_option) for node in model.graph.node: try: c2ops = cls._onnx_node_to_caffe2_op( init_model, pred_model, node, opset_version) except Exception as e: success = False print('ONNX FATAL:', e) continue (init_net if include_initializers else net).op.extend( c2ops.init_ops) net.op.extend(c2ops.ops) net.external_input.extend(c2ops.interface_blobs) net.external_output.extend(value_info.name for value_info in model.graph.output) net.external_input.extend(value_info.name for value_info in model.graph.input) if not success: raise RuntimeError('ONNX conversion failed') return init_net, pred_net
def _load_onnx(self, path): _logger.info("loading the ONNX model from: " + path) try: start = time.time() if isinstance(path, onnx.ModelProto): onnx_model = path else: onnx_model = ModelProto() with open(path, 'rb') as f: content = f.read() onnx_model.ParseFromString(content) end = time.time() seconds = end - start _logger.info( "Loaded ONNX model in {:.3f} seconds.".format(seconds)) # Check that the IR is well formed # onnx.checker.check_model(onnx_model) # onnx IR version _logger.info("ONNX IR_version {}".format(onnx_model.ir_version)) except Exception as ex: _logger.error("Error occurred when loading onnx model file: " + str(ex)) raise ex _logger.info("ONNX Graph producer: {} version {}".format( onnx_model.producer_name, onnx_model.producer_version)) _logger.info("ONNX Graph total len: {}".format( len(onnx_model.graph.input))) return onnx_model.graph
def onnx_to_caffe2(onnx_model, output, init_net_output): onnx_model_proto = ModelProto() onnx_model_proto.ParseFromString(onnx_model.read()) init_net, predict_net = c2.onnx_graph_to_caffe2_net(onnx_model_proto) init_net_output.write(init_net.SerializeToString()) output.write(predict_net.SerializeToString())
def optimize_model(input, model_type, gpu_only, num_heads, hidden_size, sequence_length, input_int32, float16): (optimizer_class, producer, run_onnxruntime) = MODEL_CLASSES[model_type] input_model_path = input if run_onnxruntime: input_model_path = optimize_by_onnxruntime(input_model_path, gpu_only) logger.info( "Use OnnxRuntime to optimize and save the optimized model to {}". format(input_model_path)) model = ModelProto() with open(input_model_path, "rb") as f: model.ParseFromString(f.read()) if model.producer_name and producer != model.producer_name: logger.warning( f"Model producer not matched: Expect {producer}, Got {model.producer_name} {model.producer_version}. Please specify correct --model_type parameter." ) bert_model = optimizer_class(model, num_heads, hidden_size, sequence_length, input_int32, float16, gpu_only) bert_model.optimize() return bert_model
def test_pytorch_model_0_gpu_onnxruntime(self): if 'CUDAExecutionProvider' not in onnxruntime.get_available_providers( ): print( "skip test_pytorch_model_0_gpu_onnxruntime since no gpu found") return input = _get_test_model_path('bert_pytorch_0') output = 'temp.onnx' optimize_by_onnxruntime(input, use_gpu=True, optimized_model_path=output) model = ModelProto() with open(output, "rb") as f: model.ParseFromString(f.read()) os.remove(output) bert_model = OnnxModel(model) expected_node_count = { 'EmbedLayerNormalization': 1, 'Attention': 12, 'SkipLayerNormalization': 24, 'Gelu': 0, 'FastGelu': 12, 'BiasGelu': 0 } self.verify_node_count(bert_model, expected_node_count, 'test_pytorch_model_0_gpu_onnxruntime')
def main(): parser = argparse.ArgumentParser() parser.add_argument('--input', required=True, type=str) parser.add_argument('--output', required=True, type=str) parser.add_argument('--float16', required=False, action='store_true') parser.set_defaults(float16=False) args = parser.parse_args() model = ModelProto() with open(args.input, "rb") as f: model.ParseFromString(f.read()) bert_model = TinyBertOnnxModel(model, False) if args.float16: bert_model.convert_model_float32_to_float16() bert_model.update_graph() bert_model.remove_unused_constant() print("opset verion", bert_model.model.opset_import[0].version) with open(args.output, "wb") as out: out.write(bert_model.model.SerializeToString()) p = Path(args.output) data_path = p.parent batch_size = 1 sequence_length = SEQ_LEN generate_test_data(args.output, data_path, batch_size, sequence_length, use_cpu=not args.float16)
def get_bert_inputs( onnx_file: str, input_ids_name: Optional[str] = None, segment_ids_name: Optional[str] = None, input_mask_name: Optional[str] = None, ) -> Tuple[Optional[np.ndarray], Optional[np.ndarray], Optional[np.ndarray]]: """Find graph inputs for BERT model. First, we will deduce inputs from EmbedLayerNormalization node. If not found, we will guess the meaning of graph inputs based on naming. Args: onnx_file (str): onnx model path input_ids_name (str, optional): Name of graph input for input IDs. Defaults to None. segment_ids_name (str, optional): Name of graph input for segment IDs. Defaults to None. input_mask_name (str, optional): Name of graph input for attention mask. Defaults to None. Returns: Tuple[Optional[np.ndarray], Optional[np.ndarray], Optional[np.ndarray]]: input tensors of input_ids, segment_ids and input_mask """ model = ModelProto() with open(onnx_file, "rb") as file: model.ParseFromString(file.read()) onnx_model = OnnxModel(model) return find_bert_inputs(onnx_model, input_ids_name, segment_ids_name, input_mask_name)
def main(): # type: () -> None parser = argparse.ArgumentParser(description="ONNX net drawer") parser.add_argument( "--input", type=Text, required=True, help="The input protobuf file.", ) parser.add_argument( "--output", type=Text, required=True, help="The output protobuf file.", ) parser.add_argument( "--rankdir", type=Text, default='LR', help="The rank direction of the pydot graph.", ) parser.add_argument( "--embed_docstring", action="store_true", help="Embed docstring as javascript alert. Useful for SVG format.", ) args = parser.parse_args() model = ModelProto() with open(args.input, 'rb') as fid: content = fid.read() model.ParseFromString(content) pydot_graph = GetPydotGraph( model.graph, name=model.graph.name, rankdir=args.rankdir, node_producer=GetOpNodeProducer( embed_docstring=args.embed_docstring, **OP_STYLE ), ) pydot_graph.write_dot(args.output)
def optimize_model(input, model_type, num_heads, hidden_size, opt_level=99, optimization_options=None): (optimizer_class, producer, run_onnxruntime) = MODEL_CLASSES[model_type] input_model_path = input if run_onnxruntime and opt_level > 0: input_model_path = optimize_by_onnxruntime(input_model_path, use_gpu=False, opt_level=opt_level) logger.info( "Use OnnxRuntime to optimize and save the optimized model to {}". format(input_model_path)) model = ModelProto() with open(input_model_path, "rb") as f: model.ParseFromString(f.read()) if model.producer_name and producer != model.producer_name: logger.warning( f"Model producer not matched: Expect {producer}, Got {model.producer_name} {model.producer_version}. Please specify correct --model_type parameter." ) if optimization_options is None: optimization_options = BertOptimizationOptions(model_type) bert_model = optimizer_class(model, num_heads, hidden_size) bert_model.optimize(optimization_options) return bert_model
def test_caffe2_to_onnx(self): caffe2_net = tempfile.NamedTemporaryFile() caffe2_init_net = tempfile.NamedTemporaryFile() output = tempfile.NamedTemporaryFile() model = ModelHelper(name='caffe2-to-onnx-test') brew.relu(model, ["X"], "Y") caffe2_net.write(model.net.Proto().SerializeToString()) caffe2_net.flush() init_model = ModelHelper(name='caffe2-to-onnx-init-test') init_model.net.GivenTensorFill([], 'X', shape=[2, 2], values=np.zeros((2, 2)).flatten().astype(float)) caffe2_init_net.write(init_model.net.Proto().SerializeToString()) caffe2_init_net.flush() self._run_command( caffe2_to_onnx, [ caffe2_net.name, '--caffe2-init-net', caffe2_init_net.name, '--output', output.name, ], catch_exceptions=False, ) onnx_model = ModelProto() onnx_model.ParseFromString(output.read()) self.assertEqual(len(onnx_model.graph.node), 1) self.assertEqual(onnx_model.graph.node[0].op_type, 'Relu') self.assertEqual(len(onnx_model.graph.initializer), 1) self.assertEqual(onnx_model.graph.initializer[0].name, onnx_model.graph.input[0].name)
def test_caffe2_to_onnx_value_info(self): caffe2_net = tempfile.NamedTemporaryFile() output = tempfile.NamedTemporaryFile() model = ModelHelper(name='caffe2-to-onnx-test') brew.relu(model, ["X"], "Y") caffe2_net.write(model.net.Proto().SerializeToString()) caffe2_net.flush() args = [caffe2_net.name, '--output', output.name] self.assertRaisesRegexp(Exception, 'value info', self._run_command, caffe2_to_onnx, args) args.extend( ['--value-info', json.dumps({ 'X': (TensorProto.FLOAT, (2, 2)), })]) result = self._run_command(caffe2_to_onnx, args) onnx_model = ModelProto() onnx_model.ParseFromString(output.read()) self.assertEqual(len(onnx_model.graph.node), 1) self.assertEqual(onnx_model.graph.node[0].op_type, 'Relu') self.assertEqual(len(onnx_model.graph.initializer), 0)
def parse_onnx(fname): with open(fname, "rb") as f: data = f.read() model = ModelProto() model.ParseFromString(data) outputs = {} g = Graph() for node in model.graph.initializer: n = Node() n.op_type = "Const" n.name = node.name g.add(n) outputs[n.name] = n for node in model.graph.node: n = Node(node) for name in node.output: outputs[name] = n for name in node.input: o = outputs.get(name) if o: n.add_input(o) g.add(n) for node in model.graph.output: o = outputs.get(node.name) if o: g.add_output(o) return g
def run_node(cls, node, inputs, device='CPU', outputs_info=None): inputs_info = [(x.dtype, x.shape) for x in inputs] input_value_infos = [ helper.make_tensor_value_info(x, NP_TYPE_TO_TENSOR_TYPE[t], shape) for x, (t, shape) in zip(node.input, inputs_info) ] output_value_infos = [ helper.make_tensor_value_info(x, NP_TYPE_TO_TENSOR_TYPE[t], shape) for x, (t, shape) in zip(node.output, outputs_info) ] if outputs_info: graph = helper.make_graph([node], "test", input_value_infos, []) orig_model = helper.make_model(graph, producer_name='onnx-test') orig_model_str = orig_model.SerializeToString() inferred_model_str = onnx.shape_inference.infer_shapes( orig_model_str) inferred_model = ModelProto() inferred_model.ParseFromString(inferred_model_str) # Allow shape inference to not return anything, but if it # does then check that it's correct if inferred_model.graph.value_info: assert (list( inferred_model.graph.value_info) == output_value_infos) raise BackendIsNotSupposedToImplementIt( "This is the dummy backend test that doesn't verify the results but does run the shape inference" )
def main(): args = get_args() with open(args.input, "rb") as f: data = f.read() model = ModelProto() model.ParseFromString(data) if args.check: onnx.checker.check_model(model) if args.stats: ops = collections.Counter() for node in model.graph.node: ops[node.op_type] += 1 print(ops, "\n\n") if args.meta: fields = [ "ir_version", "producer_name", "producer_version", "name", "opset_import" ] for name in fields: value = getattr(model, name, None) if value: print("{} = {}".format(name, value)) for i in model.metadata_props: print("meta.{} = {}", i.key, i.value) print(helper.printable_graph(model.graph)) if args.pbtxt: with open(args.pbtxt, "w") as f: f.write(str(model.graph))
def main(): parser = argparse.ArgumentParser() parser.add_argument('--input', required=True, type=str) parser.add_argument('--output', required=True, type=str) parser.add_argument('--output_optimized_model', required=False, action='store_true') parser.set_defaults(output_optimized_model=False) args = parser.parse_args() model = ModelProto() with open(args.input, "rb") as f: model.ParseFromString(f.read()) bert_model = TinyGpt2Model(model) bert_model.update_graph() bert_model.remove_unused_constant() print("opset verion", bert_model.model.opset_import[0].version) with open(args.output, "wb") as out: out.write(bert_model.model.SerializeToString()) p = Path(args.output) data_path = p.parent generate_test_data(args.output, data_path, batch_size=1, use_cpu=True, output_optimized_model=args.output_optimized_model)
def main(): args = parse_arguments() setup_logging(args.verbose) output_names = None if args.output_names is None else args.output_names.split( ";") model = ModelProto() with open(args.input, "rb") as input_file: model.ParseFromString(input_file.read()) onnx_model = OnnxModel(model) optimizer = BertOnnxModelShapeOptimizer(onnx_model) optimizer.optimize( args.output, args.input_ids, args.segment_ids, args.input_mask, args.enable_shape_opt, args.enable_reshape_opt, output_names, args.batch_size, args.sequence_length, args.verbose, )
def main(): parser = argparse.ArgumentParser() parser.add_argument('--input', required=True, type=str) parser.add_argument('--output', required=True, type=str) parser.add_argument( '--framework', required=True, type=str, help="Original framework. Only support TensorFlow and PyTorch") # model parameters parser.add_argument('--num_heads', required=False, type=int, default=12, help="number of attention heads") parser.add_argument('--hidden_size', required=False, type=int, default=768) parser.add_argument('--sequence_length', required=False, type=int, default=128) # Use int32 (instead of int64) tensor as input to avoid unnecessary data # type cast. parser.add_argument('--input_int32', required=False, action='store_true') parser.set_defaults(input_int32=False) # For NVidia GPU with Tensor Core like V100 and T4, half-precision float # brings better performance. parser.add_argument('--float16', required=False, action='store_true') parser.set_defaults(float16=False) parser.add_argument('--gpu_only', required=False, action='store_true') parser.set_defaults(gpu_only=False) parser.add_argument('--verbose', required=False, action='store_true') parser.set_defaults(verbose=False) args = parser.parse_args() model = ModelProto() with open(args.input, "rb") as f: model.ParseFromString(f.read()) if args.framework.lower() == 'tensorflow': bert_model = BertOnnxModelTF(model, args.num_heads, args.hidden_size, args.sequence_length, args.input_int32, args.float16, args.gpu_only, args.verbose) elif args.framework.lower() == 'pytorch': bert_model = BertOnnxModel(model, args.num_heads, args.hidden_size, args.sequence_length, args.input_int32, args.float16, args.gpu_only, args.verbose) else: print("Unsupported framework:" + args.framework) bert_model.optimize() with open(args.output, "wb") as out: out.write(bert_model.model.SerializeToString())
def _optimized(self, graph, opts): orig_model = helper.make_model(graph, producer_name='onnx-test') orig_model_str = orig_model.SerializeToString() optimized_model_str = onnx.optimizer.optimize(orig_model_str, opts) optimized_model = ModelProto() optimized_model.ParseFromString(optimized_model_str) checker.check_model(optimized_model) return optimized_model
def _optimized(self, graph): orig_model = helper.make_model(graph, producer_name='onnx-to-caffe2-test') orig_model_str = orig_model.SerializeToString() optimized_model_str = c2.Caffe2Backend.optimize_onnx(orig_model_str) optimized_model = ModelProto() optimized_model.ParseFromString(optimized_model_str) return optimized_model
def create_caffe2_predictor(onnx_file_path): with open(onnx_file_path, 'rb') as onnx_model: onnx_model_proto = ModelProto() onnx_model_proto.ParseFromString(onnx_model.read()) init_net, predict_net = c2.onnx_graph_to_caffe2_net( onnx_model_proto) predictor = workspace.Predictor(init_net, predict_net) return predictor
def main(): args = get_args() with open(args.input, "rb") as f: data = f.read() model = ModelProto() model.ParseFromString(data) if args.stats: ops = collections.Counter() for node in model.graph.node: ops[node.op_type] += 1 print(ops, "\n\n") if args.meta: fields = [ "ir_version", "producer_name", "producer_version", "name", "opset_import" ] for name in fields: value = getattr(model, name, None) if value: print("{} = {}".format(name, value)) for i in model.metadata_props: print("meta.{} = {}", i.key, i.value) print(helper.printable_graph(model.graph)) if args.check: onnx.checker.check_model(model) inferred_model = shape_inference.infer_shapes(model) onnx.checker.check_model(inferred_model) if args.pbtxt: with open(args.pbtxt, "w") as f: f.write(str(model.graph)) if args.dot: with open(args.dot, "w") as f: f.write("digraph graphname {\n") for node in model.graph.node: output_name = node.name name = node.name color = "" if node.op_type.startswith("_"): color = ' color="yellow"' if node.op_type == "CELL": color = ' color="red"' f.write('"{}" [label="{},{}"{}];\n'.format( output_name, node.op_type, name, color)) for input_name in node.input: parts = input_name.split(":") input_name = re.sub(r"^\^", "", parts[0]) f.write(' "{}" -> "{}";\n'.format(input_name, output_name)) f.write("}\n")
def main(): parser = argparse.ArgumentParser(description="ONNX net drawer") parser.add_argument( "--input", type=str, required=True, help="The input protobuf file.", ) parser.add_argument( "--output", type=str, required=True, help="The output protobuf file.", ) parser.add_argument( "--rankdir", type=str, default='LR', help="The rank direction of the pydot graph.", ) parser.add_argument( "--embed_docstring", action="store_true", help="Embed docstring as javascript alert. Useful for SVG format.", ) parser.add_argument( "--marked", type=int, default=0, help="0: original, 1: marked", ) parser.add_argument( "--marked_list", type=str, default="", help="if 2_3_4, means node 2,3,4 will be marked", ) args = parser.parse_args() if args.marked: marked_list = [int(e) for e in args.marked_list.split('_')] else: marked_list = [] model = ModelProto() with open(args.input, 'rb') as fid: content = fid.read() model.ParseFromString(content) pydot_graph = GetPydotGraph( model.graph, name=model.graph.name, rankdir=args.rankdir, node_producer=GetOpNodeProducer(embed_docstring=args.embed_docstring, #**OP_STYLE ), marked_list=marked_list, ) pydot_graph.write_dot(args.output)
def model_proto_from_zip(zip_path, external_tensor_storage): model_proto = ModelProto() with zipfile.ZipFile(zip_path, 'r') as z: for n in z.namelist(): f = z.open(n) if n.endswith(".onnx"): model_proto.ParseFromString(f.read()) else: external_tensor_storage.name_to_tensor_data[n] = f.read() return model_proto
def main(): parser = argparse.ArgumentParser() parser.add_argument('--input', required=True, type=str) parser.add_argument('--output', required=True, type=str) # model parameters parser.add_argument('--num_heads', required=False, type=int, default=12, help="number of attention heads") parser.add_argument('--hidden_size', required=False, type=int, default=768) parser.add_argument('--sequence_length', required=False, type=int, default=128) # Use int32 (instead of int64) tensor as input to avoid unnecessary data type cast. parser.add_argument('--input_int32', required=False, action='store_true') parser.set_defaults(input_int32=False) # For NVidia GPU with Tensor Core like V100 and T4, half-precision float brings better performance. parser.add_argument('--float16', required=False, action='store_true') parser.set_defaults(float16=False) parser.add_argument('--verbose', required=False, action='store_true') parser.set_defaults(verbose=False) args = parser.parse_args() model = ModelProto() with open(args.input, "rb") as f: model.ParseFromString(f.read()) bert_model = BertOnnxModel(model, args.num_heads, args.hidden_size, args.sequence_length) bert_model.fuse_layer_norm() bert_model.fuse_gelu() bert_model.fuse_reshape() bert_model.fuse_attention(args.verbose) bert_model.fuse_embed_layer(args.verbose) if bert_model.embed_node is None: print("Failed to fuse embedding layer.") return if args.input_int32: bert_model.change_input_to_int32() else: bert_model.cast_input_to_int32() if args.float16: bert_model.convert_model_float32_to_float16() with open(args.output, "wb") as out: out.write(bert_model.model.SerializeToString())
def test_version_exists(self): model = ModelProto() # When we create it, graph should not have a version string. self.assertFalse(model.HasField('ir_version')) # We should touch the version so it is annotated with the current # ir version of the running ONNX model.ir_version = IR_VERSION model_string = model.SerializeToString() model.ParseFromString(model_string) self.assertTrue(model.HasField('ir_version')) # Check if the version is correct. self.assertEqual(model.ir_version, IR_VERSION)
def build_engine_from_onnx(onnx_path, engine_name, batch_size, TRT_LOGGER): model = ModelProto() with open(onnx_path, "rb") as f: model.ParseFromString(f.read()) d0 = model.graph.input[0].type.tensor_type.shape.dim[1].dim_value d1 = model.graph.input[0].type.tensor_type.shape.dim[2].dim_value d2 = model.graph.input[0].type.tensor_type.shape.dim[3].dim_value shape = [batch_size, d0, d1, d2] engine = eng.build_engine(TRT_LOGGER, onnx_path, shape=shape) eng.save_engine(engine, engine_name) return engine
def run(self, onnx_model): model = ModelProto() content = onnx_model model.ParseFromString(content) pydot_graph = self.GetPydotGraph( model.graph, name=model.graph.name, rankdir='TD', node_producer=self.GetOpNodeProducer( **OP_STYLE ), ) return pydot_graph.create(format='png')
def main(args): engine_name = args.plan_file onnx_path = args.onnx_file batch_size = 1 model = ModelProto() with open(onnx_path, "rb") as f: model.ParseFromString(f.read()) d0 = model.graph.input[0].type.tensor_type.shape.dim[1].dim_value d1 = model.graph.input[0].type.tensor_type.shape.dim[2].dim_value d2 = model.graph.input[0].type.tensor_type.shape.dim[3].dim_value shape = [batch_size, d0, d1, d2] engine = eng.build_engine(onnx_path, shape=shape) eng.save_engine(engine, engine_name)
def main(): args = parse_arguments() setup_logging(args.verbose) exclude_names = set() if args.exclude is None else set(args.exclude.split(';')) model = ModelProto() with open(args.input, "rb") as input_file: model.ParseFromString(input_file.read()) convert_initializers(model, exclude_names, args.sparsity_threshold, args.tolerance) with open(args.output, "wb") as output_file: s = model.SerializeToString() output_file.write(s)