def optimize_model(input, model_type, gpu_only, num_heads, hidden_size, sequence_length, input_int32, float16): (optimizer_class, producer, run_onnxruntime) = MODEL_CLASSES[model_type] input_model_path = input if run_onnxruntime: input_model_path = optimize_by_onnxruntime(input_model_path, gpu_only) logger.info( "Use OnnxRuntime to optimize and save the optimized model to {}". format(input_model_path)) model = ModelProto() with open(input_model_path, "rb") as f: model.ParseFromString(f.read()) if model.producer_name and producer != model.producer_name: logger.warning( f"Model producer not matched: Expect {producer}, Got {model.producer_name} {model.producer_version}. Please specify correct --model_type parameter." ) bert_model = optimizer_class(model, num_heads, hidden_size, sequence_length, input_int32, float16, gpu_only) bert_model.optimize() return bert_model
def main(): parser = argparse.ArgumentParser() parser.add_argument('--input', required=True, type=str) parser.add_argument('--output', required=True, type=str) parser.add_argument('--float16', required=False, action='store_true') parser.set_defaults(float16=False) args = parser.parse_args() model = ModelProto() with open(args.input, "rb") as f: model.ParseFromString(f.read()) bert_model = TinyBertOnnxModel(model, False) if args.float16: bert_model.convert_model_float32_to_float16() bert_model.update_graph() bert_model.remove_unused_constant() print("opset verion", bert_model.model.opset_import[0].version) with open(args.output, "wb") as out: out.write(bert_model.model.SerializeToString()) p = Path(args.output) data_path = p.parent batch_size = 1 sequence_length = SEQ_LEN generate_test_data(args.output, data_path, batch_size, sequence_length, use_cpu=not args.float16)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--input', required=True, type=str) parser.add_argument('--output', required=True, type=str) parser.add_argument('--output_optimized_model', required=False, action='store_true') parser.set_defaults(output_optimized_model=False) args = parser.parse_args() model = ModelProto() with open(args.input, "rb") as f: model.ParseFromString(f.read()) bert_model = TinyGpt2Model(model) bert_model.update_graph() bert_model.remove_unused_constant() print("opset verion", bert_model.model.opset_import[0].version) with open(args.output, "wb") as out: out.write(bert_model.model.SerializeToString()) p = Path(args.output) data_path = p.parent generate_test_data(args.output, data_path, batch_size=1, use_cpu=True, output_optimized_model=args.output_optimized_model)
def expand_out_dim( model: ModelProto, dim_idx: int, inplace: Optional[bool] = False, ) -> ModelProto: """Inserts an extra dimension with extent 1 to each output in the graph. Inserts an Unsqueeze node for each output. It can be used as a utility before merging graphs, for example when the second one expects a batch dimension. Arguments: model (ModelProto): Model dim_idx (int): Index of the dimension to be inserted. A negative value means counting dimensions from the back. inplace (bool): If True, mutates the model directly. Otherwise, a copy will be created """ if type(model) is not ModelProto: raise ValueError("model argument is not an ONNX model") if not inplace: m = ModelProto() m.CopyFrom(model) model = m expand_out_dim_graph( model.graph, dim_idx, inplace=True # No need to create a copy, since it's a new model ) return model
def test_caffe2_to_onnx_value_info(self): caffe2_net = tempfile.NamedTemporaryFile() output = tempfile.NamedTemporaryFile() model = ModelHelper(name='caffe2-to-onnx-test') brew.relu(model, ["X"], "Y") caffe2_net.write(model.net.Proto().SerializeToString()) caffe2_net.flush() args = [caffe2_net.name, '--output', output.name] self.assertRaisesRegexp(Exception, 'value info', self._run_command, caffe2_to_onnx, args) args.extend( ['--value-info', json.dumps({ 'X': (TensorProto.FLOAT, (2, 2)), })]) result = self._run_command(caffe2_to_onnx, args) onnx_model = ModelProto() onnx_model.ParseFromString(output.read()) self.assertEqual(len(onnx_model.graph.node), 1) self.assertEqual(onnx_model.graph.node[0].op_type, 'Relu') self.assertEqual(len(onnx_model.graph.initializer), 0)
def _load_onnx(self, path): _logger.info("loading the ONNX model from: " + path) try: start = time.time() if isinstance(path, onnx.ModelProto): onnx_model = path else: onnx_model = ModelProto() with open(path, 'rb') as f: content = f.read() onnx_model.ParseFromString(content) end = time.time() seconds = end - start _logger.info( "Loaded ONNX model in {:.3f} seconds.".format(seconds)) # Check that the IR is well formed # onnx.checker.check_model(onnx_model) # onnx IR version _logger.info("ONNX IR_version {}".format(onnx_model.ir_version)) except Exception as ex: _logger.error("Error occurred when loading onnx model file: " + str(ex)) raise ex _logger.info("ONNX Graph producer: {} version {}".format( onnx_model.producer_name, onnx_model.producer_version)) _logger.info("ONNX Graph total len: {}".format( len(onnx_model.graph.input))) return onnx_model.graph
def parse_onnx(fname): with open(fname, "rb") as f: data = f.read() model = ModelProto() model.ParseFromString(data) outputs = {} g = Graph() for node in model.graph.initializer: n = Node() n.op_type = "Const" n.name = node.name g.add(n) outputs[n.name] = n for node in model.graph.node: n = Node(node) for name in node.output: outputs[name] = n for name in node.input: o = outputs.get(name) if o: n.add_input(o) g.add(n) for node in model.graph.output: o = outputs.get(node.name) if o: g.add_output(o) return g
def test_caffe2_to_onnx(self): caffe2_net = tempfile.NamedTemporaryFile() caffe2_init_net = tempfile.NamedTemporaryFile() output = tempfile.NamedTemporaryFile() model = ModelHelper(name='caffe2-to-onnx-test') brew.relu(model, ["X"], "Y") caffe2_net.write(model.net.Proto().SerializeToString()) caffe2_net.flush() init_model = ModelHelper(name='caffe2-to-onnx-init-test') init_model.net.GivenTensorFill([], 'X', shape=[2, 2], values=np.zeros((2, 2)).flatten().astype(float)) caffe2_init_net.write(init_model.net.Proto().SerializeToString()) caffe2_init_net.flush() self._run_command( caffe2_to_onnx, [ caffe2_net.name, '--caffe2-init-net', caffe2_init_net.name, '--output', output.name, ], catch_exceptions=False, ) onnx_model = ModelProto() onnx_model.ParseFromString(output.read()) self.assertEqual(len(onnx_model.graph.node), 1) self.assertEqual(onnx_model.graph.node[0].op_type, 'Relu') self.assertEqual(len(onnx_model.graph.initializer), 1) self.assertEqual(onnx_model.graph.initializer[0].name, onnx_model.graph.input[0].name)
def optimize_model(input, model_type, num_heads, hidden_size, opt_level=99, optimization_options=None): (optimizer_class, producer, run_onnxruntime) = MODEL_CLASSES[model_type] input_model_path = input if run_onnxruntime and opt_level > 0: input_model_path = optimize_by_onnxruntime(input_model_path, use_gpu=False, opt_level=opt_level) logger.info( "Use OnnxRuntime to optimize and save the optimized model to {}". format(input_model_path)) model = ModelProto() with open(input_model_path, "rb") as f: model.ParseFromString(f.read()) if model.producer_name and producer != model.producer_name: logger.warning( f"Model producer not matched: Expect {producer}, Got {model.producer_name} {model.producer_version}. Please specify correct --model_type parameter." ) if optimization_options is None: optimization_options = BertOptimizationOptions(model_type) bert_model = optimizer_class(model, num_heads, hidden_size) bert_model.optimize(optimization_options) return bert_model
def test_pytorch_model_0_gpu_onnxruntime(self): if 'CUDAExecutionProvider' not in onnxruntime.get_available_providers( ): print( "skip test_pytorch_model_0_gpu_onnxruntime since no gpu found") return input = _get_test_model_path('bert_pytorch_0') output = 'temp.onnx' optimize_by_onnxruntime(input, use_gpu=True, optimized_model_path=output) model = ModelProto() with open(output, "rb") as f: model.ParseFromString(f.read()) os.remove(output) bert_model = OnnxModel(model) expected_node_count = { 'EmbedLayerNormalization': 1, 'Attention': 12, 'SkipLayerNormalization': 24, 'Gelu': 0, 'FastGelu': 12, 'BiasGelu': 0 } self.verify_node_count(bert_model, expected_node_count, 'test_pytorch_model_0_gpu_onnxruntime')
def onnx_to_caffe2(onnx_model, output, init_net_output): onnx_model_proto = ModelProto() onnx_model_proto.ParseFromString(onnx_model.read()) init_net, predict_net = c2.onnx_graph_to_caffe2_net(onnx_model_proto) init_net_output.write(init_net.SerializeToString()) output.write(predict_net.SerializeToString())
def test_expand_out_dim(self) -> None: ''' Tests expanding output dimensions. The resulting graph should have the same output names, but with one more dimension at the specified index. ''' m1 = _load_model(m1_def) def _check_model(m1: ModelProto, m2: ModelProto, dim_idx: int) -> None: for out_g2, out_g1 in zip(m2.graph.output, m1.graph.output): self.assertEqual(out_g2.name, out_g1.name) self.assertEqual(out_g2.type.tensor_type.elem_type, out_g1.type.tensor_type.elem_type) expected_out_shape = _get_shape(out_g1) expected_out_shape.insert(dim_idx, 1) self.assertEqual(_get_shape(out_g2), expected_out_shape) for dim_idx in [0, 2, -1, -3]: m2 = compose.expand_out_dim(m1, dim_idx) _check_model(m1, m2, dim_idx) # Test inplace m2 = ModelProto() m2.CopyFrom(m1) dim_idx = 0 compose.expand_out_dim(m2, dim_idx, inplace=True) _check_model(m1, m2, dim_idx)
def main(): args = parse_arguments() setup_logging(args.verbose) output_names = None if args.output_names is None else args.output_names.split( ";") model = ModelProto() with open(args.input, "rb") as input_file: model.ParseFromString(input_file.read()) onnx_model = OnnxModel(model) optimizer = BertOnnxModelShapeOptimizer(onnx_model) optimizer.optimize( args.output, args.input_ids, args.segment_ids, args.input_mask, args.enable_shape_opt, args.enable_reshape_opt, output_names, args.batch_size, args.sequence_length, args.verbose, )
def main(): args = get_args() with open(args.input, "rb") as f: data = f.read() model = ModelProto() model.ParseFromString(data) if args.check: onnx.checker.check_model(model) if args.stats: ops = collections.Counter() for node in model.graph.node: ops[node.op_type] += 1 print(ops, "\n\n") if args.meta: fields = [ "ir_version", "producer_name", "producer_version", "name", "opset_import" ] for name in fields: value = getattr(model, name, None) if value: print("{} = {}".format(name, value)) for i in model.metadata_props: print("meta.{} = {}", i.key, i.value) print(helper.printable_graph(model.graph)) if args.pbtxt: with open(args.pbtxt, "w") as f: f.write(str(model.graph))
def run_node(cls, node, inputs, device='CPU', outputs_info=None): inputs_info = [(x.dtype, x.shape) for x in inputs] input_value_infos = [ helper.make_tensor_value_info(x, NP_TYPE_TO_TENSOR_TYPE[t], shape) for x, (t, shape) in zip(node.input, inputs_info) ] output_value_infos = [ helper.make_tensor_value_info(x, NP_TYPE_TO_TENSOR_TYPE[t], shape) for x, (t, shape) in zip(node.output, outputs_info) ] if outputs_info: graph = helper.make_graph([node], "test", input_value_infos, []) orig_model = helper.make_model(graph, producer_name='onnx-test') orig_model_str = orig_model.SerializeToString() inferred_model_str = onnx.shape_inference.infer_shapes( orig_model_str) inferred_model = ModelProto() inferred_model.ParseFromString(inferred_model_str) # Allow shape inference to not return anything, but if it # does then check that it's correct if inferred_model.graph.value_info: assert (list( inferred_model.graph.value_info) == output_value_infos) raise BackendIsNotSupposedToImplementIt( "This is the dummy backend test that doesn't verify the results but does run the shape inference" )
def make_model(graph: GraphProto, **kwargs: Any) -> ModelProto: """Construct a ModelProto Arguments: graph (GraphProto): *make_graph* returns **kwargs: any attribute to add to the returned instance Returns: ModelProto """ model = ModelProto() # Touch model.ir_version so it is stored as the version from which it is # generated. model.ir_version = IR_VERSION model.graph.CopyFrom(graph) opset_imports: Optional[Sequence[OperatorSetIdProto]] = None opset_imports = kwargs.pop('opset_imports', None) # type: ignore if opset_imports is not None: model.opset_import.extend(opset_imports) else: # Default import imp = model.opset_import.add() imp.version = defs.onnx_opset_version() functions: Optional[Sequence[FunctionProto]] = None functions = kwargs.pop('functions', None) # type: ignore if functions is not None: model.functions.extend(functions) for k, v in kwargs.items(): # TODO: Does this work with repeated fields? setattr(model, k, v) return model
def get_bert_inputs( onnx_file: str, input_ids_name: Optional[str] = None, segment_ids_name: Optional[str] = None, input_mask_name: Optional[str] = None, ) -> Tuple[Optional[np.ndarray], Optional[np.ndarray], Optional[np.ndarray]]: """Find graph inputs for BERT model. First, we will deduce inputs from EmbedLayerNormalization node. If not found, we will guess the meaning of graph inputs based on naming. Args: onnx_file (str): onnx model path input_ids_name (str, optional): Name of graph input for input IDs. Defaults to None. segment_ids_name (str, optional): Name of graph input for segment IDs. Defaults to None. input_mask_name (str, optional): Name of graph input for attention mask. Defaults to None. Returns: Tuple[Optional[np.ndarray], Optional[np.ndarray], Optional[np.ndarray]]: input tensors of input_ids, segment_ids and input_mask """ model = ModelProto() with open(onnx_file, "rb") as file: model.ParseFromString(file.read()) onnx_model = OnnxModel(model) return find_bert_inputs(onnx_model, input_ids_name, segment_ids_name, input_mask_name)
def main(): # type: () -> None parser = argparse.ArgumentParser(description="ONNX net drawer") parser.add_argument( "--input", type=Text, required=True, help="The input protobuf file.", ) parser.add_argument( "--output", type=Text, required=True, help="The output protobuf file.", ) parser.add_argument( "--rankdir", type=Text, default='LR', help="The rank direction of the pydot graph.", ) parser.add_argument( "--embed_docstring", action="store_true", help="Embed docstring as javascript alert. Useful for SVG format.", ) args = parser.parse_args() model = ModelProto() with open(args.input, 'rb') as fid: content = fid.read() model.ParseFromString(content) pydot_graph = GetPydotGraph( model.graph, name=model.graph.name, rankdir=args.rankdir, node_producer=GetOpNodeProducer( embed_docstring=args.embed_docstring, **OP_STYLE ), ) pydot_graph.write_dot(args.output)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--input', required=True, type=str) parser.add_argument('--output', required=True, type=str) parser.add_argument( '--framework', required=True, type=str, help="Original framework. Only support TensorFlow and PyTorch") # model parameters parser.add_argument('--num_heads', required=False, type=int, default=12, help="number of attention heads") parser.add_argument('--hidden_size', required=False, type=int, default=768) parser.add_argument('--sequence_length', required=False, type=int, default=128) # Use int32 (instead of int64) tensor as input to avoid unnecessary data # type cast. parser.add_argument('--input_int32', required=False, action='store_true') parser.set_defaults(input_int32=False) # For NVidia GPU with Tensor Core like V100 and T4, half-precision float # brings better performance. parser.add_argument('--float16', required=False, action='store_true') parser.set_defaults(float16=False) parser.add_argument('--gpu_only', required=False, action='store_true') parser.set_defaults(gpu_only=False) parser.add_argument('--verbose', required=False, action='store_true') parser.set_defaults(verbose=False) args = parser.parse_args() model = ModelProto() with open(args.input, "rb") as f: model.ParseFromString(f.read()) if args.framework.lower() == 'tensorflow': bert_model = BertOnnxModelTF(model, args.num_heads, args.hidden_size, args.sequence_length, args.input_int32, args.float16, args.gpu_only, args.verbose) elif args.framework.lower() == 'pytorch': bert_model = BertOnnxModel(model, args.num_heads, args.hidden_size, args.sequence_length, args.input_int32, args.float16, args.gpu_only, args.verbose) else: print("Unsupported framework:" + args.framework) bert_model.optimize() with open(args.output, "wb") as out: out.write(bert_model.model.SerializeToString())
def _optimized(self, graph, opts): orig_model = helper.make_model(graph, producer_name='onnx-test') orig_model_str = orig_model.SerializeToString() optimized_model_str = onnx.optimizer.optimize(orig_model_str, opts) optimized_model = ModelProto() optimized_model.ParseFromString(optimized_model_str) checker.check_model(optimized_model) return optimized_model
def _optimized(self, graph): orig_model = helper.make_model(graph, producer_name='onnx-to-caffe2-test') orig_model_str = orig_model.SerializeToString() optimized_model_str = c2.Caffe2Backend.optimize_onnx(orig_model_str) optimized_model = ModelProto() optimized_model.ParseFromString(optimized_model_str) return optimized_model
def create_caffe2_predictor(onnx_file_path): with open(onnx_file_path, 'rb') as onnx_model: onnx_model_proto = ModelProto() onnx_model_proto.ParseFromString(onnx_model.read()) init_net, predict_net = c2.onnx_graph_to_caffe2_net( onnx_model_proto) predictor = workspace.Predictor(init_net, predict_net) return predictor
def main(): parser = argparse.ArgumentParser(description="ONNX net drawer") parser.add_argument( "--input", type=str, required=True, help="The input protobuf file.", ) parser.add_argument( "--output", type=str, required=True, help="The output protobuf file.", ) parser.add_argument( "--rankdir", type=str, default='LR', help="The rank direction of the pydot graph.", ) parser.add_argument( "--embed_docstring", action="store_true", help="Embed docstring as javascript alert. Useful for SVG format.", ) parser.add_argument( "--marked", type=int, default=0, help="0: original, 1: marked", ) parser.add_argument( "--marked_list", type=str, default="", help="if 2_3_4, means node 2,3,4 will be marked", ) args = parser.parse_args() if args.marked: marked_list = [int(e) for e in args.marked_list.split('_')] else: marked_list = [] model = ModelProto() with open(args.input, 'rb') as fid: content = fid.read() model.ParseFromString(content) pydot_graph = GetPydotGraph( model.graph, name=model.graph.name, rankdir=args.rankdir, node_producer=GetOpNodeProducer(embed_docstring=args.embed_docstring, #**OP_STYLE ), marked_list=marked_list, ) pydot_graph.write_dot(args.output)
def main(): args = get_args() with open(args.input, "rb") as f: data = f.read() model = ModelProto() model.ParseFromString(data) if args.stats: ops = collections.Counter() for node in model.graph.node: ops[node.op_type] += 1 print(ops, "\n\n") if args.meta: fields = [ "ir_version", "producer_name", "producer_version", "name", "opset_import" ] for name in fields: value = getattr(model, name, None) if value: print("{} = {}".format(name, value)) for i in model.metadata_props: print("meta.{} = {}", i.key, i.value) print(helper.printable_graph(model.graph)) if args.check: onnx.checker.check_model(model) inferred_model = shape_inference.infer_shapes(model) onnx.checker.check_model(inferred_model) if args.pbtxt: with open(args.pbtxt, "w") as f: f.write(str(model.graph)) if args.dot: with open(args.dot, "w") as f: f.write("digraph graphname {\n") for node in model.graph.node: output_name = node.name name = node.name color = "" if node.op_type.startswith("_"): color = ' color="yellow"' if node.op_type == "CELL": color = ' color="red"' f.write('"{}" [label="{},{}"{}];\n'.format( output_name, node.op_type, name, color)) for input_name in node.input: parts = input_name.split(":") input_name = re.sub(r"^\^", "", parts[0]) f.write(' "{}" -> "{}";\n'.format(input_name, output_name)) f.write("}\n")
def make_model(graph, **kwargs): model = ModelProto() # Touch model.ir_version so it is stored as the version from which it is # generated. model.ir_version = IR_VERSION model.graph.CopyFrom(graph) for k, v in kwargs.items(): setattr(model, k, v) return model
def model_proto_from_zip(zip_path, external_tensor_storage): model_proto = ModelProto() with zipfile.ZipFile(zip_path, 'r') as z: for n in z.namelist(): f = z.open(n) if n.endswith(".onnx"): model_proto.ParseFromString(f.read()) else: external_tensor_storage.name_to_tensor_data[n] = f.read() return model_proto
def test_version_exists(self): # type: () -> None model = ModelProto() # When we create it, graph should not have a version string. self.assertFalse(model.HasField('ir_version')) # We should touch the version so it is annotated with the current # ir version of the running ONNX model.ir_version = IR_VERSION model_string = model.SerializeToString() model.ParseFromString(model_string) self.assertTrue(model.HasField('ir_version')) # Check if the version is correct. self.assertEqual(model.ir_version, IR_VERSION)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--input', required=True, type=str) parser.add_argument('--output', required=True, type=str) # model parameters parser.add_argument('--num_heads', required=False, type=int, default=12, help="number of attention heads") parser.add_argument('--hidden_size', required=False, type=int, default=768) parser.add_argument('--sequence_length', required=False, type=int, default=128) # Use int32 (instead of int64) tensor as input to avoid unnecessary data type cast. parser.add_argument('--input_int32', required=False, action='store_true') parser.set_defaults(input_int32=False) # For NVidia GPU with Tensor Core like V100 and T4, half-precision float brings better performance. parser.add_argument('--float16', required=False, action='store_true') parser.set_defaults(float16=False) parser.add_argument('--verbose', required=False, action='store_true') parser.set_defaults(verbose=False) args = parser.parse_args() model = ModelProto() with open(args.input, "rb") as f: model.ParseFromString(f.read()) bert_model = BertOnnxModel(model, args.num_heads, args.hidden_size, args.sequence_length) bert_model.fuse_layer_norm() bert_model.fuse_gelu() bert_model.fuse_reshape() bert_model.fuse_attention(args.verbose) bert_model.fuse_embed_layer(args.verbose) if bert_model.embed_node is None: print("Failed to fuse embedding layer.") return if args.input_int32: bert_model.change_input_to_int32() else: bert_model.cast_input_to_int32() if args.float16: bert_model.convert_model_float32_to_float16() with open(args.output, "wb") as out: out.write(bert_model.model.SerializeToString())
def build_engine_from_onnx(onnx_path, engine_name, batch_size, TRT_LOGGER): model = ModelProto() with open(onnx_path, "rb") as f: model.ParseFromString(f.read()) d0 = model.graph.input[0].type.tensor_type.shape.dim[1].dim_value d1 = model.graph.input[0].type.tensor_type.shape.dim[2].dim_value d2 = model.graph.input[0].type.tensor_type.shape.dim[3].dim_value shape = [batch_size, d0, d1, d2] engine = eng.build_engine(TRT_LOGGER, onnx_path, shape=shape) eng.save_engine(engine, engine_name) return engine
def run(self, onnx_model): model = ModelProto() content = onnx_model model.ParseFromString(content) pydot_graph = self.GetPydotGraph( model.graph, name=model.graph.name, rankdir='TD', node_producer=self.GetOpNodeProducer( **OP_STYLE ), ) return pydot_graph.create(format='png')
def main(): args = parse_arguments() setup_logging(args.verbose) exclude_names = set() if args.exclude is None else set(args.exclude.split(';')) model = ModelProto() with open(args.input, "rb") as input_file: model.ParseFromString(input_file.read()) convert_initializers(model, exclude_names, args.sparsity_threshold, args.tolerance) with open(args.output, "wb") as output_file: s = model.SerializeToString() output_file.write(s)
def make_model(graph, **kwargs): # type: (GraphProto, **Any) -> ModelProto model = ModelProto() # Touch model.ir_version so it is stored as the version from which it is # generated. model.ir_version = IR_VERSION model.graph.CopyFrom(graph) opset_imports = None # type: Optional[Sequence[OperatorSetIdProto]] opset_imports = kwargs.pop('opset_imports', None) # type: ignore if opset_imports is not None: model.opset_import.extend(opset_imports) else: # Default import imp = model.opset_import.add() imp.version = defs.onnx_opset_version() for k, v in kwargs.items(): # TODO: Does this work with repeated fields? setattr(model, k, v) return model
def _simple_model(self): # type: () -> ModelProto # Create a ModelProto. model = ModelProto() model.ir_version = IR_VERSION return model