def update_onnx_initializer(onnx_file, ckpt_file, output_file): "Update onnx initializer." with open(onnx_file, 'rb') as f: data = f.read() model = onnx_pb.ModelProto() model.ParseFromString(data) initializer = model.graph.initializer param_dict = load_checkpoint(ckpt_file) for i, _ in enumerate(initializer): item = initializer[i] if not item.name in param_dict: print( f"Warning: Can not find '{item.name}' in checkpoint parameters dictionary" ) continue weight = param_dict[item.name].data.asnumpy() bin_data = weight.tobytes() if len(item.raw_data) != len(bin_data): print( f"Warning: Size of weight from checkpoint is different from original size, ignore it" ) continue item.raw_data = bin_data pb_msg = model.SerializeToString() with open(output_file, 'wb') as f: f.write(pb_msg) print(f'Graph name: {model.graph.name}') print(f'Initializer length: {len(initializer)}') print(f'Checkpoint dict length: {len(param_dict)}') print( f'The new weights have been written to file {output_file} successfully' )
def clone_model_with_shape_infer(model): if model_has_infer_metadata(model): cloned_model = onnx_proto.ModelProto() cloned_model.CopyFrom(model) else: cloned_model = save_and_reload_model(model) return cloned_model
def main(): if len(sys.argv) < 3: print( 'Usage: python onnx_to_nnir.py <onnxModel> <nnirOutputFolder> [--input_dims n,c,h,w (optional)] [--node_type_append 0/1 (optional: appends node type to output tensor name)]' ) sys.exit(1) onnxFileName = sys.argv[1] outputFolder = sys.argv[2] #appends node type to output tensor name. node_type_append = 0 pos = 4 while pos < len(sys.argv) and len( sys.argv) > 3 and sys.argv[pos][:2] == '--': if sys.argv[pos] == '--node_type_append': node_type_append = int(sys.argv[pos + 1]) pos = pos + 2 elif sys.argv[pos] == '--input_dims': #input_dims = sys.argv[pos+1] pos = pos + 2 print('loading ONNX model from %s ...' % (onnxFileName)) onnx_model_proto = onnx_pb.ModelProto() if not os.path.isfile(onnxFileName): print('ERROR: unable to open: ' + onnxFileName) sys.exit(1) onnx_model_proto.ParseFromString(open(onnxFileName, 'rb').read()) print('converting to IR model in %s ...' % (outputFolder)) onnx2ir(onnx_model_proto, outputFolder, node_type_append)
def main(args): device_name = 'cpu' if args.gpu is None else 'cuda:' + str(args.gpu) device = torch.device(device_name) n_joints = 14 model = LPM(3, 32, n_joints + 1, device, T=args.t) if args.checkpoint_name is not None: print('Loading checkpoint...') path = os.path.join(args.model_dir, args.checkpoint_name) checkpoint = torch.load(path) model.load_state_dict(checkpoint['state_dict']) model = model.to(device) print('Exporting ONNX...') dummy_images = torch.zeros( (3, args.resolution, args.resolution)).to(device) dummy_centers = torch.zeros( (1, args.resolution, args.resolution)).to(device) torch.onnx.export(model, (dummy_images, dummy_images, dummy_images, dummy_images, dummy_images, dummy_centers), args.onnx_name) print('Exporting CoreML...') model_file = open(args.onnx_name, 'rb') model_proto = onnx_pb.ModelProto() model_proto.ParseFromString(model_file.read()) mlmodel = onnx_coreml.convert(model_proto, mode='regressor', image_input_names=['0']) mlmodel.save(args.core_ml_name) print('Done!')
def augment_graph(self): ''' Adds ReduceMin and ReduceMax nodes to all quantization_candidates op type nodes in model and ensures their outputs are stored as part of the graph output :return: augmented ONNX model ''' model = onnx_proto.ModelProto() model.CopyFrom(self.model) model = onnx.shape_inference.infer_shapes(model) added_nodes = [] added_outputs = [] tensors, value_infos = self.select_tensors_to_calibrate(model) for tensor in tensors: # When doing ReduceMax/ReduceMin, ORT can't reduce on dim with value of 0 if 'keepdims' is false. # To make the code simple, we always let keepdims to be 1. keepdims = 1 # dim could be: # [dim_param: "batch_size", dim_value: 256, dim_value: 36, dim_value: 64], # [dim_value: 0], # ... # Please see the definition of TensorShapeProto https://github.com/onnx/onnx/blob/master/onnx/onnx.proto#L651 dim = value_infos[tensor].type.tensor_type.shape.dim shape = (1, ) if len(dim) == 1 else tuple(1 for i in range(len(dim))) # Adding ReduceMin nodes reduce_min_name = tensor + '_ReduceMin' reduce_min_node = onnx.helper.make_node('ReduceMin', [tensor], [tensor + '_ReduceMin'], reduce_min_name, keepdims=keepdims) added_nodes.append(reduce_min_node) added_outputs.append( helper.make_tensor_value_info(reduce_min_node.output[0], TensorProto.FLOAT, shape)) # Adding ReduceMax nodes reduce_max_name = tensor + '_ReduceMax' reduce_max_node = onnx.helper.make_node('ReduceMax', [tensor], [tensor + '_ReduceMax'], reduce_max_name, keepdims=keepdims) added_nodes.append(reduce_max_node) added_outputs.append( helper.make_tensor_value_info(reduce_max_node.output[0], TensorProto.FLOAT, shape)) model.graph.node.extend(added_nodes) model.graph.output.extend(added_outputs) onnx.save(model, self.augmented_model_path, save_as_external_data=self.use_external_data_format) self.augment_model = model
def augment_graph(self): ''' Adds ReduceMin and ReduceMax nodes to all quantization_candidates op type nodes in model and ensures their outputs are stored as part of the graph output :return: augmented ONNX model ''' model = onnx_proto.ModelProto() model.CopyFrom(self.model) model = onnx.shape_inference.infer_shapes(model) value_infos = {vi.name: vi for vi in model.graph.value_info} value_infos.update({ot.name: ot for ot in model.graph.output}) value_infos.update({it.name: it for it in model.graph.input}) initializer = set(init.name for init in model.graph.initializer) added_nodes = [] added_outputs = [] tensors_to_calibrate = set() tensor_type_to_calibrate = set([TensorProto.FLOAT, TensorProto.FLOAT16]) for node in model.graph.node: should_be_calibrate = ((node.op_type in self.calibrate_op_types) and (node.name not in self.black_nodes)) or (node.name in self.white_nodes) or ((not self.calibrate_op_types) and (node.name not in self.black_nodes)) if should_be_calibrate: for tensor_name in itertools.chain(node.input, node.output): if tensor_name in value_infos.keys(): vi = value_infos[tensor_name] if vi.type.HasField('tensor_type') and (vi.type.tensor_type.elem_type in tensor_type_to_calibrate) and (tensor_name not in initializer): tensors_to_calibrate.add(tensor_name) # If augmenting all ops, it's possible that some nodes' input value are 0. # Can't reduce on dim with value of 0 if 'keepdims' is false, therefore set keepdims to 1. if self.calibrate_op_types: keepdims_value = 0 else: keepdims_value = 1 for tensor in tensors_to_calibrate: # Adding ReduceMin nodes reduce_min_name = tensor + '_ReduceMin' reduce_min_node = onnx.helper.make_node('ReduceMin', [tensor], [tensor + '_ReduceMin'], reduce_min_name, keepdims=keepdims_value) added_nodes.append(reduce_min_node) added_outputs.append(helper.make_tensor_value_info(reduce_min_node.output[0], TensorProto.FLOAT, ())) # Adding ReduceMax nodes reduce_max_name = tensor + '_ReduceMax' reduce_max_node = onnx.helper.make_node('ReduceMax', [tensor], [tensor + '_ReduceMax'], reduce_max_name, keepdims=keepdims_value) added_nodes.append(reduce_max_node) added_outputs.append(helper.make_tensor_value_info(reduce_max_node.output[0], TensorProto.FLOAT, ())) model.graph.node.extend(added_nodes) model.graph.output.extend(added_outputs) return model
def augment_graph(self): ''' Adds ReduceMin and ReduceMax nodes to all quantization_candidates op type nodes in model and ensures their outputs are stored as part of the graph output :return: augmented ONNX model ''' model = onnx_proto.ModelProto() model.CopyFrom(self.model) model = onnx.shape_inference.infer_shapes(model) value_infos = {vi.name: vi for vi in model.graph.value_info} value_infos.update({ot.name: ot for ot in model.graph.output}) value_infos.update({it.name: it for it in model.graph.input}) initializer = set(init.name for init in model.graph.initializer) added_nodes = [] added_outputs = [] tensors_to_calibrate = set() tensor_type_to_calibrate = set([TensorProto.FLOAT, TensorProto.FLOAT16]) for node in model.graph.node: if len(self.op_types_to_calibrate) == 0 or node.op_type in self.op_types_to_calibrate: for tensor_name in itertools.chain(node.input, node.output): if tensor_name in value_infos.keys(): vi = value_infos[tensor_name] if vi.type.HasField('tensor_type') and ( vi.type.tensor_type.elem_type in tensor_type_to_calibrate) and ( tensor_name not in initializer): tensors_to_calibrate.add(tensor_name) for tensor in tensors_to_calibrate: # Adding ReduceMin nodes reduce_min_name = tensor + '_ReduceMin' reduce_min_node = onnx.helper.make_node('ReduceMin', [tensor], [tensor + '_ReduceMin'], reduce_min_name, keepdims=0) added_nodes.append(reduce_min_node) added_outputs.append(helper.make_tensor_value_info(reduce_min_node.output[0], TensorProto.FLOAT, ())) # Adding ReduceMax nodes reduce_max_name = tensor + '_ReduceMax' reduce_max_node = onnx.helper.make_node('ReduceMax', [tensor], [tensor + '_ReduceMax'], reduce_max_name, keepdims=0) added_nodes.append(reduce_max_node) added_outputs.append(helper.make_tensor_value_info(reduce_max_node.output[0], TensorProto.FLOAT, ())) model.graph.node.extend(added_nodes) model.graph.output.extend(added_outputs) onnx.save(model, self.augmented_model_path) self.augment_model = model
def main(): if len(sys.argv) < 3: print('Usage: python onnx_to_nnir.py <onnxModel> <nnirOutputFolder> [--input_dims n,c,h,w (optional)]') sys.exit(1) onnxFileName = sys.argv[1] outputFolder = sys.argv[2] print('loading ONNX model from %s ...' % (onnxFileName)) onnx_model_proto = onnx_pb.ModelProto() if not os.path.isfile(onnxFileName): print('ERROR: unable to open: ' + onnxFileName) sys.exit(1) onnx_model_proto.ParseFromString(open(onnxFileName, 'rb').read()) print('converting to IR model in %s ...' % (outputFolder)) onnx2ir(onnx_model_proto, outputFolder)
def augment_graph(self): ''' Adds ReduceMin and ReduceMax nodes to all quantization_candidates op type nodes in model and ensures their outputs are stored as part of the graph output :return: augmented ONNX model ''' model = onnx_proto.ModelProto() model.CopyFrom(self.model) model = onnx.shape_inference.infer_shapes(model) added_nodes = [] added_outputs = [] tensors, value_infos = self.select_tensors_to_calibrate(model) for tensor in tensors: # Get tensor's shape dim = len(value_infos[tensor].type.tensor_type.shape.dim) shape = (1, ) if dim == 1 else list(1 for i in range(dim)) # Adding ReduceMin nodes reduce_min_name = tensor + '_ReduceMin' reduce_min_node = onnx.helper.make_node('ReduceMin', [tensor], [tensor + '_ReduceMin'], reduce_min_name) added_nodes.append(reduce_min_node) added_outputs.append( helper.make_tensor_value_info(reduce_min_node.output[0], TensorProto.FLOAT, shape)) # Adding ReduceMax nodes reduce_max_name = tensor + '_ReduceMax' reduce_max_node = onnx.helper.make_node('ReduceMax', [tensor], [tensor + '_ReduceMax'], reduce_max_name) added_nodes.append(reduce_max_node) added_outputs.append( helper.make_tensor_value_info(reduce_max_node.output[0], TensorProto.FLOAT, shape)) model.graph.node.extend(added_nodes) model.graph.output.extend(added_outputs) onnx.save(model, self.augmented_model_path) self.augment_model = model
def augment_graph(self): ''' make all quantization_candidates op type nodes as part of the graph output. :return: augmented ONNX model ''' model = onnx_proto.ModelProto() model.CopyFrom(self.model) model = onnx.shape_inference.infer_shapes(model) added_nodes = [] added_outputs = [] tensors, value_infos = self.select_tensors_to_calibrate(model) for tensor in tensors: added_outputs.append(value_infos[tensor]) model.graph.node.extend(added_nodes) model.graph.output.extend(added_outputs) onnx.save(model, self.augmented_model_path) self.augment_model = model
def main(): os.system('rm -rf saved_models && mkdir saved_models') files = glob.glob('saved_models/*.onnx') + glob.glob( '../yolov3/weights/*.onnx') for f in files: # 1. ONNX to CoreML name = 'saved_models/' + f.split('/')[-1].replace('.onnx', '') model_file = open(f, 'rb') model_proto = onnx_pb.ModelProto() model_proto.ParseFromString(model_file.read()) coreml_model = convert(model_proto, image_input_names=['0']) # coreml_model.save(model_out) # 2. Reduce model to FP16, change outputs to DOUBLE and save import coremltools spec = coreml_model.get_spec() for i in range(2): spec.description.output[i].type.multiArrayType.dataType = \ coremltools.proto.FeatureTypes_pb2.ArrayFeatureType.ArrayDataType.Value('DOUBLE') spec = coremltools.utils.convert_neural_network_spec_weights_to_fp16( spec) coreml_model = coremltools.models.MLModel(spec) num_classes = 80 num_anchors = 507 spec.description.output[0].type.multiArrayType.shape.append( num_classes) spec.description.output[0].type.multiArrayType.shape.append( num_anchors) spec.description.output[1].type.multiArrayType.shape.append(4) spec.description.output[1].type.multiArrayType.shape.append( num_anchors) coreml_model.save(name + '.mlmodel') print(spec.description) # 3. Create NMS protobuf import numpy as np nms_spec = coremltools.proto.Model_pb2.Model() nms_spec.specificationVersion = 3 for i in range(2): decoder_output = coreml_model._spec.description.output[ i].SerializeToString() nms_spec.description.input.add() nms_spec.description.input[i].ParseFromString(decoder_output) nms_spec.description.output.add() nms_spec.description.output[i].ParseFromString(decoder_output) nms_spec.description.output[0].name = 'confidence' nms_spec.description.output[1].name = 'coordinates' output_sizes = [num_classes, 4] for i in range(2): ma_type = nms_spec.description.output[i].type.multiArrayType ma_type.shapeRange.sizeRanges.add() ma_type.shapeRange.sizeRanges[0].lowerBound = 0 ma_type.shapeRange.sizeRanges[0].upperBound = -1 ma_type.shapeRange.sizeRanges.add() ma_type.shapeRange.sizeRanges[1].lowerBound = output_sizes[i] ma_type.shapeRange.sizeRanges[1].upperBound = output_sizes[i] del ma_type.shape[:] nms = nms_spec.nonMaximumSuppression nms.confidenceInputFeatureName = '133' # 1x507x80 nms.coordinatesInputFeatureName = '134' # 1x507x4 nms.confidenceOutputFeatureName = 'confidence' nms.coordinatesOutputFeatureName = 'coordinates' nms.iouThresholdInputFeatureName = 'iouThreshold' nms.confidenceThresholdInputFeatureName = 'confidenceThreshold' nms.iouThreshold = 0.6 nms.confidenceThreshold = 0.4 nms.pickTop.perClass = True labels = np.loadtxt('../yolov3/data/coco.names', dtype=str, delimiter='\n') nms.stringClassLabels.vector.extend(labels) nms_model = coremltools.models.MLModel(nms_spec) nms_model.save(name + '_nms.mlmodel') # 4. Pipeline models togethor from coremltools.models import datatypes # from coremltools.models import neural_network from coremltools.models.pipeline import Pipeline input_features = [('image', datatypes.Array(3, 416, 416)), ('iouThreshold', datatypes.Double()), ('confidenceThreshold', datatypes.Double())] output_features = ['confidence', 'coordinates'] pipeline = Pipeline(input_features, output_features) # Add 3rd dimension of size 1 (apparently not needed, produces error on compile) # ssd_output = coreml_model._spec.description.output # ssd_output[0].type.multiArrayType.shape[:] = [num_classes, num_anchors, 1] # ssd_output[1].type.multiArrayType.shape[:] = [4, num_anchors, 1] # And now we can add the three models, in order: pipeline.add_model(coreml_model) pipeline.add_model(nms_model) # Correct datatypes pipeline.spec.description.input[0].ParseFromString( coreml_model._spec.description.input[0].SerializeToString()) pipeline.spec.description.output[0].ParseFromString( nms_model._spec.description.output[0].SerializeToString()) pipeline.spec.description.output[1].ParseFromString( nms_model._spec.description.output[1].SerializeToString()) # Update metadata pipeline.spec.description.metadata.versionString = 'yolov3-tiny.pt imported from PyTorch' pipeline.spec.description.metadata.shortDescription = 'https://github.com/ultralytics/yolov3' pipeline.spec.description.metadata.author = '*****@*****.**' pipeline.spec.description.metadata.license = 'https://github.com/ultralytics/yolov3' user_defined_metadata = { 'classes': ','.join(labels), 'iou_threshold': str(nms.iouThreshold), 'confidence_threshold': str(nms.confidenceThreshold) } pipeline.spec.description.metadata.userDefined.update( user_defined_metadata) # Save the model pipeline.spec.specificationVersion = 3 final_model = coremltools.models.MLModel(pipeline.spec) final_model.save((name + '_pipelined.mlmodel'))
def quantize(model, per_channel=True, nbits=8, quantization_mode=QuantizationMode.IntegerOps, static=False, asymmetric_input_types=False, input_quantization_params=None, output_quantization_params=None): ''' Given an onnx model, create a quantized onnx model and save it into a file :param model: ModelProto to quantize :param per_channel: quantize weights per channel :param nbits: number of bits to represent quantized data. Currently only supporting 8-bit types :param quantization_mode: Can be one of the QuantizationMode types. IntegerOps: the function will use integer ops. Only ConvInteger and MatMulInteger ops are supported now. QLinearOps: the function will use QLinear ops. Only QLinearConv and QLinearMatMul ops are supported now. :param static: True: The inputs/activations are quantized using static scale and zero point values specified through input_quantization_params. False: The inputs/activations are quantized using dynamic scale and zero point values computed while running the model. :param asymmetric_input_types: True: Weights are quantized into signed integers and inputs/activations into unsigned integers. False: Weights and inputs/activations are quantized into unsigned integers. :param input_quantization_params: Dictionary to specify the zero point and scale values for inputs to conv and matmul nodes. Should be specified when static is set to True. The input_quantization_params should be specified in the following format: { "input_name": [zero_point, scale] }. zero_point should be of type np.uint8 and scale should be of type np.float32. example: { 'resnet_model/Relu_1:0': [np.uint8(0), np.float32(0.019539741799235344)], 'resnet_model/Relu_2:0': [np.uint8(0), np.float32(0.011359662748873234)] } :param output_quantization_params: Dictionary to specify the zero point and scale values for outputs of conv and matmul nodes. Should be specified in QuantizationMode.QLinearOps mode. The output_quantization_params should be specified in the following format: { "output_name": [zero_point, scale] } zero_point can be of type np.uint8/np.int8 and scale should be of type np.float32. example: { 'resnet_model/Relu_3:0': [np.int8(0), np.float32(0.011359662748873234)], 'resnet_model/Relu_4:0': [np.uint8(0), np.float32(0.011359662748873234)] } :return: ModelProto with quantization ''' if nbits == 8: input_qType = onnx_proto.TensorProto.UINT8 weight_qType = onnx_proto.TensorProto.INT8 if asymmetric_input_types else onnx_proto.TensorProto.UINT8 mode = quantization_mode copy_model = onnx_proto.ModelProto() copy_model.CopyFrom(model) quantizer = ONNXQuantizer(copy_model, per_channel, mode, static, weight_qType, input_qType, input_quantization_params, output_quantization_params) quantizer.quantize_model() quantizer.model.producer_name = __producer__ quantizer.model.producer_version = __version__ return quantizer.model else: raise ValueError('Unknown value for nbits. only 8 bit quantization is currently supported')
def onnx_to_coreml(onnx_model, output): # type: (IO[str], str) -> None onnx_model_proto = onnx_pb.ModelProto() onnx_model_proto.ParseFromString(onnx_model.read()) coreml_model = convert(onnx_model_proto) coreml_model.save(output)
def quantize(model, per_channel=False, nbits=8, quantization_mode=QuantizationMode.IntegerOps, static=False, force_fusions=False, symmetric_activation=False, symmetric_weight=False, quantization_params=None, nodes_to_quantize=None, nodes_to_exclude=None, op_types_to_quantize=[]): ''' Given an onnx model, create a quantized onnx model and save it into a file :param model: ModelProto to quantize :param per_channel: quantize weights per channel :param nbits: number of bits to represent quantized data. Currently only supporting 8-bit types :param quantization_mode: Can be one of the QuantizationMode types. IntegerOps: the function will use integer ops. Only ConvInteger and MatMulInteger ops are supported now. QLinearOps: the function will use QLinear ops. Only QLinearConv and QLinearMatMul ops are supported now. :param static: True: The inputs/activations are quantized using static scale and zero point values specified through quantization_params. False: The inputs/activations are quantized using dynamic scale and zero point values computed while running the model. :param symmetric_activation: True: activations are quantized into signed integers. False: activations are quantized into unsigned integers. :param symmetric_weight: True: weights are quantized into signed integers. False: weights are quantized into unsigned integers. :param quantization_params: Dictionary to specify the zero point and scale values for inputs to conv and matmul nodes. Should be specified when static is set to True. The quantization_params should be specified in the following format: { "input_name": [zero_point, scale] }. zero_point should be of type np.uint8 and scale should be of type np.float32. example: { 'resnet_model/Relu_1:0': [np.uint8(0), np.float32(0.019539741799235344)], 'resnet_model/Relu_2:0': [np.uint8(0), np.float32(0.011359662748873234)] } :param nodes_to_quantize: List of nodes names to quantize. When this list is not None only the nodes in this list are quantized. example: [ 'Conv__224', 'Conv__252' ] :param nodes_to_exclude: List of nodes names to exclude. The nodes in this list will be excluded from quantization when it is not None. :param op_types_to_quantize: specify the types of operators to quantize, like ['Conv'] to quantize Conv only. It quantizes all supported operators by default. :return: ModelProto with quantization ''' logging.warning("onnxruntime.quantization.quantize is deprecated.\n\ Please use quantize_static for static quantization, quantize_dynamic for dynamic quantization." ) if nbits == 8 or nbits == 7: mode = quantization_mode copy_model = onnx_proto.ModelProto() copy_model.CopyFrom(model) if not op_types_to_quantize or len(op_types_to_quantize) == 0: op_types_to_quantize = list( QLinearOpsRegistry.keys()) if static else list( IntegerOpsRegistry.keys()) quantizer = ONNXQuantizer(copy_model, per_channel, nbits == 7, mode, static, symmetric_weight, symmetric_activation, quantization_params, nodes_to_quantize, nodes_to_exclude, op_types_to_quantize) quantizer.quantize_model() return quantizer.model.model else: raise ValueError( 'Only 8 and 7 bit quantization is currently supported')
def main(): os.system('rm -rf saved_models && mkdir saved_models') files = glob.glob('saved_models/*.onnx') + \ glob.glob('../yolov3/weights/*.onnx') for f in files: # 1. ONNX to CoreML name = 'saved_models/' + f.split('/')[-1].replace('.onnx', '') # # Load the ONNX model model = onnx.load(f) # Check that the IR is well formed print(onnx.checker.check_model(model)) # Print a human readable representation of the graph print(onnx.helper.printable_graph(model.graph)) model_file = open(f, 'rb') model_proto = onnx_pb.ModelProto() model_proto.ParseFromString(model_file.read()) yolov3_model = convert(model_proto, image_input_names=['0'], preprocessing_args={'image_scale': 1. / 255}) # 2. Reduce model to FP16, change outputs to DOUBLE and save import coremltools spec = yolov3_model.get_spec() for i in range(2): spec.description.output[i].type.multiArrayType.dataType = \ coremltools.proto.FeatureTypes_pb2.ArrayFeatureType.ArrayDataType.Value( 'DOUBLE') spec = coremltools.utils.convert_neural_network_spec_weights_to_fp16( spec) yolov3_model = coremltools.models.MLModel(spec) name_out0 = spec.description.output[0].name name_out1 = spec.description.output[1].name num_classes = 80 num_anchors = 507 # 507 for yolov3-tiny, spec.description.output[0].type.multiArrayType.shape.append( num_anchors) spec.description.output[0].type.multiArrayType.shape.append( num_classes) # spec.description.output[0].type.multiArrayType.shape.append(1) spec.description.output[1].type.multiArrayType.shape.append( num_anchors) spec.description.output[1].type.multiArrayType.shape.append(4) # spec.description.output[1].type.multiArrayType.shape.append(1) # rename # input_mlmodel = input_tensor.replace(":", "__").replace("/", "__") # class_output_mlmodel = class_output_tensor.replace(":", "__").replace("/", "__") # bbox_output_mlmodel = bbox_output_tensor.replace(":", "__").replace("/", "__") # # for i in range(len(spec.neuralNetwork.layers)): # if spec.neuralNetwork.layers[i].input[0] == input_mlmodel: # spec.neuralNetwork.layers[i].input[0] = 'image' # if spec.neuralNetwork.layers[i].output[0] == class_output_mlmodel: # spec.neuralNetwork.layers[i].output[0] = 'scores' # if spec.neuralNetwork.layers[i].output[0] == bbox_output_mlmodel: # spec.neuralNetwork.layers[i].output[0] = 'boxes' spec.neuralNetwork.preprocessing[0].featureName = '0' yolov3_model.save(name + '.mlmodel') # yolov3_model.visualize_spec() print(spec.description) # 2.5. Try to Predict: from PIL import Image img = Image.open('../yolov3/data/samples/zidane_416.jpg') out = yolov3_model.predict({'0': img}, useCPUOnly=True) print(out[name_out0].shape, out[name_out1].shape) # 3. Create NMS protobuf import numpy as np nms_spec = coremltools.proto.Model_pb2.Model() nms_spec.specificationVersion = 3 for i in range(2): decoder_output = yolov3_model._spec.description.output[ i].SerializeToString() nms_spec.description.input.add() nms_spec.description.input[i].ParseFromString(decoder_output) nms_spec.description.output.add() nms_spec.description.output[i].ParseFromString(decoder_output) nms_spec.description.output[0].name = 'confidence' nms_spec.description.output[1].name = 'coordinates' output_sizes = [num_classes, 4] for i in range(2): ma_type = nms_spec.description.output[i].type.multiArrayType ma_type.shapeRange.sizeRanges.add() ma_type.shapeRange.sizeRanges[0].lowerBound = 0 ma_type.shapeRange.sizeRanges[0].upperBound = -1 ma_type.shapeRange.sizeRanges.add() ma_type.shapeRange.sizeRanges[1].lowerBound = output_sizes[i] ma_type.shapeRange.sizeRanges[1].upperBound = output_sizes[i] del ma_type.shape[:] nms = nms_spec.nonMaximumSuppression nms.confidenceInputFeatureName = name_out0 # 1x507x80 nms.coordinatesInputFeatureName = name_out1 # 1x507x4 nms.confidenceOutputFeatureName = 'confidence' nms.coordinatesOutputFeatureName = 'coordinates' nms.iouThresholdInputFeatureName = 'iouThreshold' nms.confidenceThresholdInputFeatureName = 'confidenceThreshold' nms.iouThreshold = 0.4 nms.confidenceThreshold = 0.5 nms.pickTop.perClass = True labels = np.loadtxt('../yolov3/data/coco.names', dtype=str, delimiter='\n') nms.stringClassLabels.vector.extend(labels) nms_model = coremltools.models.MLModel(nms_spec) nms_model.save(name + '_nms.mlmodel') # out_nms = nms_model.predict({ # '143': out['143'].squeeze().reshape((80, 507)), # '144': out['144'].squeeze().reshape((4, 507)) # }) # print(out_nms['confidence'].shape, out_nms['coordinates'].shape) # # # 3.5 Add Softmax model # from coremltools.models import datatypes # from coremltools.models import neural_network # # input_features = [ # ("141", datatypes.Array(num_anchors, num_classes, 1)), # ("143", datatypes.Array(num_anchors, 4, 1)) # ] # # output_features = [ # ("141", datatypes.Array(num_anchors, num_classes, 1)), # ("143", datatypes.Array(num_anchors, 4, 1)) # ] # # builder = neural_network.NeuralNetworkBuilder(input_features, output_features) # builder.add_softmax(name="softmax_pcls", # dim=(0, 3, 2, 1), # input_name="scores", # output_name="permute_scores_output") # softmax_model = coremltools.models.MLModel(builder.spec) # softmax_model.save("softmax.mlmodel") # 4. Pipeline models togethor from coremltools.models import datatypes # from coremltools.models import neural_network from coremltools.models.pipeline import Pipeline input_features = [('0', datatypes.Array(3, 416, 416)), ('iouThreshold', datatypes.Double()), ('confidenceThreshold', datatypes.Double())] output_features = ['confidence', 'coordinates'] pipeline = Pipeline(input_features, output_features) # Add 3rd dimension of size 1 (apparently not needed, produces error on compile) yolov3_output = yolov3_model._spec.description.output yolov3_output[0].type.multiArrayType.shape[:] = [ num_anchors, num_classes, 1 ] yolov3_output[1].type.multiArrayType.shape[:] = [num_anchors, 4, 1] nms_input = nms_model._spec.description.input for i in range(2): nms_input[i].type.multiArrayType.shape[:] = yolov3_output[ i].type.multiArrayType.shape[:] # And now we can add the three models, in order: pipeline.add_model(yolov3_model) pipeline.add_model(nms_model) # Correct datatypes pipeline.spec.description.input[0].ParseFromString( yolov3_model._spec.description.input[0].SerializeToString()) pipeline.spec.description.output[0].ParseFromString( nms_model._spec.description.output[0].SerializeToString()) pipeline.spec.description.output[1].ParseFromString( nms_model._spec.description.output[1].SerializeToString()) # Update metadata pipeline.spec.description.metadata.versionString = 'yolov3-tiny.pt imported from PyTorch' pipeline.spec.description.metadata.shortDescription = 'https://github.com/ultralytics/yolov3' pipeline.spec.description.metadata.author = '*****@*****.**' pipeline.spec.description.metadata.license = 'https://github.com/ultralytics/yolov3' user_defined_metadata = { 'classes': ','.join(labels), 'iou_threshold': str(nms.iouThreshold), 'confidence_threshold': str(nms.confidenceThreshold) } pipeline.spec.description.metadata.userDefined.update( user_defined_metadata) # Save the model pipeline.spec.specificationVersion = 3 final_model = coremltools.models.MLModel(pipeline.spec) final_model.save((name + '_pipelined.mlmodel'))
def main(input_file, output_file): num_classes = 8 model_ft = models.resnet18(pretrained=False) num_ftrs = model_ft.fc.in_features model_ft.fc = nn.Sequential(nn.Dropout(p=.5), nn.Linear(num_ftrs, num_classes), nn.Sigmoid()) checkpoint = torch.load(input_file, map_location=lambda storage, loc: storage) model_ft.load_state_dict(checkpoint['model']) imsize = 224 dummy_input = torch.randn(1, 3, 224, 224, requires_grad=True) protofile = output_file + '.proto' model_ft.train(False) model_ft.eval() model_ft.cpu() torch.onnx.export(model_ft, dummy_input, protofile, input_names=['0'], output_names=['classification'], verbose=True) model = onnx.load(protofile) scale = 1.0 / (0.226 * 255.0) red_scale = 1.0 / (0.229 * 255.0) green_scale = 1.0 / (0.224 * 255.0) blue_scale = 1.0 / (0.225 * 255.0) args = dict(is_bgr=False, red_bias=-(0.485 * 255.0), green_bias=-(0.456 * 255.0), blue_bias=-(0.406 * 255.0)) model_file = open(protofile, 'rb') model_proto = onnx_pb.ModelProto() model_proto.ParseFromString(model_file.read()) coreml_model = convert(model_proto, image_input_names=['0'], preprocessing_args=args) spec = coreml_model.get_spec() # get NN portion of the spec nn_spec = spec.neuralNetwork layers = nn_spec.layers # this is a list of all the layers layers_copy = copy.deepcopy( layers) # make a copy of the layers, these will be added back later del nn_spec.layers[:] # delete all the layers # add a scale layer now # since mlmodel is in protobuf format, we can add proto messages directly # To look at more examples on how to add other layers: see "builder.py" file in coremltools repo scale_layer = nn_spec.layers.add() scale_layer.name = 'scale_layer' scale_layer.input.append('0') scale_layer.output.append('input1_scaled') params = scale_layer.scale params.scale.floatValue.extend([red_scale, green_scale, blue_scale]) # scale values for RGB params.shapeScale.extend([3, 1, 1]) # shape of the scale vector # now add back the rest of the layers (which happens to be just one in this case: the crop layer) nn_spec.layers.extend(layers_copy) # need to also change the input of the crop layer to match the output of the scale layer nn_spec.layers[1].input[0] = 'input1_scaled' print(spec.description) coreml_model = coremltools.models.MLModel(spec) coreml_model.save(output_file)
def augment_graph(self): ''' Adds ReduceMin and ReduceMax nodes to all quantization_candidates op type nodes in model and ensures their outputs are stored as part of the graph output :return: augmented ONNX model ''' model = onnx_proto.ModelProto() model.CopyFrom(self.model) model = onnx.shape_inference.infer_shapes(model) added_nodes = [] added_outputs = [] tensors, value_infos = self.select_tensors_to_calibrate(model) for tensor in tensors: # When doing ReduceMax/ReduceMin, keep dimension if tensor contains dim with value of 0, # for example: # dim = [ dim_value: 0 ] # # otherwise, don't keep dimension. # dim = value_infos[tensor].type.tensor_type.shape.dim keepdims = 0 shape = () for d in dim: # A dimension can be either an integer value or a symbolic variable. # Dimension with integer value and value of 0 is what we are looking for to keep dimension. # Please see the def of TensorShapeProto https://github.com/onnx/onnx/blob/master/onnx/onnx.proto#L630 if d.WhichOneof('value') == 'dim_value' and d.dim_value == 0: keepdims = 1 shape = (1, ) if len(dim) == 1 else list( 1 for i in range(len(dim))) break # Adding ReduceMin nodes reduce_min_name = tensor + '_ReduceMin' reduce_min_node = onnx.helper.make_node('ReduceMin', [tensor], [tensor + '_ReduceMin'], reduce_min_name, keepdims=keepdims) added_nodes.append(reduce_min_node) added_outputs.append( helper.make_tensor_value_info(reduce_min_node.output[0], TensorProto.FLOAT, shape)) # Adding ReduceMax nodes reduce_max_name = tensor + '_ReduceMax' reduce_max_node = onnx.helper.make_node('ReduceMax', [tensor], [tensor + '_ReduceMax'], reduce_max_name, keepdims=keepdims) added_nodes.append(reduce_max_node) added_outputs.append( helper.make_tensor_value_info(reduce_max_node.output[0], TensorProto.FLOAT, shape)) model.graph.node.extend(added_nodes) model.graph.output.extend(added_outputs) onnx.save(model, self.augmented_model_path) self.augment_model = model
import sys from onnx import onnx_pb from onnx_coreml import convert model_in = sys.argv[1] model_out = sys.argv[2] model_file = open(model_in, 'rb') model_proto = onnx_pb.ModelProto() model_proto.ParseFromString(model_file.read()) coreml_model = convert(model_proto, image_input_names=['inputImage'], image_output_names=['outputImage']) coreml_model.save(model_out)
#!/usr/bin/env python3 import sys from google.protobuf import text_format from onnx import onnx_pb with open(sys.argv[1]) as f: c = f.read() m = onnx_pb.ModelProto() text_format.Merge(c, m) with open(sys.argv[2], 'wb') as f: f.write(m.SerializeToString())