def convert_pytorch_to_coreml(cfg_file, class_names, state_dict_file, dummy_input, coreml_pipeline_filename): _path = os.path.dirname(coreml_pipeline_filename) assert os.path.isdir(_path), f"path {_path} does not exist" if os.path.exists(coreml_pipeline_filename): logger.warning(f'file {coreml_pipeline_filename} exists.') HERE = os.path.dirname(os.path.realpath(__file__)) _onnx_filename = os.path.join( HERE, '../../tests/output/yolo-cars.onnx' ) #tempfile.NamedTemporaryFile(prefix='onny-yolo') #_onnx_filename = tempfile.NamedTemporaryFile(prefix='onny-yolo', suffix='.onnx', delete=False) _coreml_filename = os.path.join( HERE, '../../tests/output/yolo-cars.mlmodel' ) #tempfile.NamedTemporaryFile(prefix='coreml-yolo') #_coreml_filename = tempfile.NamedTemporaryFile(prefix='coreml-yolo', suffix='.mlmodel', delete=False) _coreml_nms_filename = os.path.join( HERE, '../../tests/output/yolo-cars-nms.mlmodel' ) # tempfile.NamedTemporaryFile(prefix='coreml-nms') yolo = Yolo(cfg_file=cfg_file, class_names=class_names, batch_size=1, coreml_mode=True) yolo.load_state_dict(torch.load(state_dict_file, map_location=DEVICE)) pytorch_to_onnx(yolo, dummy_input, _onnx_filename) onnx_model = onnx.load(_onnx_filename) coreml_model = convert(onnx_model, minimum_ios_deployment_target="13", image_input_names=['image'], preprocessing_args={ "image_scale": 1 / 255.0, "red_bias": 0, "green_bias": 0, "blue_bias": 0 }) yolo_model = coremltools.models.MLModel(coreml_model.get_spec()) yolo_model.save(_coreml_filename) # nms model nms_spec = coremltools.proto.Model_pb2.Model() nms_spec.specificationVersion = 3 # boxes yolo_boxes = yolo_model._spec.description.output[0].SerializeToString() nms_spec.description.input.add() nms_spec.description.input[0].ParseFromString(yolo_boxes) nms_spec.description.output.add() nms_spec.description.output[0].ParseFromString(yolo_boxes) nms_spec.description.output[0].name = "coordinates" # scores yolo_scores = yolo_model._spec.description.output[1].SerializeToString() nms_spec.description.input.add() nms_spec.description.input[1].ParseFromString(yolo_scores) nms_spec.description.output.add() nms_spec.description.output[1].ParseFromString(yolo_scores) nms_spec.description.output[1].name = "confidence" # coordinates ma_type = nms_spec.description.output[0].type.multiArrayType ma_type.shapeRange.sizeRanges.add() ma_type.shapeRange.sizeRanges[0].lowerBound = 0 ma_type.shapeRange.sizeRanges[0].upperBound = -1 ma_type.shapeRange.sizeRanges.add() ma_type.shapeRange.sizeRanges[1].lowerBound = 4 ma_type.shapeRange.sizeRanges[1].upperBound = 4 del ma_type.shape[:] # confidence ma_type = nms_spec.description.output[1].type.multiArrayType ma_type.shapeRange.sizeRanges.add() ma_type.shapeRange.sizeRanges[0].lowerBound = 0 ma_type.shapeRange.sizeRanges[0].upperBound = -1 ma_type.shapeRange.sizeRanges.add() ma_type.shapeRange.sizeRanges[1].lowerBound = len(class_names) ma_type.shapeRange.sizeRanges[1].upperBound = len(class_names) del ma_type.shape[:] nms = nms_spec.nonMaximumSuppression nms.coordinatesInputFeatureName = "boxes" nms.confidenceInputFeatureName = "scores" nms.coordinatesOutputFeatureName = "coordinates" nms.confidenceOutputFeatureName = "confidence" nms.iouThresholdInputFeatureName = "iouThreshold" nms.confidenceThresholdInputFeatureName = "confidenceThreshold" default_iou_threshold = 0.5 nms.iouThreshold = default_iou_threshold default_confidence_threshold = 0.7 nms.confidenceThreshold = default_confidence_threshold nms.pickTop.perClass = True nms.stringClassLabels.vector.extend(class_names) nms_model = coremltools.models.MLModel(nms_spec) nms_model.save(_coreml_nms_filename) # pipeline input_features = [("image", datatypes.Array(3, 416, 416)), ("iouThreshold", datatypes.Double()), ("confidenceThreshold", datatypes.Double())] output_features = [ "coordinates", "confidence", ] pipeline = Pipeline(input_features, output_features) pipeline.add_model(yolo_model) pipeline.add_model(nms_model) # configure in and output of pipeline pipeline.spec.description.input[0].ParseFromString( yolo_model._spec.description.input[0].SerializeToString()) pipeline.spec.description.output[0].ParseFromString( nms_model._spec.description.output[0].SerializeToString()) pipeline.spec.description.output[1].ParseFromString( nms_model._spec.description.output[1].SerializeToString()) user_defined_metadata = { "classes": ",".join(class_names), "iou_threshold": str(default_iou_threshold), "confidence_threshold": str(default_confidence_threshold) } pipeline.spec.description.metadata.userDefined.update( user_defined_metadata) pipeline.spec.specificationVersion = 3 final_model = coremltools.models.MLModel(pipeline.spec) final_model.save(coreml_pipeline_filename)
def main(): os.system('rm -rf saved_models && mkdir saved_models') files = glob.glob('saved_models/*.onnx') + glob.glob( '../yolov3/weights/*.onnx') for f in files: # 1. ONNX to CoreML name = 'saved_models/' + f.split('/')[-1].replace('.onnx', '') model_file = open(f, 'rb') model_proto = onnx_pb.ModelProto() model_proto.ParseFromString(model_file.read()) coreml_model = convert(model_proto, image_input_names=['0']) # coreml_model.save(model_out) # 2. Reduce model to FP16, change outputs to DOUBLE and save import coremltools spec = coreml_model.get_spec() for i in range(2): spec.description.output[i].type.multiArrayType.dataType = \ coremltools.proto.FeatureTypes_pb2.ArrayFeatureType.ArrayDataType.Value('DOUBLE') spec = coremltools.utils.convert_neural_network_spec_weights_to_fp16( spec) coreml_model = coremltools.models.MLModel(spec) num_classes = 80 num_anchors = 507 spec.description.output[0].type.multiArrayType.shape.append( num_classes) spec.description.output[0].type.multiArrayType.shape.append( num_anchors) spec.description.output[1].type.multiArrayType.shape.append(4) spec.description.output[1].type.multiArrayType.shape.append( num_anchors) coreml_model.save(name + '.mlmodel') print(spec.description) # 3. Create NMS protobuf import numpy as np nms_spec = coremltools.proto.Model_pb2.Model() nms_spec.specificationVersion = 3 for i in range(2): decoder_output = coreml_model._spec.description.output[ i].SerializeToString() nms_spec.description.input.add() nms_spec.description.input[i].ParseFromString(decoder_output) nms_spec.description.output.add() nms_spec.description.output[i].ParseFromString(decoder_output) nms_spec.description.output[0].name = 'confidence' nms_spec.description.output[1].name = 'coordinates' output_sizes = [num_classes, 4] for i in range(2): ma_type = nms_spec.description.output[i].type.multiArrayType ma_type.shapeRange.sizeRanges.add() ma_type.shapeRange.sizeRanges[0].lowerBound = 0 ma_type.shapeRange.sizeRanges[0].upperBound = -1 ma_type.shapeRange.sizeRanges.add() ma_type.shapeRange.sizeRanges[1].lowerBound = output_sizes[i] ma_type.shapeRange.sizeRanges[1].upperBound = output_sizes[i] del ma_type.shape[:] nms = nms_spec.nonMaximumSuppression nms.confidenceInputFeatureName = '133' # 1x507x80 nms.coordinatesInputFeatureName = '134' # 1x507x4 nms.confidenceOutputFeatureName = 'confidence' nms.coordinatesOutputFeatureName = 'coordinates' nms.iouThresholdInputFeatureName = 'iouThreshold' nms.confidenceThresholdInputFeatureName = 'confidenceThreshold' nms.iouThreshold = 0.6 nms.confidenceThreshold = 0.4 nms.pickTop.perClass = True labels = np.loadtxt('../yolov3/data/coco.names', dtype=str, delimiter='\n') nms.stringClassLabels.vector.extend(labels) nms_model = coremltools.models.MLModel(nms_spec) nms_model.save(name + '_nms.mlmodel') # 4. Pipeline models togethor from coremltools.models import datatypes # from coremltools.models import neural_network from coremltools.models.pipeline import Pipeline input_features = [('image', datatypes.Array(3, 416, 416)), ('iouThreshold', datatypes.Double()), ('confidenceThreshold', datatypes.Double())] output_features = ['confidence', 'coordinates'] pipeline = Pipeline(input_features, output_features) # Add 3rd dimension of size 1 (apparently not needed, produces error on compile) # ssd_output = coreml_model._spec.description.output # ssd_output[0].type.multiArrayType.shape[:] = [num_classes, num_anchors, 1] # ssd_output[1].type.multiArrayType.shape[:] = [4, num_anchors, 1] # And now we can add the three models, in order: pipeline.add_model(coreml_model) pipeline.add_model(nms_model) # Correct datatypes pipeline.spec.description.input[0].ParseFromString( coreml_model._spec.description.input[0].SerializeToString()) pipeline.spec.description.output[0].ParseFromString( nms_model._spec.description.output[0].SerializeToString()) pipeline.spec.description.output[1].ParseFromString( nms_model._spec.description.output[1].SerializeToString()) # Update metadata pipeline.spec.description.metadata.versionString = 'yolov3-tiny.pt imported from PyTorch' pipeline.spec.description.metadata.shortDescription = 'https://github.com/ultralytics/yolov3' pipeline.spec.description.metadata.author = '*****@*****.**' pipeline.spec.description.metadata.license = 'https://github.com/ultralytics/yolov3' user_defined_metadata = { 'classes': ','.join(labels), 'iou_threshold': str(nms.iouThreshold), 'confidence_threshold': str(nms.confidenceThreshold) } pipeline.spec.description.metadata.userDefined.update( user_defined_metadata) # Save the model pipeline.spec.specificationVersion = 3 final_model = coremltools.models.MLModel(pipeline.spec) final_model.save((name + '_pipelined.mlmodel'))
def main(): os.system('rm -rf saved_models && mkdir saved_models') files = glob.glob('saved_models/*.onnx') + \ glob.glob('../yolov3/weights/*.onnx') for f in files: # 1. ONNX to CoreML name = 'saved_models/' + f.split('/')[-1].replace('.onnx', '') # # Load the ONNX model model = onnx.load(f) # Check that the IR is well formed print(onnx.checker.check_model(model)) # Print a human readable representation of the graph print(onnx.helper.printable_graph(model.graph)) model_file = open(f, 'rb') model_proto = onnx_pb.ModelProto() model_proto.ParseFromString(model_file.read()) yolov3_model = convert(model_proto, image_input_names=['0'], preprocessing_args={'image_scale': 1. / 255}) # 2. Reduce model to FP16, change outputs to DOUBLE and save import coremltools spec = yolov3_model.get_spec() for i in range(2): spec.description.output[i].type.multiArrayType.dataType = \ coremltools.proto.FeatureTypes_pb2.ArrayFeatureType.ArrayDataType.Value( 'DOUBLE') spec = coremltools.utils.convert_neural_network_spec_weights_to_fp16( spec) yolov3_model = coremltools.models.MLModel(spec) name_out0 = spec.description.output[0].name name_out1 = spec.description.output[1].name num_classes = 80 num_anchors = 507 # 507 for yolov3-tiny, spec.description.output[0].type.multiArrayType.shape.append( num_anchors) spec.description.output[0].type.multiArrayType.shape.append( num_classes) # spec.description.output[0].type.multiArrayType.shape.append(1) spec.description.output[1].type.multiArrayType.shape.append( num_anchors) spec.description.output[1].type.multiArrayType.shape.append(4) # spec.description.output[1].type.multiArrayType.shape.append(1) # rename # input_mlmodel = input_tensor.replace(":", "__").replace("/", "__") # class_output_mlmodel = class_output_tensor.replace(":", "__").replace("/", "__") # bbox_output_mlmodel = bbox_output_tensor.replace(":", "__").replace("/", "__") # # for i in range(len(spec.neuralNetwork.layers)): # if spec.neuralNetwork.layers[i].input[0] == input_mlmodel: # spec.neuralNetwork.layers[i].input[0] = 'image' # if spec.neuralNetwork.layers[i].output[0] == class_output_mlmodel: # spec.neuralNetwork.layers[i].output[0] = 'scores' # if spec.neuralNetwork.layers[i].output[0] == bbox_output_mlmodel: # spec.neuralNetwork.layers[i].output[0] = 'boxes' spec.neuralNetwork.preprocessing[0].featureName = '0' yolov3_model.save(name + '.mlmodel') # yolov3_model.visualize_spec() print(spec.description) # 2.5. Try to Predict: from PIL import Image img = Image.open('../yolov3/data/samples/zidane_416.jpg') out = yolov3_model.predict({'0': img}, useCPUOnly=True) print(out[name_out0].shape, out[name_out1].shape) # 3. Create NMS protobuf import numpy as np nms_spec = coremltools.proto.Model_pb2.Model() nms_spec.specificationVersion = 3 for i in range(2): decoder_output = yolov3_model._spec.description.output[ i].SerializeToString() nms_spec.description.input.add() nms_spec.description.input[i].ParseFromString(decoder_output) nms_spec.description.output.add() nms_spec.description.output[i].ParseFromString(decoder_output) nms_spec.description.output[0].name = 'confidence' nms_spec.description.output[1].name = 'coordinates' output_sizes = [num_classes, 4] for i in range(2): ma_type = nms_spec.description.output[i].type.multiArrayType ma_type.shapeRange.sizeRanges.add() ma_type.shapeRange.sizeRanges[0].lowerBound = 0 ma_type.shapeRange.sizeRanges[0].upperBound = -1 ma_type.shapeRange.sizeRanges.add() ma_type.shapeRange.sizeRanges[1].lowerBound = output_sizes[i] ma_type.shapeRange.sizeRanges[1].upperBound = output_sizes[i] del ma_type.shape[:] nms = nms_spec.nonMaximumSuppression nms.confidenceInputFeatureName = name_out0 # 1x507x80 nms.coordinatesInputFeatureName = name_out1 # 1x507x4 nms.confidenceOutputFeatureName = 'confidence' nms.coordinatesOutputFeatureName = 'coordinates' nms.iouThresholdInputFeatureName = 'iouThreshold' nms.confidenceThresholdInputFeatureName = 'confidenceThreshold' nms.iouThreshold = 0.4 nms.confidenceThreshold = 0.5 nms.pickTop.perClass = True labels = np.loadtxt('../yolov3/data/coco.names', dtype=str, delimiter='\n') nms.stringClassLabels.vector.extend(labels) nms_model = coremltools.models.MLModel(nms_spec) nms_model.save(name + '_nms.mlmodel') # out_nms = nms_model.predict({ # '143': out['143'].squeeze().reshape((80, 507)), # '144': out['144'].squeeze().reshape((4, 507)) # }) # print(out_nms['confidence'].shape, out_nms['coordinates'].shape) # # # 3.5 Add Softmax model # from coremltools.models import datatypes # from coremltools.models import neural_network # # input_features = [ # ("141", datatypes.Array(num_anchors, num_classes, 1)), # ("143", datatypes.Array(num_anchors, 4, 1)) # ] # # output_features = [ # ("141", datatypes.Array(num_anchors, num_classes, 1)), # ("143", datatypes.Array(num_anchors, 4, 1)) # ] # # builder = neural_network.NeuralNetworkBuilder(input_features, output_features) # builder.add_softmax(name="softmax_pcls", # dim=(0, 3, 2, 1), # input_name="scores", # output_name="permute_scores_output") # softmax_model = coremltools.models.MLModel(builder.spec) # softmax_model.save("softmax.mlmodel") # 4. Pipeline models togethor from coremltools.models import datatypes # from coremltools.models import neural_network from coremltools.models.pipeline import Pipeline input_features = [('0', datatypes.Array(3, 416, 416)), ('iouThreshold', datatypes.Double()), ('confidenceThreshold', datatypes.Double())] output_features = ['confidence', 'coordinates'] pipeline = Pipeline(input_features, output_features) # Add 3rd dimension of size 1 (apparently not needed, produces error on compile) yolov3_output = yolov3_model._spec.description.output yolov3_output[0].type.multiArrayType.shape[:] = [ num_anchors, num_classes, 1 ] yolov3_output[1].type.multiArrayType.shape[:] = [num_anchors, 4, 1] nms_input = nms_model._spec.description.input for i in range(2): nms_input[i].type.multiArrayType.shape[:] = yolov3_output[ i].type.multiArrayType.shape[:] # And now we can add the three models, in order: pipeline.add_model(yolov3_model) pipeline.add_model(nms_model) # Correct datatypes pipeline.spec.description.input[0].ParseFromString( yolov3_model._spec.description.input[0].SerializeToString()) pipeline.spec.description.output[0].ParseFromString( nms_model._spec.description.output[0].SerializeToString()) pipeline.spec.description.output[1].ParseFromString( nms_model._spec.description.output[1].SerializeToString()) # Update metadata pipeline.spec.description.metadata.versionString = 'yolov3-tiny.pt imported from PyTorch' pipeline.spec.description.metadata.shortDescription = 'https://github.com/ultralytics/yolov3' pipeline.spec.description.metadata.author = '*****@*****.**' pipeline.spec.description.metadata.license = 'https://github.com/ultralytics/yolov3' user_defined_metadata = { 'classes': ','.join(labels), 'iou_threshold': str(nms.iouThreshold), 'confidence_threshold': str(nms.confidenceThreshold) } pipeline.spec.description.metadata.userDefined.update( user_defined_metadata) # Save the model pipeline.spec.specificationVersion = 3 final_model = coremltools.models.MLModel(pipeline.spec) final_model.save((name + '_pipelined.mlmodel'))
def convert_ssd(exported_graph_path, model_structure, output_path): num_anchors = 1917 saved_model_path = os.path.join(exported_graph_path, 'saved_model') coreml_model_path = os.path.join(output_path, 'Model.mlmodel') json_labels = os.path.join(exported_graph_path, 'labels.json') with open(json_labels) as f: labels = json.load(f) # Strip the model down to something usable by Core ML. # Instead of `concat_1`, use `Postprocessor/convert_scores`, because it # applies the sigmoid to the class scores. frozen_model_path = '.tmp/tmp_frozen_graph.pb' input_node = 'Preprocessor/sub' bbox_output_node = 'concat' class_output_node = 'Postprocessor/convert_scores' graph = optimize_graph(saved_model_path, frozen_model_path, [input_node], [bbox_output_node, class_output_node]) # conversion tensors have a `:0` at the end of the name input_tensor = input_node + ':0' bbox_output_tensor = bbox_output_node + ':0' class_output_tensor = class_output_node + ':0' # Convert to Core ML model. ssd_model = tfcoreml.convert( tf_model_path=frozen_model_path, mlmodel_path=coreml_model_path, input_name_shape_dict={input_tensor: [1, 300, 300, 3]}, image_input_names=input_tensor, output_feature_names=[bbox_output_tensor, class_output_tensor], is_bgr=False, red_bias=-1.0, green_bias=-1.0, blue_bias=-1.0, image_scale=2. / 255) spec = ssd_model.get_spec() # Rename the inputs and outputs to something more readable. spec.description.input[0].name = 'image' spec.description.input[0].shortDescription = 'Input image' spec.description.output[0].name = 'scores' spec.description.output[ 0].shortDescription = 'Predicted class scores for each bounding box' spec.description.output[1].name = 'boxes' spec.description.output[ 1].shortDescription = 'Predicted coordinates for each bounding box' input_mlmodel = input_tensor.replace(':', '__').replace('/', '__') class_output_mlmodel = class_output_tensor.replace(':', '__').replace( '/', '__') bbox_output_mlmodel = bbox_output_tensor.replace(':', '__').replace('/', '__') for i in range(len(spec.neuralNetwork.layers)): if spec.neuralNetwork.layers[i].input[0] == input_mlmodel: spec.neuralNetwork.layers[i].input[0] = 'image' if spec.neuralNetwork.layers[i].output[0] == class_output_mlmodel: spec.neuralNetwork.layers[i].output[0] = 'scores' if spec.neuralNetwork.layers[i].output[0] == bbox_output_mlmodel: spec.neuralNetwork.layers[i].output[0] = 'boxes' spec.neuralNetwork.preprocessing[0].featureName = 'image' # For some reason the output shape of the `scores` output is not filled in. spec.description.output[0].type.multiArrayType.shape.append( len(labels) + 1) spec.description.output[0].type.multiArrayType.shape.append(num_anchors) # And the `boxes` output shape is (4, 1917, 1) so get rid of that last one. del spec.description.output[1].type.multiArrayType.shape[-1] # Convert weights to 16-bit floats to make the model smaller. spec = coremltools.utils.convert_neural_network_spec_weights_to_fp16(spec) # Create a new MLModel from the modified spec and save it. ssd_model = coremltools.models.MLModel(spec) decoder_model = build_decoder(graph, len(labels), num_anchors) nms_model = build_nms(decoder_model, labels) input_features = [('image', datatypes.Array(3, 300, 300)), ('iouThreshold', datatypes.Double()), ('confidenceThreshold', datatypes.Double())] output_features = ['confidence', 'coordinates'] pipeline = Pipeline(input_features, output_features) # We added a dimension of size 1 to the back of the inputs of the decoder # model, so we should also add this to the output of the SSD model or else # the inputs and outputs do not match and the pipeline is not valid. ssd_output = ssd_model._spec.description.output ssd_output[0].type.multiArrayType.shape[:] = [ len(labels) + 1, num_anchors, 1 ] ssd_output[1].type.multiArrayType.shape[:] = [4, num_anchors, 1] pipeline.add_model(ssd_model) pipeline.add_model(decoder_model) pipeline.add_model(nms_model) # The `image` input should really be an image, not a multi-array. pipeline.spec.description.input[0].ParseFromString( ssd_model._spec.description.input[0].SerializeToString()) # Copy the declarations of the `confidence` and `coordinates` outputs. # The Pipeline makes these strings by default. pipeline.spec.description.output[0].ParseFromString( nms_model._spec.description.output[0].SerializeToString()) pipeline.spec.description.output[1].ParseFromString( nms_model._spec.description.output[1].SerializeToString()) # Add descriptions to the inputs and outputs. pipeline.spec.description.input[ 1].shortDescription = '(optional) IOU Threshold override' pipeline.spec.description.input[ 2].shortDescription = '(optional) Confidence Threshold override' pipeline.spec.description.output[ 0].shortDescription = u'Boxes \xd7 Class confidence' pipeline.spec.description.output[ 1].shortDescription = u'Boxes \xd7 [x, y, width, height] (relative to image size)' # Add metadata to the model. pipeline.spec.description.metadata.versionString = 'ssd_mobilenet' pipeline.spec.description.metadata.shortDescription = 'MobileNet + SSD' pipeline.spec.description.metadata.author = 'Converted to Core ML by Cloud Annotations' pipeline.spec.description.metadata.license = 'https://github.com/tensorflow/models/blob/master/research/object_detection' # Add the list of class labels and the default threshold values too. user_defined_metadata = { 'iou_threshold': str(0.5), 'confidence_threshold': str(0.5), 'classes': ','.join(labels) } pipeline.spec.description.metadata.userDefined.update( user_defined_metadata) pipeline.spec.specificationVersion = 3 final_model = coremltools.models.MLModel(pipeline.spec) final_model.save(coreml_model_path)
def convert_ssd(exported_graph_path, model_structure, output_path): num_anchors = 1917 saved_model_path = os.path.join(exported_graph_path, 'saved_model') coreml_model_path = os.path.join(output_path, 'Model.mlmodel') json_labels = os.path.join(exported_graph_path, 'labels.json') with open(json_labels) as f: labels = json.load(f) # Strip the model down to something usable by Core ML. # Instead of `concat_1`, use `Postprocessor/convert_scores`, because it # applies the sigmoid to the class scores. frozen_model_path = '.tmp/tmp_frozen_graph.pb' input_node = 'Preprocessor/sub' bbox_output_node = 'concat' class_output_node = 'Postprocessor/convert_scores' graph = optimize_graph(saved_model_path, frozen_model_path, [input_node], [bbox_output_node, class_output_node]) # conversion tensors have a `:0` at the end of the name input_tensor = input_node + ':0' bbox_output_tensor = bbox_output_node + ':0' class_output_tensor = class_output_node + ':0' # Convert to Core ML model. ssd_model = tfcoreml.convert( tf_model_path=frozen_model_path, mlmodel_path=coreml_model_path, input_name_shape_dict={ input_tensor: [1, 300, 300, 3] }, image_input_names=input_tensor, output_feature_names=[bbox_output_tensor, class_output_tensor], is_bgr=False, red_bias=-1.0, green_bias=-1.0, blue_bias=-1.0, image_scale=2./255) spec = ssd_model.get_spec() # Rename the inputs and outputs to something more readable. spec.description.input[0].name = 'image' spec.description.input[0].shortDescription = 'Input image' spec.description.output[0].name = 'scores' spec.description.output[0].shortDescription = 'Predicted class scores for each bounding box' spec.description.output[1].name = 'boxes' spec.description.output[1].shortDescription = 'Predicted coordinates for each bounding box' input_mlmodel = input_tensor.replace(':', '__').replace('/', '__') class_output_mlmodel = class_output_tensor.replace(':', '__').replace('/', '__') bbox_output_mlmodel = bbox_output_tensor.replace(':', '__').replace('/', '__') for i in range(len(spec.neuralNetwork.layers)): if spec.neuralNetwork.layers[i].input[0] == input_mlmodel: spec.neuralNetwork.layers[i].input[0] = 'image' if spec.neuralNetwork.layers[i].output[0] == class_output_mlmodel: spec.neuralNetwork.layers[i].output[0] = 'scores' if spec.neuralNetwork.layers[i].output[0] == bbox_output_mlmodel: spec.neuralNetwork.layers[i].output[0] = 'boxes' spec.neuralNetwork.preprocessing[0].featureName = 'image' # For some reason the output shape of the `scores` output is not filled in. spec.description.output[0].type.multiArrayType.shape.append(len(labels) + 1) spec.description.output[0].type.multiArrayType.shape.append(num_anchors) # And the `boxes` output shape is (4, 1917, 1) so get rid of that last one. del spec.description.output[1].type.multiArrayType.shape[-1] # Convert weights to 16-bit floats to make the model smaller. spec = coremltools.utils.convert_neural_network_spec_weights_to_fp16(spec) # Create a new MLModel from the modified spec and save it. ssd_model = coremltools.models.MLModel(spec) decoder_model = build_decoder(graph, len(labels), num_anchors) nms_model = build_nms(decoder_model, labels) input_features = [ ('image', datatypes.Array(3, 300, 300)), ('iouThreshold', datatypes.Double()), ('confidenceThreshold', datatypes.Double()) ] output_features = ['confidence', 'coordinates'] pipeline = Pipeline(input_features, output_features) # We added a dimension of size 1 to the back of the inputs of the decoder # model, so we should also add this to the output of the SSD model or else # the inputs and outputs do not match and the pipeline is not valid. ssd_output = ssd_model._spec.description.output ssd_output[0].type.multiArrayType.shape[:] = [len(labels) + 1, num_anchors, 1] ssd_output[1].type.multiArrayType.shape[:] = [4, num_anchors, 1] pipeline.add_model(ssd_model) pipeline.add_model(decoder_model) pipeline.add_model(nms_model) # The `image` input should really be an image, not a multi-array. pipeline.spec.description.input[0].ParseFromString(ssd_model._spec.description.input[0].SerializeToString()) # Copy the declarations of the `confidence` and `coordinates` outputs. # The Pipeline makes these strings by default. pipeline.spec.description.output[0].ParseFromString(nms_model._spec.description.output[0].SerializeToString()) pipeline.spec.description.output[1].ParseFromString(nms_model._spec.description.output[1].SerializeToString()) # Add descriptions to the inputs and outputs. pipeline.spec.description.input[1].shortDescription = '(optional) IOU Threshold override' pipeline.spec.description.input[2].shortDescription = '(optional) Confidence Threshold override' pipeline.spec.description.output[0].shortDescription = u'Boxes \xd7 Class confidence' pipeline.spec.description.output[1].shortDescription = u'Boxes \xd7 [x, y, width, height] (relative to image size)' # Add metadata to the model. pipeline.spec.description.metadata.versionString = 'ssd_mobilenet' pipeline.spec.description.metadata.shortDescription = 'MobileNet + SSD' pipeline.spec.description.metadata.author = 'Converted to Core ML by Cloud Annotations' pipeline.spec.description.metadata.license = 'https://github.com/tensorflow/models/blob/master/research/object_detection' # Add the list of class labels and the default threshold values too. user_defined_metadata = { 'iou_threshold': str(0.5), 'confidence_threshold': str(0.5), 'classes': ','.join(labels) } pipeline.spec.description.metadata.userDefined.update(user_defined_metadata) pipeline.spec.specificationVersion = 3 final_model = coremltools.models.MLModel(pipeline.spec) final_model.save(coreml_model_path)
def convert_localization(frozen_model, labels_path, output_path, anchors): os.makedirs(output_path, exist_ok=True) num_anchors = 1917 with open(labels_path) as f: labels = json.load(f) # Strip the model down to something usable by Core ML. # Instead of `concat_1`, use `Postprocessor/convert_scores`, because it # applies the sigmoid to the class scores. input_node = "Preprocessor/sub" bbox_output_node = "Squeeze" class_output_node = "Postprocessor/convert_scores" # Convert to Core ML model. ssd_model = tfcoreml.convert( tf_model_path=frozen_model, input_name_shape_dict={input_node: [1, 300, 300, 3]}, image_input_names=[input_node], output_feature_names=[bbox_output_node, class_output_node], is_bgr=False, red_bias=-1.0, green_bias=-1.0, blue_bias=-1.0, image_scale=2.0 / 255, minimum_ios_deployment_target="13", ) spec = ssd_model.get_spec() # Rename the inputs and outputs to something more readable. spec.description.input[0].name = "image" spec.description.input[0].shortDescription = "Input image" spec.neuralNetwork.preprocessing[0].featureName = "image" for i in range(len(spec.description.output)): if spec.description.output[i].name == bbox_output_node: spec.description.output[i].name = "boxes" spec.description.output[ i].shortDescription = "Predicted coordinates for each bounding box" spec.description.output[i].type.multiArrayType.shape[:] = [ 4, num_anchors, 1, ] if spec.description.output[i].name == class_output_node: spec.description.output[i].name = "scores" spec.description.output[ i].shortDescription = "Predicted class scores for each bounding box" spec.description.output[i].type.multiArrayType.shape[:] = [ len(labels) + 1, num_anchors, 1, ] for i in range(len(spec.neuralNetwork.layers)): # Assumes everything only has 1 input or output... if spec.neuralNetwork.layers[i].input[0] == input_node: spec.neuralNetwork.layers[i].input[0] = "image" if spec.neuralNetwork.layers[i].output[0] == class_output_node: spec.neuralNetwork.layers[i].output[0] = "scores" if spec.neuralNetwork.layers[i].output[0] == bbox_output_node: spec.neuralNetwork.layers[i].output[0] = "boxes" for input_ in spec.description.input: _convert_multiarray_to_float32(input_) for output_ in spec.description.output: _convert_multiarray_to_float32(output_) # Convert weights to 16-bit floats to make the model smaller. spec = coremltools.utils.convert_neural_network_spec_weights_to_fp16(spec) input_features = [ ("image", datatypes.Array(3, 300, 300)), ("iouThreshold", datatypes.Double()), ("confidenceThreshold", datatypes.Double()), ] output_features = ["confidence", "coordinates"] pipeline = Pipeline(input_features, output_features) # Create a new MLModel from the modified spec and save it. ssd_model = coremltools.models.MLModel(spec) decoder_model = build_decoder(anchors, len(labels), num_anchors) nms_model = build_nms(decoder_model, labels) pipeline.add_model(ssd_model) pipeline.add_model(decoder_model) pipeline.add_model(nms_model) # The `image` input should really be an image, not a multi-array. pipeline.spec.description.input[0].ParseFromString( ssd_model._spec.description.input[0].SerializeToString()) # Copy the declarations of the `confidence` and `coordinates` outputs. # The Pipeline makes these strings by default. pipeline.spec.description.output[0].ParseFromString( nms_model._spec.description.output[0].SerializeToString()) pipeline.spec.description.output[1].ParseFromString( nms_model._spec.description.output[1].SerializeToString()) # Add descriptions to the inputs and outputs. pipeline.spec.description.input[ 1].shortDescription = "(optional) IOU Threshold override" pipeline.spec.description.input[ 2].shortDescription = "(optional) Confidence Threshold override" pipeline.spec.description.output[ 0].shortDescription = u"Boxes \xd7 Class confidence" pipeline.spec.description.output[ 1].shortDescription = u"Boxes \xd7 [x, y, width, height] (relative to image size)" # Add metadata to the model. pipeline.spec.description.metadata.versionString = "ssd_mobilenet" pipeline.spec.description.metadata.shortDescription = "MobileNet + SSD" pipeline.spec.description.metadata.author = ( "Converted to Core ML by Cloud Annotations") pipeline.spec.description.metadata.license = ( "https://github.com/tensorflow/models/blob/master/research/object_detection" ) # Add the list of class labels and the default threshold values too. user_defined_metadata = { "iou_threshold": str(0.5), "confidence_threshold": str(0.5), "classes": ",".join(labels), } pipeline.spec.description.metadata.userDefined.update( user_defined_metadata) pipeline.spec.specificationVersion = 4 final_model = coremltools.models.MLModel(pipeline.spec) final_model.save(os.path.join(output_path, "Model.mlmodel"))