Example #1
0
def convert_pytorch_to_coreml(cfg_file, class_names, state_dict_file,
                              dummy_input, coreml_pipeline_filename):
    _path = os.path.dirname(coreml_pipeline_filename)
    assert os.path.isdir(_path), f"path {_path} does not exist"

    if os.path.exists(coreml_pipeline_filename):
        logger.warning(f'file {coreml_pipeline_filename} exists.')

    HERE = os.path.dirname(os.path.realpath(__file__))

    _onnx_filename = os.path.join(
        HERE, '../../tests/output/yolo-cars.onnx'
    )  #tempfile.NamedTemporaryFile(prefix='onny-yolo')
    #_onnx_filename =  tempfile.NamedTemporaryFile(prefix='onny-yolo', suffix='.onnx', delete=False)
    _coreml_filename = os.path.join(
        HERE, '../../tests/output/yolo-cars.mlmodel'
    )  #tempfile.NamedTemporaryFile(prefix='coreml-yolo')
    #_coreml_filename = tempfile.NamedTemporaryFile(prefix='coreml-yolo', suffix='.mlmodel', delete=False)
    _coreml_nms_filename = os.path.join(
        HERE, '../../tests/output/yolo-cars-nms.mlmodel'
    )  # tempfile.NamedTemporaryFile(prefix='coreml-nms')

    yolo = Yolo(cfg_file=cfg_file,
                class_names=class_names,
                batch_size=1,
                coreml_mode=True)

    yolo.load_state_dict(torch.load(state_dict_file, map_location=DEVICE))

    pytorch_to_onnx(yolo, dummy_input, _onnx_filename)

    onnx_model = onnx.load(_onnx_filename)
    coreml_model = convert(onnx_model,
                           minimum_ios_deployment_target="13",
                           image_input_names=['image'],
                           preprocessing_args={
                               "image_scale": 1 / 255.0,
                               "red_bias": 0,
                               "green_bias": 0,
                               "blue_bias": 0
                           })

    yolo_model = coremltools.models.MLModel(coreml_model.get_spec())

    yolo_model.save(_coreml_filename)

    # nms model
    nms_spec = coremltools.proto.Model_pb2.Model()
    nms_spec.specificationVersion = 3
    # boxes
    yolo_boxes = yolo_model._spec.description.output[0].SerializeToString()
    nms_spec.description.input.add()
    nms_spec.description.input[0].ParseFromString(yolo_boxes)
    nms_spec.description.output.add()
    nms_spec.description.output[0].ParseFromString(yolo_boxes)
    nms_spec.description.output[0].name = "coordinates"
    # scores
    yolo_scores = yolo_model._spec.description.output[1].SerializeToString()
    nms_spec.description.input.add()
    nms_spec.description.input[1].ParseFromString(yolo_scores)
    nms_spec.description.output.add()
    nms_spec.description.output[1].ParseFromString(yolo_scores)
    nms_spec.description.output[1].name = "confidence"

    # coordinates
    ma_type = nms_spec.description.output[0].type.multiArrayType
    ma_type.shapeRange.sizeRanges.add()
    ma_type.shapeRange.sizeRanges[0].lowerBound = 0
    ma_type.shapeRange.sizeRanges[0].upperBound = -1
    ma_type.shapeRange.sizeRanges.add()
    ma_type.shapeRange.sizeRanges[1].lowerBound = 4
    ma_type.shapeRange.sizeRanges[1].upperBound = 4
    del ma_type.shape[:]

    # confidence
    ma_type = nms_spec.description.output[1].type.multiArrayType
    ma_type.shapeRange.sizeRanges.add()
    ma_type.shapeRange.sizeRanges[0].lowerBound = 0
    ma_type.shapeRange.sizeRanges[0].upperBound = -1
    ma_type.shapeRange.sizeRanges.add()
    ma_type.shapeRange.sizeRanges[1].lowerBound = len(class_names)
    ma_type.shapeRange.sizeRanges[1].upperBound = len(class_names)
    del ma_type.shape[:]

    nms = nms_spec.nonMaximumSuppression
    nms.coordinatesInputFeatureName = "boxes"
    nms.confidenceInputFeatureName = "scores"
    nms.coordinatesOutputFeatureName = "coordinates"
    nms.confidenceOutputFeatureName = "confidence"
    nms.iouThresholdInputFeatureName = "iouThreshold"
    nms.confidenceThresholdInputFeatureName = "confidenceThreshold"
    default_iou_threshold = 0.5
    nms.iouThreshold = default_iou_threshold
    default_confidence_threshold = 0.7
    nms.confidenceThreshold = default_confidence_threshold
    nms.pickTop.perClass = True
    nms.stringClassLabels.vector.extend(class_names)

    nms_model = coremltools.models.MLModel(nms_spec)
    nms_model.save(_coreml_nms_filename)

    # pipeline
    input_features = [("image", datatypes.Array(3, 416, 416)),
                      ("iouThreshold", datatypes.Double()),
                      ("confidenceThreshold", datatypes.Double())]

    output_features = [
        "coordinates",
        "confidence",
    ]
    pipeline = Pipeline(input_features, output_features)

    pipeline.add_model(yolo_model)
    pipeline.add_model(nms_model)

    # configure in and output of pipeline
    pipeline.spec.description.input[0].ParseFromString(
        yolo_model._spec.description.input[0].SerializeToString())
    pipeline.spec.description.output[0].ParseFromString(
        nms_model._spec.description.output[0].SerializeToString())
    pipeline.spec.description.output[1].ParseFromString(
        nms_model._spec.description.output[1].SerializeToString())

    user_defined_metadata = {
        "classes": ",".join(class_names),
        "iou_threshold": str(default_iou_threshold),
        "confidence_threshold": str(default_confidence_threshold)
    }
    pipeline.spec.description.metadata.userDefined.update(
        user_defined_metadata)
    pipeline.spec.specificationVersion = 3

    final_model = coremltools.models.MLModel(pipeline.spec)
    final_model.save(coreml_pipeline_filename)
Example #2
0
def main():
    os.system('rm -rf saved_models && mkdir saved_models')
    files = glob.glob('saved_models/*.onnx') + glob.glob(
        '../yolov3/weights/*.onnx')

    for f in files:
        # 1. ONNX to CoreML
        name = 'saved_models/' + f.split('/')[-1].replace('.onnx', '')

        model_file = open(f, 'rb')
        model_proto = onnx_pb.ModelProto()
        model_proto.ParseFromString(model_file.read())
        coreml_model = convert(model_proto, image_input_names=['0'])
        # coreml_model.save(model_out)

        # 2. Reduce model to FP16, change outputs to DOUBLE and save
        import coremltools

        spec = coreml_model.get_spec()
        for i in range(2):
            spec.description.output[i].type.multiArrayType.dataType = \
                coremltools.proto.FeatureTypes_pb2.ArrayFeatureType.ArrayDataType.Value('DOUBLE')

        spec = coremltools.utils.convert_neural_network_spec_weights_to_fp16(
            spec)
        coreml_model = coremltools.models.MLModel(spec)

        num_classes = 80
        num_anchors = 507
        spec.description.output[0].type.multiArrayType.shape.append(
            num_classes)
        spec.description.output[0].type.multiArrayType.shape.append(
            num_anchors)

        spec.description.output[1].type.multiArrayType.shape.append(4)
        spec.description.output[1].type.multiArrayType.shape.append(
            num_anchors)
        coreml_model.save(name + '.mlmodel')
        print(spec.description)

        # 3. Create NMS protobuf
        import numpy as np

        nms_spec = coremltools.proto.Model_pb2.Model()
        nms_spec.specificationVersion = 3

        for i in range(2):
            decoder_output = coreml_model._spec.description.output[
                i].SerializeToString()

            nms_spec.description.input.add()
            nms_spec.description.input[i].ParseFromString(decoder_output)

            nms_spec.description.output.add()
            nms_spec.description.output[i].ParseFromString(decoder_output)

        nms_spec.description.output[0].name = 'confidence'
        nms_spec.description.output[1].name = 'coordinates'

        output_sizes = [num_classes, 4]
        for i in range(2):
            ma_type = nms_spec.description.output[i].type.multiArrayType
            ma_type.shapeRange.sizeRanges.add()
            ma_type.shapeRange.sizeRanges[0].lowerBound = 0
            ma_type.shapeRange.sizeRanges[0].upperBound = -1
            ma_type.shapeRange.sizeRanges.add()
            ma_type.shapeRange.sizeRanges[1].lowerBound = output_sizes[i]
            ma_type.shapeRange.sizeRanges[1].upperBound = output_sizes[i]
            del ma_type.shape[:]

        nms = nms_spec.nonMaximumSuppression
        nms.confidenceInputFeatureName = '133'  # 1x507x80
        nms.coordinatesInputFeatureName = '134'  # 1x507x4
        nms.confidenceOutputFeatureName = 'confidence'
        nms.coordinatesOutputFeatureName = 'coordinates'
        nms.iouThresholdInputFeatureName = 'iouThreshold'
        nms.confidenceThresholdInputFeatureName = 'confidenceThreshold'

        nms.iouThreshold = 0.6
        nms.confidenceThreshold = 0.4
        nms.pickTop.perClass = True

        labels = np.loadtxt('../yolov3/data/coco.names',
                            dtype=str,
                            delimiter='\n')
        nms.stringClassLabels.vector.extend(labels)

        nms_model = coremltools.models.MLModel(nms_spec)
        nms_model.save(name + '_nms.mlmodel')

        # 4. Pipeline models togethor
        from coremltools.models import datatypes
        # from coremltools.models import neural_network
        from coremltools.models.pipeline import Pipeline

        input_features = [('image', datatypes.Array(3, 416, 416)),
                          ('iouThreshold', datatypes.Double()),
                          ('confidenceThreshold', datatypes.Double())]

        output_features = ['confidence', 'coordinates']

        pipeline = Pipeline(input_features, output_features)

        # Add 3rd dimension of size 1 (apparently not needed, produces error on compile)
        # ssd_output = coreml_model._spec.description.output
        # ssd_output[0].type.multiArrayType.shape[:] = [num_classes, num_anchors, 1]
        # ssd_output[1].type.multiArrayType.shape[:] = [4, num_anchors, 1]

        # And now we can add the three models, in order:
        pipeline.add_model(coreml_model)
        pipeline.add_model(nms_model)

        # Correct datatypes
        pipeline.spec.description.input[0].ParseFromString(
            coreml_model._spec.description.input[0].SerializeToString())
        pipeline.spec.description.output[0].ParseFromString(
            nms_model._spec.description.output[0].SerializeToString())
        pipeline.spec.description.output[1].ParseFromString(
            nms_model._spec.description.output[1].SerializeToString())

        # Update metadata
        pipeline.spec.description.metadata.versionString = 'yolov3-tiny.pt imported from PyTorch'
        pipeline.spec.description.metadata.shortDescription = 'https://github.com/ultralytics/yolov3'
        pipeline.spec.description.metadata.author = '*****@*****.**'
        pipeline.spec.description.metadata.license = 'https://github.com/ultralytics/yolov3'

        user_defined_metadata = {
            'classes': ','.join(labels),
            'iou_threshold': str(nms.iouThreshold),
            'confidence_threshold': str(nms.confidenceThreshold)
        }
        pipeline.spec.description.metadata.userDefined.update(
            user_defined_metadata)

        # Save the model
        pipeline.spec.specificationVersion = 3
        final_model = coremltools.models.MLModel(pipeline.spec)
        final_model.save((name + '_pipelined.mlmodel'))
Example #3
0
def main():
    os.system('rm -rf saved_models && mkdir saved_models')
    files = glob.glob('saved_models/*.onnx') + \
        glob.glob('../yolov3/weights/*.onnx')

    for f in files:
        # 1. ONNX to CoreML
        name = 'saved_models/' + f.split('/')[-1].replace('.onnx', '')

        # # Load the ONNX model
        model = onnx.load(f)

        # Check that the IR is well formed
        print(onnx.checker.check_model(model))

        # Print a human readable representation of the graph
        print(onnx.helper.printable_graph(model.graph))

        model_file = open(f, 'rb')
        model_proto = onnx_pb.ModelProto()
        model_proto.ParseFromString(model_file.read())
        yolov3_model = convert(model_proto,
                               image_input_names=['0'],
                               preprocessing_args={'image_scale': 1. / 255})

        # 2. Reduce model to FP16, change outputs to DOUBLE and save
        import coremltools

        spec = yolov3_model.get_spec()
        for i in range(2):
            spec.description.output[i].type.multiArrayType.dataType = \
                coremltools.proto.FeatureTypes_pb2.ArrayFeatureType.ArrayDataType.Value(
                    'DOUBLE')

        spec = coremltools.utils.convert_neural_network_spec_weights_to_fp16(
            spec)
        yolov3_model = coremltools.models.MLModel(spec)

        name_out0 = spec.description.output[0].name
        name_out1 = spec.description.output[1].name

        num_classes = 80
        num_anchors = 507  # 507 for yolov3-tiny,
        spec.description.output[0].type.multiArrayType.shape.append(
            num_anchors)
        spec.description.output[0].type.multiArrayType.shape.append(
            num_classes)
        # spec.description.output[0].type.multiArrayType.shape.append(1)

        spec.description.output[1].type.multiArrayType.shape.append(
            num_anchors)
        spec.description.output[1].type.multiArrayType.shape.append(4)
        # spec.description.output[1].type.multiArrayType.shape.append(1)

        # rename
        # input_mlmodel = input_tensor.replace(":", "__").replace("/", "__")
        # class_output_mlmodel = class_output_tensor.replace(":", "__").replace("/", "__")
        # bbox_output_mlmodel = bbox_output_tensor.replace(":", "__").replace("/", "__")
        #
        # for i in range(len(spec.neuralNetwork.layers)):
        #     if spec.neuralNetwork.layers[i].input[0] == input_mlmodel:
        #         spec.neuralNetwork.layers[i].input[0] = 'image'
        #     if spec.neuralNetwork.layers[i].output[0] == class_output_mlmodel:
        #         spec.neuralNetwork.layers[i].output[0] = 'scores'
        #     if spec.neuralNetwork.layers[i].output[0] == bbox_output_mlmodel:
        #         spec.neuralNetwork.layers[i].output[0] = 'boxes'

        spec.neuralNetwork.preprocessing[0].featureName = '0'

        yolov3_model.save(name + '.mlmodel')
        # yolov3_model.visualize_spec()
        print(spec.description)

        # 2.5. Try to Predict:
        from PIL import Image
        img = Image.open('../yolov3/data/samples/zidane_416.jpg')
        out = yolov3_model.predict({'0': img}, useCPUOnly=True)
        print(out[name_out0].shape, out[name_out1].shape)

        # 3. Create NMS protobuf
        import numpy as np

        nms_spec = coremltools.proto.Model_pb2.Model()
        nms_spec.specificationVersion = 3

        for i in range(2):
            decoder_output = yolov3_model._spec.description.output[
                i].SerializeToString()

            nms_spec.description.input.add()
            nms_spec.description.input[i].ParseFromString(decoder_output)

            nms_spec.description.output.add()
            nms_spec.description.output[i].ParseFromString(decoder_output)

        nms_spec.description.output[0].name = 'confidence'
        nms_spec.description.output[1].name = 'coordinates'

        output_sizes = [num_classes, 4]
        for i in range(2):
            ma_type = nms_spec.description.output[i].type.multiArrayType
            ma_type.shapeRange.sizeRanges.add()
            ma_type.shapeRange.sizeRanges[0].lowerBound = 0
            ma_type.shapeRange.sizeRanges[0].upperBound = -1
            ma_type.shapeRange.sizeRanges.add()
            ma_type.shapeRange.sizeRanges[1].lowerBound = output_sizes[i]
            ma_type.shapeRange.sizeRanges[1].upperBound = output_sizes[i]
            del ma_type.shape[:]

        nms = nms_spec.nonMaximumSuppression
        nms.confidenceInputFeatureName = name_out0  # 1x507x80
        nms.coordinatesInputFeatureName = name_out1  # 1x507x4
        nms.confidenceOutputFeatureName = 'confidence'
        nms.coordinatesOutputFeatureName = 'coordinates'
        nms.iouThresholdInputFeatureName = 'iouThreshold'
        nms.confidenceThresholdInputFeatureName = 'confidenceThreshold'

        nms.iouThreshold = 0.4
        nms.confidenceThreshold = 0.5
        nms.pickTop.perClass = True

        labels = np.loadtxt('../yolov3/data/coco.names',
                            dtype=str,
                            delimiter='\n')
        nms.stringClassLabels.vector.extend(labels)

        nms_model = coremltools.models.MLModel(nms_spec)
        nms_model.save(name + '_nms.mlmodel')

        # out_nms = nms_model.predict({
        #     '143': out['143'].squeeze().reshape((80, 507)),
        #     '144': out['144'].squeeze().reshape((4, 507))
        # })
        # print(out_nms['confidence'].shape, out_nms['coordinates'].shape)

        # # # 3.5 Add Softmax model
        # from coremltools.models import datatypes
        # from coremltools.models import neural_network
        #
        # input_features = [
        #     ("141", datatypes.Array(num_anchors, num_classes, 1)),
        #     ("143", datatypes.Array(num_anchors, 4, 1))
        # ]
        #
        # output_features = [
        #     ("141", datatypes.Array(num_anchors, num_classes, 1)),
        #     ("143", datatypes.Array(num_anchors, 4, 1))
        # ]
        #
        # builder = neural_network.NeuralNetworkBuilder(input_features, output_features)
        # builder.add_softmax(name="softmax_pcls",
        #                     dim=(0, 3, 2, 1),
        #                     input_name="scores",
        #                     output_name="permute_scores_output")
        # softmax_model = coremltools.models.MLModel(builder.spec)
        # softmax_model.save("softmax.mlmodel")

        # 4. Pipeline models togethor
        from coremltools.models import datatypes
        # from coremltools.models import neural_network
        from coremltools.models.pipeline import Pipeline

        input_features = [('0', datatypes.Array(3, 416, 416)),
                          ('iouThreshold', datatypes.Double()),
                          ('confidenceThreshold', datatypes.Double())]

        output_features = ['confidence', 'coordinates']

        pipeline = Pipeline(input_features, output_features)

        # Add 3rd dimension of size 1 (apparently not needed, produces error on compile)
        yolov3_output = yolov3_model._spec.description.output
        yolov3_output[0].type.multiArrayType.shape[:] = [
            num_anchors, num_classes, 1
        ]
        yolov3_output[1].type.multiArrayType.shape[:] = [num_anchors, 4, 1]

        nms_input = nms_model._spec.description.input
        for i in range(2):
            nms_input[i].type.multiArrayType.shape[:] = yolov3_output[
                i].type.multiArrayType.shape[:]

        # And now we can add the three models, in order:
        pipeline.add_model(yolov3_model)

        pipeline.add_model(nms_model)

        # Correct datatypes
        pipeline.spec.description.input[0].ParseFromString(
            yolov3_model._spec.description.input[0].SerializeToString())
        pipeline.spec.description.output[0].ParseFromString(
            nms_model._spec.description.output[0].SerializeToString())
        pipeline.spec.description.output[1].ParseFromString(
            nms_model._spec.description.output[1].SerializeToString())

        # Update metadata
        pipeline.spec.description.metadata.versionString = 'yolov3-tiny.pt imported from PyTorch'
        pipeline.spec.description.metadata.shortDescription = 'https://github.com/ultralytics/yolov3'
        pipeline.spec.description.metadata.author = '*****@*****.**'
        pipeline.spec.description.metadata.license = 'https://github.com/ultralytics/yolov3'

        user_defined_metadata = {
            'classes': ','.join(labels),
            'iou_threshold': str(nms.iouThreshold),
            'confidence_threshold': str(nms.confidenceThreshold)
        }
        pipeline.spec.description.metadata.userDefined.update(
            user_defined_metadata)

        # Save the model
        pipeline.spec.specificationVersion = 3
        final_model = coremltools.models.MLModel(pipeline.spec)
        final_model.save((name + '_pipelined.mlmodel'))
def convert_ssd(exported_graph_path, model_structure, output_path):
    num_anchors = 1917

    saved_model_path = os.path.join(exported_graph_path, 'saved_model')
    coreml_model_path = os.path.join(output_path, 'Model.mlmodel')

    json_labels = os.path.join(exported_graph_path, 'labels.json')
    with open(json_labels) as f:
        labels = json.load(f)

    # Strip the model down to something usable by Core ML.
    # Instead of `concat_1`, use `Postprocessor/convert_scores`, because it
    # applies the sigmoid to the class scores.
    frozen_model_path = '.tmp/tmp_frozen_graph.pb'
    input_node = 'Preprocessor/sub'
    bbox_output_node = 'concat'
    class_output_node = 'Postprocessor/convert_scores'
    graph = optimize_graph(saved_model_path, frozen_model_path, [input_node],
                           [bbox_output_node, class_output_node])

    # conversion tensors have a `:0` at the end of the name
    input_tensor = input_node + ':0'
    bbox_output_tensor = bbox_output_node + ':0'
    class_output_tensor = class_output_node + ':0'

    # Convert to Core ML model.
    ssd_model = tfcoreml.convert(
        tf_model_path=frozen_model_path,
        mlmodel_path=coreml_model_path,
        input_name_shape_dict={input_tensor: [1, 300, 300, 3]},
        image_input_names=input_tensor,
        output_feature_names=[bbox_output_tensor, class_output_tensor],
        is_bgr=False,
        red_bias=-1.0,
        green_bias=-1.0,
        blue_bias=-1.0,
        image_scale=2. / 255)

    spec = ssd_model.get_spec()

    # Rename the inputs and outputs to something more readable.
    spec.description.input[0].name = 'image'
    spec.description.input[0].shortDescription = 'Input image'
    spec.description.output[0].name = 'scores'
    spec.description.output[
        0].shortDescription = 'Predicted class scores for each bounding box'
    spec.description.output[1].name = 'boxes'
    spec.description.output[
        1].shortDescription = 'Predicted coordinates for each bounding box'

    input_mlmodel = input_tensor.replace(':', '__').replace('/', '__')
    class_output_mlmodel = class_output_tensor.replace(':', '__').replace(
        '/', '__')
    bbox_output_mlmodel = bbox_output_tensor.replace(':',
                                                     '__').replace('/', '__')

    for i in range(len(spec.neuralNetwork.layers)):
        if spec.neuralNetwork.layers[i].input[0] == input_mlmodel:
            spec.neuralNetwork.layers[i].input[0] = 'image'
        if spec.neuralNetwork.layers[i].output[0] == class_output_mlmodel:
            spec.neuralNetwork.layers[i].output[0] = 'scores'
        if spec.neuralNetwork.layers[i].output[0] == bbox_output_mlmodel:
            spec.neuralNetwork.layers[i].output[0] = 'boxes'

    spec.neuralNetwork.preprocessing[0].featureName = 'image'

    # For some reason the output shape of the `scores` output is not filled in.
    spec.description.output[0].type.multiArrayType.shape.append(
        len(labels) + 1)
    spec.description.output[0].type.multiArrayType.shape.append(num_anchors)

    # And the `boxes` output shape is (4, 1917, 1) so get rid of that last one.
    del spec.description.output[1].type.multiArrayType.shape[-1]

    # Convert weights to 16-bit floats to make the model smaller.
    spec = coremltools.utils.convert_neural_network_spec_weights_to_fp16(spec)

    # Create a new MLModel from the modified spec and save it.
    ssd_model = coremltools.models.MLModel(spec)

    decoder_model = build_decoder(graph, len(labels), num_anchors)
    nms_model = build_nms(decoder_model, labels)

    input_features = [('image', datatypes.Array(3, 300, 300)),
                      ('iouThreshold', datatypes.Double()),
                      ('confidenceThreshold', datatypes.Double())]

    output_features = ['confidence', 'coordinates']

    pipeline = Pipeline(input_features, output_features)

    # We added a dimension of size 1 to the back of the inputs of the decoder
    # model, so we should also add this to the output of the SSD model or else
    # the inputs and outputs do not match and the pipeline is not valid.
    ssd_output = ssd_model._spec.description.output
    ssd_output[0].type.multiArrayType.shape[:] = [
        len(labels) + 1, num_anchors, 1
    ]
    ssd_output[1].type.multiArrayType.shape[:] = [4, num_anchors, 1]

    pipeline.add_model(ssd_model)
    pipeline.add_model(decoder_model)
    pipeline.add_model(nms_model)

    # The `image` input should really be an image, not a multi-array.
    pipeline.spec.description.input[0].ParseFromString(
        ssd_model._spec.description.input[0].SerializeToString())

    # Copy the declarations of the `confidence` and `coordinates` outputs.
    # The Pipeline makes these strings by default.
    pipeline.spec.description.output[0].ParseFromString(
        nms_model._spec.description.output[0].SerializeToString())
    pipeline.spec.description.output[1].ParseFromString(
        nms_model._spec.description.output[1].SerializeToString())

    # Add descriptions to the inputs and outputs.
    pipeline.spec.description.input[
        1].shortDescription = '(optional) IOU Threshold override'
    pipeline.spec.description.input[
        2].shortDescription = '(optional) Confidence Threshold override'
    pipeline.spec.description.output[
        0].shortDescription = u'Boxes \xd7 Class confidence'
    pipeline.spec.description.output[
        1].shortDescription = u'Boxes \xd7 [x, y, width, height] (relative to image size)'

    # Add metadata to the model.
    pipeline.spec.description.metadata.versionString = 'ssd_mobilenet'
    pipeline.spec.description.metadata.shortDescription = 'MobileNet + SSD'
    pipeline.spec.description.metadata.author = 'Converted to Core ML by Cloud Annotations'
    pipeline.spec.description.metadata.license = 'https://github.com/tensorflow/models/blob/master/research/object_detection'

    # Add the list of class labels and the default threshold values too.
    user_defined_metadata = {
        'iou_threshold': str(0.5),
        'confidence_threshold': str(0.5),
        'classes': ','.join(labels)
    }
    pipeline.spec.description.metadata.userDefined.update(
        user_defined_metadata)

    pipeline.spec.specificationVersion = 3

    final_model = coremltools.models.MLModel(pipeline.spec)
    final_model.save(coreml_model_path)
def convert_ssd(exported_graph_path, model_structure, output_path):
    num_anchors = 1917

    saved_model_path = os.path.join(exported_graph_path, 'saved_model')
    coreml_model_path = os.path.join(output_path, 'Model.mlmodel')

    json_labels = os.path.join(exported_graph_path, 'labels.json')
    with open(json_labels) as f:
        labels = json.load(f)

    # Strip the model down to something usable by Core ML.
    # Instead of `concat_1`, use `Postprocessor/convert_scores`, because it
    # applies the sigmoid to the class scores.
    frozen_model_path = '.tmp/tmp_frozen_graph.pb'
    input_node = 'Preprocessor/sub'
    bbox_output_node = 'concat'
    class_output_node = 'Postprocessor/convert_scores'    
    graph = optimize_graph(saved_model_path, frozen_model_path, [input_node], [bbox_output_node, class_output_node])

    # conversion tensors have a `:0` at the end of the name
    input_tensor = input_node + ':0'
    bbox_output_tensor = bbox_output_node + ':0'
    class_output_tensor = class_output_node + ':0'

    # Convert to Core ML model.
    ssd_model = tfcoreml.convert(
        tf_model_path=frozen_model_path,
        mlmodel_path=coreml_model_path,
        input_name_shape_dict={ input_tensor: [1, 300, 300, 3] },
        image_input_names=input_tensor,
        output_feature_names=[bbox_output_tensor, class_output_tensor],
        is_bgr=False,
        red_bias=-1.0,
        green_bias=-1.0,
        blue_bias=-1.0,
        image_scale=2./255)

    spec = ssd_model.get_spec()

    # Rename the inputs and outputs to something more readable.
    spec.description.input[0].name = 'image'
    spec.description.input[0].shortDescription = 'Input image'
    spec.description.output[0].name = 'scores'
    spec.description.output[0].shortDescription = 'Predicted class scores for each bounding box'
    spec.description.output[1].name = 'boxes'
    spec.description.output[1].shortDescription = 'Predicted coordinates for each bounding box'

    input_mlmodel = input_tensor.replace(':', '__').replace('/', '__')
    class_output_mlmodel = class_output_tensor.replace(':', '__').replace('/', '__')
    bbox_output_mlmodel = bbox_output_tensor.replace(':', '__').replace('/', '__')

    for i in range(len(spec.neuralNetwork.layers)):
        if spec.neuralNetwork.layers[i].input[0] == input_mlmodel:
            spec.neuralNetwork.layers[i].input[0] = 'image'
        if spec.neuralNetwork.layers[i].output[0] == class_output_mlmodel:
            spec.neuralNetwork.layers[i].output[0] = 'scores'
        if spec.neuralNetwork.layers[i].output[0] == bbox_output_mlmodel:
            spec.neuralNetwork.layers[i].output[0] = 'boxes'

    spec.neuralNetwork.preprocessing[0].featureName = 'image'

    # For some reason the output shape of the `scores` output is not filled in.
    spec.description.output[0].type.multiArrayType.shape.append(len(labels) + 1)
    spec.description.output[0].type.multiArrayType.shape.append(num_anchors)

    # And the `boxes` output shape is (4, 1917, 1) so get rid of that last one.
    del spec.description.output[1].type.multiArrayType.shape[-1]

    # Convert weights to 16-bit floats to make the model smaller.
    spec = coremltools.utils.convert_neural_network_spec_weights_to_fp16(spec)

    # Create a new MLModel from the modified spec and save it.
    ssd_model = coremltools.models.MLModel(spec)

    decoder_model = build_decoder(graph, len(labels), num_anchors)
    nms_model = build_nms(decoder_model, labels)

    input_features = [
        ('image', datatypes.Array(3, 300, 300)),
        ('iouThreshold', datatypes.Double()),
        ('confidenceThreshold', datatypes.Double())
    ]

    output_features = ['confidence', 'coordinates']

    pipeline = Pipeline(input_features, output_features)

    # We added a dimension of size 1 to the back of the inputs of the decoder 
    # model, so we should also add this to the output of the SSD model or else 
    # the inputs and outputs do not match and the pipeline is not valid.
    ssd_output = ssd_model._spec.description.output
    ssd_output[0].type.multiArrayType.shape[:] = [len(labels) + 1, num_anchors, 1]
    ssd_output[1].type.multiArrayType.shape[:] = [4, num_anchors, 1]

    pipeline.add_model(ssd_model)
    pipeline.add_model(decoder_model)
    pipeline.add_model(nms_model)

    # The `image` input should really be an image, not a multi-array.
    pipeline.spec.description.input[0].ParseFromString(ssd_model._spec.description.input[0].SerializeToString())

    # Copy the declarations of the `confidence` and `coordinates` outputs.
    # The Pipeline makes these strings by default.
    pipeline.spec.description.output[0].ParseFromString(nms_model._spec.description.output[0].SerializeToString())
    pipeline.spec.description.output[1].ParseFromString(nms_model._spec.description.output[1].SerializeToString())

    # Add descriptions to the inputs and outputs.
    pipeline.spec.description.input[1].shortDescription = '(optional) IOU Threshold override'
    pipeline.spec.description.input[2].shortDescription = '(optional) Confidence Threshold override'
    pipeline.spec.description.output[0].shortDescription = u'Boxes \xd7 Class confidence'
    pipeline.spec.description.output[1].shortDescription = u'Boxes \xd7 [x, y, width, height] (relative to image size)'

    # Add metadata to the model.
    pipeline.spec.description.metadata.versionString = 'ssd_mobilenet'
    pipeline.spec.description.metadata.shortDescription = 'MobileNet + SSD'
    pipeline.spec.description.metadata.author = 'Converted to Core ML by Cloud Annotations'
    pipeline.spec.description.metadata.license = 'https://github.com/tensorflow/models/blob/master/research/object_detection'

    # Add the list of class labels and the default threshold values too.
    user_defined_metadata = {
        'iou_threshold': str(0.5),
        'confidence_threshold': str(0.5),
        'classes': ','.join(labels)
    }
    pipeline.spec.description.metadata.userDefined.update(user_defined_metadata)

    pipeline.spec.specificationVersion = 3

    final_model = coremltools.models.MLModel(pipeline.spec)
    final_model.save(coreml_model_path)
def convert_localization(frozen_model, labels_path, output_path, anchors):
    os.makedirs(output_path, exist_ok=True)

    num_anchors = 1917

    with open(labels_path) as f:
        labels = json.load(f)

    # Strip the model down to something usable by Core ML.
    # Instead of `concat_1`, use `Postprocessor/convert_scores`, because it
    # applies the sigmoid to the class scores.
    input_node = "Preprocessor/sub"
    bbox_output_node = "Squeeze"
    class_output_node = "Postprocessor/convert_scores"

    # Convert to Core ML model.
    ssd_model = tfcoreml.convert(
        tf_model_path=frozen_model,
        input_name_shape_dict={input_node: [1, 300, 300, 3]},
        image_input_names=[input_node],
        output_feature_names=[bbox_output_node, class_output_node],
        is_bgr=False,
        red_bias=-1.0,
        green_bias=-1.0,
        blue_bias=-1.0,
        image_scale=2.0 / 255,
        minimum_ios_deployment_target="13",
    )

    spec = ssd_model.get_spec()

    # Rename the inputs and outputs to something more readable.
    spec.description.input[0].name = "image"
    spec.description.input[0].shortDescription = "Input image"
    spec.neuralNetwork.preprocessing[0].featureName = "image"

    for i in range(len(spec.description.output)):
        if spec.description.output[i].name == bbox_output_node:
            spec.description.output[i].name = "boxes"
            spec.description.output[
                i].shortDescription = "Predicted coordinates for each bounding box"
            spec.description.output[i].type.multiArrayType.shape[:] = [
                4,
                num_anchors,
                1,
            ]

        if spec.description.output[i].name == class_output_node:
            spec.description.output[i].name = "scores"
            spec.description.output[
                i].shortDescription = "Predicted class scores for each bounding box"
            spec.description.output[i].type.multiArrayType.shape[:] = [
                len(labels) + 1,
                num_anchors,
                1,
            ]

    for i in range(len(spec.neuralNetwork.layers)):
        # Assumes everything only has 1 input or output...
        if spec.neuralNetwork.layers[i].input[0] == input_node:
            spec.neuralNetwork.layers[i].input[0] = "image"
        if spec.neuralNetwork.layers[i].output[0] == class_output_node:
            spec.neuralNetwork.layers[i].output[0] = "scores"
        if spec.neuralNetwork.layers[i].output[0] == bbox_output_node:
            spec.neuralNetwork.layers[i].output[0] = "boxes"

    for input_ in spec.description.input:
        _convert_multiarray_to_float32(input_)
    for output_ in spec.description.output:
        _convert_multiarray_to_float32(output_)

    # Convert weights to 16-bit floats to make the model smaller.
    spec = coremltools.utils.convert_neural_network_spec_weights_to_fp16(spec)

    input_features = [
        ("image", datatypes.Array(3, 300, 300)),
        ("iouThreshold", datatypes.Double()),
        ("confidenceThreshold", datatypes.Double()),
    ]

    output_features = ["confidence", "coordinates"]

    pipeline = Pipeline(input_features, output_features)

    # Create a new MLModel from the modified spec and save it.
    ssd_model = coremltools.models.MLModel(spec)
    decoder_model = build_decoder(anchors, len(labels), num_anchors)
    nms_model = build_nms(decoder_model, labels)

    pipeline.add_model(ssd_model)
    pipeline.add_model(decoder_model)
    pipeline.add_model(nms_model)

    # The `image` input should really be an image, not a multi-array.
    pipeline.spec.description.input[0].ParseFromString(
        ssd_model._spec.description.input[0].SerializeToString())

    # Copy the declarations of the `confidence` and `coordinates` outputs.
    # The Pipeline makes these strings by default.
    pipeline.spec.description.output[0].ParseFromString(
        nms_model._spec.description.output[0].SerializeToString())
    pipeline.spec.description.output[1].ParseFromString(
        nms_model._spec.description.output[1].SerializeToString())

    # Add descriptions to the inputs and outputs.
    pipeline.spec.description.input[
        1].shortDescription = "(optional) IOU Threshold override"
    pipeline.spec.description.input[
        2].shortDescription = "(optional) Confidence Threshold override"
    pipeline.spec.description.output[
        0].shortDescription = u"Boxes \xd7 Class confidence"
    pipeline.spec.description.output[
        1].shortDescription = u"Boxes \xd7 [x, y, width, height] (relative to image size)"

    # Add metadata to the model.
    pipeline.spec.description.metadata.versionString = "ssd_mobilenet"
    pipeline.spec.description.metadata.shortDescription = "MobileNet + SSD"
    pipeline.spec.description.metadata.author = (
        "Converted to Core ML by Cloud Annotations")
    pipeline.spec.description.metadata.license = (
        "https://github.com/tensorflow/models/blob/master/research/object_detection"
    )

    # Add the list of class labels and the default threshold values too.
    user_defined_metadata = {
        "iou_threshold": str(0.5),
        "confidence_threshold": str(0.5),
        "classes": ",".join(labels),
    }
    pipeline.spec.description.metadata.userDefined.update(
        user_defined_metadata)

    pipeline.spec.specificationVersion = 4

    final_model = coremltools.models.MLModel(pipeline.spec)
    final_model.save(os.path.join(output_path, "Model.mlmodel"))