Beispiel #1
0
def update_onnx_initializer(onnx_file, ckpt_file, output_file):
    "Update onnx initializer."
    with open(onnx_file, 'rb') as f:
        data = f.read()
    model = onnx_pb.ModelProto()
    model.ParseFromString(data)
    initializer = model.graph.initializer
    param_dict = load_checkpoint(ckpt_file)

    for i, _ in enumerate(initializer):
        item = initializer[i]
        if not item.name in param_dict:
            print(
                f"Warning: Can not find '{item.name}' in checkpoint parameters dictionary"
            )
            continue
        weight = param_dict[item.name].data.asnumpy()
        bin_data = weight.tobytes()
        if len(item.raw_data) != len(bin_data):
            print(
                f"Warning: Size of weight from checkpoint is different from original size, ignore it"
            )
            continue
        item.raw_data = bin_data

    pb_msg = model.SerializeToString()
    with open(output_file, 'wb') as f:
        f.write(pb_msg)

    print(f'Graph name: {model.graph.name}')
    print(f'Initializer length: {len(initializer)}')
    print(f'Checkpoint dict length: {len(param_dict)}')
    print(
        f'The new weights have been written to file {output_file} successfully'
    )
Beispiel #2
0
def clone_model_with_shape_infer(model):
    if model_has_infer_metadata(model):
        cloned_model = onnx_proto.ModelProto()
        cloned_model.CopyFrom(model)
    else:
        cloned_model = save_and_reload_model(model)
    return cloned_model
Beispiel #3
0
def main():
    if len(sys.argv) < 3:
        print(
            'Usage: python onnx_to_nnir.py <onnxModel> <nnirOutputFolder> [--input_dims n,c,h,w (optional)] [--node_type_append 0/1 (optional: appends node type to output tensor name)]'
        )
        sys.exit(1)
    onnxFileName = sys.argv[1]
    outputFolder = sys.argv[2]
    #appends node type to output tensor name.
    node_type_append = 0
    pos = 4
    while pos < len(sys.argv) and len(
            sys.argv) > 3 and sys.argv[pos][:2] == '--':
        if sys.argv[pos] == '--node_type_append':
            node_type_append = int(sys.argv[pos + 1])
            pos = pos + 2
        elif sys.argv[pos] == '--input_dims':
            #input_dims = sys.argv[pos+1]
            pos = pos + 2
    print('loading ONNX model from %s ...' % (onnxFileName))
    onnx_model_proto = onnx_pb.ModelProto()
    if not os.path.isfile(onnxFileName):
        print('ERROR: unable to open: ' + onnxFileName)
        sys.exit(1)
    onnx_model_proto.ParseFromString(open(onnxFileName, 'rb').read())
    print('converting to IR model in %s ...' % (outputFolder))
    onnx2ir(onnx_model_proto, outputFolder, node_type_append)
Beispiel #4
0
def main(args):
    device_name = 'cpu' if args.gpu is None else 'cuda:' + str(args.gpu)
    device = torch.device(device_name)

    n_joints = 14
    model = LPM(3, 32, n_joints + 1, device, T=args.t)

    if args.checkpoint_name is not None:
        print('Loading checkpoint...')
        path = os.path.join(args.model_dir, args.checkpoint_name)
        checkpoint = torch.load(path)
        model.load_state_dict(checkpoint['state_dict'])
    model = model.to(device)

    print('Exporting ONNX...')
    dummy_images = torch.zeros(
        (3, args.resolution, args.resolution)).to(device)
    dummy_centers = torch.zeros(
        (1, args.resolution, args.resolution)).to(device)
    torch.onnx.export(model, (dummy_images, dummy_images, dummy_images,
                              dummy_images, dummy_images, dummy_centers),
                      args.onnx_name)

    print('Exporting CoreML...')
    model_file = open(args.onnx_name, 'rb')
    model_proto = onnx_pb.ModelProto()
    model_proto.ParseFromString(model_file.read())
    mlmodel = onnx_coreml.convert(model_proto,
                                  mode='regressor',
                                  image_input_names=['0'])
    mlmodel.save(args.core_ml_name)
    print('Done!')
Beispiel #5
0
    def augment_graph(self):
        '''
        Adds ReduceMin and ReduceMax nodes to all quantization_candidates op type nodes in
        model and ensures their outputs are stored as part of the graph output
        :return: augmented ONNX model
        '''
        model = onnx_proto.ModelProto()
        model.CopyFrom(self.model)
        model = onnx.shape_inference.infer_shapes(model)

        added_nodes = []
        added_outputs = []
        tensors, value_infos = self.select_tensors_to_calibrate(model)

        for tensor in tensors:

            # When doing ReduceMax/ReduceMin, ORT can't reduce on dim with value of 0 if 'keepdims' is false.
            # To make the code simple, we always let keepdims to be 1.
            keepdims = 1

            # dim could be:
            #   [dim_param: "batch_size", dim_value: 256, dim_value: 36, dim_value: 64],
            #   [dim_value: 0],
            #   ...
            # Please see the definition of TensorShapeProto https://github.com/onnx/onnx/blob/master/onnx/onnx.proto#L651
            dim = value_infos[tensor].type.tensor_type.shape.dim
            shape = (1, ) if len(dim) == 1 else tuple(1
                                                      for i in range(len(dim)))

            # Adding ReduceMin nodes
            reduce_min_name = tensor + '_ReduceMin'
            reduce_min_node = onnx.helper.make_node('ReduceMin', [tensor],
                                                    [tensor + '_ReduceMin'],
                                                    reduce_min_name,
                                                    keepdims=keepdims)

            added_nodes.append(reduce_min_node)
            added_outputs.append(
                helper.make_tensor_value_info(reduce_min_node.output[0],
                                              TensorProto.FLOAT, shape))

            # Adding ReduceMax nodes
            reduce_max_name = tensor + '_ReduceMax'
            reduce_max_node = onnx.helper.make_node('ReduceMax', [tensor],
                                                    [tensor + '_ReduceMax'],
                                                    reduce_max_name,
                                                    keepdims=keepdims)

            added_nodes.append(reduce_max_node)
            added_outputs.append(
                helper.make_tensor_value_info(reduce_max_node.output[0],
                                              TensorProto.FLOAT, shape))

        model.graph.node.extend(added_nodes)
        model.graph.output.extend(added_outputs)
        onnx.save(model,
                  self.augmented_model_path,
                  save_as_external_data=self.use_external_data_format)
        self.augment_model = model
Beispiel #6
0
    def augment_graph(self):
        '''
        Adds ReduceMin and ReduceMax nodes to all quantization_candidates op type nodes in
        model and ensures their outputs are stored as part of the graph output
        :return: augmented ONNX model
        '''
        model = onnx_proto.ModelProto()
        model.CopyFrom(self.model)
        model = onnx.shape_inference.infer_shapes(model)
        value_infos = {vi.name: vi for vi in model.graph.value_info}
        value_infos.update({ot.name: ot for ot in model.graph.output})
        value_infos.update({it.name: it for it in model.graph.input})
        initializer = set(init.name for init in model.graph.initializer)

        added_nodes = []
        added_outputs = []
        tensors_to_calibrate = set()
        tensor_type_to_calibrate = set([TensorProto.FLOAT, TensorProto.FLOAT16])

        for node in model.graph.node:
            should_be_calibrate = ((node.op_type in self.calibrate_op_types) and
                                       (node.name not in self.black_nodes)) or (node.name in self.white_nodes) or ((not self.calibrate_op_types) and (node.name not in self.black_nodes))
            if should_be_calibrate:
                for tensor_name in itertools.chain(node.input, node.output):
                    if tensor_name in value_infos.keys():
                        vi = value_infos[tensor_name]
                        if vi.type.HasField('tensor_type') and (vi.type.tensor_type.elem_type in tensor_type_to_calibrate) and (tensor_name not in initializer):
                            tensors_to_calibrate.add(tensor_name)

        # If augmenting all ops, it's possible that some nodes' input value are 0.
        # Can't reduce on dim with value of 0 if 'keepdims' is false, therefore set keepdims to 1.
        if self.calibrate_op_types:
            keepdims_value = 0
        else:
            keepdims_value = 1

        for tensor in tensors_to_calibrate:
            # Adding ReduceMin nodes
            reduce_min_name = tensor + '_ReduceMin'
            reduce_min_node = onnx.helper.make_node('ReduceMin', [tensor], [tensor + '_ReduceMin'],
                                                    reduce_min_name,
                                                    keepdims=keepdims_value)

            added_nodes.append(reduce_min_node)
            added_outputs.append(helper.make_tensor_value_info(reduce_min_node.output[0], TensorProto.FLOAT, ()))

            # Adding ReduceMax nodes
            reduce_max_name = tensor + '_ReduceMax'
            reduce_max_node = onnx.helper.make_node('ReduceMax', [tensor], [tensor + '_ReduceMax'],
                                                    reduce_max_name,
                                                    keepdims=keepdims_value)

            added_nodes.append(reduce_max_node)
            added_outputs.append(helper.make_tensor_value_info(reduce_max_node.output[0], TensorProto.FLOAT, ()))

        model.graph.node.extend(added_nodes)
        model.graph.output.extend(added_outputs)

        return model
    def augment_graph(self):
        '''
        Adds ReduceMin and ReduceMax nodes to all quantization_candidates op type nodes in
        model and ensures their outputs are stored as part of the graph output
        :return: augmented ONNX model
        '''
        model = onnx_proto.ModelProto()
        model.CopyFrom(self.model)
        model = onnx.shape_inference.infer_shapes(model)
        value_infos = {vi.name: vi for vi in model.graph.value_info}
        value_infos.update({ot.name: ot for ot in model.graph.output})
        value_infos.update({it.name: it for it in model.graph.input})
        initializer = set(init.name for init in model.graph.initializer)

        added_nodes = []
        added_outputs = []
        tensors_to_calibrate = set()
        tensor_type_to_calibrate = set([TensorProto.FLOAT, TensorProto.FLOAT16])

        for node in model.graph.node:
            if len(self.op_types_to_calibrate) == 0 or node.op_type in self.op_types_to_calibrate:
                for tensor_name in itertools.chain(node.input, node.output):
                    if tensor_name in value_infos.keys():
                        vi = value_infos[tensor_name]
                        if vi.type.HasField('tensor_type') and (
                                vi.type.tensor_type.elem_type in tensor_type_to_calibrate) and (
                                    tensor_name not in initializer):
                            tensors_to_calibrate.add(tensor_name)

        for tensor in tensors_to_calibrate:
            # Adding ReduceMin nodes
            reduce_min_name = tensor + '_ReduceMin'
            reduce_min_node = onnx.helper.make_node('ReduceMin', [tensor], [tensor + '_ReduceMin'],
                                                    reduce_min_name,
                                                    keepdims=0)

            added_nodes.append(reduce_min_node)
            added_outputs.append(helper.make_tensor_value_info(reduce_min_node.output[0], TensorProto.FLOAT, ()))

            # Adding ReduceMax nodes
            reduce_max_name = tensor + '_ReduceMax'
            reduce_max_node = onnx.helper.make_node('ReduceMax', [tensor], [tensor + '_ReduceMax'],
                                                    reduce_max_name,
                                                    keepdims=0)

            added_nodes.append(reduce_max_node)
            added_outputs.append(helper.make_tensor_value_info(reduce_max_node.output[0], TensorProto.FLOAT, ()))

        model.graph.node.extend(added_nodes)
        model.graph.output.extend(added_outputs)
        onnx.save(model, self.augmented_model_path)
        self.augment_model = model
Beispiel #8
0
def main():
    if len(sys.argv) < 3:
        print('Usage: python onnx_to_nnir.py <onnxModel> <nnirOutputFolder> [--input_dims n,c,h,w (optional)]')
        sys.exit(1)
    onnxFileName = sys.argv[1]
    outputFolder = sys.argv[2]
    print('loading ONNX model from %s ...' % (onnxFileName))
    onnx_model_proto = onnx_pb.ModelProto()
    if not os.path.isfile(onnxFileName):
        print('ERROR: unable to open: ' + onnxFileName)
        sys.exit(1)
    onnx_model_proto.ParseFromString(open(onnxFileName, 'rb').read())
    print('converting to IR model in %s ...' % (outputFolder))
    onnx2ir(onnx_model_proto, outputFolder)
Beispiel #9
0
    def augment_graph(self):
        '''
        Adds ReduceMin and ReduceMax nodes to all quantization_candidates op type nodes in
        model and ensures their outputs are stored as part of the graph output
        :return: augmented ONNX model
        '''
        model = onnx_proto.ModelProto()
        model.CopyFrom(self.model)
        model = onnx.shape_inference.infer_shapes(model)

        added_nodes = []
        added_outputs = []
        tensors, value_infos = self.select_tensors_to_calibrate(model)

        for tensor in tensors:

            # Get tensor's shape
            dim = len(value_infos[tensor].type.tensor_type.shape.dim)
            shape = (1, ) if dim == 1 else list(1 for i in range(dim))

            # Adding ReduceMin nodes
            reduce_min_name = tensor + '_ReduceMin'
            reduce_min_node = onnx.helper.make_node('ReduceMin', [tensor],
                                                    [tensor + '_ReduceMin'],
                                                    reduce_min_name)

            added_nodes.append(reduce_min_node)
            added_outputs.append(
                helper.make_tensor_value_info(reduce_min_node.output[0],
                                              TensorProto.FLOAT, shape))

            # Adding ReduceMax nodes
            reduce_max_name = tensor + '_ReduceMax'
            reduce_max_node = onnx.helper.make_node('ReduceMax', [tensor],
                                                    [tensor + '_ReduceMax'],
                                                    reduce_max_name)

            added_nodes.append(reduce_max_node)
            added_outputs.append(
                helper.make_tensor_value_info(reduce_max_node.output[0],
                                              TensorProto.FLOAT, shape))

        model.graph.node.extend(added_nodes)
        model.graph.output.extend(added_outputs)
        onnx.save(model, self.augmented_model_path)
        self.augment_model = model
Beispiel #10
0
    def augment_graph(self):
        '''
        make all quantization_candidates op type nodes as part of the graph output.
        :return: augmented ONNX model
        '''
        model = onnx_proto.ModelProto()
        model.CopyFrom(self.model)
        model = onnx.shape_inference.infer_shapes(model)

        added_nodes = []
        added_outputs = []
        tensors, value_infos = self.select_tensors_to_calibrate(model) 

        for tensor in tensors:
            added_outputs.append(value_infos[tensor])

        model.graph.node.extend(added_nodes)
        model.graph.output.extend(added_outputs)
        onnx.save(model, self.augmented_model_path)
        self.augment_model = model
Beispiel #11
0
def main():
    os.system('rm -rf saved_models && mkdir saved_models')
    files = glob.glob('saved_models/*.onnx') + glob.glob(
        '../yolov3/weights/*.onnx')

    for f in files:
        # 1. ONNX to CoreML
        name = 'saved_models/' + f.split('/')[-1].replace('.onnx', '')

        model_file = open(f, 'rb')
        model_proto = onnx_pb.ModelProto()
        model_proto.ParseFromString(model_file.read())
        coreml_model = convert(model_proto, image_input_names=['0'])
        # coreml_model.save(model_out)

        # 2. Reduce model to FP16, change outputs to DOUBLE and save
        import coremltools

        spec = coreml_model.get_spec()
        for i in range(2):
            spec.description.output[i].type.multiArrayType.dataType = \
                coremltools.proto.FeatureTypes_pb2.ArrayFeatureType.ArrayDataType.Value('DOUBLE')

        spec = coremltools.utils.convert_neural_network_spec_weights_to_fp16(
            spec)
        coreml_model = coremltools.models.MLModel(spec)

        num_classes = 80
        num_anchors = 507
        spec.description.output[0].type.multiArrayType.shape.append(
            num_classes)
        spec.description.output[0].type.multiArrayType.shape.append(
            num_anchors)

        spec.description.output[1].type.multiArrayType.shape.append(4)
        spec.description.output[1].type.multiArrayType.shape.append(
            num_anchors)
        coreml_model.save(name + '.mlmodel')
        print(spec.description)

        # 3. Create NMS protobuf
        import numpy as np

        nms_spec = coremltools.proto.Model_pb2.Model()
        nms_spec.specificationVersion = 3

        for i in range(2):
            decoder_output = coreml_model._spec.description.output[
                i].SerializeToString()

            nms_spec.description.input.add()
            nms_spec.description.input[i].ParseFromString(decoder_output)

            nms_spec.description.output.add()
            nms_spec.description.output[i].ParseFromString(decoder_output)

        nms_spec.description.output[0].name = 'confidence'
        nms_spec.description.output[1].name = 'coordinates'

        output_sizes = [num_classes, 4]
        for i in range(2):
            ma_type = nms_spec.description.output[i].type.multiArrayType
            ma_type.shapeRange.sizeRanges.add()
            ma_type.shapeRange.sizeRanges[0].lowerBound = 0
            ma_type.shapeRange.sizeRanges[0].upperBound = -1
            ma_type.shapeRange.sizeRanges.add()
            ma_type.shapeRange.sizeRanges[1].lowerBound = output_sizes[i]
            ma_type.shapeRange.sizeRanges[1].upperBound = output_sizes[i]
            del ma_type.shape[:]

        nms = nms_spec.nonMaximumSuppression
        nms.confidenceInputFeatureName = '133'  # 1x507x80
        nms.coordinatesInputFeatureName = '134'  # 1x507x4
        nms.confidenceOutputFeatureName = 'confidence'
        nms.coordinatesOutputFeatureName = 'coordinates'
        nms.iouThresholdInputFeatureName = 'iouThreshold'
        nms.confidenceThresholdInputFeatureName = 'confidenceThreshold'

        nms.iouThreshold = 0.6
        nms.confidenceThreshold = 0.4
        nms.pickTop.perClass = True

        labels = np.loadtxt('../yolov3/data/coco.names',
                            dtype=str,
                            delimiter='\n')
        nms.stringClassLabels.vector.extend(labels)

        nms_model = coremltools.models.MLModel(nms_spec)
        nms_model.save(name + '_nms.mlmodel')

        # 4. Pipeline models togethor
        from coremltools.models import datatypes
        # from coremltools.models import neural_network
        from coremltools.models.pipeline import Pipeline

        input_features = [('image', datatypes.Array(3, 416, 416)),
                          ('iouThreshold', datatypes.Double()),
                          ('confidenceThreshold', datatypes.Double())]

        output_features = ['confidence', 'coordinates']

        pipeline = Pipeline(input_features, output_features)

        # Add 3rd dimension of size 1 (apparently not needed, produces error on compile)
        # ssd_output = coreml_model._spec.description.output
        # ssd_output[0].type.multiArrayType.shape[:] = [num_classes, num_anchors, 1]
        # ssd_output[1].type.multiArrayType.shape[:] = [4, num_anchors, 1]

        # And now we can add the three models, in order:
        pipeline.add_model(coreml_model)
        pipeline.add_model(nms_model)

        # Correct datatypes
        pipeline.spec.description.input[0].ParseFromString(
            coreml_model._spec.description.input[0].SerializeToString())
        pipeline.spec.description.output[0].ParseFromString(
            nms_model._spec.description.output[0].SerializeToString())
        pipeline.spec.description.output[1].ParseFromString(
            nms_model._spec.description.output[1].SerializeToString())

        # Update metadata
        pipeline.spec.description.metadata.versionString = 'yolov3-tiny.pt imported from PyTorch'
        pipeline.spec.description.metadata.shortDescription = 'https://github.com/ultralytics/yolov3'
        pipeline.spec.description.metadata.author = '*****@*****.**'
        pipeline.spec.description.metadata.license = 'https://github.com/ultralytics/yolov3'

        user_defined_metadata = {
            'classes': ','.join(labels),
            'iou_threshold': str(nms.iouThreshold),
            'confidence_threshold': str(nms.confidenceThreshold)
        }
        pipeline.spec.description.metadata.userDefined.update(
            user_defined_metadata)

        # Save the model
        pipeline.spec.specificationVersion = 3
        final_model = coremltools.models.MLModel(pipeline.spec)
        final_model.save((name + '_pipelined.mlmodel'))
Beispiel #12
0
def quantize(model, per_channel=True, nbits=8, quantization_mode=QuantizationMode.IntegerOps,
    static=False, asymmetric_input_types=False, input_quantization_params=None, output_quantization_params=None):
    '''
        Given an onnx model, create a quantized onnx model and save it into a file

    :param model: ModelProto to quantize
    :param per_channel: quantize weights per channel
    :param nbits: number of bits to represent quantized data. Currently only supporting 8-bit types
    :param quantization_mode: Can be one of the QuantizationMode types.
        IntegerOps:
            the function will use integer ops. Only ConvInteger and MatMulInteger ops are supported now.
        QLinearOps:
            the function will use QLinear ops. Only QLinearConv and QLinearMatMul ops are supported now.
    :param static:
        True: The inputs/activations are quantized using static scale and zero point values
              specified through input_quantization_params.
        False: The inputs/activations are quantized using dynamic scale and zero point values
               computed while running the model.
    :param asymmetric_input_types:
        True: Weights are quantized into signed integers and inputs/activations into unsigned integers.
        False: Weights and inputs/activations are quantized into unsigned integers.
    :param input_quantization_params:
        Dictionary to specify the zero point and scale values for inputs to conv and matmul nodes.
        Should be specified when static is set to True.
        The input_quantization_params should be specified in the following format:
            {
                "input_name": [zero_point, scale]
            }.
        zero_point should be of type np.uint8 and scale should be of type np.float32.
        example:
            {
                'resnet_model/Relu_1:0': [np.uint8(0), np.float32(0.019539741799235344)],
                'resnet_model/Relu_2:0': [np.uint8(0), np.float32(0.011359662748873234)]
            }
    :param output_quantization_params:
        Dictionary to specify the zero point and scale values for outputs of conv and matmul nodes.
        Should be specified in QuantizationMode.QLinearOps mode.
        The output_quantization_params should be specified in the following format:
            {
                "output_name": [zero_point, scale]
            }
        zero_point can be of type np.uint8/np.int8 and scale should be of type np.float32.
        example:
            {
                'resnet_model/Relu_3:0': [np.int8(0), np.float32(0.011359662748873234)],
                'resnet_model/Relu_4:0': [np.uint8(0), np.float32(0.011359662748873234)]
            }
    :return: ModelProto with quantization
    '''
    if nbits == 8:
        input_qType = onnx_proto.TensorProto.UINT8
        weight_qType = onnx_proto.TensorProto.INT8 if asymmetric_input_types else onnx_proto.TensorProto.UINT8
        mode = quantization_mode
        copy_model = onnx_proto.ModelProto()
        copy_model.CopyFrom(model)
        quantizer = ONNXQuantizer(copy_model, per_channel, mode, static, weight_qType, input_qType,
                        input_quantization_params, output_quantization_params)
        quantizer.quantize_model()
        quantizer.model.producer_name = __producer__
        quantizer.model.producer_version = __version__
        return quantizer.model
    else:
        raise ValueError('Unknown value for nbits. only 8 bit quantization is currently supported')
Beispiel #13
0
def onnx_to_coreml(onnx_model, output):  # type: (IO[str], str) -> None
    onnx_model_proto = onnx_pb.ModelProto()
    onnx_model_proto.ParseFromString(onnx_model.read())
    coreml_model = convert(onnx_model_proto)
    coreml_model.save(output)
Beispiel #14
0
def quantize(model,
             per_channel=False,
             nbits=8,
             quantization_mode=QuantizationMode.IntegerOps,
             static=False,
             force_fusions=False,
             symmetric_activation=False,
             symmetric_weight=False,
             quantization_params=None,
             nodes_to_quantize=None,
             nodes_to_exclude=None,
             op_types_to_quantize=[]):
    '''
        Given an onnx model, create a quantized onnx model and save it into a file
    :param model: ModelProto to quantize
    :param per_channel: quantize weights per channel
    :param nbits: number of bits to represent quantized data. Currently only supporting 8-bit types
    :param quantization_mode: Can be one of the QuantizationMode types.
        IntegerOps:
            the function will use integer ops. Only ConvInteger and MatMulInteger ops are supported now.
        QLinearOps:
            the function will use QLinear ops. Only QLinearConv and QLinearMatMul ops are supported now.
    :param static:
        True: The inputs/activations are quantized using static scale and zero point values
              specified through quantization_params.
        False: The inputs/activations are quantized using dynamic scale and zero point values
               computed while running the model.
    :param symmetric_activation:
        True: activations are quantized into signed integers.
        False: activations are quantized into unsigned integers.
    :param symmetric_weight:
        True: weights are quantized into signed integers.
        False: weights are quantized into unsigned integers.
    :param quantization_params:
        Dictionary to specify the zero point and scale values for inputs to conv and matmul nodes.
        Should be specified when static is set to True.
        The quantization_params should be specified in the following format:
            {
                "input_name": [zero_point, scale]
            }.
        zero_point should be of type np.uint8 and scale should be of type np.float32.
        example:
            {
                'resnet_model/Relu_1:0': [np.uint8(0), np.float32(0.019539741799235344)],
                'resnet_model/Relu_2:0': [np.uint8(0), np.float32(0.011359662748873234)]
            }
    :param nodes_to_quantize:
        List of nodes names to quantize. When this list is not None only the nodes in this list
        are quantized.
        example:
        [
            'Conv__224',
            'Conv__252'
        ]
    :param nodes_to_exclude:
        List of nodes names to exclude. The nodes in this list will be excluded from quantization
        when it is not None.
    :param op_types_to_quantize: specify the types of operators to quantize, like ['Conv'] to quantize Conv only. It quantizes all supported operators by default.
    :return: ModelProto with quantization
    '''
    logging.warning("onnxruntime.quantization.quantize is deprecated.\n\
         Please use quantize_static for static quantization, quantize_dynamic for dynamic quantization."
                    )
    if nbits == 8 or nbits == 7:
        mode = quantization_mode
        copy_model = onnx_proto.ModelProto()
        copy_model.CopyFrom(model)

        if not op_types_to_quantize or len(op_types_to_quantize) == 0:
            op_types_to_quantize = list(
                QLinearOpsRegistry.keys()) if static else list(
                    IntegerOpsRegistry.keys())

        quantizer = ONNXQuantizer(copy_model, per_channel, nbits == 7, mode,
                                  static, symmetric_weight,
                                  symmetric_activation, quantization_params,
                                  nodes_to_quantize, nodes_to_exclude,
                                  op_types_to_quantize)

        quantizer.quantize_model()
        return quantizer.model.model
    else:
        raise ValueError(
            'Only 8 and 7 bit quantization is currently supported')
Beispiel #15
0
def main():
    os.system('rm -rf saved_models && mkdir saved_models')
    files = glob.glob('saved_models/*.onnx') + \
        glob.glob('../yolov3/weights/*.onnx')

    for f in files:
        # 1. ONNX to CoreML
        name = 'saved_models/' + f.split('/')[-1].replace('.onnx', '')

        # # Load the ONNX model
        model = onnx.load(f)

        # Check that the IR is well formed
        print(onnx.checker.check_model(model))

        # Print a human readable representation of the graph
        print(onnx.helper.printable_graph(model.graph))

        model_file = open(f, 'rb')
        model_proto = onnx_pb.ModelProto()
        model_proto.ParseFromString(model_file.read())
        yolov3_model = convert(model_proto,
                               image_input_names=['0'],
                               preprocessing_args={'image_scale': 1. / 255})

        # 2. Reduce model to FP16, change outputs to DOUBLE and save
        import coremltools

        spec = yolov3_model.get_spec()
        for i in range(2):
            spec.description.output[i].type.multiArrayType.dataType = \
                coremltools.proto.FeatureTypes_pb2.ArrayFeatureType.ArrayDataType.Value(
                    'DOUBLE')

        spec = coremltools.utils.convert_neural_network_spec_weights_to_fp16(
            spec)
        yolov3_model = coremltools.models.MLModel(spec)

        name_out0 = spec.description.output[0].name
        name_out1 = spec.description.output[1].name

        num_classes = 80
        num_anchors = 507  # 507 for yolov3-tiny,
        spec.description.output[0].type.multiArrayType.shape.append(
            num_anchors)
        spec.description.output[0].type.multiArrayType.shape.append(
            num_classes)
        # spec.description.output[0].type.multiArrayType.shape.append(1)

        spec.description.output[1].type.multiArrayType.shape.append(
            num_anchors)
        spec.description.output[1].type.multiArrayType.shape.append(4)
        # spec.description.output[1].type.multiArrayType.shape.append(1)

        # rename
        # input_mlmodel = input_tensor.replace(":", "__").replace("/", "__")
        # class_output_mlmodel = class_output_tensor.replace(":", "__").replace("/", "__")
        # bbox_output_mlmodel = bbox_output_tensor.replace(":", "__").replace("/", "__")
        #
        # for i in range(len(spec.neuralNetwork.layers)):
        #     if spec.neuralNetwork.layers[i].input[0] == input_mlmodel:
        #         spec.neuralNetwork.layers[i].input[0] = 'image'
        #     if spec.neuralNetwork.layers[i].output[0] == class_output_mlmodel:
        #         spec.neuralNetwork.layers[i].output[0] = 'scores'
        #     if spec.neuralNetwork.layers[i].output[0] == bbox_output_mlmodel:
        #         spec.neuralNetwork.layers[i].output[0] = 'boxes'

        spec.neuralNetwork.preprocessing[0].featureName = '0'

        yolov3_model.save(name + '.mlmodel')
        # yolov3_model.visualize_spec()
        print(spec.description)

        # 2.5. Try to Predict:
        from PIL import Image
        img = Image.open('../yolov3/data/samples/zidane_416.jpg')
        out = yolov3_model.predict({'0': img}, useCPUOnly=True)
        print(out[name_out0].shape, out[name_out1].shape)

        # 3. Create NMS protobuf
        import numpy as np

        nms_spec = coremltools.proto.Model_pb2.Model()
        nms_spec.specificationVersion = 3

        for i in range(2):
            decoder_output = yolov3_model._spec.description.output[
                i].SerializeToString()

            nms_spec.description.input.add()
            nms_spec.description.input[i].ParseFromString(decoder_output)

            nms_spec.description.output.add()
            nms_spec.description.output[i].ParseFromString(decoder_output)

        nms_spec.description.output[0].name = 'confidence'
        nms_spec.description.output[1].name = 'coordinates'

        output_sizes = [num_classes, 4]
        for i in range(2):
            ma_type = nms_spec.description.output[i].type.multiArrayType
            ma_type.shapeRange.sizeRanges.add()
            ma_type.shapeRange.sizeRanges[0].lowerBound = 0
            ma_type.shapeRange.sizeRanges[0].upperBound = -1
            ma_type.shapeRange.sizeRanges.add()
            ma_type.shapeRange.sizeRanges[1].lowerBound = output_sizes[i]
            ma_type.shapeRange.sizeRanges[1].upperBound = output_sizes[i]
            del ma_type.shape[:]

        nms = nms_spec.nonMaximumSuppression
        nms.confidenceInputFeatureName = name_out0  # 1x507x80
        nms.coordinatesInputFeatureName = name_out1  # 1x507x4
        nms.confidenceOutputFeatureName = 'confidence'
        nms.coordinatesOutputFeatureName = 'coordinates'
        nms.iouThresholdInputFeatureName = 'iouThreshold'
        nms.confidenceThresholdInputFeatureName = 'confidenceThreshold'

        nms.iouThreshold = 0.4
        nms.confidenceThreshold = 0.5
        nms.pickTop.perClass = True

        labels = np.loadtxt('../yolov3/data/coco.names',
                            dtype=str,
                            delimiter='\n')
        nms.stringClassLabels.vector.extend(labels)

        nms_model = coremltools.models.MLModel(nms_spec)
        nms_model.save(name + '_nms.mlmodel')

        # out_nms = nms_model.predict({
        #     '143': out['143'].squeeze().reshape((80, 507)),
        #     '144': out['144'].squeeze().reshape((4, 507))
        # })
        # print(out_nms['confidence'].shape, out_nms['coordinates'].shape)

        # # # 3.5 Add Softmax model
        # from coremltools.models import datatypes
        # from coremltools.models import neural_network
        #
        # input_features = [
        #     ("141", datatypes.Array(num_anchors, num_classes, 1)),
        #     ("143", datatypes.Array(num_anchors, 4, 1))
        # ]
        #
        # output_features = [
        #     ("141", datatypes.Array(num_anchors, num_classes, 1)),
        #     ("143", datatypes.Array(num_anchors, 4, 1))
        # ]
        #
        # builder = neural_network.NeuralNetworkBuilder(input_features, output_features)
        # builder.add_softmax(name="softmax_pcls",
        #                     dim=(0, 3, 2, 1),
        #                     input_name="scores",
        #                     output_name="permute_scores_output")
        # softmax_model = coremltools.models.MLModel(builder.spec)
        # softmax_model.save("softmax.mlmodel")

        # 4. Pipeline models togethor
        from coremltools.models import datatypes
        # from coremltools.models import neural_network
        from coremltools.models.pipeline import Pipeline

        input_features = [('0', datatypes.Array(3, 416, 416)),
                          ('iouThreshold', datatypes.Double()),
                          ('confidenceThreshold', datatypes.Double())]

        output_features = ['confidence', 'coordinates']

        pipeline = Pipeline(input_features, output_features)

        # Add 3rd dimension of size 1 (apparently not needed, produces error on compile)
        yolov3_output = yolov3_model._spec.description.output
        yolov3_output[0].type.multiArrayType.shape[:] = [
            num_anchors, num_classes, 1
        ]
        yolov3_output[1].type.multiArrayType.shape[:] = [num_anchors, 4, 1]

        nms_input = nms_model._spec.description.input
        for i in range(2):
            nms_input[i].type.multiArrayType.shape[:] = yolov3_output[
                i].type.multiArrayType.shape[:]

        # And now we can add the three models, in order:
        pipeline.add_model(yolov3_model)

        pipeline.add_model(nms_model)

        # Correct datatypes
        pipeline.spec.description.input[0].ParseFromString(
            yolov3_model._spec.description.input[0].SerializeToString())
        pipeline.spec.description.output[0].ParseFromString(
            nms_model._spec.description.output[0].SerializeToString())
        pipeline.spec.description.output[1].ParseFromString(
            nms_model._spec.description.output[1].SerializeToString())

        # Update metadata
        pipeline.spec.description.metadata.versionString = 'yolov3-tiny.pt imported from PyTorch'
        pipeline.spec.description.metadata.shortDescription = 'https://github.com/ultralytics/yolov3'
        pipeline.spec.description.metadata.author = '*****@*****.**'
        pipeline.spec.description.metadata.license = 'https://github.com/ultralytics/yolov3'

        user_defined_metadata = {
            'classes': ','.join(labels),
            'iou_threshold': str(nms.iouThreshold),
            'confidence_threshold': str(nms.confidenceThreshold)
        }
        pipeline.spec.description.metadata.userDefined.update(
            user_defined_metadata)

        # Save the model
        pipeline.spec.specificationVersion = 3
        final_model = coremltools.models.MLModel(pipeline.spec)
        final_model.save((name + '_pipelined.mlmodel'))
def main(input_file, output_file):
    num_classes = 8
    model_ft = models.resnet18(pretrained=False)
    num_ftrs = model_ft.fc.in_features
    model_ft.fc = nn.Sequential(nn.Dropout(p=.5),
                                nn.Linear(num_ftrs, num_classes), nn.Sigmoid())

    checkpoint = torch.load(input_file,
                            map_location=lambda storage, loc: storage)
    model_ft.load_state_dict(checkpoint['model'])

    imsize = 224
    dummy_input = torch.randn(1, 3, 224, 224, requires_grad=True)

    protofile = output_file + '.proto'

    model_ft.train(False)
    model_ft.eval()
    model_ft.cpu()
    torch.onnx.export(model_ft,
                      dummy_input,
                      protofile,
                      input_names=['0'],
                      output_names=['classification'],
                      verbose=True)
    model = onnx.load(protofile)

    scale = 1.0 / (0.226 * 255.0)
    red_scale = 1.0 / (0.229 * 255.0)
    green_scale = 1.0 / (0.224 * 255.0)
    blue_scale = 1.0 / (0.225 * 255.0)

    args = dict(is_bgr=False,
                red_bias=-(0.485 * 255.0),
                green_bias=-(0.456 * 255.0),
                blue_bias=-(0.406 * 255.0))

    model_file = open(protofile, 'rb')
    model_proto = onnx_pb.ModelProto()
    model_proto.ParseFromString(model_file.read())
    coreml_model = convert(model_proto,
                           image_input_names=['0'],
                           preprocessing_args=args)

    spec = coreml_model.get_spec()

    # get NN portion of the spec
    nn_spec = spec.neuralNetwork
    layers = nn_spec.layers  # this is a list of all the layers
    layers_copy = copy.deepcopy(
        layers)  # make a copy of the layers, these will be added back later
    del nn_spec.layers[:]  # delete all the layers

    # add a scale layer now
    # since mlmodel is in protobuf format, we can add proto messages directly
    # To look at more examples on how to add other layers: see "builder.py" file in coremltools repo
    scale_layer = nn_spec.layers.add()
    scale_layer.name = 'scale_layer'
    scale_layer.input.append('0')
    scale_layer.output.append('input1_scaled')

    params = scale_layer.scale
    params.scale.floatValue.extend([red_scale, green_scale,
                                    blue_scale])  # scale values for RGB
    params.shapeScale.extend([3, 1, 1])  # shape of the scale vector

    # now add back the rest of the layers (which happens to be just one in this case: the crop layer)
    nn_spec.layers.extend(layers_copy)

    # need to also change the input of the crop layer to match the output of the scale layer
    nn_spec.layers[1].input[0] = 'input1_scaled'

    print(spec.description)

    coreml_model = coremltools.models.MLModel(spec)

    coreml_model.save(output_file)
    def augment_graph(self):
        '''
        Adds ReduceMin and ReduceMax nodes to all quantization_candidates op type nodes in
        model and ensures their outputs are stored as part of the graph output
        :return: augmented ONNX model
        '''
        model = onnx_proto.ModelProto()
        model.CopyFrom(self.model)
        model = onnx.shape_inference.infer_shapes(model)

        added_nodes = []
        added_outputs = []
        tensors, value_infos = self.select_tensors_to_calibrate(model)

        for tensor in tensors:

            # When doing ReduceMax/ReduceMin, keep dimension if tensor contains dim with value of 0,
            # for example:
            #     dim = [ dim_value: 0 ]
            #
            # otherwise, don't keep dimension.
            #
            dim = value_infos[tensor].type.tensor_type.shape.dim
            keepdims = 0
            shape = ()
            for d in dim:
                # A dimension can be either an integer value or a symbolic variable.
                # Dimension with integer value and value of 0 is what we are looking for to keep dimension.
                # Please see the def of TensorShapeProto https://github.com/onnx/onnx/blob/master/onnx/onnx.proto#L630
                if d.WhichOneof('value') == 'dim_value' and d.dim_value == 0:
                    keepdims = 1
                    shape = (1, ) if len(dim) == 1 else list(
                        1 for i in range(len(dim)))
                    break

            # Adding ReduceMin nodes
            reduce_min_name = tensor + '_ReduceMin'
            reduce_min_node = onnx.helper.make_node('ReduceMin', [tensor],
                                                    [tensor + '_ReduceMin'],
                                                    reduce_min_name,
                                                    keepdims=keepdims)

            added_nodes.append(reduce_min_node)
            added_outputs.append(
                helper.make_tensor_value_info(reduce_min_node.output[0],
                                              TensorProto.FLOAT, shape))

            # Adding ReduceMax nodes
            reduce_max_name = tensor + '_ReduceMax'
            reduce_max_node = onnx.helper.make_node('ReduceMax', [tensor],
                                                    [tensor + '_ReduceMax'],
                                                    reduce_max_name,
                                                    keepdims=keepdims)

            added_nodes.append(reduce_max_node)
            added_outputs.append(
                helper.make_tensor_value_info(reduce_max_node.output[0],
                                              TensorProto.FLOAT, shape))

        model.graph.node.extend(added_nodes)
        model.graph.output.extend(added_outputs)
        onnx.save(model, self.augmented_model_path)
        self.augment_model = model
Beispiel #18
0
import sys
from onnx import onnx_pb
from onnx_coreml import convert

model_in = sys.argv[1]
model_out = sys.argv[2]

model_file = open(model_in, 'rb')
model_proto = onnx_pb.ModelProto()
model_proto.ParseFromString(model_file.read())
coreml_model = convert(model_proto,
                       image_input_names=['inputImage'],
                       image_output_names=['outputImage'])
coreml_model.save(model_out)
#!/usr/bin/env python3

import sys
from google.protobuf import text_format
from onnx import onnx_pb

with open(sys.argv[1]) as f:
    c = f.read()
m = onnx_pb.ModelProto()
text_format.Merge(c, m)
with open(sys.argv[2], 'wb') as f:
    f.write(m.SerializeToString())