Beispiel #1
0
    def run_node(cls,
                 node,
                 inputs,
                 device='CPU',
                 opset_version=_known_opset_version):
        super(Caffe2Backend, cls).run_node(node, inputs, device)

        device_option = get_device_option(Device(device))
        with Workspace(), core.DeviceScope(device_option):  # temporary!
            if isinstance(inputs, dict):
                for key, value in inputs.items():
                    workspace.FeedBlob(key, value)
            else:
                assert len(node.input) == len(
                    inputs), "{}: expected {} but got {}".format(
                        node.op_type, len(node.input), len(inputs))
                for key, value in zip(node.input, inputs):
                    workspace.FeedBlob(key, value)

            cls._inplace_rewrite([node])
            ops = cls._onnx_node_to_caffe2_op(
                node, opset_version or cls._known_opset_version)
            for op in ops:
                op.device_option.CopyFrom(device_option)
            workspace.RunOperatorsOnce(ops)
            output_values = [workspace.FetchBlob(name) for name in node.output]
            return namedtupledict('Outputs', node.output)(*output_values)
Beispiel #2
0
 def supports_device(cls, device_str):
     device = Device(device_str)
     if device.type == DeviceType.CPU:
         return True
     elif device.type == DeviceType.CUDA:
         return workspace.has_gpu_support
     return False
Beispiel #3
0
    def run_node(cls, node, inputs, device='CPU', opset_version=_known_opset_version, outputs_info=None):
        super(Caffe2Backend, cls).run_node(node, inputs, device=device, outputs_info=outputs_info)

        device_option = get_device_option(Device(device))
        ws = Workspace()
        with core.DeviceScope(device_option):  # temporary!
            if isinstance(inputs, dict):
                for key, value in inputs.items():
                    ws.FeedBlob(key, value)
            else:
                assert len(node.input) == len(inputs), "{}: expected {} but got {}".format(
                    node.op_type, len(node.input), len(inputs))
                for key, value in zip(node.input, inputs):
                    ws.FeedBlob(key, value)

            ops = []
            cbackend = C.Caffe2Backend(cls._dummy_name)
            ops_str = cbackend.convert_node(node.SerializeToString(), opset_version)
            for s in ops_str[0] + ops_str[1]:
                op = caffe2_pb2.OperatorDef()
                op.ParseFromString(s)
                op.device_option.CopyFrom(device_option)
                ops.append(op)
            # For testing
            if "ONNX_CAFFE2_DEBUG" in os.environ:
                init_ops, ops2, _ = cls._onnx_node_to_caffe2_op(
                    None, None, node, opset_version or cls._known_opset_version)
                ops2 = init_ops + ops2
                for op in ops2:
                    op.device_option.CopyFrom(device_option)
                print("\nC++:\n{}\nPython:\n{}".format(ops, ops2))
            ws.RunOperatorsOnce(ops)
            output_values = [ws.FetchBlob(name) for name in node.output]
            return namedtupledict('Outputs', node.output)(*output_values)
Beispiel #4
0
    def run_node(cls, node, inputs, device='CPU'):
        super(TensorflowBackendBase, cls).run_node(node, inputs, device)
        node_graph = tf.Graph()
        with node_graph.as_default():
            node = OnnxNode(node)
            device_option = get_device_option(Device(device))
            input_tensors = []
            for i in inputs:
                input_tensors.append(tf.constant(i))

            if isinstance(inputs, dict):
                feed_dict_raw = inputs
            else:
                assert len(node.inputs) == len(inputs)
                feed_dict_raw = dict(zip(node.inputs, inputs))

            # TODO: is constant the best way for feeding inputs?
            input_dict = dict([(x[0], tf.constant(x[1]))
                               for x in feed_dict_raw.items()])
            ops = cls._onnx_node_to_tensorflow_op(node, input_dict)
            output_vals = []

            with tf.Session() as sess:
                with tf.device(device_option):
                    sess.run(tf.global_variables_initializer())
                    output_vals = sess.run(ops)

        return namedtupledict('Outputs', node.outputs)(*output_vals)
Beispiel #5
0
    def __init__(self, model, device, **kwargs):
        """Create a ``BackendRep``.

        Parameters
        ----------
        model : str
            The path of onnx model file.
        device : onnx.Device
            The executing device.

        """
        if not isinstance(device, Device):
            device = Device(device)
        execute_ws = workspace.get_workspace()
        if device.type == DeviceType.CPU:
            device_type, device_index = 'cpu', 0
        elif device.type == DeviceType.CUDA:
            device_type, device_index = 'cuda', device.device_id
        else:
            raise ValueError('Unsupported device type: ' + device.type)
        with context.device(device_type, device_index):
            self._context = GraphLib.from_onnx(model)
        self._input_dict = collections.OrderedDict()
        self._output_dict = collections.OrderedDict()
        for input in self._context._def.input:
            impl = execute_ws.get_tensor(input)
            self._input_dict[input] = Tensor(impl=impl)
        for output in self._context._def.output:
            impl = execute_ws.get_tensor(output)
            self._output_dict[output] = Tensor(impl=impl)
        self._output_tuple = namedtupledict('Outputs', self._context._def.output)
Beispiel #6
0
    def _onnx_model_to_caffe2_net(cls, onnx_model, device, opset_version, include_initializers):
        device_option = get_device_option(Device(device))

        init_model = ModelProto()
        init_model.ParseFromString(cls.optimize_onnx(onnx_model.SerializeToString(), init=True))
        cls._inplace_rewrite(init_model.graph)

        predict_model = ModelProto()
        predict_model.ParseFromString(cls.optimize_onnx(onnx_model.SerializeToString(), predict=True))
        cls._inplace_rewrite(predict_model.graph)

        init_net = caffe2_pb2.NetDef()
        predict_net = caffe2_pb2.NetDef()

        init_net.name = onnx_model.graph.name + '_init'
        predict_net.name = onnx_model.graph.name + '_predict'

        if include_initializers:
            init_net.op.extend(cls._create_tensor_filling_op(tp) for tp in onnx_model.graph.initializer)

        dummy_name(cls._all_names_in_graph(init_model.graph) | cls._all_names_in_graph(predict_model.graph))

        for net, model in ( (init_net, init_model), (predict_net, predict_model) ):
            net.device_option.CopyFrom(device_option)
            for node in model.graph.node:
                net.op.extend(cls._onnx_node_to_caffe2_op(node, opset_version))
            net.external_output.extend(
                value_info.name for value_info in model.graph.output)
            net.external_input.extend(
                value_info.name for value_info in model.graph.input)

        return init_net, predict_net
Beispiel #7
0
 def supports_device(cls, device_str):
     device = Device(device_str)
     if device.type == DeviceType.CPU:
         return True
     elif core.IsGPUDeviceType(device.type):
         return workspace.has_gpu_support or workspace.has_hip_support
     return False
Beispiel #8
0
    def prepare(cls, predict_model, device='CPU',
                init_model=None, **kwargs):
        '''
        For Onnx Caffe2Backend, we require that init_graph don't initialize the actual input of the predict_graph,

        for example, if "img" is the input blob for the predict_net, we require that in init_graph and in
        initializer of the predict_graph, "img" is not initalized. We don't have a check for this, since
        there is no way we can know which blob is the input of the predict_graph.
        '''
        super(Caffe2Backend, cls).prepare(predict_model, device, **kwargs)

        if init_model:
            checker.check_model(init_model)

        init_net, predict_net = cls.onnx_graph_to_caffe2_net(predict_model.graph)
        predict_net.device_option.CopyFrom(get_device_option(Device(device)))

        ws = Workspace()
        with ws, core.DeviceScope(predict_net.device_option):
            if init_model:
               _, init_net_from_model = cls.onnx_graph_to_caffe2_net(init_model.graph)
               init_net.op.extend(init_net_from_model.op)
            workspace.RunNetOnce(init_net)
            uninitialized = [x
                             for x in predict_net.external_input
                             if not workspace.HasBlob(x)]

        return Caffe2Rep(predict_net, ws, uninitialized)
class TensorRTBackendRep(BackendRep):
    def __init__(self, model, device, max_batch_size=32,
                 max_workspace_size=None, calib=None, quantization_mode="fp32" serialize_engine=False, **kwargs):
        if not isinstance(device, Device):
            device = Device(device)
        self._set_device(device)
        self._logger = TRT_LOGGER
        self.builder = trt.Builder(self._logger)
        self.network = self.builder.create_network(flags=1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))
        self.parser = trt.OnnxParser(self.network, self._logger)
        
        if quantization_mode == 'fp16':
             self.builder.fp16_mode = True
        if quantization_mode == 'int8':
             self.builder.int8_mode = True
             assert(calib != None)
             self.builder.int8_calibrator = calib

        if not isinstance(model, six.string_types):
            model_str = model.SerializeToString()
        else:
            model_str = model

        if not trt.init_libnvinfer_plugins(TRT_LOGGER, ""):
            msg = "Failed to initialize TensorRT's plugin library."
            raise RuntimeError(msg)
        
        if not self.parser.parse(model_str):
            error = self.parser.get_error(0)
            msg = "While parsing node number %i:\n" % error.node()
            msg += ("%s:%i In function %s:\n[%i] %s" %
                    (error.file(), error.line(), error.func(),
                     error.code(), error.desc()))
            raise RuntimeError(msg)
        if max_workspace_size is None:
            max_workspace_size = 1 << 28

        self.builder.max_batch_size = max_batch_size
        self.builder.max_workspace_size = max_workspace_size

        for layer in self.network:
            print(layer.name)

        print(self.network[-1].get_output(0).shape)

        trt_engine = self.builder.build_cuda_engine(self.network)
        if trt_engine is None:
            raise RuntimeError("Failed to build TensorRT engine from network")
        if serialize_engine:
            trt_engine = self._serialize_deserialize(trt_engine)
        self.engine = Engine(trt_engine)
        self._output_shapes = {}
        self._output_dtype = {}
        for output in model.graph.output:
            dims = output.type.tensor_type.shape.dim
            output_shape = tuple([dim.dim_value for dim in dims])
            self._output_shapes[output.name] = output_shape
            self._output_dtype[output.name] = output.type.tensor_type.elem_type
Beispiel #10
0
    def _onnx_model_to_caffe2_net(cls, onnx_model, device, opset_version,
                                  include_initializers):
        device_option = get_device_option(Device(device))

        # Prior to onnx version update to onnx-1.8.0, errors caused by failures in
        # in the onnx shape inference call were being supressed. Hence a try-catch block
        # is added around the infer_shapes call to avoid these failures and preserve status
        try:
            onnx_model = onnx.utils.polish_model(onnx_model)
        except RuntimeError:
            warnings.warn(
                "ShapeInferenceWarning: Inferred shape and existing shape differ in rank"
            )
        init_model = cls.optimize_onnx(onnx_model, init=True)
        pred_model = cls.optimize_onnx(onnx_model, predict=True)

        init_net = caffe2_pb2.NetDef()
        pred_net = caffe2_pb2.NetDef()

        init_net.name = onnx_model.graph.name + '_init'
        pred_net.name = onnx_model.graph.name + '_predict'

        if include_initializers:
            init_net.op.extend(
                cls._create_tensor_filling_op(tp)
                for tp in onnx_model.graph.initializer)

        cls._dummy_name.reset(
            cls._all_names_in_graph(init_model.graph)
            | cls._all_names_in_graph(pred_model.graph))

        errors = []
        for net, model in ((init_net, init_model), (pred_net, pred_model)):
            net.device_option.CopyFrom(device_option)
            for node in model.graph.node:
                try:
                    c2ops = cls._onnx_node_to_caffe2_op(
                        init_model, pred_model, node, opset_version)
                except Exception as e:
                    msg = 'Error while processing node: {}. Exception: {}'.format(
                        node, e)
                    errors.append(msg)
                    print('ONNX FATAL:', msg, file=sys.stderr)
                    continue
                init_net.op.extend(c2ops.init_ops)
                net.op.extend(c2ops.ops)
                net.external_input.extend(c2ops.interface_blobs)
            net.external_output.extend(value_info.name
                                       for value_info in model.graph.output)
            net.external_input.extend(value_info.name
                                      for value_info in model.graph.input)

        if len(errors) > 0:
            raise RuntimeError(
                "ONNX conversion failed, encountered {} errors:\n\n{}".format(
                    len(errors), "\n\n".join(errors)))

        return init_net, pred_net
Beispiel #11
0
    def prepare(cls, model, device='CPU', **kwargs):
        '''
        For Onnx Caffe2Backend, we require that init_graph don't initialize the actual input of the predict_graph,

        for example, if "img" is the input blob for the predict_net, we require that in init_graph and in
        initializer of the predict_graph, "img" is not initalized. We don't have a check for this, since
        there is no way we can know which blob is the input of the predict_graph.
        '''
        super(Caffe2Backend, cls).prepare(model, device, **kwargs)

        opset_version = None
        for imp in model.opset_import:
            if not imp.HasField("domain") or imp.domain == "":
                opset_version = imp.version
                if imp.version > cls._known_opset_version:
                    warnings.warn(
                        "This version of onnx-caffe2 targets ONNX operator set version {}, but the model we are trying to import uses version {}.  We will try to import it anyway, but if the model uses operators which had BC-breaking changes in the intervening versions, import will fail."
                        .format(cls._known_opset_version, imp.version))
            else:
                warnings.warn("Unrecognized operator set {}".format(
                    imp.domain))
        if opset_version is None:
            if model.ir_version >= 0x00000003:
                raise RuntimeError(
                    "Model with IR version >= 3 did not specify ONNX operator set version (onnx-caffe2 requires it)"
                )
            else:
                opset_version = 1

        ws = Workspace()
        device_option = get_device_option(Device(device))

        # Directly load initializer data into blobs in workspace
        cls._direct_initialize_parameters(
            model.graph.initializer,
            ws,
            device_option,
        )

        initialized = {init.name for init in model.graph.initializer}

        cls._direct_initialize_inputs(
            model.graph.input,
            initialized,
            ws,
            device_option,
        )

        uninitialized = [
            value_info.name for value_info in model.graph.input
            if value_info.name not in initialized
        ]

        init_net, predict_net = cls._onnx_model_to_caffe2_net(
            model, device, opset_version, False)

        retval = Caffe2Rep(init_net, predict_net, ws, uninitialized)
        return retval
Beispiel #12
0
    def onnx_graph_to_caffe2_net(cls,
                                 graph_def,
                                 device="CPU",
                                 opset_version=_known_opset_version):
        device_option = get_device_option(Device(device))
        cls._inplace_rewrite(graph_def)
        if graph_def.initializer:
            init_net = cls.onnx_initializer_to_caffe2_init_net(
                graph_def.initializer)
            initialized = {init.name for init in graph_def.initializer}
        else:
            init_net = caffe2_pb2.NetDef()
            initialized = set()

        dummy_name(cls._all_names_in_graph(graph_def) | initialized)

        predict_net = caffe2_pb2.NetDef()
        predict_net.name = graph_def.name
        for node in graph_def.node:
            predict_net.op.extend(
                cls._onnx_node_to_caffe2_op(node, opset_version))

        predict_net.external_input.extend(value_info.name
                                          for value_info in graph_def.input)
        predict_net.external_output.extend(value_info.name
                                           for value_info in graph_def.output)

        # Caffe2 predictor requires all input blobs (including the
        # real model inputs) are initialized in init_net
        for value_info in graph_def.input:
            if value_info.name in initialized:
                continue
            op_def = caffe2_pb2.OperatorDef()
            op_def.output.extend([value_info.name])
            op_def.type = 'GivenTensorFill'

            shape = list(d.dim_value
                         for d in value_info.type.tensor_type.shape.dim)
            # TODO: Putting this in the init net will make it run faster, but it
            # causes some tests to fail...
            # shape = (1,)

            shape_arg = op_def.arg.add()
            shape_arg.name = 'shape'
            shape_arg.ints.extend(shape)

            values_arg = op_def.arg.add()
            values_arg.name = 'values'
            values_arg.floats.extend(np.ones(shape).flatten().tolist())

            init_net.op.extend([op_def])

        # Set the device option for the init_net and predict_net.
        init_net.device_option.CopyFrom(device_option)
        predict_net.device_option.CopyFrom(device_option)

        return init_net, predict_net
Beispiel #13
0
    def _onnx_model_to_caffe2_net(cls, onnx_model, device, opset_version,
                                  include_initializers):
        device_option = get_device_option(Device(device))

        init_model = ModelProto()
        init_model.ParseFromString(
            cls.optimize_onnx(onnx_model.SerializeToString(), init=True))

        pred_model = ModelProto()
        pred_model.ParseFromString(
            cls.optimize_onnx(onnx_model.SerializeToString(), predict=True))

        init_net = caffe2_pb2.NetDef()
        pred_net = caffe2_pb2.NetDef()

        init_net.name = onnx_model.graph.name + '_init'
        pred_net.name = onnx_model.graph.name + '_predict'

        if include_initializers:
            init_net.op.extend(
                cls._create_tensor_filling_op(tp)
                for tp in onnx_model.graph.initializer)

        dummy_name(
            cls._all_names_in_graph(init_model.graph)
            | cls._all_names_in_graph(pred_model.graph))

        success = True
        for net, model in ((init_net, init_model), (pred_net, pred_model)):
            net.device_option.CopyFrom(device_option)
            for node in model.graph.node:
                try:
                    c2ops = cls._onnx_node_to_caffe2_op(
                        init_model, pred_model, node, opset_version)
                except Exception as e:
                    success = False
                    print('ONNX FATAL:', e)
                    continue
                (init_net if include_initializers else net).op.extend(
                    c2ops.init_ops)
                net.op.extend(c2ops.ops)
                net.external_input.extend(c2ops.interface_blobs)
            net.external_output.extend(value_info.name
                                       for value_info in model.graph.output)
            net.external_input.extend(value_info.name
                                      for value_info in model.graph.input)

        if not success:
            raise RuntimeError('ONNX conversion failed')

        return init_net, pred_net
Beispiel #14
0
    def _onnx_model_to_caffe2_net(cls, onnx_model, device, opset_version,
                                  include_initializers):
        device_option = get_device_option(Device(device))

        onnx_model = onnx.utils.polish_model(onnx_model)
        init_model = cls.optimize_onnx(onnx_model, init=True)
        pred_model = cls.optimize_onnx(onnx_model, predict=True)

        init_net = caffe2_pb2.NetDef()
        pred_net = caffe2_pb2.NetDef()

        init_net.name = onnx_model.graph.name + '_init'
        pred_net.name = onnx_model.graph.name + '_predict'

        if include_initializers:
            init_net.op.extend(
                cls._create_tensor_filling_op(tp)
                for tp in onnx_model.graph.initializer)

        cls._dummy_name.reset(
            cls._all_names_in_graph(init_model.graph)
            | cls._all_names_in_graph(pred_model.graph))

        errors = []
        for net, model in ((init_net, init_model), (pred_net, pred_model)):
            net.device_option.CopyFrom(device_option)
            for node in model.graph.node:
                try:
                    c2ops = cls._onnx_node_to_caffe2_op(
                        init_model, pred_model, node, opset_version)
                except Exception as e:
                    msg = 'Error while processing node: {}. Exception: {}'.format(
                        node, e)
                    errors.append(msg)
                    print('ONNX FATAL:', msg, file=sys.stderr)
                    continue
                init_net.op.extend(c2ops.init_ops)
                net.op.extend(c2ops.ops)
                net.external_input.extend(c2ops.interface_blobs)
            net.external_output.extend(value_info.name
                                       for value_info in model.graph.output)
            net.external_input.extend(value_info.name
                                      for value_info in model.graph.input)

        if len(errors) > 0:
            raise RuntimeError(
                "ONNX conversion failed, encountered {} errors:\n\n{}".format(
                    len(errors), "\n\n".join(errors)))

        return init_net, pred_net
Beispiel #15
0
    def run_node(cls,
                 node,
                 inputs,
                 device='CPU',
                 opset_version=_known_opset_version,
                 outputs_info=None):
        super(Caffe2Backend, cls).run_node(node,
                                           inputs,
                                           device=device,
                                           outputs_info=outputs_info,
                                           opset_version=opset_version)

        value_infos = []
        device_option = get_device_option(Device(device))
        ws = Workspace()
        with core.DeviceScope(device_option):  # temporary!
            if isinstance(inputs, dict):
                for key, value in inputs.items():
                    ws.FeedBlob(key, value)
                    value_infos.append(
                        onnx.helper.make_tensor_value_info(
                            name=key,
                            elem_type=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[
                                value.dtype],
                            shape=value.shape).SerializeToString())
            else:
                assert len(node.input) == len(
                    inputs), "{}: expected {} but got {}".format(
                        node.op_type, len(node.input), len(inputs))
                for key, value in zip(node.input, inputs):
                    ws.FeedBlob(key, value)
                    value_infos.append(
                        onnx.helper.make_tensor_value_info(
                            name=key,
                            elem_type=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[
                                value.dtype],
                            shape=value.shape).SerializeToString())

            ops = []
            cbackend = C.Caffe2Backend(cls._dummy_name)
            ops_str = cbackend.convert_node(node.SerializeToString(),
                                            value_infos, opset_version)
            for s in ops_str[0] + ops_str[1]:
                op = caffe2_pb2.OperatorDef()
                op.ParseFromString(s)
                op.device_option.CopyFrom(device_option)
                ops.append(op)
            ws.RunOperatorsOnce(ops)
            output_values = [ws.FetchBlob(name) for name in node.output]
            return namedtupledict('Outputs', node.output)(*output_values)
Beispiel #16
0
    def supports_device(cls, device_str):
        """Query if the given device is supported.

        Parameters
        ----------
        device_str : str
            The device descriptor.

        Returns
        -------
        bool
            **True** if device is supported otherwise **False**.

        """
        device = Device(device_str)
        return device.type == DeviceType.CUDA
Beispiel #17
0
    def supports_device(cls, device_str):
        """Query if the given device is supported.

        Parameters
        ----------
        device_str : str
            The device descriptor.

        Returns
        -------
        bool
            ``True`` if device is supported otherwise ``False``.

        """
        device = Device(device_str)
        if device.type in (DeviceType.CPU, DeviceType.CUDA):
            return True
        return False
Beispiel #18
0
    def run_node(cls, node, inputs, device='CPU'):
        super(Caffe2Backend, cls).run_node(node, inputs, device)

        device_option = get_device_option(Device(device))
        with Workspace(), core.DeviceScope(device_option):  # temporary!
            if isinstance(inputs, dict):
                for key, value in inputs.items():
                    workspace.FeedBlob(key, value)
            else:
                assert(len(node.input) == len(inputs))
                for key, value in zip(node.input, inputs):
                    workspace.FeedBlob(key, value)

            cls._inplace_rewrite([node])
            ops = cls._onnx_node_to_caffe2_op(node)
            for op in ops:
                workspace.RunOperatorOnce(op)
            output_values = [workspace.FetchBlob(name) for name in node.output]
            return namedtupledict('Outputs', node.output)(*output_values)
Beispiel #19
0
 def __init__(self,
              model,
              device,
              max_batch_size=32,
              max_workspace_size=None,
              serialize_engine=True,
              **kwargs):
     if not isinstance(device, Device):
         device = Device(device)
     self._set_device(device)
     self._logger = trt.infer.ConsoleLogger(trt.infer.LogSeverity.WARNING)
     self.builder = trt.infer.create_infer_builder(self._logger)
     self.network = self.builder.create_network()
     self.parser = parser.create_parser(self.network, self._logger)
     if not isinstance(model, six.string_types):
         model_str = model.SerializeToString()
     else:
         model_str = model
     if not self.parser.parse(model_str):
         error = self.parser.get_error(0)
         msg = "While parsing node number %i:\n" % error.node()
         msg += ("%s:%i In function %s:\n[%i] %s" %
                 (error.file(), error.line(), error.func(), error.code(),
                  error.desc()))
         raise RuntimeError(msg)
     if max_workspace_size is None:
         max_workspace_size = 1 << 28
     self.builder.set_max_batch_size(max_batch_size)
     self.builder.set_max_workspace_size(max_workspace_size)
     trt_engine = self.builder.build_cuda_engine(self.network)
     if trt_engine is None:
         raise RuntimeError("Failed to build TensorRT engine from network")
     if serialize_engine:
         trt_engine = self._serialize_deserialize(trt_engine)
     self.engine = Engine(trt_engine)
     self._output_shapes = {}
     for output in model.graph.output:
         dims = output.type.tensor_type.shape.dim
         output_shape = tuple([dim.dim_value for dim in dims])
         self._output_shapes[output.name] = output_shape
Beispiel #20
0
    def __init__(self, model, device, **kwargs):
        """Create a ``BackendRep``.

        Parameters
        ----------
        model : str
            The path of onnx model file.
        device : onnx.Device
            The executing device.

        """
        if not isinstance(device, Device):
            device = Device(device)
        graph_str = workspace.get_workspace().PrepareONNXModel(model)
        graph_def = dragon_pb2.GraphDef()
        graph_def.ParseFromString(graph_str)
        if device.type == DeviceType.CPU:
            device_type, device_index = 'cpu', 0
        elif device.type == DeviceType.CUDA:
            device_type, device_index = 'cuda', device.device_id
        else:
            raise ValueError('Unsupported device type: ' + device.type)
        with context.device(device_type, device_index):
            self._function = function_lib.Function(name='ONNXGraph') \
                                         .import_from(graph_def)
        self._input_dict = collections.OrderedDict([
            (impl.name,
             EagerTensor(impl=impl,
                         device=device_spec.DeviceSpec(device_type,
                                                       device_index)))
            for impl in self._function.inputs
        ])
        self._output_dict = collections.OrderedDict([
            (impl.name,
             EagerTensor(impl=impl,
                         device=device_spec.DeviceSpec(device_type,
                                                       device_index)))
            for impl in self._function.outputs
        ])
Beispiel #21
0
    def prepare(cls, model, device='CPU', **kwargs):
        '''
        For Onnx Caffe2Backend, we require that init_graph don't initialize the actual input of the predict_graph,

        for example, if "img" is the input blob for the predict_net, we require that in init_graph and in
        initializer of the predict_graph, "img" is not initalized. We don't have a check for this, since
        there is no way we can know which blob is the input of the predict_graph.
        '''
        super(Caffe2Backend, cls).prepare(model, device, **kwargs)

        init_net, predict_net = cls.onnx_graph_to_caffe2_net(model.graph)
        predict_net.device_option.CopyFrom(get_device_option(Device(device)))

        initialized = {init.name for init in model.graph.initializer}
        uninitialized = [x for x in predict_net.external_input
                         if x not in initialized]

        ws = Workspace()
        with ws, core.DeviceScope(predict_net.device_option):
            workspace.RunNetOnce(init_net)

        return Caffe2Rep(predict_net, ws, uninitialized)
Beispiel #22
0
  def run_node(cls, node, inputs, device='CPU', outputs_info=None, **kwargs):
    """ Run ONNX node.

    :param node: ONNX NodeProto object.
    :param inputs: Inputs.
    :param device: Device run on.
    :param outputs_info: None.
    :param kwargs: Other args.
    :return: Outputs.
    """
    super(TensorflowBackend, cls).run_node(node, inputs, device)
    node_graph = tf.Graph()
    with node_graph.as_default():
      node = OnnxNode(node)
      device_option = get_device_option(Device(device))
      input_tensors = []
      for i in inputs:
        input_tensors.append(tf.constant(i))

      if isinstance(inputs, dict):
        feed_dict_raw = inputs
      else:
        assert len(node.inputs) == len(inputs)
        feed_dict_raw = dict(zip(node.inputs, inputs))

      # TODO: is constant the best way for feeding inputs?
      input_dict = dict([
          (x[0], tf.constant(x[1])) for x in feed_dict_raw.items()
      ])
      ops = cls._onnx_node_to_tensorflow_op(node, input_dict)

      with tf.compat.v1.Session() as sess:
        with tf.device(device_option):
          sess.run(tf.compat.v1.global_variables_initializer())
          output_vals = sess.run(ops)

    return namedtupledict('Outputs', node.outputs)(*output_vals)
Beispiel #23
0
 def supports_device(cls, device):  # type: (Text) -> bool
     d = Device(device)
     if d.type == DeviceType.CPU:
         return True
     return False
Beispiel #24
0
 def supports_device(cls, device):
     d = Device(device)
     if d.type == DeviceType.CPU:
         return True
     return False
Beispiel #25
0
    def prepare(cls, model, device='CPU', raw_values_dict=None, **kwargs):
        '''
        For Onnx Caffe2Backend, we require that init_graph don't initialize the actual input of the predict_graph,

        for example, if "img" is the input blob for the predict_net, we require that in init_graph and in
        initializer of the predict_graph, "img" is not initalized. We don't have a check for this, since
        there is no way we can know which blob is the input of the predict_graph.
        '''
        if not kwargs.pop('no_check_UNSAFE', False):
            super(Caffe2Backend, cls).prepare(model, device, **kwargs)
        opset_version = None
        for imp in model.opset_import:
            if not imp.HasField("domain") or imp.domain == "":
                opset_version = imp.version
                if imp.version > cls._known_opset_version:
                    warnings.warn(
                        "This version of onnx-caffe2 targets ONNX operator set version {}, but the model we are trying to import uses version {}.  We will try to import it anyway, but if the model uses operators which had BC-breaking changes in the intervening versions, import will fail."
                        .format(cls._known_opset_version, imp.version))
            else:
                warnings.warn("Unrecognized operator set {}".format(
                    imp.domain))
        if opset_version is None:
            if model.ir_version >= 0x00000003:
                raise RuntimeError(
                    "Model with IR version >= 3 did not specify ONNX operator set version (onnx-caffe2 requires it)"
                )
            else:
                opset_version = 1

        model = onnx.shape_inference.infer_shapes(model)

        # Check whether we have RNN related ops
        pred_model = cls.optimize_onnx(model, predict=True)
        rnn_nodes = []
        for node in pred_model.graph.node:
            if node.op_type in {'LSTM', 'GRU', 'RNN'}:
                rnn_nodes.append(node)

        # Build the C++ backend
        # TODO: build a predictor that supports GPU
        #       And for RNN nets, we need to avoid adding init_net
        use_cpp_backend = device == 'CPU' and not rnn_nodes
        # use python backend for now
        use_cpp_backend = False
        if use_cpp_backend:
            c2_rnn_ops = []
            if rnn_nodes:
                init_model = cls.optimize_onnx(model, init=True)
                for node in rnn_nodes:
                    c2ops = cls._onnx_node_to_caffe2_op(
                        init_model, pred_model, node, opset_version)
                    init_ops = [x.SerializeToString() for x in c2ops.init_ops]
                    ops = [x.SerializeToString() for x in c2ops.ops]
                    external_inputs = c2ops.interface_blobs
                    c2_rnn_ops.append(
                        C.Caffe2Ops(init_ops, ops, external_inputs))
                del init_model

            cbackend = C.Caffe2Backend(cls._dummy_name)
            if raw_values_dict:
                cls._external_value_resolution_pass(model, raw_values_dict)
            rep = cbackend.prepare(model.SerializeToString(), device,
                                   c2_rnn_ops)
            # For testing
            # Dump the net descriptions to file for comparison with the Python ones
            if "ONNX_CAFFE2_DEBUG" in os.environ:
                pred_net_str = rep.pred_net()
                pn = caffe2_pb2.NetDef()
                pn.ParseFromString(pred_net_str)
                init_net_str = rep.init_net()
                inn = caffe2_pb2.NetDef()
                inn.ParseFromString(init_net_str)
                with open("cpp.txt", "w") as f:
                    f.write("pred_net: \n{}".format(pn))

            rep_wrapper = Caffe2CppRep(rep)
            return rep_wrapper
        else:
            ws = Workspace()
            device_option = get_device_option(Device(device))

            init_net, predict_net = cls._onnx_model_to_caffe2_net(
                model, device, opset_version, False)

            if raw_values_dict:
                cls._external_value_resolution_pass(model, raw_values_dict)

            # Directly load initializer data into blobs in workspace
            cls._direct_initialize_parameters(
                model.graph.initializer,
                ws,
                device_option,
            )

            initialized = {init.name for init in model.graph.initializer}

            cls._direct_initialize_inputs(
                model.graph.input,
                initialized,
                ws,
                device_option,
            )

            uninitialized = [
                value_info.name for value_info in model.graph.input
                if value_info.name not in initialized
            ]

            if "ONNX_CAFFE2_DEBUG" in os.environ:
                with open("python.txt", "w") as f:
                    f.write("pred_net: \n{}".format(predict_net))
            retval = Caffe2Rep(init_net, predict_net, ws, uninitialized)
            return retval
Beispiel #26
0
    def prepare(cls, model, device='CPU', raw_values_dict=None, **kwargs):
        '''
        For Onnx Caffe2Backend, we require that init_graph don't initialize the actual input of the predict_graph,

        for example, if "img" is the input blob for the predict_net, we require that in init_graph and in
        initializer of the predict_graph, "img" is not initalized. We don't have a check for this, since
        there is no way we can know which blob is the input of the predict_graph.
        '''
        if not kwargs.pop('no_check_UNSAFE', False):
            super(Caffe2Backend, cls).prepare(model, device, **kwargs)
        opset_version = None
        for imp in model.opset_import:
            if not imp.HasField("domain") or imp.domain == "":
                opset_version = imp.version
                if imp.version > cls._known_opset_version:
                    warnings.warn(
                        "This version of onnx-caffe2 targets ONNX operator set version {}, but the model we are trying to import uses version {}.  We will try to import it anyway, but if the model uses operators which had BC-breaking changes in the intervening versions, import will fail."
                        .format(cls._known_opset_version, imp.version))
            else:
                warnings.warn("Unrecognized operator set {}".format(
                    imp.domain))
        if opset_version is None:
            if model.ir_version >= 0x00000003:
                raise RuntimeError(
                    "Model with IR version >= 3 did not specify ONNX operator set version (onnx-caffe2 requires it)"
                )
            else:
                opset_version = 1

        # Prior to onnx version update to onnx-1.8.0, errors caused by failures in
        # in the onnx shape inference call were being supressed. Hence a try-catch block
        # is added around the infer_shapes call to avoid these failures and preserve status
        try:
            model = onnx.shape_inference.infer_shapes(model)
        except RuntimeError:
            warnings.warn(
                "ShapeInferenceWarning: Inferred shape and existing shape differ in rank"
            )

        ws = Workspace()
        device_option = get_device_option(Device(device))

        init_net, predict_net = cls._onnx_model_to_caffe2_net(
            model, device, opset_version, False)

        if raw_values_dict:
            cls._external_value_resolution_pass(model, raw_values_dict)

        # Directly load initializer data into blobs in workspace
        cls._direct_initialize_parameters(
            model.graph.initializer,
            ws,
            device_option,
        )

        initialized = {init.name for init in model.graph.initializer}

        cls._direct_initialize_inputs(
            model.graph.input,
            initialized,
            ws,
            device_option,
        )

        uninitialized = [
            value_info.name for value_info in model.graph.input
            if value_info.name not in initialized
        ]

        retval = Caffe2Rep(init_net, predict_net, ws, uninitialized)
        return retval
Beispiel #27
0
    def __init__(self, model, device, max_batch_size=32,
                 max_workspace_size=None, serialize_engine=False, verbose=False, **kwargs):
        if not isinstance(device, Device):
            device = Device(device)
        self._set_device(device)
        self._logger = TRT_LOGGER
        self.builder = trt.Builder(self._logger)
        self.network = self.builder.create_network(flags=1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))
        self.parser = trt.OnnxParser(self.network, self._logger)
        self.shape_tensor_inputs = []
        self.serialize_engine = serialize_engine
        self.verbose = verbose

        if self.verbose:
            print(f'\nRunning {model.graph.name}...')

        if not isinstance(model, six.string_types):
            model_str = model.SerializeToString()
        else:
            model_str = model

        if not trt.init_libnvinfer_plugins(TRT_LOGGER, ""):
            msg = "Failed to initialize TensorRT's plugin library."
            raise RuntimeError(msg)
        
        if not self.parser.parse(model_str):
            error = self.parser.get_error(0)
            msg = "While parsing node number %i:\n" % error.node()
            msg += ("%s:%i In function %s:\n[%i] %s" %
                    (error.file(), error.line(), error.func(),
                     error.code(), error.desc()))
            raise RuntimeError(msg)
        if max_workspace_size is None:
            max_workspace_size = 1 << 28

        self.builder.max_batch_size = max_batch_size
        self.builder.max_workspace_size = max_workspace_size

        num_inputs = self.network.num_inputs
        for idx in range(num_inputs):
            inp_tensor = self.network.get_input(idx)
            if inp_tensor.is_shape_tensor:
                self.shape_tensor_inputs.append((inp_tensor.name, idx))
                if self.verbose:
                    print(f'\nInput \'{inp_tensor.name}\' at index {idx} is a shape tensor')
        
        if self.verbose:
            for layer in self.network:
                print(layer)

            print(f'Output shape: {self.network[-1].get_output(0).shape}')
        
        if len(self.shape_tensor_inputs) == 0:
            self._build_engine()
        else:
            if self.verbose:
                print("Deferring engine build to run stage")
        
        self._output_shapes = {}
        self._output_dtype = {}
        for output in model.graph.output:
            dims = output.type.tensor_type.shape.dim
            output_shape = tuple([dim.dim_value for dim in dims])
            self._output_shapes[output.name] = output_shape
            self._output_dtype[output.name] = output.type.tensor_type.elem_type
Beispiel #28
0
 def supports_device(cls, device_str):
     device = Device(device_str)
     return device.type == DeviceType.CUDA
Beispiel #29
0
    def __init__(self,
                 model,
                 device,
                 path_to_trt='./pretrained/firenet_float32_batch1.engine',
                 max_workspace_size=None,
                 serialize_engine=False,
                 verbose=False,
                 **kwargs):
        if not isinstance(device, Device):
            device = Device(device)
        self._set_device(device)
        self._logger = TRT_LOGGER
        self.builder = trt.Builder(self._logger)
        self.config = self.builder.create_builder_config()
        self.network = self.builder.create_network(
            flags=1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))
        self.parser = trt.OnnxParser(self.network, self._logger)
        self.shape_tensor_inputs = []
        self.serialize_engine = serialize_engine
        self.verbose = verbose
        self.dynamic = False
        self.path_to_trt = path_to_trt

        if self.verbose:
            print(f'\nRunning {model.graph.name}...')
            TRT_LOGGER.min_severity = trt.Logger.VERBOSE

        if not isinstance(model, six.string_types):
            model_str = model.SerializeToString()
            # print('True_isinstance')
        else:
            model_str = model

        if not trt.init_libnvinfer_plugins(TRT_LOGGER, ""):
            msg = "Failed to initialize TensorRT's plugin library."
            raise RuntimeError(msg)

        if not self.parser.parse(model_str):
            # print('True_parse')
            error = self.parser.get_error(0)
            msg = "While parsing node number %i:\n" % error.node()
            msg += ("%s:%i In function %s:\n[%i] %s" %
                    (error.file(), error.line(), error.func(), error.code(),
                     error.desc()))
            raise RuntimeError(msg)
        if max_workspace_size is None:
            max_workspace_size = 1 << 28

        self.config.max_workspace_size = max_workspace_size

        num_inputs = self.network.num_inputs
        for idx in range(num_inputs):
            inp_tensor = self.network.get_input(idx)
            if inp_tensor.is_shape_tensor or -1 in inp_tensor.shape:
                self.dynamic = True
                break

        if self.verbose:
            for layer in self.network:
                print(layer)

            print(f'Output shape: {self.network[-1].get_output(0).shape}')

        if self.dynamic:
            if self.verbose:
                print(
                    "Found dynamic inputs! Deferring engine build to run stage"
                )
        else:
            # self._build_engine()
            self._load_engine()
        self._output_shapes = {}
        self._output_dtype = {}
        for output in model.graph.output:
            dims = output.type.tensor_type.shape.dim
            output_shape = tuple([dim.dim_value for dim in dims])
            self._output_shapes[output.name] = output_shape
            self._output_dtype[output.name] = output.type.tensor_type.elem_type
Beispiel #30
0
    def __init__(
        self,
        model,
        device,
        max_batch_size=32,
        max_workspace_size=None,
        optimization_profiles=None,
        serialize_engine=False,
    ):
        """Create a ``BackendRep``.

        Parameters
        ----------
        model : onnx.ModelProto
            The onnx model.
        device : onnx.Device
            The executing device.
        max_batch_size : int, optional, default=32
            The max batch size.
        max_workspace_size : int, optional
            The max workspace size in bytes.
        optimization_profiles : List[Dict], optional
            The optimization profiles.
        serialize_engine : bool, optional, default=False
            Whether to serialize engine into a file.

        """
        if not isinstance(device, Device):
            device = Device(device)
        self._set_device(device)
        self._logger = TRT_LOGGER
        self._builder = trt.Builder(self._logger)
        self._builder_config = self._builder.create_builder_config()
        self._network = self._builder.create_network(
            flags=1 << (int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)))
        self._parser = trt.OnnxParser(self._network, self._logger)

        if not isinstance(model, six.string_types):
            model_str = model.SerializeToString()
        else:
            model_str = model

        if not trt.init_libnvinfer_plugins(TRT_LOGGER, ''):
            msg = "Failed to initialize TensorRT's plugin library."
            raise RuntimeError(msg)

        if not self._parser.parse(model_str):
            error = self._parser.get_error(0)
            msg = "While parsing node #%i:\n" % error.node()
            msg += ("%s:%i In function %s:\n[%i] %s" %
                    (error.file(), error.line(), error.func(), error.code(),
                     error.desc()))
            raise RuntimeError(msg)

        if max_workspace_size is None:
            max_workspace_size = 1 << 28

        # Setup the builder.
        self._builder.max_batch_size = max_batch_size
        self._builder.max_workspace_size = max_workspace_size
        self._add_optimization_profiles(optimization_profiles)

        # Build and wrap for the cuda engine.
        if optimization_profiles is None:
            cuda_engine = self._builder.build_cuda_engine(self._network)
        else:
            cuda_engine = self._builder.build_engine(self._network,
                                                     self._builder_config)
        if cuda_engine is None:
            raise RuntimeError("Failed to build TensorRT engine from network.")
        if serialize_engine:
            cuda_engine = self._serialize_deserialize(cuda_engine)
        self._engine = engine.Engine(cuda_engine, device.device_id)

        self._output_shapes = {}
        self._output_dtypes = {}

        for output in model.graph.output:
            dims = output.type.tensor_type.shape.dim
            output_shape = tuple([dim.dim_value for dim in dims])
            self._output_shapes[output.name] = output_shape
            self._output_dtypes[
                output.name] = output.type.tensor_type.elem_type