Ejemplo n.º 1
0
def c2_native_run_net(init_net, predict_net, inputs):
    ws = Workspace()
    if init_net:
        ws.RunNetOnce(init_net)

    if isinstance(inputs, dict):
        for key, value in inputs.items():
            ws.FeedBlob(key, value, predict_net.device_option)
    else:
        uninitialized = [input_name
                         for input_name in predict_net.external_input
                         if not ws.HasBlob(input_name)]
        if len(uninitialized) == len(inputs):
            for key, value in zip(uninitialized, inputs):
                ws.FeedBlob(key, value, predict_net.device_option)
        else:
            # If everything is initialized,
            # we just initialized the first len(inputs) external_input.
            assert(len(inputs) <= len(predict_net.external_input))
            for i in range(len(inputs)):
                ws.FeedBlob(predict_net.external_input[i], inputs[i],
                            predict_net.device_option)

    ws.RunNetOnce(predict_net)

    output_names = predict_net.external_output
    output_values = [ws.FetchBlob(name) for name in output_names]
    return ws, namedtupledict('Outputs', output_names)(*output_values)
Ejemplo n.º 2
0
 def run(self, inputs, **kwargs):
     super(Caffe2Rep, self).run(inputs, **kwargs)
     with core.DeviceScope(self.predict_net.device_option):
         if isinstance(inputs, dict):
             with core.NameScope(self._name_scope):
                 for key, value in inputs.items():
                     self.workspace.FeedBlob(key, value)
         elif isinstance(inputs, list) or isinstance(inputs, tuple):
             if len(self.uninitialized) != len(inputs):
                 raise RuntimeError('Expected {} values for uninitialized '
                                    'graph inputs ({}), but got {}.'.format(
                                        len(self.uninitialized),
                                        ', '.join(self.uninitialized),
                                        len(inputs)))
             for i, value in enumerate(inputs):
                 # namescope already baked into protobuf
                 self.workspace.FeedBlob(self.uninitialized[i], value)
         else:
             # single input
             self.workspace.FeedBlob(self.uninitialized[0], inputs)
         if not self.nets_created:
             self.workspace.CreateNet(self.init_net)
             self.workspace.CreateNet(self.predict_net)
             self.nets_created = True
         if not self.ran_init_net:
             self.workspace.RunNet(self.init_net.name)
             self.ran_init_net = True
         self.workspace.RunNet(self.predict_net.name)
     output_values = [self.workspace.FetchBlob(name)
                      for name in self.predict_net.external_output]
     return namedtupledict('Outputs',
                           self.predict_net.external_output)(*output_values)
Ejemplo n.º 3
0
    def run_node(cls, node, inputs, device='CPU', opset_version=_known_opset_version, outputs_info=None):
        super(Caffe2Backend, cls).run_node(node, inputs, device=device,
                                           outputs_info=outputs_info, opset_version=opset_version)

        device_option = get_device_option(Device(device))
        ws = Workspace()
        with core.DeviceScope(device_option):  # temporary!
            if isinstance(inputs, dict):
                for key, value in inputs.items():
                    ws.FeedBlob(key, value)
            else:
                assert len(node.input) == len(inputs), "{}: expected {} but got {}".format(
                    node.op_type, len(node.input), len(inputs))
                for key, value in zip(node.input, inputs):
                    ws.FeedBlob(key, value)

            ops = []
            cbackend = C.Caffe2Backend(cls._dummy_name)
            ops_str = cbackend.convert_node(node.SerializeToString(), opset_version)
            for s in ops_str[0] + ops_str[1]:
                op = caffe2_pb2.OperatorDef()
                op.ParseFromString(s)
                op.device_option.CopyFrom(device_option)
                ops.append(op)
            # For testing
            if "ONNX_CAFFE2_DEBUG" in os.environ:
                init_ops, ops2, _ = cls._onnx_node_to_caffe2_op(
                    None, None, node, opset_version or cls._known_opset_version)
                ops2 = init_ops + ops2
                for op in ops2:
                    op.device_option.CopyFrom(device_option)
                print("\nC++:\n{}\nPython:\n{}".format(ops, ops2))
            ws.RunOperatorsOnce(ops)
            output_values = [ws.FetchBlob(name) for name in node.output]
            return namedtupledict('Outputs', node.output)(*output_values)
Ejemplo n.º 4
0
def c2_native_run_op(op_def, inputs):
    ws = Workspace()
    if isinstance(inputs, dict):
        for key, value in inputs.items():
            ws.FeedBlob(key, value, op_def.device_option)
    else:
        assert(len(op_def.input) == len(inputs))
        for key, value in zip(op_def.input, inputs):
            ws.FeedBlob(key, value, op_def.device_option)

    ws.RunOperatorOnce(op_def)

    output_names = op_def.output
    output_values = [ws.FetchBlob(name) for name in output_names]
    return ws, namedtupledict('Outputs', output_names)(*output_values)
Ejemplo n.º 5
0
 def run(self, inputs):
     output_values = None
     if isinstance(inputs, dict):
         output_values = self.__core.run(inputs)
     elif isinstance(inputs, list) or isinstance(inputs, tuple):
         if len(inputs) != len(self.__uninitialized_inputs):
             raise RuntimeError('Expected {} values for uninitialized '
                                'graph inputs ({}), but got {}.'.format(
                                     len(self.__uninitialized_inputs),
                                     ', '.join(self.__uninitialized_inputs),
                                     len(inputs)))
         input_map = {}
         for k, v in zip(self.__uninitialized_inputs, inputs):
             input_map[k] = v
         output_values = self.__core.run(input_map)
     else:
         # single input
         output_values = self.__core.run([inputs])
     return namedtupledict('Outputs', self.__external_outputs)(*output_values)
Ejemplo n.º 6
0
 def _test_onnx_importer(self, model_name, data_input_index = 0):
     model_dir = _download_onnx_model(model_name)
     model_def = onnx.load(os.path.join(model_dir, 'model.onnx'))
     input_blob_dims = [int(x.dim_value) for x in model_def.graph.input[data_input_index].type.tensor_type.shape.dim]
     op_inputs = [x.name for x in model_def.graph.input]
     op_outputs = [x.name for x in model_def.graph.output]
     print("{}".format(op_inputs))
     data = np.random.randn(*input_blob_dims).astype(np.float32)
     Y_c2 = c2.run_model(model_def, {op_inputs[data_input_index]: data})
     op = convert_onnx_model_to_trt_op(model_def, verbosity=3)
     device_option = core.DeviceOption(caffe2_pb2.CUDA, 0)
     op.device_option.CopyFrom(device_option)
     Y_trt = None
     ws = Workspace()
     with core.DeviceScope(device_option):
         ws.FeedBlob(op_inputs[data_input_index], data)
         ws.RunOperatorsOnce([op])
         output_values = [ws.FetchBlob(name) for name in op_outputs]
         Y_trt = namedtupledict('Outputs', op_outputs)(*output_values)
     np.testing.assert_allclose(Y_c2, Y_trt, rtol=1e-3)
Ejemplo n.º 7
0
 def _test_relu_graph(self, X, batch_size, trt_max_batch_size):
     node_def = make_node("Relu", ["X"], ["Y"])
     Y_c2 = c2.run_node(node_def, {"X": X})
     graph_def = make_graph(
         [node_def],
         name="test",
         inputs=[make_tensor_value_info("X", onnx.TensorProto.FLOAT, [batch_size, 1, 3, 2])],
         outputs=[make_tensor_value_info("Y", onnx.TensorProto.FLOAT, [batch_size, 1, 3, 2])])
     model_def = make_model(graph_def, producer_name='relu-test')
     op_outputs = [x.name for x in model_def.graph.output]
     op = convert_onnx_model_to_trt_op(model_def, max_batch_size=trt_max_batch_size)
     device_option = core.DeviceOption(caffe2_pb2.CUDA, 0)
     op.device_option.CopyFrom(device_option)
     Y_trt = None
     ws = Workspace()
     with core.DeviceScope(device_option):
         ws.FeedBlob("X", X)
         ws.RunOperatorsOnce([op])
         output_values = [ws.FetchBlob(name) for name in op_outputs]
         Y_trt = namedtupledict('Outputs', op_outputs)(*output_values)
     np.testing.assert_almost_equal(Y_c2, Y_trt)
Ejemplo n.º 8
0
    def run_node(cls, node, inputs, device='CPU', outputs_info=None, **kwargs):
        """ Run ONNX node.

    :param node: ONNX NodeProto object.
    :param inputs: Inputs.
    :param device: Device run on.
    :param outputs_info: None.
    :param kwargs: Other args.
    :return: Outputs.
    """

        super(TensorflowBackend, cls).run_node(node, inputs, device)
        common.sys_config.device = device

        node = OnnxNode(node)
        input_tensors = []
        for i in inputs:
            input_tensors.append(tf.constant(i))

        if isinstance(inputs, dict):
            feed_dict_raw = inputs
        else:
            assert len(node.inputs) == len(inputs)
            feed_dict_raw = dict(zip(node.inputs, inputs))

        # TODO: is constant the best way for feeding inputs?
        input_dict = dict([(x[0], tf.constant(x[1]))
                           for x in feed_dict_raw.items()])

        module = TFModule(node, cls)

        output_vals = module(**input_dict)
        output_vals = [
            val.numpy() if isinstance(val, tf.Tensor) else val
            for val in output_vals
        ]

        return namedtupledict('Outputs', node.outputs)(*output_vals)
Ejemplo n.º 9
0
def native_run_graph(graph_def, inputs, initializer, init_func=None):
    # De-Optimization
    for i in range(len(graph_def.arg)):
        if graph_def.arg[i].name == 'optimization_level':
            graph_def.arg[i].i = 0

    # Create an anonymous workspace
    ws = _workspace.Workspace()

    with ws.as_default():
        # Register all the initializer before feeding them
        for name in initializer:
            _Tensor(name=name).Variable()

        # Feed the given values if necessary
        if init_func: init_func()

        # Feed the external inputs
        for name, blob in inputs.items():
            _workspace.FeedTensor(name, blob)

        # Create and Run the graph
        graph_name = _workspace.CreateGraph(graph_def)
        _workspace.RunGraph(graph_name, return_outputs=False)

        # Fetch the outputs
        output_names = graph_def.output
        output_values = [_workspace.FetchTensor(name) for name in output_names]

        # Fetch the initializer
        initializer = [
            numpy_helper.from_array(
                _workspace.FetchTensor(name), name=name)
                    for name in initializer
        ]

    # Return the outputs
    return ws, namedtupledict('Outputs', output_names)(*output_values), initializer
Ejemplo n.º 10
0
    def run_node(cls, node, inputs, device='CPU', opset_version=_known_opset_version):
        super(Caffe2Backend, cls).run_node(node, inputs, device)

        device_option = get_device_option(Device(device))
        with Workspace(), core.DeviceScope(device_option):  # temporary!
            if isinstance(inputs, dict):
                for key, value in inputs.items():
                    workspace.FeedBlob(key, value)
            else:
                assert len(node.input) == len(inputs), "{}: expected {} but got {}".format(
                    node.op_type, len(node.input), len(inputs))
                for key, value in zip(node.input, inputs):
                    workspace.FeedBlob(key, value)

            cls._inplace_rewrite([node])
            init_ops, ops, _ = cls._onnx_node_to_caffe2_op(
                None, None, node, opset_version or cls._known_opset_version)
            ops = init_ops + ops
            for op in ops:
                op.device_option.CopyFrom(device_option)
            workspace.RunOperatorsOnce(ops)
            output_values = [workspace.FetchBlob(name) for name in node.output]
            return namedtupledict('Outputs', node.output)(*output_values)
Ejemplo n.º 11
0
 def _test_onnx_importer(self, model_name, data_input_index, opset_version=onnx.defs.onnx_opset_version()):
     model_dir = _download_onnx_model(model_name, opset_version)
     model_def = onnx.load(os.path.join(model_dir, 'model.onnx'))
     input_blob_dims = [int(x.dim_value) for x in model_def.graph.input[data_input_index].type.tensor_type.shape.dim]
     op_inputs = [x.name for x in model_def.graph.input]
     op_outputs = [x.name for x in model_def.graph.output]
     print("{}".format(op_inputs))
     data = np.random.randn(*input_blob_dims).astype(np.float32)
     Y_c2 = c2.run_model(model_def, {op_inputs[data_input_index]: data})
     op = convert_onnx_model_to_trt_op(model_def, verbosity=3)
     device_option = core.DeviceOption(caffe2_pb2.CUDA, 0)
     op.device_option.CopyFrom(device_option)
     Y_trt = None
     ws = Workspace()
     with core.DeviceScope(device_option):
         ws.FeedBlob(op_inputs[data_input_index], data)
         if opset_version >= 5:
             # Some newer models from ONNX Zoo come with pre-set "data_0" input
             ws.FeedBlob("data_0", data)
         ws.RunOperatorsOnce([op])
         output_values = [ws.FetchBlob(name) for name in op_outputs]
         Y_trt = namedtupledict('Outputs', op_outputs)(*output_values)
     np.testing.assert_allclose(Y_c2, Y_trt, rtol=1e-3)
Ejemplo n.º 12
0
def c2_native_run_net(init_net, predict_net, inputs, debug_arg=None):
    ws = Workspace()
    if init_net:
        ws.RunNetOnce(init_net)

    if isinstance(inputs, dict):
        for key, value in inputs.items():
            ws.FeedBlob(key, value, predict_net.device_option)
    else:
        uninitialized = [
            input_name for input_name in predict_net.external_input
            if not ws.HasBlob(input_name)
        ]
        if len(uninitialized) == len(inputs):
            for key, value in zip(uninitialized, inputs):
                ws.FeedBlob(key, value, predict_net.device_option)
        else:
            # If everything is initialized,
            # we just initialized the first len(inputs) external_input.
            # Added some extra logging to help debug sporadic sandcastle fails
            if len(inputs) > len(predict_net.external_input):
                print("c2_native_run_net assert. len(inputs)=", len(inputs),
                      "len(predict_net.external_input)=",
                      len(predict_net.external_input))
                print("debug_arg: ", debug_arg)
                print("predict_net ", type(predict_net), ":", predict_net)
                print("inputs ", type(inputs), ":", inputs)
            assert (len(inputs) <= len(predict_net.external_input))
            for i in range(len(inputs)):
                ws.FeedBlob(predict_net.external_input[i], inputs[i],
                            predict_net.device_option)

    ws.RunNetOnce(predict_net)

    output_names = predict_net.external_output
    output_values = [ws.FetchBlob(name) for name in output_names]
    return ws, namedtupledict('Outputs', output_names)(*output_values)
Ejemplo n.º 13
0
  def run_node(cls, node, inputs, device='CPU', outputs_info=None, **kwargs):
    """ Run ONNX node.

    :param node: ONNX NodeProto object.
    :param inputs: Inputs.
    :param device: Device run on.
    :param outputs_info: None.
    :param kwargs: Other args.
    :return: Outputs.
    """
    super(TensorflowBackend, cls).run_node(node, inputs, device)
    node_graph = tf.Graph()
    with node_graph.as_default():
      node = OnnxNode(node)
      device_option = get_device_option(Device(device))
      input_tensors = []
      for i in inputs:
        input_tensors.append(tf.constant(i))

      if isinstance(inputs, dict):
        feed_dict_raw = inputs
      else:
        assert len(node.inputs) == len(inputs)
        feed_dict_raw = dict(zip(node.inputs, inputs))

      # TODO: is constant the best way for feeding inputs?
      input_dict = dict([
          (x[0], tf.constant(x[1])) for x in feed_dict_raw.items()
      ])
      ops = cls._onnx_node_to_tensorflow_op(node, input_dict)

      with tf.compat.v1.Session() as sess:
        with tf.device(device_option):
          sess.run(tf.compat.v1.global_variables_initializer())
          output_vals = sess.run(ops)

    return namedtupledict('Outputs', node.outputs)(*output_vals)
Ejemplo n.º 14
0
    def run_node(cls, node, inputs, uninit=[0]):
        """
        run the keras model converted from the onnx node with given inputs.
        used for unit test.
        :param node: onnx node
        :param inputs: inputs
        :param uninit: index of inputs which need to 
        :return: output dict of model
        """
        super(KerasBackend, cls).run_node(node, inputs)
        node = OnnxNode(node)
        input_tensor = list()
        input_array = list()
        input_dict = dict()
        cls.extra_input = list()
        cls.extra_input_array = list()
        for i in range(len(inputs)):
            input_dict[node.inputs[i]] = inputs[i]
        for i in uninit:
            input_array.append(inputs[i])
            shape = list(inputs[i].shape)
            # if len(shape) == 1:
            #     shape = [-1, shape[0]]

            x = Input(batch_shape=shape,
                      name=node.inputs[i],
                      dtype=str(inputs[i].dtype))
            input_tensor.append(x)
            input_dict[node.inputs[i]] = x
        out = cls._onnx_node_to_keras_op(node, input_dict)[0]

        model = Model(inputs=input_tensor + cls.extra_input, outputs=out)
        input_array += cls.extra_input_array
        if len(input_array) == 1:
            input_array = input_array[0]
        res = model.predict(input_array)
        return namedtupledict('Outputs', node.outputs)(*[res])
Ejemplo n.º 15
0
    def run(self, inputs, **kwargs):
        """ Run TensorflowRep.

    :param inputs: Given inputs.
    :param kwargs: Other args.
    :return: Outputs.
    """
        super(TensorflowRep, self).run(inputs, **kwargs)

        # TODO: handle name scope if necessary
        with self.graph.as_default():
            with tf.Session() as sess:
                if isinstance(inputs, dict):
                    feed_dict = inputs
                elif isinstance(inputs, list) or isinstance(inputs, tuple):
                    if len(self.inputs) != len(inputs):
                        raise RuntimeError(
                            'Expected {} values for uninitialized '
                            'graph inputs ({}), but got {}.'.format(
                                len(self.inputs), ', '.join(self.inputs),
                                len(inputs)))
                    feed_dict = dict(zip(self.inputs, inputs))
                else:
                    # single input
                    feed_dict = dict([(self.inputs[0], inputs)])

                feed_dict = {
                    self.tensor_dict[key]: feed_dict[key]
                    for key in self.inputs
                }

                sess.run(tf.global_variables_initializer())
                outputs = [self.tensor_dict[output] for output in self.outputs]

                output_values = sess.run(outputs, feed_dict=feed_dict)
                return namedtupledict('Outputs', self.outputs)(*output_values)
Ejemplo n.º 16
0
    def run_node(cls, node, inputs, device='CPU'):
        super(TensorflowBackend, cls).run_node(node, inputs, device)
        node = OnnxNode(node)
        device_option = get_device_option(Device(device))
        input_tensors = []
        for i in inputs:
            input_tensors.append(tf.constant(i))

        if isinstance(inputs, dict):
            feed_dict_raw = inputs
        else:
            assert len(node.inputs) == len(inputs)
            feed_dict_raw = dict(zip(node.inputs, inputs))
        # TODO: is constant the best way for feeding inputs?
        input_dict = dict([(x[0], tf.constant(x[1])) for x in \
                           feed_dict_raw.items()])
        ops = cls._onnx_node_to_tensorflow_op(node, input_dict)
        output_vals = []
        with tf.Session() as sess:
            with tf.device(device_option):
                sess.run(tf.global_variables_initializer())
                output_vals = sess.run(ops)

        return namedtupledict('Outputs', node.outputs)(*output_vals)
Ejemplo n.º 17
0
    def run_node(cls, node, inputs, device='CPU', opset_version=_known_opset_version, outputs_info=None):
        super(Caffe2Backend, cls).run_node(node, inputs, device=device,
                                           outputs_info=outputs_info, opset_version=opset_version)

        value_infos = []
        device_option = get_device_option(Device(device))
        ws = Workspace()
        with core.DeviceScope(device_option):  # temporary!
            if isinstance(inputs, dict):
                for key, value in inputs.items():
                    ws.FeedBlob(key, value)
                    value_infos.append(onnx.helper.make_tensor_value_info(
                        name=key,
                        elem_type=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[value.dtype],
                        shape=value.shape).SerializeToString())
            else:
                assert len(node.input) == len(inputs), "{}: expected {} but got {}".format(
                    node.op_type, len(node.input), len(inputs))
                for key, value in zip(node.input, inputs):
                    ws.FeedBlob(key, value)
                    value_infos.append(onnx.helper.make_tensor_value_info(
                        name=key,
                        elem_type=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[value.dtype],
                        shape=value.shape).SerializeToString())

            ops = []
            cbackend = C.Caffe2Backend(cls._dummy_name)
            ops_str = cbackend.convert_node(node.SerializeToString(), value_infos, opset_version)
            for s in ops_str[0] + ops_str[1]:
                op = caffe2_pb2.OperatorDef()
                op.ParseFromString(s)
                op.device_option.CopyFrom(device_option)
                ops.append(op)
            ws.RunOperatorsOnce(ops)
            output_values = [ws.FetchBlob(name) for name in node.output]
            return namedtupledict('Outputs', node.output)(*output_values)
Ejemplo n.º 18
0
 def run(self, inputs, **kwargs):
     super(Caffe2Rep, self).run(inputs, **kwargs)
     with core.DeviceScope(self.predict_net.device_option):
         if isinstance(inputs, dict):
             with core.NameScope(self._name_scope):
                 for key, value in inputs.items():
                     self.workspace.FeedBlob(key, value)
         elif isinstance(inputs, list) or isinstance(inputs, tuple):
             if len(self.uninitialized) != len(inputs):
                 raise RuntimeError('Expected {} values for uninitialized '
                                    'graph inputs ({}), but got {}.'.format(
                                        len(self.uninitialized),
                                        ', '.join(self.uninitialized),
                                        len(inputs)))
             for i, value in enumerate(inputs):
                 # namescope already baked into protobuf
                 self.workspace.FeedBlob(self.uninitialized[i], value)
         else:
             # single input
             self.workspace.FeedBlob(self.uninitialized[0], inputs)
         if not self.nets_created:
             self.workspace.CreateNet(self.init_net)
             self.workspace.CreateNet(self.predict_net)
             self.nets_created = True
         if not self.ran_init_net:
             self.workspace.RunNet(self.init_net.name)
             self.ran_init_net = True
         self.workspace.RunNet(self.predict_net.name)
     output_values = []
     for name in self.predict_net.external_output:
         try:
             output_values.append(self.workspace.FetchBlob(name))
         except Exception:
             output_values.append(self.workspace.FetchInt8Blob(name))
     return namedtupledict('Outputs',
                           self.predict_net.external_output)(*output_values)
Ejemplo n.º 19
0
    def test_resnet50_core(self):
        N = 1
        repeat = 1
        print("Batch size: {}, repeat inference {} times".format(N, repeat))
        init_net, pred_net, _ = self._get_c2_model('resnet50')
        self._add_head_tail(pred_net, 'real_data', 'real_softmax')
        input_blob_dims = (N, 3, 224, 224)
        input_name = "real_data"

        device_option = core.DeviceOption(caffe2_pb2.CPU, 0)
        init_net.device_option.CopyFrom(device_option)
        pred_net.device_option.CopyFrom(device_option)
        for op in pred_net.op:
            op.device_option.CopyFrom(device_option)
        net_outputs = pred_net.external_output
        Y_c2 = None
        data = np.random.randn(*input_blob_dims).astype(np.float32)
        c2_time = 1
        workspace.SwitchWorkspace("onnxifi_test", True)
        with core.DeviceScope(device_option):
            workspace.FeedBlob(input_name, data)
            workspace.RunNetOnce(init_net)
            workspace.CreateNet(pred_net)
            start = time.time()
            for _ in range(repeat):
                workspace.RunNet(pred_net.name)
            end = time.time()
            c2_time = end - start
            output_values = [workspace.FetchBlob(name) for name in net_outputs]
            Y_c2 = namedtupledict('Outputs', net_outputs)(*output_values)
        workspace.ResetWorkspace()

        # Fill the workspace with the weights
        with core.DeviceScope(device_option):
            workspace.RunNetOnce(init_net)

        # Cut the graph
        start = time.time()
        pred_net_cut = onnxifi_caffe2_net(pred_net,
                                          {input_name: input_blob_dims},
                                          infer_shapes=True)
        del init_net, pred_net
        #_print_net(pred_net_cut)

        Y_trt = None
        input_name = pred_net_cut.external_input[0]
        print("C2 runtime: {}s".format(c2_time))
        with core.DeviceScope(device_option):
            workspace.FeedBlob(input_name, data)
            workspace.CreateNet(pred_net_cut)
            end = time.time()
            print("Conversion time: {:.2f}s".format(end - start))

            start = time.time()
            for _ in range(repeat):
                workspace.RunNet(pred_net_cut.name)
            end = time.time()
            trt_time = end - start
            print("Onnxifi runtime: {}s, improvement: {}%".format(
                trt_time, (c2_time - trt_time) / c2_time * 100))
            output_values = [workspace.FetchBlob(name) for name in net_outputs]
            Y_trt = namedtupledict('Outputs', net_outputs)(*output_values)
        np.testing.assert_allclose(Y_c2, Y_trt, rtol=1e-3)
Ejemplo n.º 20
0
    def test_resnet50_core(self):
        N = 2
        warmup = 20
        repeat = 100
        print("Batch size: {}, repeat inference {} times, warmup {} times".format(N, repeat, warmup))
        init_net, pred_net, _  = self._get_c2_model('resnet50')
        self._add_head_tail(pred_net, 'real_data', 'real_softmax')
        input_blob_dims = (N, 3, 224, 224)
        input_name = "real_data"

        device_option = core.DeviceOption(caffe2_pb2.CUDA, 0)
        init_net.device_option.CopyFrom(device_option)
        pred_net.device_option.CopyFrom(device_option)
        for op in pred_net.op:
            op.device_option.CopyFrom(device_option)
            op.engine = 'CUDNN'
        net_outputs = pred_net.external_output
        Y_c2 = None
        data =  np.random.randn(*input_blob_dims).astype(np.float32)
        c2_time = 1
        workspace.SwitchWorkspace("gpu_test", True)
        with core.DeviceScope(device_option):
            workspace.FeedBlob(input_name, data)
            workspace.RunNetOnce(init_net)
            workspace.CreateNet(pred_net)
            for _ in range(warmup):
                workspace.RunNet(pred_net.name)
            start = time.time()
            for _ in range(repeat):
                workspace.RunNet(pred_net.name)
            end = time.time()
            c2_time = end - start
            output_values = [workspace.FetchBlob(name) for name in net_outputs]
            Y_c2 = namedtupledict('Outputs', net_outputs)(*output_values)
        workspace.ResetWorkspace()

        # Fill the workspace with the weights
        with core.DeviceScope(device_option):
            workspace.RunNetOnce(init_net)

        # Cut the graph
        start = time.time()
        pred_net_cut = transform_caffe2_net(pred_net,
                                            {input_name: input_blob_dims},
                                            build_serializable_op=True)
        del init_net, pred_net
        #_print_net(pred_net_cut)

        Y_trt = None
        input_name = pred_net_cut.external_input[0]
        print("C2 runtime: {}s".format(c2_time))
        with core.DeviceScope(device_option):
            workspace.FeedBlob(input_name, data)
            workspace.CreateNet(pred_net_cut)
            end = time.time()
            print("Conversion time: {:.2f}s".format(end -start))

            for _ in range(warmup):
                workspace.RunNet(pred_net_cut.name)
            start = time.time()
            for _ in range(repeat):
                workspace.RunNet(pred_net_cut.name)
            end = time.time()
            trt_time = end - start
            print("TRT runtime: {}s, improvement: {}%".format(trt_time, (c2_time-trt_time)/c2_time*100))
            output_values = [workspace.FetchBlob(name) for name in net_outputs]
            Y_trt = namedtupledict('Outputs', net_outputs)(*output_values)
        np.testing.assert_allclose(Y_c2, Y_trt, rtol=1e-3)
Ejemplo n.º 21
0
    def instantiate(cls, node, **kwargs):
        input_data1 = node.input_tensor[0]
        attrs = node.attrs

        if (attrs.get('ceil_mode') == None
            ):  # define ceil_mode after Maxpool-10. default is 0.
            attrs['ceil_mode'] = 0

        if (input_data1.ndim == 3):
            if (attrs.get('strides') == None):
                attrs['strides'] = (1, )
            if (attrs.get('dilations') == None
                ):  # define dilations[] after Maxpool-10.
                attrs['dilations'] = (1, )

            # pad_shape[i] = (output_spatial_shape[i] - 1) * strides_spatial_shape[i] + kernel_spatial_shape[i] - input_spatial_shape[i]
            auto_pad = attrs.get('auto_pad')
            if (attrs.get('pads') == None):
                if (auto_pad == 'SAME_UPPER'):
                    attrs['pads'] = (
                        math.floor(((math.ceil(
                            input_data1.shape[2] / attrs['strides'][0]) - 1) *
                                    attrs['strides'][0] +
                                    ((attrs['kernel_shape'][0] - 1) *
                                     attrs['dilations'][0] + 1) -
                                    input_data1.shape[2]) / 2),
                        math.ceil(((math.ceil(
                            input_data1.shape[2] / attrs['strides'][0]) - 1) *
                                   attrs['strides'][0] +
                                   ((attrs['kernel_shape'][0] - 1) *
                                    attrs['dilations'][0] + 1) -
                                   input_data1.shape[2]) / 2),
                    )
                elif (auto_pad == 'SAME_LOWER'):
                    attrs['pads'] = (
                        math.ceil(((math.ceil(
                            input_data1.shape[2] / attrs['strides'][0]) - 1) *
                                   attrs['strides'][0] +
                                   ((attrs['kernel_shape'][0] - 1) *
                                    attrs['dilations'][0] + 1) -
                                   input_data1.shape[2]) / 2),
                        math.floor(((math.ceil(
                            input_data1.shape[2] / attrs['strides'][0]) - 1) *
                                    attrs['strides'][0] +
                                    ((attrs['kernel_shape'][0] - 1) *
                                     attrs['dilations'][0] + 1) -
                                    input_data1.shape[2]) / 2),
                    )
                elif (auto_pad == 'VALID'):
                    attrs['pads'] = (0, 0)
                elif (auto_pad == 'NOTSET' or auto_pad == None):
                    attrs['pads'] = (0, 0)
            if (attrs.get('storage_order') == None):
                attrs['storage_order'] = 0

            # SAME: output_spatial_shape[i] = ceil(input_spatial_shape[i] / strides_spatial_shape[i])
            # VALID: output_spatial_shape[i] = ceil((input_spatial_shape[i] - kernel_spatial_shape[i] + 1) / strides_spatial_shape[i])
            # NOTSET: output_spatial_shape[i] = floor((input_spatial_shape[i] + pad_shape[i] - kernel_spatial_shape[i]) / strides_spatial_shape[i] + 1)
            tmp_shape = []
            for d in range(0, input_data1.ndim - 1):
                tmp_shape.append(input_data1.shape[d])
            if (auto_pad == 'SAME_UPPER') or (auto_pad == 'SAME_LOWER'):
                tmp_shape.append(
                    math.ceil(input_data1.shape[-1] / attrs['strides'][-1]))
            elif (auto_pad == 'VALID'):
                tmp_shape.append(
                    math.ceil((input_data1.shape[-1] -
                               ((attrs['kernel_shape'][-1] - 1) *
                                attrs['dilations'][-1] + 1) + 1) /
                              attrs['strides'][-1]))
            else:  # auto_pad is None
                if (attrs['ceil_mode'] == 0):
                    tmp_shape.append(
                        math.floor((input_data1.shape[-1] + attrs['pads'][0] +
                                    attrs['pads'][-1] -
                                    ((attrs['kernel_shape'][-1] - 1) *
                                     attrs['dilations'][-1] + 1)) /
                                   attrs['strides'][-1] + 1))
                else:
                    tmp_shape.append(
                        math.ceil((input_data1.shape[-1] + attrs['pads'][0] +
                                   attrs['pads'][-1] -
                                   ((attrs['kernel_shape'][-1] - 1) *
                                    attrs['dilations'][-1] + 1)) /
                                  attrs['strides'][-1] + 1))

        elif (input_data1.ndim == 4):
            if (attrs.get('strides') == None):
                attrs['strides'] = (1, 1)
            if (attrs.get('dilations') == None
                ):  # define dilations[] after Maxpool-10.
                attrs['dilations'] = (1, 1)

            # pad_shape[i] = (output_spatial_shape[i] - 1) * strides_spatial_shape[i] + kernel_spatial_shape[i] - input_spatial_shape[i]
            auto_pad = attrs.get('auto_pad')
            if (attrs.get('pads') == None):
                if (auto_pad == 'SAME_UPPER'):
                    attrs['pads'] = (math.floor(
                        ((math.ceil(input_data1.shape[2] / attrs['strides'][0])
                          - 1) * attrs['strides'][0] +
                         ((attrs['kernel_shape'][0] - 1) *
                          attrs['dilations'][0] + 1) - input_data1.shape[2]) /
                        2),
                                     math.floor((
                                         (math.ceil(input_data1.shape[3] /
                                                    attrs['strides'][1]) - 1) *
                                         attrs['strides'][1] +
                                         ((attrs['kernel_shape'][1] - 1) *
                                          attrs['dilations'][1] + 1) -
                                         input_data1.shape[3]) / 2),
                                     math.ceil((
                                         (math.ceil(input_data1.shape[2] /
                                                    attrs['strides'][0]) - 1) *
                                         attrs['strides'][0] +
                                         ((attrs['kernel_shape'][0] - 1) *
                                          attrs['dilations'][0] + 1) -
                                         input_data1.shape[2]) / 2),
                                     math.ceil((
                                         (math.ceil(input_data1.shape[3] /
                                                    attrs['strides'][1]) - 1) *
                                         attrs['strides'][1] +
                                         ((attrs['kernel_shape'][1] - 1) *
                                          attrs['dilations'][1] + 1) -
                                         input_data1.shape[3]) / 2))
                elif (auto_pad == 'SAME_LOWER'):
                    attrs['pads'] = (math.ceil(
                        ((math.ceil(input_data1.shape[2] / attrs['strides'][0])
                          - 1) * attrs['strides'][0] +
                         ((attrs['kernel_shape'][0] - 1) *
                          attrs['dilations'][0] + 1) - input_data1.shape[2]) /
                        2),
                                     math.ceil((
                                         (math.ceil(input_data1.shape[3] /
                                                    attrs['strides'][1]) - 1) *
                                         attrs['strides'][1] +
                                         ((attrs['kernel_shape'][1] - 1) *
                                          attrs['dilations'][1] + 1) -
                                         input_data1.shape[3]) / 2),
                                     math.floor((
                                         (math.ceil(input_data1.shape[2] /
                                                    attrs['strides'][0]) - 1) *
                                         attrs['strides'][0] +
                                         ((attrs['kernel_shape'][0] - 1) *
                                          attrs['dilations'][0] + 1) -
                                         input_data1.shape[2]) / 2),
                                     math.floor((
                                         (math.ceil(input_data1.shape[3] /
                                                    attrs['strides'][1]) - 1) *
                                         attrs['strides'][1] +
                                         ((attrs['kernel_shape'][1] - 1) *
                                          attrs['dilations'][1] + 1) -
                                         input_data1.shape[3]) / 2))
                elif (auto_pad == 'VALID'):
                    attrs['pads'] = (0, 0, 0, 0)
                elif (auto_pad == 'NOTSET' or auto_pad == None):
                    attrs['pads'] = (0, 0, 0, 0)
            if (attrs.get('storage_order') == None):
                attrs['storage_order'] = 0

            # SAME: output_spatial_shape[i] = ceil(input_spatial_shape[i] / strides_spatial_shape[i])
            # VALID: output_spatial_shape[i] = ceil((input_spatial_shape[i] - kernel_spatial_shape[i] + 1) / strides_spatial_shape[i])
            # NOTSET: output_spatial_shape[i] = floor((input_spatial_shape[i] + pad_shape[i] - kernel_spatial_shape[i]) / strides_spatial_shape[i] + 1)
            tmp_shape = []
            for d in range(0, input_data1.ndim - 2):
                tmp_shape.append(input_data1.shape[d])
            if (auto_pad == 'SAME_UPPER') or (auto_pad == 'SAME_LOWER'):
                tmp_shape.append(
                    math.ceil(input_data1.shape[-2] / attrs['strides'][-2]))
                tmp_shape.append(
                    math.ceil(input_data1.shape[-1] / attrs['strides'][-1]))
            elif (auto_pad == 'VALID'):
                tmp_shape.append(
                    math.ceil((input_data1.shape[-2] -
                               ((attrs['kernel_shape'][-2] - 1) *
                                attrs['dilations'][-2] + 1) + 1) /
                              attrs['strides'][-2]))
                tmp_shape.append(
                    math.ceil((input_data1.shape[-1] -
                               ((attrs['kernel_shape'][-1] - 1) *
                                attrs['dilations'][-1] + 1) + 1) /
                              attrs['strides'][-1]))
            else:  # auto_pad is None
                if (attrs['ceil_mode'] == 0):
                    tmp_shape.append(
                        math.floor((input_data1.shape[-2] + attrs['pads'][0] +
                                    attrs['pads'][-2] -
                                    ((attrs['kernel_shape'][-2] - 1) *
                                     attrs['dilations'][-2] + 1)) /
                                   attrs['strides'][-2] + 1))
                    tmp_shape.append(
                        math.floor((input_data1.shape[-1] + attrs['pads'][1] +
                                    attrs['pads'][-1] -
                                    ((attrs['kernel_shape'][-1] - 1) *
                                     attrs['dilations'][-1] + 1)) /
                                   attrs['strides'][-1] + 1))
                else:
                    tmp_shape.append(
                        math.ceil((input_data1.shape[-2] + attrs['pads'][0] +
                                   attrs['pads'][-2] -
                                   ((attrs['kernel_shape'][-2] - 1) *
                                    attrs['dilations'][-2] + 1)) /
                                  attrs['strides'][-2] + 1))
                    tmp_shape.append(
                        math.ceil((input_data1.shape[-1] + attrs['pads'][1] +
                                   attrs['pads'][-1] -
                                   ((attrs['kernel_shape'][-1] - 1) *
                                    attrs['dilations'][-1] + 1)) /
                                  attrs['strides'][-1] + 1))

        elif (input_data1.ndim == 5):
            if (attrs.get('strides') == None):
                attrs['strides'] = (1, 1, 1)
            if (attrs.get('dilations') == None
                ):  # define dilations[] after Maxpool-10.
                attrs['dilations'] = (1, 1, 1)

            # pad_shape[i] = (output_spatial_shape[i] - 1) * strides_spatial_shape[i] + kernel_spatial_shape[i] - input_spatial_shape[i]
            auto_pad = attrs.get('auto_pad')
            if (attrs.get('pads') == None):
                if (auto_pad == 'SAME_UPPER'):
                    attrs['pads'] = (math.floor(
                        ((math.ceil(input_data1.shape[2] / attrs['strides'][0])
                          - 1) * attrs['strides'][0] +
                         ((attrs['kernel_shape'][0] - 1) *
                          attrs['dilations'][0] + 1) - input_data1.shape[2]) /
                        2),
                                     math.floor((
                                         (math.ceil(input_data1.shape[3] /
                                                    attrs['strides'][1]) - 1) *
                                         attrs['strides'][1] +
                                         ((attrs['kernel_shape'][1] - 1) *
                                          attrs['dilations'][1] + 1) -
                                         input_data1.shape[3]) / 2),
                                     math.floor((
                                         (math.ceil(input_data1.shape[4] /
                                                    attrs['strides'][2]) - 1) *
                                         attrs['strides'][2] +
                                         ((attrs['kernel_shape'][2] - 1) *
                                          attrs['dilations'][2] + 1) -
                                         input_data1.shape[4]) / 2),
                                     math.ceil((
                                         (math.ceil(input_data1.shape[2] /
                                                    attrs['strides'][0]) - 1) *
                                         attrs['strides'][0] +
                                         ((attrs['kernel_shape'][0] - 1) *
                                          attrs['dilations'][0] + 1) -
                                         input_data1.shape[2]) / 2),
                                     math.ceil((
                                         (math.ceil(input_data1.shape[3] /
                                                    attrs['strides'][1]) - 1) *
                                         attrs['strides'][1] +
                                         ((attrs['kernel_shape'][1] - 1) *
                                          attrs['dilations'][1] + 1) -
                                         input_data1.shape[3]) / 2),
                                     math.ceil((
                                         (math.ceil(input_data1.shape[4] /
                                                    attrs['strides'][2]) - 1) *
                                         attrs['strides'][2] +
                                         ((attrs['kernel_shape'][2] - 1) *
                                          attrs['dilations'][2] + 1) -
                                         input_data1.shape[4]) / 2))
                elif (auto_pad == 'SAME_LOWER'):
                    attrs['pads'] = (math.ceil(
                        ((math.ceil(input_data1.shape[2] / attrs['strides'][0])
                          - 1) * attrs['strides'][0] +
                         ((attrs['kernel_shape'][0] - 1) *
                          attrs['dilations'][0] + 1) - input_data1.shape[2]) /
                        2),
                                     math.ceil((
                                         (math.ceil(input_data1.shape[3] /
                                                    attrs['strides'][1]) - 1) *
                                         attrs['strides'][1] +
                                         ((attrs['kernel_shape'][1] - 1) *
                                          attrs['dilations'][1] + 1) -
                                         input_data1.shape[3]) / 2),
                                     math.ceil((
                                         (math.ceil(input_data1.shape[4] /
                                                    attrs['strides'][2]) - 1) *
                                         attrs['strides'][2] +
                                         ((attrs['kernel_shape'][2] - 1) *
                                          attrs['dilations'][2] + 1) -
                                         input_data1.shape[4]) / 2),
                                     math.floor((
                                         (math.ceil(input_data1.shape[2] /
                                                    attrs['strides'][0]) - 1) *
                                         attrs['strides'][0] +
                                         ((attrs['kernel_shape'][0] - 1) *
                                          attrs['dilations'][0] + 1) -
                                         input_data1.shape[2]) / 2),
                                     math.floor((
                                         (math.ceil(input_data1.shape[3] /
                                                    attrs['strides'][1]) - 1) *
                                         attrs['strides'][1] +
                                         ((attrs['kernel_shape'][1] - 1) *
                                          attrs['dilations'][1] + 1) -
                                         input_data1.shape[3]) / 2),
                                     math.floor((
                                         (math.ceil(input_data1.shape[4] /
                                                    attrs['strides'][2]) - 1) *
                                         attrs['strides'][2] +
                                         ((attrs['kernel_shape'][2] - 1) *
                                          attrs['dilations'][2] + 1) -
                                         input_data1.shape[4]) / 2))
                elif (auto_pad == 'VALID'):
                    attrs['pads'] = (0, 0, 0, 0, 0, 0)
                elif (auto_pad == 'NOTSET' or auto_pad == None):
                    attrs['pads'] = (0, 0, 0, 0, 0, 0)
            if (attrs.get('storage_order') == None):
                attrs['storage_order'] = 0

            # SAME: output_spatial_shape[i] = ceil(input_spatial_shape[i] / strides_spatial_shape[i])
            # VALID: output_spatial_shape[i] = ceil((input_spatial_shape[i] - kernel_spatial_shape[i] + 1) / strides_spatial_shape[i])
            # NOTSET: output_spatial_shape[i] = floor((input_spatial_shape[i] + pad_shape[i] - kernel_spatial_shape[i]) / strides_spatial_shape[i] + 1)
            tmp_shape = []
            for d in range(0, input_data1.ndim - 3):
                tmp_shape.append(input_data1.shape[d])
            if (auto_pad == 'SAME_UPPER') or (auto_pad == 'SAME_LOWER'):
                tmp_shape.append(
                    math.ceil(input_data1.shape[-3] / attrs['strides'][-3]))
                tmp_shape.append(
                    math.ceil(input_data1.shape[-2] / attrs['strides'][-2]))
                tmp_shape.append(
                    math.ceil(input_data1.shape[-1] / attrs['strides'][-1]))
            elif (auto_pad == 'VALID'):
                tmp_shape.append(
                    math.ceil((input_data1.shape[-3] -
                               ((attrs['kernel_shape'][-3] - 1) *
                                attrs['dilations'][-3] + 1) + 1) /
                              attrs['strides'][-3]))
                tmp_shape.append(
                    math.ceil((input_data1.shape[-2] -
                               ((attrs['kernel_shape'][-2] - 1) *
                                attrs['dilations'][-2] + 1) + 1) /
                              attrs['strides'][-2]))
                tmp_shape.append(
                    math.ceil((input_data1.shape[-1] -
                               ((attrs['kernel_shape'][-1] - 1) *
                                attrs['dilations'][-1] + 1) + 1) /
                              attrs['strides'][-1]))
            else:  # auto_pad is None
                if (attrs['ceil_mode'] == 0):
                    tmp_shape.append(
                        math.floor((input_data1.shape[-3] + attrs['pads'][0] +
                                    attrs['pads'][-3] -
                                    ((attrs['kernel_shape'][-3] - 1) *
                                     attrs['dilations'][-3] + 1)) /
                                   attrs['strides'][-3] + 1))
                    tmp_shape.append(
                        math.floor((input_data1.shape[-2] + attrs['pads'][1] +
                                    attrs['pads'][-2] -
                                    ((attrs['kernel_shape'][-2] - 1) *
                                     attrs['dilations'][-2] + 1)) /
                                   attrs['strides'][-2] + 1))
                    tmp_shape.append(
                        math.floor((input_data1.shape[-1] + attrs['pads'][2] +
                                    attrs['pads'][-1] -
                                    ((attrs['kernel_shape'][-1] - 1) *
                                     attrs['dilations'][-1] + 1)) /
                                   attrs['strides'][-1] + 1))
                else:
                    tmp_shape.append(
                        math.ceil((input_data1.shape[-3] + attrs['pads'][0] +
                                   attrs['pads'][-3] -
                                   ((attrs['kernel_shape'][-3] - 1) *
                                    attrs['dilations'][-3] + 1)) /
                                  attrs['strides'][-3] + 1))
                    tmp_shape.append(
                        math.ceil((input_data1.shape[-2] + attrs['pads'][1] +
                                   attrs['pads'][-2] -
                                   ((attrs['kernel_shape'][-2] - 1) *
                                    attrs['dilations'][-2] + 1)) /
                                  attrs['strides'][-2] + 1))
                    tmp_shape.append(
                        math.ceil((input_data1.shape[-1] + attrs['pads'][2] +
                                   attrs['pads'][-1] -
                                   ((attrs['kernel_shape'][-1] - 1) *
                                    attrs['dilations'][-1] + 1)) /
                                  attrs['strides'][-1] + 1))
        else:
            raise (ValueError)

        outputs_shape = tuple(tmp_shape)
        outputs_dtype = input_data1.dtype
        outputs_dict = {
            node.valid_var_name(node.outputs[0]):
            np.ones(shape=outputs_shape, dtype=outputs_dtype)
        }
        output_tensor = namedtupledict('output_tensor',
                                       outputs_dict.keys())(**outputs_dict)

        device = kwargs.get('device')
        if (issubclass(device.__class__, QumicoDevice)
                and QumicoDeviceType.OpenMP in device.options):
            cls.OpenMP = True

        return cls(node,
                   input_tensor=node.input_tensor,
                   output_tensor=output_tensor,
                   attrs=attrs)
Ejemplo n.º 22
0
    def test_resnet50_core(self):
        N = 2
        warmup = 20
        repeat = 100
        print("Batch size: {}, repeat inference {} times, warmup {} times".
              format(N, repeat, warmup))
        init_net, pred_net, _ = self._get_c2_model('resnet50')
        self._add_head_tail(pred_net, 'real_data', 'real_softmax')
        input_blob_dims = (N, 3, 224, 224)
        input_name = "real_data"

        device_option = core.DeviceOption(caffe2_pb2.CUDA, 0)
        init_net.device_option.CopyFrom(device_option)
        pred_net.device_option.CopyFrom(device_option)
        for op in pred_net.op:
            op.device_option.CopyFrom(device_option)
            op.engine = 'CUDNN'
        net_outputs = pred_net.external_output
        Y_c2 = None
        data = np.random.randn(*input_blob_dims).astype(np.float32)
        c2_time = 1
        ws = Workspace()
        with core.DeviceScope(device_option):
            ws.FeedBlob(input_name, data)
            ws.RunNetOnce(init_net)
            ws.CreateNet(pred_net)
            for _ in range(warmup):
                ws.RunNet(pred_net.name)
            start = time.time()
            for _ in range(repeat):
                ws.RunNet(pred_net.name)
            end = time.time()
            c2_time = end - start
            output_values = [ws.FetchBlob(name) for name in net_outputs]
            Y_c2 = namedtupledict('Outputs', net_outputs)(*output_values)
        ws.ResetWorkspace()

        # Cut the graph
        init_net_cut, pred_net_cut = transform_caffe2_net(
            init_net, pred_net, {input_name: input_blob_dims})
        del init_net, pred_net
        #print_net(pred_net_cut)

        Y_trt = None
        input_name = pred_net_cut.external_input[0]
        print("C2 runtime: {}s".format(c2_time))
        ws = Workspace()
        with core.DeviceScope(device_option):
            ws.FeedBlob(input_name, data)
            ws.RunNetOnce(init_net_cut)
            ws.CreateNet(pred_net_cut)
            for _ in range(warmup):
                ws.RunNet(pred_net_cut.name)
            start = time.time()
            for _ in range(repeat):
                ws.RunNet(pred_net_cut.name)
            end = time.time()
            trt_time = end - start
            print("TRT runtime: {}s, improvement: {}%".format(
                trt_time, (c2_time - trt_time) / c2_time * 100))
            output_values = [ws.FetchBlob(name) for name in net_outputs]
            Y_trt = namedtupledict('Outputs', net_outputs)(*output_values)
        np.testing.assert_allclose(Y_c2, Y_trt, rtol=1e-3)
Ejemplo n.º 23
0
    def instantiate(cls, node, **kwargs):
        input_data1 = node.input_tensor[0]
        attrs = node.attrs

        if (input_data1.ndim == 3):
            attrs['kernel_shape'] = (input_data1.shape[-1], )
            if (attrs.get('strides') == None):
                attrs['strides'] = (1, )

    # pad_shape[i] = (output_spatial_shape[i] - 1) * strides_spatial_shape[i] + kernel_spatial_shape[i] - input_spatial_shape[i]
            auto_pad = attrs.get('auto_pad')
            if (attrs.get('pads') == None):
                if (auto_pad == 'SAME_UPPER'):
                    attrs['pads'] = (
                        math.floor(
                            ((math.ceil(input_data1.shape[2] /
                                        attrs['strides'][0]) - 1) *
                             attrs['strides'][0] + attrs['kernel_shape'][0] -
                             input_data1.shape[2]) / 2),
                        math.ceil(
                            ((math.ceil(input_data1.shape[2] /
                                        attrs['strides'][0]) - 1) *
                             attrs['strides'][0] + attrs['kernel_shape'][0] -
                             input_data1.shape[2]) / 2),
                    )
                elif (auto_pad == 'SAME_LOWER'):
                    attrs['pads'] = (
                        math.ceil(
                            ((math.ceil(input_data1.shape[2] /
                                        attrs['strides'][0]) - 1) *
                             attrs['strides'][0] + attrs['kernel_shape'][0] -
                             input_data1.shape[2]) / 2),
                        math.floor(
                            ((math.ceil(input_data1.shape[2] /
                                        attrs['strides'][0]) - 1) *
                             attrs['strides'][0] + attrs['kernel_shape'][0] -
                             input_data1.shape[2]) / 2),
                    )
                elif (auto_pad == 'VALID'):
                    attrs['pads'] = (0, 0)
                elif (auto_pad == 'NOTSET' or auto_pad == None):
                    attrs['pads'] = (0, 0)
            if (attrs.get('storage_order') == None):
                attrs['storage_order'] = 0
            if (attrs.get('count_include_pad') == None):
                attrs['count_include_pad'] = 0

    # SAME: output_spatial_shape[i] = ceil(input_spatial_shape[i] / strides_spatial_shape[i])
    # VALID: output_spatial_shape[i] = ceil((input_spatial_shape[i] - kernel_spatial_shape[i] + 1) / strides_spatial_shape[i])
    # NOTSET: output_spatial_shape[i] = floor((input_spatial_shape[i] + pad_shape[i] - kernel_spatial_shape[i]) / strides_spatial_shape[i] + 1)
            tmp_shape = []
            for d in range(0, input_data1.ndim - 1):
                tmp_shape.append(input_data1.shape[d])
            if (auto_pad == 'SAME_UPPER') or (auto_pad == 'SAME_LOWER'):
                tmp_shape.append(
                    math.ceil(input_data1.shape[-1] / attrs['strides'][-1]))
            elif (auto_pad == 'VALID'):
                tmp_shape.append(
                    math.ceil(
                        (input_data1.shape[-1] - attrs['kernel_shape'][-1] + 1)
                        / attrs['strides'][-1]))
            else:
                tmp_shape.append(
                    math.floor(
                        (input_data1.shape[-1] + attrs['pads'][0] +
                         attrs['pads'][-1] - attrs['kernel_shape'][-1]) /
                        attrs['strides'][-1] + 1))
        elif (input_data1.ndim == 4):
            attrs['kernel_shape'] = (input_data1.shape[-2],
                                     input_data1.shape[-1])
            if (attrs.get('strides') == None):
                attrs['strides'] = (1, 1)

    # pad_shape[i] = (output_spatial_shape[i] - 1) * strides_spatial_shape[i] + kernel_spatial_shape[i] - input_spatial_shape[i]
            auto_pad = attrs.get('auto_pad')
            if (attrs.get('pads') == None):
                if (auto_pad == 'SAME_UPPER'):
                    attrs['pads'] = (
                        math.floor(
                            ((math.ceil(input_data1.shape[2] /
                                        attrs['strides'][0]) - 1) *
                             attrs['strides'][0] + attrs['kernel_shape'][0] -
                             input_data1.shape[2]) / 2),
                        math.floor(
                            ((math.ceil(input_data1.shape[3] /
                                        attrs['strides'][1]) - 1) *
                             attrs['strides'][1] + attrs['kernel_shape'][1] -
                             input_data1.shape[3]) / 2),
                        math.ceil(
                            ((math.ceil(input_data1.shape[2] /
                                        attrs['strides'][0]) - 1) *
                             attrs['strides'][0] + attrs['kernel_shape'][0] -
                             input_data1.shape[2]) / 2),
                        math.ceil(
                            ((math.ceil(input_data1.shape[3] /
                                        attrs['strides'][1]) - 1) *
                             attrs['strides'][1] + attrs['kernel_shape'][1] -
                             input_data1.shape[3]) / 2))
                elif (auto_pad == 'SAME_LOWER'):
                    attrs['pads'] = (
                        math.ceil(
                            ((math.ceil(input_data1.shape[2] /
                                        attrs['strides'][0]) - 1) *
                             attrs['strides'][0] + attrs['kernel_shape'][0] -
                             input_data1.shape[2]) / 2),
                        math.ceil(
                            ((math.ceil(input_data1.shape[3] /
                                        attrs['strides'][1]) - 1) *
                             attrs['strides'][1] + attrs['kernel_shape'][1] -
                             input_data1.shape[3]) / 2),
                        math.floor(
                            ((math.ceil(input_data1.shape[2] /
                                        attrs['strides'][0]) - 1) *
                             attrs['strides'][0] + attrs['kernel_shape'][0] -
                             input_data1.shape[2]) / 2),
                        math.floor(
                            ((math.ceil(input_data1.shape[3] /
                                        attrs['strides'][1]) - 1) *
                             attrs['strides'][1] + attrs['kernel_shape'][1] -
                             input_data1.shape[3]) / 2))
                elif (auto_pad == 'VALID'):
                    attrs['pads'] = (0, 0, 0, 0)
                elif (auto_pad == 'NOTSET' or auto_pad == None):
                    attrs['pads'] = (0, 0, 0, 0)
            if (attrs.get('storage_order') == None):
                attrs['storage_order'] = 0
            if (attrs.get('count_include_pad') == None):
                attrs['count_include_pad'] = 0

    # SAME: output_spatial_shape[i] = ceil(input_spatial_shape[i] / strides_spatial_shape[i])
    # VALID: output_spatial_shape[i] = ceil((input_spatial_shape[i] - kernel_spatial_shape[i] + 1) / strides_spatial_shape[i])
    # NOTSET: output_spatial_shape[i] = floor((input_spatial_shape[i] + pad_shape[i] - kernel_spatial_shape[i]) / strides_spatial_shape[i] + 1)
            tmp_shape = []
            for d in range(0, input_data1.ndim - 2):
                tmp_shape.append(input_data1.shape[d])
            if (auto_pad == 'SAME_UPPER') or (auto_pad == 'SAME_LOWER'):
                tmp_shape.append(
                    math.ceil(input_data1.shape[-2] / attrs['strides'][-2]))
                tmp_shape.append(
                    math.ceil(input_data1.shape[-1] / attrs['strides'][-1]))
            elif (auto_pad == 'VALID'):
                tmp_shape.append(
                    math.ceil(
                        (input_data1.shape[-2] - attrs['kernel_shape'][-2] + 1)
                        / attrs['strides'][-2]))
                tmp_shape.append(
                    math.ceil(
                        (input_data1.shape[-1] - attrs['kernel_shape'][-1] + 1)
                        / attrs['strides'][-1]))
            else:
                tmp_shape.append(
                    math.floor(
                        (input_data1.shape[-2] + attrs['pads'][0] +
                         attrs['pads'][-2] - attrs['kernel_shape'][-2]) /
                        attrs['strides'][-2] + 1))
                tmp_shape.append(
                    math.floor(
                        (input_data1.shape[-1] + attrs['pads'][1] +
                         attrs['pads'][-1] - attrs['kernel_shape'][-1]) /
                        attrs['strides'][-1] + 1))
        elif (input_data1.ndim == 5):
            attrs['kernel_shape'] = (input_data1.shape[-3],
                                     input_data1.shape[-2],
                                     input_data1.shape[-1])
            if (attrs.get('strides') == None):
                attrs['strides'] = (1, 1, 1)

    # pad_shape[i] = (output_spatial_shape[i] - 1) * strides_spatial_shape[i] + kernel_spatial_shape[i] - input_spatial_shape[i]
            auto_pad = attrs.get('auto_pad')
            if (attrs.get('pads') == None):
                if (auto_pad == 'SAME_UPPER'):
                    attrs['pads'] = (
                        math.floor(
                            ((math.ceil(input_data1.shape[2] /
                                        attrs['strides'][0]) - 1) *
                             attrs['strides'][0] + attrs['kernel_shape'][0] -
                             input_data1.shape[2]) / 2),
                        math.floor(
                            ((math.ceil(input_data1.shape[3] /
                                        attrs['strides'][1]) - 1) *
                             attrs['strides'][1] + attrs['kernel_shape'][1] -
                             input_data1.shape[3]) / 2),
                        math.floor(
                            ((math.ceil(input_data1.shape[4] /
                                        attrs['strides'][2]) - 1) *
                             attrs['strides'][2] + attrs['kernel_shape'][2] -
                             input_data1.shape[4]) / 2),
                        math.ceil(
                            ((math.ceil(input_data1.shape[2] /
                                        attrs['strides'][0]) - 1) *
                             attrs['strides'][0] + attrs['kernel_shape'][0] -
                             input_data1.shape[2]) / 2),
                        math.ceil(
                            ((math.ceil(input_data1.shape[3] /
                                        attrs['strides'][1]) - 1) *
                             attrs['strides'][1] + attrs['kernel_shape'][1] -
                             input_data1.shape[3]) / 2),
                        math.ceil(
                            ((math.ceil(input_data1.shape[4] /
                                        attrs['strides'][2]) - 1) *
                             attrs['strides'][2] + attrs['kernel_shape'][2] -
                             input_data1.shape[4]) / 2))
                elif (auto_pad == 'SAME_LOWER'):
                    attrs['pads'] = (
                        math.ceil(
                            ((math.ceil(input_data1.shape[2] /
                                        attrs['strides'][0]) - 1) *
                             attrs['strides'][0] + attrs['kernel_shape'][0] -
                             input_data1.shape[2]) / 2),
                        math.ceil(
                            ((math.ceil(input_data1.shape[3] /
                                        attrs['strides'][1]) - 1) *
                             attrs['strides'][1] + attrs['kernel_shape'][1] -
                             input_data1.shape[3]) / 2),
                        math.ceil(
                            ((math.ceil(input_data1.shape[4] /
                                        attrs['strides'][2]) - 1) *
                             attrs['strides'][2] + attrs['kernel_shape'][2] -
                             input_data1.shape[4]) / 2),
                        math.floor(
                            ((math.ceil(input_data1.shape[2] /
                                        attrs['strides'][0]) - 1) *
                             attrs['strides'][0] + attrs['kernel_shape'][0] -
                             input_data1.shape[2]) / 2),
                        math.floor(
                            ((math.ceil(input_data1.shape[3] /
                                        attrs['strides'][1]) - 1) *
                             attrs['strides'][1] + attrs['kernel_shape'][1] -
                             input_data1.shape[3]) / 2),
                        math.floor(
                            ((math.ceil(input_data1.shape[4] /
                                        attrs['strides'][2]) - 1) *
                             attrs['strides'][2] + attrs['kernel_shape'][2] -
                             input_data1.shape[4]) / 2))
                elif (auto_pad == 'VALID'):
                    attrs['pads'] = (0, 0, 0, 0, 0, 0)
                elif (auto_pad == 'NOTSET' or auto_pad == None):
                    attrs['pads'] = (0, 0, 0, 0, 0, 0)
            if (attrs.get('storage_order') == None):
                attrs['storage_order'] = 0
            if (attrs.get('count_include_pad') == None):
                attrs['count_include_pad'] = 0

    # SAME: output_spatial_shape[i] = ceil(input_spatial_shape[i] / strides_spatial_shape[i])
    # VALID: output_spatial_shape[i] = ceil((input_spatial_shape[i] - kernel_spatial_shape[i] + 1) / strides_spatial_shape[i])
    # NOTSET: output_spatial_shape[i] = floor((input_spatial_shape[i] + pad_shape[i] - kernel_spatial_shape[i]) / strides_spatial_shape[i] + 1)
            tmp_shape = []
            for d in range(0, input_data1.ndim - 3):
                tmp_shape.append(input_data1.shape[d])
            if (auto_pad == 'SAME_UPPER') or (auto_pad == 'SAME_LOWER'):
                tmp_shape.append(
                    math.ceil(input_data1.shape[-3] / attrs['strides'][-3]))
                tmp_shape.append(
                    math.ceil(input_data1.shape[-2] / attrs['strides'][-2]))
                tmp_shape.append(
                    math.ceil(input_data1.shape[-1] / attrs['strides'][-1]))
            elif (auto_pad == 'VALID'):
                tmp_shape.append(
                    math.ceil(
                        (input_data1.shape[-3] - attrs['kernel_shape'][-3] + 1)
                        / attrs['strides'][-3]))
                tmp_shape.append(
                    math.ceil(
                        (input_data1.shape[-2] - attrs['kernel_shape'][-2] + 1)
                        / attrs['strides'][-2]))
                tmp_shape.append(
                    math.ceil(
                        (input_data1.shape[-1] - attrs['kernel_shape'][-1] + 1)
                        / attrs['strides'][-1]))
            else:
                tmp_shape.append(
                    math.floor(
                        (input_data1.shape[-3] + attrs['pads'][0] +
                         attrs['pads'][-3] - attrs['kernel_shape'][-3]) /
                        attrs['strides'][-3] + 1))
                tmp_shape.append(
                    math.floor(
                        (input_data1.shape[-2] + attrs['pads'][1] +
                         attrs['pads'][-2] - attrs['kernel_shape'][-2]) /
                        attrs['strides'][-2] + 1))
                tmp_shape.append(
                    math.floor(
                        (input_data1.shape[-1] + attrs['pads'][2] +
                         attrs['pads'][-1] - attrs['kernel_shape'][-1]) /
                        attrs['strides'][-1] + 1))
        else:
            raise (ValueError)

        outputs_shape = tuple(tmp_shape)
        outputs_dtype = input_data1.dtype
        outputs_dict = {
            node.valid_var_name(node.outputs[0]):
            np.ones(shape=outputs_shape, dtype=outputs_dtype)
        }
        output_tensor = namedtupledict("output_tensor",
                                       outputs_dict.keys())(**outputs_dict)

        return cls(node,
                   input_tensor=node.input_tensor,
                   output_tensor=output_tensor,
                   attrs=attrs)
Ejemplo n.º 24
0
    def run(
            self,
            inputs,  # type: Any
            **kwargs  # type: Any
    ):
        # type: (...) -> Tuple[Any, ...]
        super(CoreMLRep, self).run(inputs, **kwargs)
        inputs_ = inputs
        _reshaped = False
        if not self.disable_rank5_mapping:
            for i, input_ in enumerate(inputs_):
                shape = input_.shape
                if len(shape) == 4 or len(shape) == 2:
                    inputs_[i] = input_[np.newaxis, :]
                    _reshaped = True
                elif len(shape) == 3:
                    spec = self.model.get_spec()
                    spec_shape = [
                        int(k) for k in
                        spec.description.input[i].type.multiArrayType.shape
                    ]
                    prod = spec_shape[0] * spec_shape[1] * spec_shape[2]
                    onnx_shape = list(shape)
                    if onnx_shape != spec_shape:
                        if onnx_shape[2] == prod:
                            inputs_[i] = np.reshape(
                                inputs_[i],
                                [onnx_shape[0], onnx_shape[1]] + spec_shape)
                        elif onnx_shape[1] * onnx_shape[2] == prod:
                            inputs_[i] = np.reshape(
                                inputs_[i], [1, onnx_shape[0]] + spec_shape)
        input_dict = dict(zip(self.input_names, map(np.array, inputs_)))
        _set_dtypes(input_dict, self.model)  #type: ignore

        prediction = self.model.predict(input_dict, self.useCPUOnly)
        output_values = [prediction[name] for name in self.output_names]

        if not self.disable_rank5_mapping:
            for i, output_ in enumerate(output_values):
                shape = output_.shape
                #reshape the CoreML output to match Onnx's output shape
                try:
                    output_values[i] = np.reshape(
                        output_, self.onnx_outputs_info[
                            self.output_names[i]][2])  # type: ignore
                except RuntimeError:
                    print(
                        "Output '%s' shape incompatible between CoreML (%s) and onnx (%s)"
                        % (self.output_names[i], output_.shape,
                           self.onnx_outputs_info[self.output_names[i]]))

        ## Type Cast to ONNX expected output types
        for i, output_ in enumerate(output_values):
            output_type = self.onnx_outputs_info[self.output_names[i]][1]
            if TENSOR_TYPE_TO_NP_TYPE[output_type] != output_values[i].dtype:
                output_values[i] = output_values[i].astype(
                    TENSOR_TYPE_TO_NP_TYPE[output_type])

        result = namedtupledict('Outputs', self.output_names)(
            *output_values)  # type: Tuple[Any, ...]
        return result