Esempio n. 1
0
def c2_native_run_net(init_net, predict_net, inputs):
    ws = Workspace()
    if init_net:
        ws.RunNetOnce(init_net)

    if isinstance(inputs, dict):
        for key, value in inputs.items():
            ws.FeedBlob(key, value, predict_net.device_option)
    else:
        uninitialized = [
            input_name for input_name in predict_net.external_input
            if not ws.HasBlob(input_name)
        ]
        if len(uninitialized) == len(inputs):
            for key, value in zip(uninitialized, inputs):
                ws.FeedBlob(key, value, predict_net.device_option)
        else:
            # If everything is initialized,
            # we just initialized the first len(inputs) external_input.
            assert (len(inputs) <= len(predict_net.external_input))
            for i in range(len(inputs)):
                ws.FeedBlob(predict_net.external_input[i], inputs[i],
                            predict_net.device_option)

    ws.RunNetOnce(predict_net)

    output_names = predict_net.external_output
    output_values = [ws.FetchBlob(name) for name in output_names]
    return ws, namedtupledict('Outputs', output_names)(*output_values)
Esempio n. 2
0
 def _test_onnx_importer(self,
                         model_name,
                         data_input_index,
                         opset_version=onnx.defs.onnx_opset_version()):
     model_dir = _download_onnx_model(model_name, opset_version)
     model_def = onnx.load(os.path.join(model_dir, 'model.onnx'))
     input_blob_dims = [
         int(x.dim_value) for x in
         model_def.graph.input[data_input_index].type.tensor_type.shape.dim
     ]
     op_inputs = [x.name for x in model_def.graph.input]
     op_outputs = [x.name for x in model_def.graph.output]
     print("{}".format(op_inputs))
     data = np.random.randn(*input_blob_dims).astype(np.float32)
     Y_c2 = c2.run_model(model_def, {op_inputs[data_input_index]: data})
     op = convert_onnx_model_to_trt_op(model_def, verbosity=3)
     device_option = core.DeviceOption(caffe2_pb2.CUDA, 0)
     op.device_option.CopyFrom(device_option)
     Y_trt = None
     ws = Workspace()
     with core.DeviceScope(device_option):
         ws.FeedBlob(op_inputs[data_input_index], data)
         if opset_version >= 5:
             # Some newer models from ONNX Zoo come with pre-set "data_0" input
             ws.FeedBlob("data_0", data)
         ws.RunOperatorsOnce([op])
         output_values = [ws.FetchBlob(name) for name in op_outputs]
         Y_trt = namedtupledict('Outputs', op_outputs)(*output_values)
     np.testing.assert_allclose(Y_c2, Y_trt, rtol=1e-3)
Esempio n. 3
0
    def run_node(cls, node, inputs, device='CPU', opset_version=_known_opset_version, outputs_info=None):
        super(Caffe2Backend, cls).run_node(node, inputs, device=device, outputs_info=outputs_info)

        device_option = get_device_option(Device(device))
        ws = Workspace()
        with core.DeviceScope(device_option):  # temporary!
            if isinstance(inputs, dict):
                for key, value in inputs.items():
                    ws.FeedBlob(key, value)
            else:
                assert len(node.input) == len(inputs), "{}: expected {} but got {}".format(
                    node.op_type, len(node.input), len(inputs))
                for key, value in zip(node.input, inputs):
                    ws.FeedBlob(key, value)

            ops = []
            cbackend = C.Caffe2Backend(cls._dummy_name)
            ops_str = cbackend.convert_node(node.SerializeToString(), opset_version)
            for s in ops_str[0] + ops_str[1]:
                op = caffe2_pb2.OperatorDef()
                op.ParseFromString(s)
                op.device_option.CopyFrom(device_option)
                ops.append(op)
            # For testing
            if "ONNX_CAFFE2_DEBUG" in os.environ:
                init_ops, ops2, _ = cls._onnx_node_to_caffe2_op(
                    None, None, node, opset_version or cls._known_opset_version)
                ops2 = init_ops + ops2
                for op in ops2:
                    op.device_option.CopyFrom(device_option)
                print("\nC++:\n{}\nPython:\n{}".format(ops, ops2))
            ws.RunOperatorsOnce(ops)
            output_values = [ws.FetchBlob(name) for name in node.output]
            return namedtupledict('Outputs', node.output)(*output_values)
Esempio n. 4
0
    def run_node(cls,
                 node,
                 inputs,
                 device='CPU',
                 opset_version=_known_opset_version,
                 outputs_info=None):
        super(Caffe2Backend, cls).run_node(node,
                                           inputs,
                                           device=device,
                                           outputs_info=outputs_info,
                                           opset_version=opset_version)

        value_infos = []
        device_option = get_device_option(Device(device))
        ws = Workspace()
        with core.DeviceScope(device_option):  # temporary!
            if isinstance(inputs, dict):
                for key, value in inputs.items():
                    ws.FeedBlob(key, value)
                    value_infos.append(
                        onnx.helper.make_tensor_value_info(
                            name=key,
                            elem_type=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[
                                value.dtype],
                            shape=value.shape).SerializeToString())
            else:
                assert len(node.input) == len(
                    inputs), "{}: expected {} but got {}".format(
                        node.op_type, len(node.input), len(inputs))
                for key, value in zip(node.input, inputs):
                    ws.FeedBlob(key, value)
                    value_infos.append(
                        onnx.helper.make_tensor_value_info(
                            name=key,
                            elem_type=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[
                                value.dtype],
                            shape=value.shape).SerializeToString())

            ops = []
            cbackend = C.Caffe2Backend(cls._dummy_name)
            ops_str = cbackend.convert_node(node.SerializeToString(),
                                            value_infos, opset_version)
            for s in ops_str[0] + ops_str[1]:
                op = caffe2_pb2.OperatorDef()
                op.ParseFromString(s)
                op.device_option.CopyFrom(device_option)
                ops.append(op)
            ws.RunOperatorsOnce(ops)
            output_values = [ws.FetchBlob(name) for name in node.output]
            return namedtupledict('Outputs', node.output)(*output_values)
Esempio n. 5
0
def c2_native_run_op(op_def, inputs):
    ws = Workspace()
    if isinstance(inputs, dict):
        for key, value in inputs.items():
            ws.FeedBlob(key, value, op_def.device_option)
    else:
        assert (len(op_def.input) == len(inputs))
        for key, value in zip(op_def.input, inputs):
            ws.FeedBlob(key, value, op_def.device_option)

    ws.RunOperatorOnce(op_def)

    output_names = op_def.output
    output_values = [ws.FetchBlob(name) for name in output_names]
    return ws, namedtupledict('Outputs', output_names)(*output_values)
Esempio n. 6
0
 def _test_relu_graph(self, X, batch_size, trt_max_batch_size):
     node_def = make_node("Relu", ["X"], ["Y"])
     Y_c2 = c2.run_node(node_def, {"X": X})
     graph_def = make_graph(
         [node_def],
         name="test",
         inputs=[
             make_tensor_value_info("X", onnx.TensorProto.FLOAT,
                                    [batch_size, 1, 3, 2])
         ],
         outputs=[
             make_tensor_value_info("Y", onnx.TensorProto.FLOAT,
                                    [batch_size, 1, 3, 2])
         ])
     model_def = make_model(graph_def, producer_name='relu-test')
     op_outputs = [x.name for x in model_def.graph.output]
     op = convert_onnx_model_to_trt_op(model_def,
                                       max_batch_size=trt_max_batch_size)
     device_option = core.DeviceOption(caffe2_pb2.CUDA, 0)
     op.device_option.CopyFrom(device_option)
     Y_trt = None
     ws = Workspace()
     with core.DeviceScope(device_option):
         ws.FeedBlob("X", X)
         ws.RunOperatorsOnce([op])
         output_values = [ws.FetchBlob(name) for name in op_outputs]
         Y_trt = namedtupledict('Outputs', op_outputs)(*output_values)
     np.testing.assert_almost_equal(Y_c2, Y_trt)
Esempio n. 7
0
def c2_native_run_net(init_net, predict_net, inputs, debug_arg=None):
    ws = Workspace()
    if init_net:
        ws.RunNetOnce(init_net)

    if isinstance(inputs, dict):
        for key, value in inputs.items():
            ws.FeedBlob(key, value, predict_net.device_option)
    else:
        uninitialized = [
            input_name for input_name in predict_net.external_input
            if not ws.HasBlob(input_name)
        ]
        if len(uninitialized) == len(inputs):
            for key, value in zip(uninitialized, inputs):
                ws.FeedBlob(key, value, predict_net.device_option)
        else:
            # If everything is initialized,
            # we just initialized the first len(inputs) external_input.
            # Added some extra logging to help debug sporadic sandcastle fails
            if len(inputs) > len(predict_net.external_input):
                print("c2_native_run_net assert. len(inputs)=", len(inputs),
                      "len(predict_net.external_input)=",
                      len(predict_net.external_input))
                print("debug_arg: ", debug_arg)
                print("predict_net ", type(predict_net), ":", predict_net)
                print("inputs ", type(inputs), ":", inputs)
            assert (len(inputs) <= len(predict_net.external_input))
            for i in range(len(inputs)):
                ws.FeedBlob(predict_net.external_input[i], inputs[i],
                            predict_net.device_option)

    ws.RunNetOnce(predict_net)

    output_names = predict_net.external_output
    output_values = [ws.FetchBlob(name) for name in output_names]
    return ws, namedtupledict('Outputs', output_names)(*output_values)
Esempio n. 8
0
 def test_resnet50(self):
     input_blob_dims = (1, 3, 224, 224)
     model_dir = _download_onnx_model('resnet50')
     model_def = onnx.load(os.path.join(model_dir, 'model.onnx'))
     op_inputs = [x.name for x in model_def.graph.input]
     op_outputs = [x.name for x in model_def.graph.output]
     n, c, h, w = input_blob_dims
     data = np.random.randn(n, c, h, w).astype(np.float32)
     Y_c2 = c2.run_model(model_def, {op_inputs[0]: data})
     op = convert_onnx_model_to_trt_op(model_def)
     device_option = core.DeviceOption(caffe2_pb2.CUDA, 0)
     op.device_option.CopyFrom(device_option)
     Y_trt = None
     ws = Workspace()
     with core.DeviceScope(device_option):
         ws.FeedBlob(op_inputs[0], data)
         ws.RunOperatorsOnce([op])
         output_values = [ws.FetchBlob(name) for name in op_outputs]
         Y_trt = namedtupledict('Outputs', op_outputs)(*output_values)
     np.testing.assert_allclose(Y_c2, Y_trt, rtol=1e-3)
Esempio n. 9
0
        def op_func(*inputs, **args):
            ws = Workspace()
            schema = OpSchema.get(op_type)
            input_prefix = 'input_'
            output_prefix = 'output_'

            def get_name_list(prefix, num, max_num):
                return [prefix + str(x) for x in range(min(num, max_num))]

            input_names, output_names = [], []
            input_names = get_name_list(
                input_prefix, len(inputs), schema.max_input
            )
            # verify the length of input name is in range
            # of schema
            num_input = len(input_names)
            if num_input > schema.max_input or num_input < \
               schema.min_input or not schema.num_inputs_allowed(num_input):
                raise ValueError(
                    "Functional C2: Number of inputs not in \
                range: {} - {} or not allowed."
                    .format(schema.min_input, schema.max_input)
                )

            if 'num_output' in args:
                num_output = args['num_output']
                if num_output > schema.max_output or \
                   num_output < schema.min_output or \
                   not schema.num_outputs_allowed(num_output) or \
                   not schema.num_inputs_outputs_allowed(num_input,
                                                         num_output):
                    raise ValueError(
                        "Functional C2: Number of output \
                    not in range: {} - {} or not allowed"
                        .format(schema.min_output, schema.max_output)
                    )
                output_names = get_name_list(
                    output_prefix, num_output, schema.max_output
                )
                args.pop('num_output')
            calculated = schema.CalculateOutput(num_input)
            if not output_names and calculated != -1:
                output_names = get_name_list(
                    output_prefix, calculated, schema.max_output
                )

            if not output_names:
                max_output = schema.max_output
                # For an op with max_output == inf
                # and no Output defined in schema
                # user should pass output_size explicitly
                if schema.inf == max_output:
                    raise ValueError(
                        "For operators with max_output == inf,\
                        user should pass num_output explicity."
                    )
                output_names = get_name_list(
                    output_prefix, max_output, max_output
                )

            op = core.CreateOperator(
                op_type, input_names, output_names, **args
            )
            device_option = args.get('device_option', core.DeviceOption(caffe2_pb2.CPU))
            with core.DeviceScope(device_option):
                for i, input_blob in enumerate(inputs):
                    ws.FeedBlob(input_names[i], input_blob)
                # RunOperator
                ws.RunOperatorOnce(op)
                output_values = [ws.FetchBlob(x) for x in output_names]
                return namedtupledict('output', output_names)(*output_values)
Esempio n. 10
0
    def test_resnet50_core(self):
        N = 2
        warmup = 20
        repeat = 100
        print("Batch size: {}, repeat inference {} times, warmup {} times".
              format(N, repeat, warmup))
        init_net, pred_net, _ = self._get_c2_model('resnet50')
        self._add_head_tail(pred_net, 'real_data', 'real_softmax')
        input_blob_dims = (N, 3, 224, 224)
        input_name = "real_data"

        device_option = core.DeviceOption(caffe2_pb2.CUDA, 0)
        init_net.device_option.CopyFrom(device_option)
        pred_net.device_option.CopyFrom(device_option)
        for op in pred_net.op:
            op.device_option.CopyFrom(device_option)
            op.engine = 'CUDNN'
        net_outputs = pred_net.external_output
        Y_c2 = None
        data = np.random.randn(*input_blob_dims).astype(np.float32)
        c2_time = 1
        ws = Workspace()
        with core.DeviceScope(device_option):
            ws.FeedBlob(input_name, data)
            ws.RunNetOnce(init_net)
            ws.CreateNet(pred_net)
            for _ in range(warmup):
                ws.RunNet(pred_net.name)
            start = time.time()
            for _ in range(repeat):
                ws.RunNet(pred_net.name)
            end = time.time()
            c2_time = end - start
            output_values = [ws.FetchBlob(name) for name in net_outputs]
            Y_c2 = namedtupledict('Outputs', net_outputs)(*output_values)
        ws.ResetWorkspace()

        # Cut the graph
        init_net_cut, pred_net_cut = transform_caffe2_net(
            init_net, pred_net, {input_name: input_blob_dims})
        del init_net, pred_net
        #print_net(pred_net_cut)

        Y_trt = None
        input_name = pred_net_cut.external_input[0]
        print("C2 runtime: {}s".format(c2_time))
        ws = Workspace()
        with core.DeviceScope(device_option):
            ws.FeedBlob(input_name, data)
            ws.RunNetOnce(init_net_cut)
            ws.CreateNet(pred_net_cut)
            for _ in range(warmup):
                ws.RunNet(pred_net_cut.name)
            start = time.time()
            for _ in range(repeat):
                ws.RunNet(pred_net_cut.name)
            end = time.time()
            trt_time = end - start
            print("TRT runtime: {}s, improvement: {}%".format(
                trt_time, (c2_time - trt_time) / c2_time * 100))
            output_values = [ws.FetchBlob(name) for name in net_outputs]
            Y_trt = namedtupledict('Outputs', net_outputs)(*output_values)
        np.testing.assert_allclose(Y_c2, Y_trt, rtol=1e-3)