예제 #1
0
    def run_node(cls, node, inputs, device='CPU', opset_version=_known_opset_version, outputs_info=None):
        super(Caffe2Backend, cls).run_node(node, inputs, device=device,
                                           outputs_info=outputs_info, opset_version=opset_version)

        value_infos = []
        device_option = get_device_option(Device(device))
        ws = Workspace()
        with core.DeviceScope(device_option):  # temporary!
            if isinstance(inputs, dict):
                for key, value in inputs.items():
                    ws.FeedBlob(key, value)
                    value_infos.append(onnx.helper.make_tensor_value_info(
                        name=key,
                        elem_type=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[value.dtype],
                        shape=value.shape).SerializeToString())
            else:
                assert len(node.input) == len(inputs), "{}: expected {} but got {}".format(
                    node.op_type, len(node.input), len(inputs))
                for key, value in zip(node.input, inputs):
                    ws.FeedBlob(key, value)
                    value_infos.append(onnx.helper.make_tensor_value_info(
                        name=key,
                        elem_type=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[value.dtype],
                        shape=value.shape).SerializeToString())

            ops = []
            cbackend = C.Caffe2Backend(cls._dummy_name)
            ops_str = cbackend.convert_node(node.SerializeToString(), value_infos, opset_version)
            for s in ops_str[0] + ops_str[1]:
                op = caffe2_pb2.OperatorDef()
                op.ParseFromString(s)
                op.device_option.CopyFrom(device_option)
                ops.append(op)
            ws.RunOperatorsOnce(ops)
            output_values = [ws.FetchBlob(name) for name in node.output]
            return namedtupledict('Outputs', node.output)(*output_values)
예제 #2
0
        def op_func(*inputs, **args):
            ws = Workspace()
            schema = OpSchema.get(op_type)
            input_prefix = 'input_'
            output_prefix = 'output_'

            def get_name_list(prefix, num, max_num):
                return [prefix + str(x) for x in range(min(num, max_num))]

            input_names, output_names = [], []
            input_names = get_name_list(
                input_prefix, len(inputs), schema.max_input
            )
            # verify the length of input name is in range
            # of schema
            num_input = len(input_names)
            if num_input > schema.max_input or num_input < \
               schema.min_input or not schema.num_inputs_allowed(num_input):
                raise ValueError(
                    "Functional C2: Number of inputs not in \
                range: {} - {} or not allowed."
                    .format(schema.min_input, schema.max_input)
                )

            if 'num_output' in args:
                num_output = args['num_output']
                if num_output > schema.max_output or \
                   num_output < schema.min_output or \
                   not schema.num_outputs_allowed(num_output) or \
                   not schema.num_inputs_outputs_allowed(num_input,
                                                         num_output):
                    raise ValueError(
                        "Functional C2: Number of output \
                    not in range: {} - {} or not allowed"
                        .format(schema.min_output, schema.max_output)
                    )
                output_names = get_name_list(
                    output_prefix, num_output, schema.max_output
                )
                args.pop('num_output')
            calculated = schema.CalculateOutput(num_input)
            if not output_names and calculated != -1:
                output_names = get_name_list(
                    output_prefix, calculated, schema.max_output
                )

            if not output_names:
                max_output = schema.max_output
                # For an op with max_output == inf
                # and no Output defined in schema
                # user should pass output_size explicitly
                if schema.inf == max_output:
                    raise ValueError(
                        "For operators with max_output == inf,\
                        user should pass num_output explicity."
                    )
                output_names = get_name_list(
                    output_prefix, max_output, max_output
                )

            op = core.CreateOperator(
                op_type, input_names, output_names, **args
            )
            device_option = args.get('device_option', core.DeviceOption(caffe2_pb2.CPU))
            with core.DeviceScope(device_option):
                for i, input_blob in enumerate(inputs):
                    ws.FeedBlob(input_names[i], input_blob)
                # RunOperator
                ws.RunOperatorOnce(op)
                output_values = [ws.FetchBlob(x) for x in output_names]
                return namedtupledict('output', output_names)(*output_values)
예제 #3
0
파일: test_trt.py 프로젝트: zjf8888/pytorch
    def test_resnet50_core(self):
        N = 2
        warmup = 20
        repeat = 100
        print("Batch size: {}, repeat inference {} times, warmup {} times".
              format(N, repeat, warmup))
        init_net, pred_net, _ = self._get_c2_model('resnet50')
        self._add_head_tail(pred_net, 'real_data', 'real_softmax')
        input_blob_dims = (N, 3, 224, 224)
        input_name = "real_data"

        device_option = core.DeviceOption(caffe2_pb2.CUDA, 0)
        init_net.device_option.CopyFrom(device_option)
        pred_net.device_option.CopyFrom(device_option)
        for op in pred_net.op:
            op.device_option.CopyFrom(device_option)
            op.engine = 'CUDNN'
        net_outputs = pred_net.external_output
        Y_c2 = None
        data = np.random.randn(*input_blob_dims).astype(np.float32)
        c2_time = 1
        ws = Workspace()
        with core.DeviceScope(device_option):
            ws.FeedBlob(input_name, data)
            ws.RunNetOnce(init_net)
            ws.CreateNet(pred_net)
            for _ in range(warmup):
                ws.RunNet(pred_net.name)
            start = time.time()
            for _ in range(repeat):
                ws.RunNet(pred_net.name)
            end = time.time()
            c2_time = end - start
            output_values = [ws.FetchBlob(name) for name in net_outputs]
            Y_c2 = namedtupledict('Outputs', net_outputs)(*output_values)
        ws.ResetWorkspace()

        # Cut the graph
        init_net_cut, pred_net_cut = transform_caffe2_net(
            init_net, pred_net, {input_name: input_blob_dims})
        del init_net, pred_net
        #print_net(pred_net_cut)

        Y_trt = None
        input_name = pred_net_cut.external_input[0]
        print("C2 runtime: {}s".format(c2_time))
        ws = Workspace()
        with core.DeviceScope(device_option):
            ws.FeedBlob(input_name, data)
            ws.RunNetOnce(init_net_cut)
            ws.CreateNet(pred_net_cut)
            for _ in range(warmup):
                ws.RunNet(pred_net_cut.name)
            start = time.time()
            for _ in range(repeat):
                ws.RunNet(pred_net_cut.name)
            end = time.time()
            trt_time = end - start
            print("TRT runtime: {}s, improvement: {}%".format(
                trt_time, (c2_time - trt_time) / c2_time * 100))
            output_values = [ws.FetchBlob(name) for name in net_outputs]
            Y_trt = namedtupledict('Outputs', net_outputs)(*output_values)
        np.testing.assert_allclose(Y_c2, Y_trt, rtol=1e-3)