예제 #1
0
def c2_native_run_net(init_net, predict_net, inputs):
    ws = Workspace()
    if init_net:
        ws.RunNetOnce(init_net)

    if isinstance(inputs, dict):
        for key, value in inputs.items():
            ws.FeedBlob(key, value, predict_net.device_option)
    else:
        uninitialized = [
            input_name for input_name in predict_net.external_input
            if not ws.HasBlob(input_name)
        ]
        if len(uninitialized) == len(inputs):
            for key, value in zip(uninitialized, inputs):
                ws.FeedBlob(key, value, predict_net.device_option)
        else:
            # If everything is initialized,
            # we just initialized the first len(inputs) external_input.
            assert (len(inputs) <= len(predict_net.external_input))
            for i in range(len(inputs)):
                ws.FeedBlob(predict_net.external_input[i], inputs[i],
                            predict_net.device_option)

    ws.RunNetOnce(predict_net)

    output_names = predict_net.external_output
    output_values = [ws.FetchBlob(name) for name in output_names]
    return ws, namedtupledict('Outputs', output_names)(*output_values)
예제 #2
0
def benchmark_caffe2_model(init_net, predict_net, warmup_iters=3, main_iters=10, layer_details=True):
    '''
        Run the benchmark net on the target model.
        Return the execution time per iteration (millisecond).
    '''
    ws = Workspace()
    if init_net:
        ws.RunNetOnce(init_net)
    ws.CreateNet(predict_net)
    results = ws.BenchmarkNet(predict_net.name, warmup_iters, main_iters, layer_details)
    del ws
    return results[0]
예제 #3
0
def c2_native_run_net(init_net, predict_net, inputs, debug_arg=None):
    ws = Workspace()
    if init_net:
        ws.RunNetOnce(init_net)

    if isinstance(inputs, dict):
        for key, value in inputs.items():
            ws.FeedBlob(key, value, predict_net.device_option)
    else:
        uninitialized = [
            input_name for input_name in predict_net.external_input
            if not ws.HasBlob(input_name)
        ]
        if len(uninitialized) == len(inputs):
            for key, value in zip(uninitialized, inputs):
                ws.FeedBlob(key, value, predict_net.device_option)
        else:
            # If everything is initialized,
            # we just initialized the first len(inputs) external_input.
            # Added some extra logging to help debug sporadic sandcastle fails
            if len(inputs) > len(predict_net.external_input):
                print("c2_native_run_net assert. len(inputs)=", len(inputs),
                      "len(predict_net.external_input)=",
                      len(predict_net.external_input))
                print("debug_arg: ", debug_arg)
                print("predict_net ", type(predict_net), ":", predict_net)
                print("inputs ", type(inputs), ":", inputs)
            assert (len(inputs) <= len(predict_net.external_input))
            for i in range(len(inputs)):
                ws.FeedBlob(predict_net.external_input[i], inputs[i],
                            predict_net.device_option)

    ws.RunNetOnce(predict_net)

    output_names = predict_net.external_output
    output_values = [ws.FetchBlob(name) for name in output_names]
    return ws, namedtupledict('Outputs', output_names)(*output_values)
예제 #4
0
파일: test_trt.py 프로젝트: zjf8888/pytorch
    def test_resnet50_core(self):
        N = 2
        warmup = 20
        repeat = 100
        print("Batch size: {}, repeat inference {} times, warmup {} times".
              format(N, repeat, warmup))
        init_net, pred_net, _ = self._get_c2_model('resnet50')
        self._add_head_tail(pred_net, 'real_data', 'real_softmax')
        input_blob_dims = (N, 3, 224, 224)
        input_name = "real_data"

        device_option = core.DeviceOption(caffe2_pb2.CUDA, 0)
        init_net.device_option.CopyFrom(device_option)
        pred_net.device_option.CopyFrom(device_option)
        for op in pred_net.op:
            op.device_option.CopyFrom(device_option)
            op.engine = 'CUDNN'
        net_outputs = pred_net.external_output
        Y_c2 = None
        data = np.random.randn(*input_blob_dims).astype(np.float32)
        c2_time = 1
        ws = Workspace()
        with core.DeviceScope(device_option):
            ws.FeedBlob(input_name, data)
            ws.RunNetOnce(init_net)
            ws.CreateNet(pred_net)
            for _ in range(warmup):
                ws.RunNet(pred_net.name)
            start = time.time()
            for _ in range(repeat):
                ws.RunNet(pred_net.name)
            end = time.time()
            c2_time = end - start
            output_values = [ws.FetchBlob(name) for name in net_outputs]
            Y_c2 = namedtupledict('Outputs', net_outputs)(*output_values)
        ws.ResetWorkspace()

        # Cut the graph
        init_net_cut, pred_net_cut = transform_caffe2_net(
            init_net, pred_net, {input_name: input_blob_dims})
        del init_net, pred_net
        #print_net(pred_net_cut)

        Y_trt = None
        input_name = pred_net_cut.external_input[0]
        print("C2 runtime: {}s".format(c2_time))
        ws = Workspace()
        with core.DeviceScope(device_option):
            ws.FeedBlob(input_name, data)
            ws.RunNetOnce(init_net_cut)
            ws.CreateNet(pred_net_cut)
            for _ in range(warmup):
                ws.RunNet(pred_net_cut.name)
            start = time.time()
            for _ in range(repeat):
                ws.RunNet(pred_net_cut.name)
            end = time.time()
            trt_time = end - start
            print("TRT runtime: {}s, improvement: {}%".format(
                trt_time, (c2_time - trt_time) / c2_time * 100))
            output_values = [ws.FetchBlob(name) for name in net_outputs]
            Y_trt = namedtupledict('Outputs', net_outputs)(*output_values)
        np.testing.assert_allclose(Y_c2, Y_trt, rtol=1e-3)