コード例 #1
0
def benchmark_caffe2_model(init_net, predict_net, warmup_iters=3, main_iters=10, layer_details=True):
    '''
        Run the benchmark net on the target model.
        Return the execution time per iteration (millisecond).
    '''
    ws = Workspace()
    if init_net:
        ws.RunNetOnce(init_net)
    ws.CreateNet(predict_net)
    results = ws.BenchmarkNet(predict_net.name, warmup_iters, main_iters, layer_details)
    del ws
    return results[0]
コード例 #2
0
ファイル: test_trt.py プロジェクト: zjf8888/pytorch
    def test_resnet50_core(self):
        N = 2
        warmup = 20
        repeat = 100
        print("Batch size: {}, repeat inference {} times, warmup {} times".
              format(N, repeat, warmup))
        init_net, pred_net, _ = self._get_c2_model('resnet50')
        self._add_head_tail(pred_net, 'real_data', 'real_softmax')
        input_blob_dims = (N, 3, 224, 224)
        input_name = "real_data"

        device_option = core.DeviceOption(caffe2_pb2.CUDA, 0)
        init_net.device_option.CopyFrom(device_option)
        pred_net.device_option.CopyFrom(device_option)
        for op in pred_net.op:
            op.device_option.CopyFrom(device_option)
            op.engine = 'CUDNN'
        net_outputs = pred_net.external_output
        Y_c2 = None
        data = np.random.randn(*input_blob_dims).astype(np.float32)
        c2_time = 1
        ws = Workspace()
        with core.DeviceScope(device_option):
            ws.FeedBlob(input_name, data)
            ws.RunNetOnce(init_net)
            ws.CreateNet(pred_net)
            for _ in range(warmup):
                ws.RunNet(pred_net.name)
            start = time.time()
            for _ in range(repeat):
                ws.RunNet(pred_net.name)
            end = time.time()
            c2_time = end - start
            output_values = [ws.FetchBlob(name) for name in net_outputs]
            Y_c2 = namedtupledict('Outputs', net_outputs)(*output_values)
        ws.ResetWorkspace()

        # Cut the graph
        init_net_cut, pred_net_cut = transform_caffe2_net(
            init_net, pred_net, {input_name: input_blob_dims})
        del init_net, pred_net
        #print_net(pred_net_cut)

        Y_trt = None
        input_name = pred_net_cut.external_input[0]
        print("C2 runtime: {}s".format(c2_time))
        ws = Workspace()
        with core.DeviceScope(device_option):
            ws.FeedBlob(input_name, data)
            ws.RunNetOnce(init_net_cut)
            ws.CreateNet(pred_net_cut)
            for _ in range(warmup):
                ws.RunNet(pred_net_cut.name)
            start = time.time()
            for _ in range(repeat):
                ws.RunNet(pred_net_cut.name)
            end = time.time()
            trt_time = end - start
            print("TRT runtime: {}s, improvement: {}%".format(
                trt_time, (c2_time - trt_time) / c2_time * 100))
            output_values = [ws.FetchBlob(name) for name in net_outputs]
            Y_trt = namedtupledict('Outputs', net_outputs)(*output_values)
        np.testing.assert_allclose(Y_c2, Y_trt, rtol=1e-3)