예제 #1
0
def benchmark(args):
    print('Batch size: {}'.format(args.batch_size))
    mf = ModelDownloader()
    init_net, pred_net, value_info = mf.get_c2_model(args.model)
    input_shapes = {
        k: [args.batch_size] + v[-1][1:]
        for (k, v) in value_info.items()
    }
    print("input info: {}".format(input_shapes))
    external_inputs = {}
    for k, v in input_shapes.items():
        external_inputs[k] = np.random.randn(*v).astype(np.float32)

    if args.device == 'CPU':
        device_option = core.DeviceOption(caffe2_pb2.CPU)
    elif args.device == 'MKL':
        device_option = core.DeviceOption(caffe2_pb2.MKLDNN)
    elif args.device == 'IDEEP':
        device_option = core.DeviceOption(caffe2_pb2.IDEEP)
    else:
        raise Exception("Unknown device: {}".format(args.device))
    print("Device option: {}, {}".format(args.device, device_option))
    pred_net.device_option.CopyFrom(device_option)
    for op in pred_net.op:
        op.device_option.CopyFrom(device_option)

    # Hack to initialized weights into MKL/IDEEP context
    workspace.RunNetOnce(init_net)
    bb = workspace.Blobs()
    weights = {}
    for b in bb:
        weights[b] = workspace.FetchBlob(b)
    for k, v in external_inputs.items():
        weights[k] = v
    workspace.ResetWorkspace()

    with core.DeviceScope(device_option):
        for name, blob in weights.items():
            #print("{}".format(name))
            workspace.FeedBlob(name, blob, device_option)
        workspace.CreateNet(pred_net)
        start = time.time()
        res = workspace.BenchmarkNet(pred_net.name, args.warmup_iterations,
                                     args.iterations,
                                     args.layer_wise_benchmark)
        print("FPS: {:.2f}".format(1 / res[0] * 1000 * args.batch_size))
예제 #2
0
def benchmark(args):
    print('Batch size: {}'.format(args.batch_size))
    mf = ModelDownloader()
    init_net, pred_net, value_info = mf.get_c2_model(args.model)
    input_shapes = {k : [args.batch_size] + v[-1][1:] for (k, v) in value_info.items()}
    print("input info: {}".format(input_shapes))
    external_inputs = {}
    for k, v in input_shapes.items():
        external_inputs[k] = np.random.randn(*v).astype(np.float32)

    if args.device == 'CPU':
        device_option = core.DeviceOption(caffe2_pb2.CPU)
    elif args.device == 'MKL':
        device_option = core.DeviceOption(caffe2_pb2.MKLDNN)
    elif args.device == 'IDEEP':
        device_option = core.DeviceOption(caffe2_pb2.IDEEP)
    else:
        raise Exception("Unknown device: {}".format(args.device))
    print("Device option: {}, {}".format(args.device, device_option))
    pred_net.device_option.CopyFrom(device_option)
    for op in pred_net.op:
        op.device_option.CopyFrom(device_option)

    # Hack to initialized weights into MKL/IDEEP context
    workspace.RunNetOnce(init_net)
    bb = workspace.Blobs()
    weights = {}
    for b in bb:
        weights[b] = workspace.FetchBlob(b)
    for k, v in external_inputs.items():
        weights[k] = v
    workspace.ResetWorkspace()

    with core.DeviceScope(device_option):
        for name, blob in weights.items():
            #print("{}".format(name))
            workspace.FeedBlob(name, blob, device_option)
        workspace.CreateNet(pred_net)
        start = time.time()
        res = workspace.BenchmarkNet(pred_net.name,
                                     args.warmup_iterations,
                                     args.iterations,
                                     args.layer_wise_benchmark)
        print("FPS: {:.2f}".format(1/res[0]*1000*args.batch_size))
예제 #3
0
    # for cpu
    conda install pytorch-nightly-cpu -c pytorch
    # for gpu with CUDA 8
    conda install pytorch-nightly cuda80 -c pytorch

or please refer to official site
https://caffe2.ai/docs/getting-started.html
"""

######################################################################
# Load pretrained Caffe2 model
# ----------------------------
# We load a pretrained resnet50 classification model provided by Caffe2.
from caffe2.python.models.download import ModelDownloader

mf = ModelDownloader()


class Model:
    def __init__(self, model_name):
        self.init_net, self.predict_net, self.value_info = mf.get_c2_model(
            model_name)


resnet50 = Model("resnet50")

######################################################################
# Load a test image
# ------------------
# A single cat dominates the examples!
from tvm.contrib.download import download_testdata
예제 #4
0
 def setUp(self):
     self.model_downloader = ModelDownloader()
예제 #5
0
class TensorRTTransformTest(TestCase):
    def setUp(self):
        self.model_downloader = ModelDownloader()

    def _add_head_tail(self, pred_net, new_head, new_tail):
        orig_head = pred_net.external_input[0]
        orig_tail = pred_net.external_output[0]

        # Add head
        head = caffe2_pb2.OperatorDef()
        head.type = "Copy"
        head.input.append(new_head)
        head.output.append(orig_head)
        dummy = caffe2_pb2.NetDef()
        dummy.op.extend(pred_net.op)
        del pred_net.op[:]
        pred_net.op.extend([head])
        pred_net.op.extend(dummy.op)
        pred_net.external_input[0] = new_head

        # Add tail
        tail = caffe2_pb2.OperatorDef()
        tail.type = "Copy"
        tail.input.append(orig_tail)
        tail.output.append(new_tail)
        pred_net.op.extend([tail])
        pred_net.external_output[0] = new_tail

    @unittest.skipIf(not workspace.C.use_trt, "No TensortRT support")
    def test_resnet50_core(self):
        N = 2
        warmup = 20
        repeat = 100
        print("Batch size: {}, repeat inference {} times, warmup {} times".format(N, repeat, warmup))
        init_net, pred_net, _ = self.model_downloader.get_c2_model('resnet50')
        self._add_head_tail(pred_net, 'real_data', 'real_softmax')
        input_blob_dims = (N, 3, 224, 224)
        input_name = "real_data"

        device_option = core.DeviceOption(caffe2_pb2.CUDA, 0)
        init_net.device_option.CopyFrom(device_option)
        pred_net.device_option.CopyFrom(device_option)
        for op in pred_net.op:
            op.device_option.CopyFrom(device_option)
            op.engine = 'CUDNN'
        net_outputs = pred_net.external_output
        Y_c2 = None
        data =  np.random.randn(*input_blob_dims).astype(np.float32)
        c2_time = 1
        workspace.SwitchWorkspace("gpu_test", True)
        with core.DeviceScope(device_option):
            workspace.FeedBlob(input_name, data)
            workspace.RunNetOnce(init_net)
            workspace.CreateNet(pred_net)
            for _ in range(warmup):
                workspace.RunNet(pred_net.name)
            start = time.time()
            for _ in range(repeat):
                workspace.RunNet(pred_net.name)
            end = time.time()
            c2_time = end - start
            output_values = [workspace.FetchBlob(name) for name in net_outputs]
            Y_c2 = namedtupledict('Outputs', net_outputs)(*output_values)
        workspace.ResetWorkspace()

        # Fill the workspace with the weights
        with core.DeviceScope(device_option):
            workspace.RunNetOnce(init_net)

        # Cut the graph
        start = time.time()
        pred_net_cut = transform_caffe2_net(pred_net,
                                            {input_name: input_blob_dims},
                                            build_serializable_op=False)
        del init_net, pred_net
        pred_net_cut.device_option.CopyFrom(device_option)
        for op in pred_net_cut.op:
            op.device_option.CopyFrom(device_option)
        #_print_net(pred_net_cut)

        Y_trt = None
        input_name = pred_net_cut.external_input[0]
        print("C2 runtime: {}s".format(c2_time))
        with core.DeviceScope(device_option):
            workspace.FeedBlob(input_name, data)
            workspace.CreateNet(pred_net_cut)
            end = time.time()
            print("Conversion time: {:.2f}s".format(end -start))

            for _ in range(warmup):
                workspace.RunNet(pred_net_cut.name)
            start = time.time()
            for _ in range(repeat):
                workspace.RunNet(pred_net_cut.name)
            end = time.time()
            trt_time = end - start
            print("TRT runtime: {}s, improvement: {}%".format(trt_time, (c2_time-trt_time)/c2_time*100))
            output_values = [workspace.FetchBlob(name) for name in net_outputs]
            Y_trt = namedtupledict('Outputs', net_outputs)(*output_values)
        np.testing.assert_allclose(Y_c2, Y_trt, rtol=1e-3)
예제 #6
0
 def setUp(self):
     self.model_downloader = ModelDownloader('ONNX_MODELS')
예제 #7
0
class TestCaffe2End2End(TestCase):
    def setUp(self):
        self.model_downloader = ModelDownloader('ONNX_MODELS')

    def _test_net(self, net_name, input_blob_dims=(1, 3, 224, 224), decimal=7):
        np.random.seed(seed=0)
        try:
            c2_init_net, c2_predict_net, value_info, debug_str = self.model_downloader.get_c2_model_dbg(
                net_name)
        except (OSError, IOError) as e:
            # catch IOError/OSError that is caused by FileNotFoundError and PermissionError
            # This is helpful because sometimes we get errors due to gfs not available
            print("\n_test_net exception: ", e)
            self.skipTest(str(e))

        # start to run the model and compare outputs
        n, c, h, w = input_blob_dims
        data = np.random.randn(n, c, h, w).astype(np.float32)
        inputs = [data]
        _, c2_outputs = c2_native_run_net(c2_init_net, c2_predict_net, inputs,
                                          debug_str)
        del _

        model = c2_onnx.caffe2_net_to_onnx_model(
            predict_net=c2_predict_net,
            init_net=c2_init_net,
            value_info=value_info,
        )
        c2_ir = c2.prepare(model)
        onnx_outputs = c2_ir.run(inputs)
        self.assertSameOutputs(c2_outputs, onnx_outputs, decimal=decimal)

    def test_alexnet(self):
        self._test_net('bvlc_alexnet', decimal=4)

    def test_resnet50(self):
        self._test_net('resnet50')

    @unittest.skipIf(os.environ.get('JENKINS_URL'),
                     'Taking too long to download!')
    def test_vgg16(self):
        self._test_net('vgg16')

    @unittest.skipIf(os.environ.get('JENKINS_URL'),
                     'Taking too long to download!')
    def test_zfnet(self):
        self._test_net('zfnet')

    def test_inception_v1(self):
        self._test_net('inception_v1', decimal=2)

    def test_inception_v2(self):
        self._test_net('inception_v2')

    def test_squeezenet(self):
        self._test_net('squeezenet')

    def test_densenet121(self):
        self._test_net('densenet121')

    def test_bvlc_googlenet(self):
        self._test_net('bvlc_googlenet')

    def test_bvlc_reference_caffenet(self):
        self._test_net('bvlc_reference_caffenet')

    def test_bvlc_reference_rcnn_ilsvrc13(self):
        self._test_net('bvlc_reference_rcnn_ilsvrc13')