def benchmark(args): print('Batch size: {}'.format(args.batch_size)) mf = ModelDownloader() init_net, pred_net, value_info = mf.get_c2_model(args.model) input_shapes = { k: [args.batch_size] + v[-1][1:] for (k, v) in value_info.items() } print("input info: {}".format(input_shapes)) external_inputs = {} for k, v in input_shapes.items(): external_inputs[k] = np.random.randn(*v).astype(np.float32) if args.device == 'CPU': device_option = core.DeviceOption(caffe2_pb2.CPU) elif args.device == 'MKL': device_option = core.DeviceOption(caffe2_pb2.MKLDNN) elif args.device == 'IDEEP': device_option = core.DeviceOption(caffe2_pb2.IDEEP) else: raise Exception("Unknown device: {}".format(args.device)) print("Device option: {}, {}".format(args.device, device_option)) pred_net.device_option.CopyFrom(device_option) for op in pred_net.op: op.device_option.CopyFrom(device_option) # Hack to initialized weights into MKL/IDEEP context workspace.RunNetOnce(init_net) bb = workspace.Blobs() weights = {} for b in bb: weights[b] = workspace.FetchBlob(b) for k, v in external_inputs.items(): weights[k] = v workspace.ResetWorkspace() with core.DeviceScope(device_option): for name, blob in weights.items(): #print("{}".format(name)) workspace.FeedBlob(name, blob, device_option) workspace.CreateNet(pred_net) start = time.time() res = workspace.BenchmarkNet(pred_net.name, args.warmup_iterations, args.iterations, args.layer_wise_benchmark) print("FPS: {:.2f}".format(1 / res[0] * 1000 * args.batch_size))
def benchmark(args): print('Batch size: {}'.format(args.batch_size)) mf = ModelDownloader() init_net, pred_net, value_info = mf.get_c2_model(args.model) input_shapes = {k : [args.batch_size] + v[-1][1:] for (k, v) in value_info.items()} print("input info: {}".format(input_shapes)) external_inputs = {} for k, v in input_shapes.items(): external_inputs[k] = np.random.randn(*v).astype(np.float32) if args.device == 'CPU': device_option = core.DeviceOption(caffe2_pb2.CPU) elif args.device == 'MKL': device_option = core.DeviceOption(caffe2_pb2.MKLDNN) elif args.device == 'IDEEP': device_option = core.DeviceOption(caffe2_pb2.IDEEP) else: raise Exception("Unknown device: {}".format(args.device)) print("Device option: {}, {}".format(args.device, device_option)) pred_net.device_option.CopyFrom(device_option) for op in pred_net.op: op.device_option.CopyFrom(device_option) # Hack to initialized weights into MKL/IDEEP context workspace.RunNetOnce(init_net) bb = workspace.Blobs() weights = {} for b in bb: weights[b] = workspace.FetchBlob(b) for k, v in external_inputs.items(): weights[k] = v workspace.ResetWorkspace() with core.DeviceScope(device_option): for name, blob in weights.items(): #print("{}".format(name)) workspace.FeedBlob(name, blob, device_option) workspace.CreateNet(pred_net) start = time.time() res = workspace.BenchmarkNet(pred_net.name, args.warmup_iterations, args.iterations, args.layer_wise_benchmark) print("FPS: {:.2f}".format(1/res[0]*1000*args.batch_size))
# for cpu conda install pytorch-nightly-cpu -c pytorch # for gpu with CUDA 8 conda install pytorch-nightly cuda80 -c pytorch or please refer to official site https://caffe2.ai/docs/getting-started.html """ ###################################################################### # Load pretrained Caffe2 model # ---------------------------- # We load a pretrained resnet50 classification model provided by Caffe2. from caffe2.python.models.download import ModelDownloader mf = ModelDownloader() class Model: def __init__(self, model_name): self.init_net, self.predict_net, self.value_info = mf.get_c2_model( model_name) resnet50 = Model("resnet50") ###################################################################### # Load a test image # ------------------ # A single cat dominates the examples! from tvm.contrib.download import download_testdata
def setUp(self): self.model_downloader = ModelDownloader()
class TensorRTTransformTest(TestCase): def setUp(self): self.model_downloader = ModelDownloader() def _add_head_tail(self, pred_net, new_head, new_tail): orig_head = pred_net.external_input[0] orig_tail = pred_net.external_output[0] # Add head head = caffe2_pb2.OperatorDef() head.type = "Copy" head.input.append(new_head) head.output.append(orig_head) dummy = caffe2_pb2.NetDef() dummy.op.extend(pred_net.op) del pred_net.op[:] pred_net.op.extend([head]) pred_net.op.extend(dummy.op) pred_net.external_input[0] = new_head # Add tail tail = caffe2_pb2.OperatorDef() tail.type = "Copy" tail.input.append(orig_tail) tail.output.append(new_tail) pred_net.op.extend([tail]) pred_net.external_output[0] = new_tail @unittest.skipIf(not workspace.C.use_trt, "No TensortRT support") def test_resnet50_core(self): N = 2 warmup = 20 repeat = 100 print("Batch size: {}, repeat inference {} times, warmup {} times".format(N, repeat, warmup)) init_net, pred_net, _ = self.model_downloader.get_c2_model('resnet50') self._add_head_tail(pred_net, 'real_data', 'real_softmax') input_blob_dims = (N, 3, 224, 224) input_name = "real_data" device_option = core.DeviceOption(caffe2_pb2.CUDA, 0) init_net.device_option.CopyFrom(device_option) pred_net.device_option.CopyFrom(device_option) for op in pred_net.op: op.device_option.CopyFrom(device_option) op.engine = 'CUDNN' net_outputs = pred_net.external_output Y_c2 = None data = np.random.randn(*input_blob_dims).astype(np.float32) c2_time = 1 workspace.SwitchWorkspace("gpu_test", True) with core.DeviceScope(device_option): workspace.FeedBlob(input_name, data) workspace.RunNetOnce(init_net) workspace.CreateNet(pred_net) for _ in range(warmup): workspace.RunNet(pred_net.name) start = time.time() for _ in range(repeat): workspace.RunNet(pred_net.name) end = time.time() c2_time = end - start output_values = [workspace.FetchBlob(name) for name in net_outputs] Y_c2 = namedtupledict('Outputs', net_outputs)(*output_values) workspace.ResetWorkspace() # Fill the workspace with the weights with core.DeviceScope(device_option): workspace.RunNetOnce(init_net) # Cut the graph start = time.time() pred_net_cut = transform_caffe2_net(pred_net, {input_name: input_blob_dims}, build_serializable_op=False) del init_net, pred_net pred_net_cut.device_option.CopyFrom(device_option) for op in pred_net_cut.op: op.device_option.CopyFrom(device_option) #_print_net(pred_net_cut) Y_trt = None input_name = pred_net_cut.external_input[0] print("C2 runtime: {}s".format(c2_time)) with core.DeviceScope(device_option): workspace.FeedBlob(input_name, data) workspace.CreateNet(pred_net_cut) end = time.time() print("Conversion time: {:.2f}s".format(end -start)) for _ in range(warmup): workspace.RunNet(pred_net_cut.name) start = time.time() for _ in range(repeat): workspace.RunNet(pred_net_cut.name) end = time.time() trt_time = end - start print("TRT runtime: {}s, improvement: {}%".format(trt_time, (c2_time-trt_time)/c2_time*100)) output_values = [workspace.FetchBlob(name) for name in net_outputs] Y_trt = namedtupledict('Outputs', net_outputs)(*output_values) np.testing.assert_allclose(Y_c2, Y_trt, rtol=1e-3)
def setUp(self): self.model_downloader = ModelDownloader('ONNX_MODELS')
class TestCaffe2End2End(TestCase): def setUp(self): self.model_downloader = ModelDownloader('ONNX_MODELS') def _test_net(self, net_name, input_blob_dims=(1, 3, 224, 224), decimal=7): np.random.seed(seed=0) try: c2_init_net, c2_predict_net, value_info, debug_str = self.model_downloader.get_c2_model_dbg( net_name) except (OSError, IOError) as e: # catch IOError/OSError that is caused by FileNotFoundError and PermissionError # This is helpful because sometimes we get errors due to gfs not available print("\n_test_net exception: ", e) self.skipTest(str(e)) # start to run the model and compare outputs n, c, h, w = input_blob_dims data = np.random.randn(n, c, h, w).astype(np.float32) inputs = [data] _, c2_outputs = c2_native_run_net(c2_init_net, c2_predict_net, inputs, debug_str) del _ model = c2_onnx.caffe2_net_to_onnx_model( predict_net=c2_predict_net, init_net=c2_init_net, value_info=value_info, ) c2_ir = c2.prepare(model) onnx_outputs = c2_ir.run(inputs) self.assertSameOutputs(c2_outputs, onnx_outputs, decimal=decimal) def test_alexnet(self): self._test_net('bvlc_alexnet', decimal=4) def test_resnet50(self): self._test_net('resnet50') @unittest.skipIf(os.environ.get('JENKINS_URL'), 'Taking too long to download!') def test_vgg16(self): self._test_net('vgg16') @unittest.skipIf(os.environ.get('JENKINS_URL'), 'Taking too long to download!') def test_zfnet(self): self._test_net('zfnet') def test_inception_v1(self): self._test_net('inception_v1', decimal=2) def test_inception_v2(self): self._test_net('inception_v2') def test_squeezenet(self): self._test_net('squeezenet') def test_densenet121(self): self._test_net('densenet121') def test_bvlc_googlenet(self): self._test_net('bvlc_googlenet') def test_bvlc_reference_caffenet(self): self._test_net('bvlc_reference_caffenet') def test_bvlc_reference_rcnn_ilsvrc13(self): self._test_net('bvlc_reference_rcnn_ilsvrc13')