def c2_native_run_net(init_net, predict_net, inputs): ws = Workspace() if init_net: ws.RunNetOnce(init_net) if isinstance(inputs, dict): for key, value in inputs.items(): ws.FeedBlob(key, value, predict_net.device_option) else: uninitialized = [ input_name for input_name in predict_net.external_input if not ws.HasBlob(input_name) ] if len(uninitialized) == len(inputs): for key, value in zip(uninitialized, inputs): ws.FeedBlob(key, value, predict_net.device_option) else: # If everything is initialized, # we just initialized the first len(inputs) external_input. assert (len(inputs) <= len(predict_net.external_input)) for i in range(len(inputs)): ws.FeedBlob(predict_net.external_input[i], inputs[i], predict_net.device_option) ws.RunNetOnce(predict_net) output_names = predict_net.external_output output_values = [ws.FetchBlob(name) for name in output_names] return ws, namedtupledict('Outputs', output_names)(*output_values)
def benchmark_caffe2_model(init_net, predict_net, warmup_iters=3, main_iters=10, layer_details=True): ''' Run the benchmark net on the target model. Return the execution time per iteration (millisecond). ''' ws = Workspace() if init_net: ws.RunNetOnce(init_net) ws.CreateNet(predict_net) results = ws.BenchmarkNet(predict_net.name, warmup_iters, main_iters, layer_details) del ws return results[0]
def c2_native_run_net(init_net, predict_net, inputs, debug_arg=None): ws = Workspace() if init_net: ws.RunNetOnce(init_net) if isinstance(inputs, dict): for key, value in inputs.items(): ws.FeedBlob(key, value, predict_net.device_option) else: uninitialized = [ input_name for input_name in predict_net.external_input if not ws.HasBlob(input_name) ] if len(uninitialized) == len(inputs): for key, value in zip(uninitialized, inputs): ws.FeedBlob(key, value, predict_net.device_option) else: # If everything is initialized, # we just initialized the first len(inputs) external_input. # Added some extra logging to help debug sporadic sandcastle fails if len(inputs) > len(predict_net.external_input): print("c2_native_run_net assert. len(inputs)=", len(inputs), "len(predict_net.external_input)=", len(predict_net.external_input)) print("debug_arg: ", debug_arg) print("predict_net ", type(predict_net), ":", predict_net) print("inputs ", type(inputs), ":", inputs) assert (len(inputs) <= len(predict_net.external_input)) for i in range(len(inputs)): ws.FeedBlob(predict_net.external_input[i], inputs[i], predict_net.device_option) ws.RunNetOnce(predict_net) output_names = predict_net.external_output output_values = [ws.FetchBlob(name) for name in output_names] return ws, namedtupledict('Outputs', output_names)(*output_values)
def test_resnet50_core(self): N = 2 warmup = 20 repeat = 100 print("Batch size: {}, repeat inference {} times, warmup {} times". format(N, repeat, warmup)) init_net, pred_net, _ = self._get_c2_model('resnet50') self._add_head_tail(pred_net, 'real_data', 'real_softmax') input_blob_dims = (N, 3, 224, 224) input_name = "real_data" device_option = core.DeviceOption(caffe2_pb2.CUDA, 0) init_net.device_option.CopyFrom(device_option) pred_net.device_option.CopyFrom(device_option) for op in pred_net.op: op.device_option.CopyFrom(device_option) op.engine = 'CUDNN' net_outputs = pred_net.external_output Y_c2 = None data = np.random.randn(*input_blob_dims).astype(np.float32) c2_time = 1 ws = Workspace() with core.DeviceScope(device_option): ws.FeedBlob(input_name, data) ws.RunNetOnce(init_net) ws.CreateNet(pred_net) for _ in range(warmup): ws.RunNet(pred_net.name) start = time.time() for _ in range(repeat): ws.RunNet(pred_net.name) end = time.time() c2_time = end - start output_values = [ws.FetchBlob(name) for name in net_outputs] Y_c2 = namedtupledict('Outputs', net_outputs)(*output_values) ws.ResetWorkspace() # Cut the graph init_net_cut, pred_net_cut = transform_caffe2_net( init_net, pred_net, {input_name: input_blob_dims}) del init_net, pred_net #print_net(pred_net_cut) Y_trt = None input_name = pred_net_cut.external_input[0] print("C2 runtime: {}s".format(c2_time)) ws = Workspace() with core.DeviceScope(device_option): ws.FeedBlob(input_name, data) ws.RunNetOnce(init_net_cut) ws.CreateNet(pred_net_cut) for _ in range(warmup): ws.RunNet(pred_net_cut.name) start = time.time() for _ in range(repeat): ws.RunNet(pred_net_cut.name) end = time.time() trt_time = end - start print("TRT runtime: {}s, improvement: {}%".format( trt_time, (c2_time - trt_time) / c2_time * 100)) output_values = [ws.FetchBlob(name) for name in net_outputs] Y_trt = namedtupledict('Outputs', net_outputs)(*output_values) np.testing.assert_allclose(Y_c2, Y_trt, rtol=1e-3)