def test_nvprof(self): if not fluid.core.is_compile_gpu(): return epoc = 8 dshape = [4, 3, 28, 28] data = layers.data(name='data', shape=[3, 28, 28], dtype='float32') conv = layers.conv2d(data, 20, 3, stride=[1, 1], padding=[1, 1]) place = fluid.GPUPlace(0) exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) with profiler.cuda_profiler("cuda_profiler.txt", 'csv') as nvprof: for i in range(epoc): input = np.random.random(dshape).astype('float32') exe.run(fluid.default_main_program(), feed={'data': input})
map(lambda x: x[0].reshape([1, 28, 28]), data)).astype(DTYPE) y_data = np.array(map(lambda x: x[1], data)).astype("int64") y_data = y_data.reshape([len(y_data), 1]) start = time.clock() outs = exe.run(fluid.default_main_program(), feed={"pixel": img_data, "label": y_data}, fetch_list=[avg_cost] + accuracy.metrics) end = time.clock() loss = np.array(outs[0]) acc = np.array(outs[1]) print("pass=%d, batch=%d, loss=%f, error=%f, elapse=%f" % (pass_id, batch_id, loss, 1 - acc, (end - start) / 1000)) pass_end = time.clock() test_avg_acc = eval_test(exe, accuracy, avg_cost) pass_acc = accuracy.eval(exe) print("pass=%d, test_avg_acc=%f, test_avg_acc=%f, elapse=%f" % (pass_id, pass_acc, test_avg_acc, (pass_end - pass_start) / 1000)) if __name__ == '__main__': args = parse_args() print_arguments(args) if args.use_nvprof and args.device == 'GPU': with profiler.cuda_profiler("cuda_profiler.txt", 'csv') as nvprof: run_benchmark(cnn_model, args) else: run_benchmark(cnn_model, args)