def inference_2d(base_model_name, base_npz_path, rgb_files, dep_files): height, width, channel = (368, 432, 4) base_model_func = get_base_model(base_model_name) e_2d = measure( lambda: TfPoseEstimator(base_npz_path, base_model_func, (height, width, channel)), 'create TfPoseEstimator') time0 = time.time() for idx, (rgb_name, dep_name) in enumerate(zip(rgb_files, dep_files)): input_2d, init_h, init_w = measure( lambda: read_2dfiles(rgb_name, dep_name, height, width), 'read_2dfiles') humans, heatMap, pafMap = measure(lambda: e_2d.inference(input_2d), 'e_2d.inference') print('got %d humans from %s' % (len(humans), rgb_name[:-4])) # plot_humans(input_2d, heatMap, pafMap, humans, '%02d' % (idx + 1)) if len(humans): coords2d, _, coords2d_vis = tranform_keypoints2d( humans[0].body_parts, init_w, init_h, 0.1) plot_human2d(rgb_name, dep_name, coords2d, idx, coords2d_vis) do_plot() mean = (time.time() - time0) / len(rgb_files) print('inference all took: %f, mean: %f, FPS: %f' % (time.time() - time0, mean, 1.0 / mean))
def inference(base_model_name, path_to_npz, data_format, input_files, plot): model_func = get_base_model(base_model_name) height, width = (368, 432) e = measure( lambda: TfPoseEstimator(path_to_npz, model_func, target_size=(width, height), data_format=data_format), 'create TfPoseEstimator') t0 = time.time() for idx, img_name in enumerate(input_files): image = measure( lambda: read_imgfile( img_name, width, height, data_format=data_format), 'read_imgfile') humans, heatMap, pafMap = measure(lambda: e.inference(image), 'e.inference') tl.logging.info('got %d humans from %s' % (len(humans), img_name)) if humans: for h in humans: tl.logging.debug(h) if plot: if data_format == 'channels_first': image = image.transpose([1, 2, 0]) plot_humans(image, heatMap, pafMap, humans, '%02d' % (idx + 1)) tot = time.time() - t0 mean = tot / len(input_files) tl.logging.info('inference all took: %f, mean: %f, FPS: %f' % (tot, mean, 1.0 / mean))
def inference(path_to_freezed_model, input_files): h, w = 368, 432 e = measure(lambda: TfPoseestimatorLoader(path_to_freezed_model, target_size=(w, h)), 'create TfPoseestimatorLoader') for idx, img_name in enumerate(input_files): image = read_imgfile(img_name, w, h) humans, heatMap, pafMap = measure(lambda: e.inference(image), 'inference') print('got %d humans from %s' % (len(humans), img_name)) if humans: for h in humans: print(h) plot_humans(image, heatMap, pafMap, humans, '%02d' % (idx + 1))
def main(): args = parse_args() height, width, channel = 368, 432, 3 images = [] for name in args.images.split(','): x = read_imgfile( name, width, height, 'channels_first') # channels_first is required for tensorRT images.append(x) model_func = _get_model_func(args.base_model) model_inputs, model_outputs = model_func() input_names = [p.name[:-2] for p in model_inputs] output_names = [p.name[:-2] for p in model_outputs] print('input names: %s' % ','.join(input_names)) print('output names: %s' % ','.join(output_names)) # outputs/conf,outputs/paf # with tf.Session() as sess: sess = tf.InteractiveSession() measure(lambda: tl.files.load_and_assign_npz_dict(args.path_to_npz, sess), 'load npz') frozen_graph = tf.graph_util.convert_variables_to_constants( sess, sess.graph_def, output_names) tf_model = tf.graph_util.remove_training_nodes(frozen_graph) uff_model = measure(lambda: uff.from_tensorflow(tf_model, output_names), 'uff.from_tensorflow') print('uff model created') parser = uffparser.create_uff_parser() inputOrder = 0 # NCHW, https://docs.nvidia.com/deeplearning/sdk/tensorrt-api/c_api/_nv_uff_parser_8h_source.html parser.register_input(input_names[0], (channel, height, width), inputOrder) for name in output_names: parser.register_output(name) G_LOGGER = trt.infer.ConsoleLogger(trt.infer.LogSeverity.INFO) max_batch_size = 1 max_workspace_size = 1 << 30 engine = measure( lambda: trt.utils.uff_to_trt_engine( G_LOGGER, uff_model, parser, max_batch_size, max_workspace_size), 'trt.utils.uff_to_trt_engine') print('engine created') f_height, f_width = (height / 8, width / 8 ) # TODO: derive from model_outputs post_process = PostProcessor((height, width), (f_height, f_width), 'channels_first') for idx, x in enumerate(images): conf, paf = measure(lambda: infer(engine, x, 1), 'infer') humans, heat_up, paf_up = measure(lambda: post_process(conf, paf), 'post_process') print('got %d humans' % (len(humans))) plot_humans(x.transpose([1, 2, 0]), heat_up, paf_up, humans, '%02d' % (idx + 1))
def inference_data(base_model_name, base_npz_path, head_model_name, head_npz_path, rgb_files, dep_files, cam_info): height, width, channel = (368, 432, 4) base_model_func = get_base_model(base_model_name) e_2d = measure( lambda: TfPoseEstimator(base_npz_path, base_model_func, (height, width, channel)), 'create TfPoseEstimator') x_size, y_size, z_size = (64, 64, 64) head_model_func = get_head_model(head_model_name) e_3d = measure( lambda: Pose3DEstimator(head_npz_path, head_model_func, (x_size, y_size, z_size), False), 'create Pose3DEstimator') time0 = time.time() coords_uv_list, coords_xyz_list = list(), list() for _, (rgb_name, dep_name) in enumerate(zip(rgb_files, dep_files)): input_2d, init_h, init_w = measure( lambda: read_2dfiles(rgb_name, dep_name, height, width), 'read_2dfiles') humans, _, _ = measure(lambda: e_2d.inference(input_2d), 'e_2d.inference') print('got %d humans from %s' % (len(humans), rgb_name[:-4])) if len(humans) is 0: coords_uv_list.append(None) coords_xyz_list.append(None) else: coords2d, coords2d_conf, coords2d_vis = tranform_keypoints2d( humans[0].body_parts, init_w, init_h, 0.1) input_3d, trafo_params = measure( lambda: read_3dfiles(dep_name, cam_info, coords2d, coords2d_vis, x_size, y_size, z_size), 'read_3dfiles') coords3d, coords3d_conf = measure(lambda: e_3d.inference(input_3d), 'e_3d.inference') coords3d_pred = coords3d * trafo_params['scale'] + trafo_params[ 'root'] coords3d_pred_proj = Camera(cam_info['K'], cam_info['distCoef']).unproject( coords2d, coords3d_pred[:, -1]) cond = coords2d_conf > coords3d_conf # use backproj only when 2d was visible and 2d/3d roughly matches coords3d_pred[cond, :] = coords3d_pred_proj[cond, :] coords3d_conf[cond] = coords2d_conf[cond] coords_uv_list.append(coords2d) coords_xyz_list.append(coords3d_pred * 1000.0) write_gait(coords_uv_list, 'gait2d.txt') write_gait(coords_xyz_list, 'gait3d.txt') mean = (time.time() - time0) / len(rgb_files) print('inference all took: %f, mean: %f, FPS: %f' % (time.time() - time0, mean, 1.0 / mean))
def inference_3d(base_model_name, base_npz_path, head_model_name, head_npz_path, rgb_files, dep_files, cam_list, joint3d_list): height, width, channel = (368, 432, 4) base_model_func = get_base_model(base_model_name) e_2d = measure( lambda: TfPoseEstimator(base_npz_path, base_model_func, (height, width, channel)), 'create TfPoseEstimator') x_size, y_size, z_size = (64, 64, 64) head_model_func = get_head_model(head_model_name) e_3d = measure( lambda: Pose3DEstimator(head_npz_path, head_model_func, (x_size, y_size, z_size), False), 'create Pose3DEstimator') time0 = time.time() for idx, (rgb_name, dep_name, cam_info, joints3d) in enumerate( zip(rgb_files, dep_files, cam_list, joint3d_list)): input_2d, init_h, init_w = measure( lambda: read_2dfiles(rgb_name, dep_name, height, width), 'read_2dfiles') humans, _, _ = measure(lambda: e_2d.inference(input_2d), 'e_2d.inference') print('got %d humans from %s' % (len(humans), rgb_name[:-4])) cam_calib = Camera(cam_info['K'], cam_info['distCoef']) for h, (pred_2d, gt_3d) in enumerate(zip(humans, joints3d)): coords2d, coords2d_conf, coords2d_vis = tranform_keypoints2d( pred_2d.body_parts, init_w, init_h) input_3d, trafo_params = measure( lambda: read_3dfiles(dep_name, cam_info, coords2d, coords2d_vis, x_size, y_size, z_size), 'read_3dfiles') coords3d, coords3d_conf = measure(lambda: e_3d.inference(input_3d), 'e_3d.inference') coords3d_pred = coords3d * trafo_params['scale'] + trafo_params[ 'root'] coords3d_pred_proj = cam_calib.unproject(coords2d, coords3d_pred[:, -1]) cond = coords2d_conf > coords3d_conf # use backproj only when 2d was visible and 2d/3d roughly matches coords3d_pred[cond, :] = coords3d_pred_proj[cond, :] coords3d_conf[cond] = coords2d_conf[cond] plot_human3d(rgb_name, dep_name, coords3d_pred, cam_calib, 'pre%d' % h, coords2d_vis) plot_human3d(rgb_name, dep_name, gt_3d[:18], Camera(cam_info['K'], cam_info['distCoef']), 'gt%d' % h) do_plot() mean = (time.time() - time0) / len(rgb_files) print('inference all took: %f, mean: %f, FPS: %f' % (time.time() - time0, mean, 1.0 / mean))
def export_model(model_func, checkpoint_dir, path_to_npz, graph_filename, uff_filename): mkdir_p(checkpoint_dir) model_parameters = model_func() names = [p.name[:-2] for p in model_parameters] print('name: %s' % ','.join(names)) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) measure(lambda: tl.files.load_and_assign_npz_dict(path_to_npz, sess), 'load npz') if graph_filename: measure(lambda: save_graph(sess, checkpoint_dir, graph_filename), 'save_graph') measure(lambda: save_model(sess, checkpoint_dir), 'save_model') if uff_filename: measure(lambda: save_uff(sess, names, uff_filename), 'save_uff') print('exported model_parameters:') for p in model_parameters: print('%s :: %s' % (p.name, p.shape))
help='channels_last | channels_first.') parser.add_argument('--plot', type=bool, default=False, help='draw the results') parser.add_argument('--repeat', type=int, default=1, help='repeat the images for n times for profiling.') parser.add_argument('--limit', type=int, default=100, help='max number of images.') return parser.parse_args() def main(): args = parse_args() image_files = ([f for f in args.images.split(',') if f] * args.repeat)[:args.limit] inference(args.base_model, args.path_to_npz, args.data_format, image_files, args.plot) if __name__ == '__main__': measure(main) # Testing # python examples/example-inference-1.py --path-to-npz=models/hao28-pose600000.npz --base-model=hao28_experimental --images=data/mscoco2017/val2017/000000000785.jpg --plot=True --limit=1 # python examples/example-inference-1.py --path-to-npz=models/pose100000.npz --base-model=mobilenet --images=data/mscoco2017/val2017/000000000785.jpg --plot=True --limit=1