def process_video(model, config, im_paths, kps, pred_dir, min_frame=0, max_frame=None): bbox_params_smooth, s, e = get_smooth_bbox_params(kps, vis_thresh=0.1) min_f = max(s, min_frame) if max_frame: max_f = min(e, max_frame) else: max_f = min(e, len(kps)) images = [] images_orig = [] for i in tqdm(range(min_f, max_f)): proc_params = process_image( im_path=im_paths[i], bbox_param=bbox_params_smooth[i], ) images.append(proc_params.pop('image')) images_orig.append(proc_params) preds = model.predict_all_images(images) render_preds( output_path=pred_dir, config=config, preds=preds, images=images, images_orig=images_orig, )
def predict_on_tracks(model, img_dir, poseflow_path, output_path, track_id, trim_length): # Get all the images im_paths = sorted(glob(osp.join(img_dir, '*.png'))) all_kps = get_labels_poseflow(poseflow_path, len(im_paths)) # Here we set which track to use. track_id = min(track_id, len(all_kps) - 1) print('Total number of PoseFlow tracks:', len(all_kps)) print('Processing track_id:', track_id) kps = all_kps[track_id] bbox_params_smooth, s, e = get_smooth_bbox_params(kps, vis_thresh=0.1) images = [] images_orig = [] min_f = max(s, 0) max_f = min(e, len(kps)) print('----------') print('Preprocessing frames.') print('----------') for i in range(min_f, max_f): proc_params = process_image( im_path=im_paths[i], bbox_param=bbox_params_smooth[i], ) images.append(proc_params.pop('image')) images_orig.append(proc_params) if track_id > 0: output_path += '_{}'.format(track_id) mkdir(output_path) pred_path = osp.join(output_path, 'hmmr_output.pkl') if osp.exists(pred_path): print('----------') print('Loading pre-computed prediction.') print('----------') with open(pred_path, 'rb') as f: preds = pickle.load(f) else: print('----------') print('Running prediction.') print('----------') preds = model.predict_all_images(images) with open(pred_path, 'wb') as f: print('Saving prediction results to', pred_path) pickle.dump(preds, f)
def main(model): # Keypoints are only used to compute the bounding box around human tracks. # They are not fed into the model. Keypoint format is [x, y, vis]. Keypoint # order doesn't matter. if config.dataset == '': im_paths, kps = load_poseflow_video(config.vid_path, config.out_dir) vis_thresh = 0.1 elif config.dataset == 'penn_action': im_paths, kps = load_penn_video(config.penn_dir, config.vid_id) vis_thresh = 0.5 else: raise Exception('Dataset {} not recognized'.format(config.dataset)) bbox_params_smooth, s, e = get_smooth_bbox_params(kps, vis_thresh) images = [] min_f = max(s, 0) max_f = min(e, len(kps)) for i in range(min_f, max_f): images.append( process_image(im_path=im_paths[i], bbox_param=bbox_params_smooth[i])) all_images, vid_paths = process_videos( config=config, images=images, T=(NUM_CONDITION + config.ar_length), suffix='AR{}'.format(config.ar_length), ) if not osp.exists(config.out_dir): os.mkdir(config.out_dir) renderer = VisRenderer(img_size=224) for i in range(0, len(all_images), config.batch_size): run_predictions( config=config, renderer=renderer, model=model, images=all_images[i:i + config.batch_size], vid_paths=vid_paths[i:i + config.batch_size], num_condition=NUM_CONDITION, )
def add_to_tfrecord(image_paths, gt2ds, coder, writer, feature_extractor=None, augmentor=None, DRAW=False, vis_thresh=0, sigma=3): """ Adds all information from a subject-sequence-camera tuple to a tfrecord. """ image_datas, image_shapes, labels, centers = [], [], [], [] scale_factors, start_pts = [], [] images = [] # Used to compute phis if needed. bbox_params, time_pt1, time_pt2 = get_smooth_bbox_params( gt2ds, vis_thresh, sigma=sigma) #import ipdb; ipdb.set_trace() for i, (im_path, gt2d, bbox_param) in enumerate( list(zip(image_paths, gt2ds, bbox_params))[time_pt1:time_pt2]): # This can be improved a lot. ret_dict = process_image(im_path, gt2d, coder, bbox_param, DRAW) image_datas.append(ret_dict['image_data']) image_shapes.append(ret_dict['image_shape']) labels.append(ret_dict['label']) centers.append(ret_dict['center']) scale_factors.append(ret_dict['scale_factors']) start_pts.append(ret_dict['start_pt']) if feature_extractor is not None: # AJ: Make sure images you send to augmentor is [0, 1]!! images.append(ret_dict['image'] / 255.) # Apply Data Augmentation & Feature Extraction if feature_extractor: pose_dummy = np.zeros((len(labels), 72)) gt3d_dummy = np.zeros((len(labels), 14, 3)) ret_dict = augmentor( images=images, image_sizes=image_shapes, labels=labels, centers=centers, poses=pose_dummy, gt3ds=gt3d_dummy) augmented_imgs = ret_dict['images'] labels = ret_dict['labels'] centers = ret_dict['centers'] image_shapes = [list(img.shape[:2]) for img in augmented_imgs] phis = feature_extractor.compute_all_phis(augmented_imgs) del images # Clear the memory. image_datas = [ coder.encode_jpeg(((img + 1) * 0.5) * 255.) for img in augmented_imgs ] else: phis = None if not FLAGS.save_img: image_datas = None example = convert_to_example_temporal( image_datas=image_datas, image_paths=image_paths, image_shapes=image_shapes, labels=labels, centers=centers, gt3ds=None, poses=None, shape=None, scale_factors=scale_factors, start_pts=start_pts, cams=None, phis=phis, ) writer.write(example.SerializeToString())
def add_to_tfrecord(image_paths, gt2ds, gt3ds, poses, shape, coder, writer, visualize=False, vis_thresh=0.1, img_size=224, sigma=8): """ Args: image_paths (N). gt2ds (Nx19x3). gt3ds (Nx14x3). poses (Nx72). shape (10). coder (ImageCoder). writer (TFRecordWriter). visualize (bool). vis_thresh (float). img_size (int). """ results = { 'image_data_scaled': [], 'im_path': [], 'im_shape': [], 'kps': [], 'center': [], 'scale': [], 'start_pt': [], } bbox_params, time_pt1, time_pt2 = get_smooth_bbox_params( gt2ds, vis_thresh, sigma=sigma, ) if time_pt1 != 0 or time_pt2 != len(image_paths): print('Start: {}, End: {}'.format(time_pt1, time_pt2)) for im_path, kps, bbox in tqdm( list(zip(image_paths, gt2ds, bbox_params))[time_pt1:time_pt2]): ret_dict = process_image(im_path=im_path, gt2d=kps, coder=coder, bbox_param=bbox, visualize=visualize, vis_thresh=vis_thresh, img_size=img_size) for key in ret_dict: results[key].append(ret_dict[key]) # Adjust to start and end time if they exist. if gt3ds is not None: gt3ds = gt3ds[time_pt1:time_pt2] if poses is not None: poses = poses[time_pt1:time_pt2] example = convert_to_example_temporal( cams=[], centers=results['center'], # N x 2 gt3ds=gt3ds, image_datas=results['image_data_scaled'], # N image_paths=results['im_path'], # N image_shapes=results['im_shape'], # N x 2 labels=results['kps'], # N x 3 x 19 scale_factors=results['scale'], # N start_pts=results['start_pt'], # N x 2 time_pts=(time_pt1, time_pt2), # 2 poses=poses, # N x 72 shape=shape, # 10 ) writer.write(example.SerializeToString())
def predict_on_tracks(model, img_dir, poseflow_path, output_path, track_id, trim_length): # Get all the images im_paths = sorted(glob(osp.join(img_dir, '*.png'))) all_kps = get_labels_poseflow(poseflow_path, len(im_paths)) # Here we set which track to use. track_id = min(track_id, len(all_kps) - 1) print('Total number of PoseFlow tracks:', len(all_kps)) print('Processing track_id:', track_id) kps = all_kps[track_id] bbox_params_smooth, s, e = get_smooth_bbox_params(kps, vis_thresh=0.1) images = [] images_orig = [] min_f = max(s, 0) max_f = min(e, len(kps)) print('----------') print('Preprocessing frames.') print('----------') for i in range(min_f, max_f): proc_params = process_image( im_path=im_paths[i], bbox_param=bbox_params_smooth[i], ) images.append(proc_params.pop('image')) images_orig.append(proc_params) if track_id > 0: output_path += '_{}'.format(track_id) mkdir(output_path) pred_path = osp.join(output_path, 'hmmr_output.pkl') if osp.exists(pred_path): print('----------') print('Loading pre-computed prediction.') print('----------') with open(pred_path, 'rb') as f: preds = pickle.load(f) else: print('----------') print('Running prediction.') print('----------') preds = model.predict_all_images(images) with open(pred_path, 'wb') as f: print('Saving prediction results to', pred_path) pickle.dump(preds, f) human_body_info = {} human_body_info['anim_len'] = len(preds['cams']) human_body_info['cam_array'] = preds['cams'] human_body_info['smpl_array'] = np.arange(len(preds['cams']) * 72, dtype='float32').reshape( (len(preds['cams']), 24, 3)) cams = [0, 0, 0] for i in range(len(preds['cams'])): temp = rot_mat_to_axis_angle(preds['poses'][i]) temp = np.reshape(temp, (24, 3)) human_body_info['smpl_array'][i] = temp cams = cams + preds['cams'][i] cams = cams / len(preds['cams']) human_body_info['cam_array'][:] = cams with open('../demo/transfer_data/human_body_info.pkl', 'wb') as f: print('Saving prediction results to', '../demo/transfer_data/human_body_info.pkl') pickle.dump(human_body_info, f)
def predict_on_tracks(model, img_dir, poseflow_path, output_path, track_id, trim_length): # Get all the images im_paths = sorted(glob(osp.join(img_dir, '*.png'))) all_kps = get_labels_poseflow(poseflow_path, len(im_paths)) # Here we set which track to use. track_id = min(track_id, len(all_kps) - 1) print('Total number of PoseFlow tracks:', len(all_kps)) print('Processing track_id:', track_id) kps = all_kps[track_id] bbox_params_smooth, s, e = get_smooth_bbox_params(kps, vis_thresh=0.1) images = [] images_orig = [] min_f = max(s, 0) max_f = min(e, len(kps)) print('----------') print('Preprocessing frames.') print('----------') for i in range(min_f, max_f): proc_params = process_image( im_path=im_paths[i], bbox_param=bbox_params_smooth[i], ) images.append(proc_params.pop('image')) images_orig.append(proc_params) if track_id > 0: output_path += '_{}'.format(track_id) mkdir(output_path) # make dir to save joint rotation mat in json import os without = output_path.split(os.sep)[:-1] # without hmmr_output if without.__len__() > 1: print('length>1') sys.exit(1) else: without = without[0] myjson_dir = osp.join(without, 'rot_output') myrot_path = osp.join(myjson_dir, 'rot_output.json') mykps_path = osp.join(myjson_dir, 'kps_output.json') mkdir(myjson_dir) # george's revision pred_path = osp.join(output_path, 'hmmr_output.pkl') if osp.exists(pred_path): print('----------') print('Loading pre-computed prediction.') print('----------') with open(pred_path, 'rb') as f: preds = pickle.load(f) else: print('----------') print('Running prediction.') print('----------') preds = model.predict_all_images(images) with open(pred_path, 'wb') as f: print('Saving prediction results to', pred_path) pickle.dump(preds, f) # get the kps mykps = preds['kps'] totalkpsdict = {} totalkpsdict['frame_Count'] = mykps.shape[0] for i in range(0, mykps.shape[0]): frame_index = "frame_" + "%04d" % i framedict = {} for j in range(0, mykps.shape[1]): _kps = mykps[i][j] kpslist = [float(j) for j in _kps] kps_index = 'kps_' + "%02d" % j framedict[kps_index] = kpslist totalkpsdict[frame_index] = framedict print('Saving kps results to', mykps_path) with open(mykps_path, 'w') as jf: json.dump(totalkpsdict, jf, sort_keys=True) # get the poses myposes = preds['poses'] totaldict = {} totaldict['frame_Count'] = myposes.shape[0] print("There are totally {} frames ".format(myposes.shape[0])) print('----------') for i in range(0, myposes.shape[0]): frame_index = "frame_" + "%04d" % i framedict = {} print('processing frame : {}'.format(frame_index)) for j in range(0, myposes.shape[1]): rotmat = myposes[i][j] rotlist = list(np.reshape(rotmat, (1, -1))[0]) rotlist = [float(j) for j in rotlist] rot_index = 'rot_' + "%02d" % j framedict[rot_index] = rotlist totaldict[frame_index] = framedict print('----------') print('Saving rot results to', myrot_path) with open(myrot_path, 'w') as jf: json.dump(totaldict, jf, sort_keys=True) # george's revision if trim_length > 0: output_path += '_trim' print('----------') print('Rendering results to {}.'.format(output_path)) print('----------') #preds is short for predict next is to dig out how to render smpl model render_preds( output_path=output_path, config=config, preds=preds, images=images, images_orig=images_orig, trim_length=trim_length, )