def make_mpii(): joint_names = 'rank,rkne,rhip,lhip,lkne,lank,pelv,thor,neck,head,rwri,relb,rsho,lsho,lelb,lwri' edges = 'lsho-lelb-lwri,rsho-relb-rwri,lhip-lkne-lank,rhip-rkne-rank,neck-head,pelv-thor' joint_info_full = JointInfo(joint_names, edges) joint_names_used = 'rank,rkne,rhip,lhip,lkne,lank,rwri,relb,rsho,lsho,lelb,lwri' joint_info_used = JointInfo(joint_names_used, edges) dataset = Pose2DDataset(joint_info_used) selected_joints = [joint_info_full.ids[name] for name in joint_info_used.names] mat_path = f'{paths.DATA_ROOT}/mpii/mpii_human_pose_v1_u12_1.mat' s = matlabfile.load(mat_path).RELEASE annolist = np.atleast_1d(s.annolist) pool = util.BoundedPool(None, 120) for anno, is_train, rect_ids in zip(annolist, util.progressbar(s.img_train), s.single_person): if not is_train: continue image_path = f'mpii/images/{anno.image.name}' annorect = np.atleast_1d(anno.annorect) rect_ids = np.atleast_1d(rect_ids) - 1 for rect_id in rect_ids: rect = annorect[rect_id] if 'annopoints' not in rect or len(rect.annopoints) == 0: continue coords = np.full( shape=[joint_info_full.n_joints, 2], fill_value=np.nan, dtype=np.float32) for joint in np.atleast_1d(rect.annopoints.point): coords[joint.id] = [joint.x, joint.y] coords = coords[selected_joints] rough_person_center = np.float32([rect.objpos.x, rect.objpos.y]) rough_person_size = rect.scale * 200 # Shift person center down like [Sun et al. 2018], who say this is common on MPII rough_person_center[1] += 0.075 * rough_person_size topleft = np.array(rough_person_center) - np.array(rough_person_size) / 2 bbox = np.array([topleft[0], topleft[1], rough_person_size, rough_person_size]) ex = Pose2DExample(image_path, coords, bbox=bbox) new_im_path = image_path.replace('mpii', 'mpii_downscaled') without_ext, ext = os.path.splitext(new_im_path) new_im_path = f'{without_ext}_{rect_id:02d}{ext}' pool.apply_async( make_efficient_example, (ex, new_im_path), callback=dataset.examples[TRAIN].append) print('Waiting for tasks...') pool.close() pool.join() print('Done...') dataset.examples[TRAIN].sort(key=lambda x: x.image_path) return dataset
def make_mupots(): joint_names = ( 'htop,neck,rsho,relb,rwri,lsho,lelb,lwri,rhip,rkne,rank,lhip,lkne,lank,spin,head,pelv' ) edges = ('htop-head-neck-spin-pelv-lhip-lkne-lank,' 'lwri-lelb-lsho-neck-rsho-relb-rwri,pelv-rhip-rkne-rank') joint_info = p3ds.JointInfo(joint_names, edges) #import data.muco #joint_info = data.muco.make_joint_info()[0] root = f'{paths.DATA_ROOT}/mupots' intrinsic_matrices = util.load_json(f'{root}/camera_intrinsics.json') dummy_coords = np.ones((joint_info.n_joints, 3)) detections_all = util.load_pickle(f'{root}/yolov3_detections.pkl') examples_val = [] examples_test = [] for i_seq in range(1, 21): annotations = matlabfile.load( f'{root}/TS{i_seq}/annot.mat')['annotations'] intrinsic_matrix = intrinsic_matrices[f'TS{i_seq}'] camera = cameralib.Camera(np.zeros(3), np.eye(3), intrinsic_matrix, distortion_coeffs=None, world_up=(0, -1, 0)) n_frames = annotations.shape[0] for i_frame in range(n_frames): image_relpath = f'TS{i_seq}/img_{i_frame:06d}.jpg' detections_frame = detections_all[image_relpath] image_path = f'{root}/{image_relpath}' for detection in detections_frame: confidence = detection[4] if confidence > 0.1: ex = p3ds.Pose3DExample(os.path.relpath( image_path, paths.DATA_ROOT), dummy_coords, detection[:4], camera, mask=None, univ_coords=dummy_coords, scene_name=f'TS{i_seq}') examples_test.append(ex) return p3ds.Pose3DDataset(joint_info, valid_examples=examples_val, test_examples=examples_test)
def get_all_gt_poses(): n_seq = 20 all_true2d = [[] for _ in range(n_seq)] all_true3d = [[] for _ in range(n_seq)] all_true3d_univ = [[] for _ in range(n_seq)] for i_seq in range(n_seq): anno_path = f'{paths.DATA_ROOT}/mupots/TS{i_seq + 1}/annot.mat' annotations = matlabfile.load(anno_path).annotations n_frames, n_people = annotations.shape for i_frame in range(len(annotations)): valid_annotations = [ annotations[i_frame, i_person] for i_person in range(n_people) if annotations[i_frame, i_person].isValidFrame ] all_true2d[i_seq].append( np.array([anno.annot2.T for anno in valid_annotations])) all_true3d[i_seq].append( np.array([anno.annot3.T for anno in valid_annotations])) all_true3d_univ[i_seq].append( np.array([anno.univ_annot3.T for anno in valid_annotations])) return all_true2d, all_true3d, all_true3d_univ
def main(): initialize() model = tf.saved_model.load(FLAGS.model_path) ji3d = get_joint_info(model, skeleton='mpi_inf_3dhp_17') predict_fn = functools.partial(model.detect_poses_batched, internal_batch_size=FLAGS.inner_batch_size, num_aug=FLAGS.num_aug, detector_threshold=0.2, detector_nms_iou_threshold=0.7, detector_flip_aug=True, antialias_factor=2, suppress_implausible_poses=False, skeleton='mpi_inf_3dhp_17') viz = poseviz.PoseViz(ji3d.names, ji3d.stick_figure_edges, write_video=bool(FLAGS.out_video_dir), world_up=(0, -1, 0), downscale=4, queue_size=2 * FLAGS.batch_size) if FLAGS.viz else None image_relpaths_all = [] poses_all = [] intrinsic_matrices = util.load_json( f'{paths.DATA_ROOT}/mupots/camera_intrinsics.json') for i_seq in range(1, 21): if FLAGS.viz: viz.new_sequence() if FLAGS.out_video_dir: viz.start_new_video(f'{FLAGS.out_video_dir}/TS{i_seq}.mp4', fps=25) annotations = matlabfile.load( f'{paths.DATA_ROOT}/mupots/TS{i_seq}/annot.mat')['annotations'] camera = cameralib.Camera( intrinsic_matrix=intrinsic_matrices[f'TS{i_seq}'], world_up=(0, -1, 0)) frame_relpaths = [ f'mupots/TS{i_seq}/img_{i:06d}.jpg' for i in range(annotations.shape[0]) ] frame_paths = [f'{paths.DATA_ROOT}/{p}' for p in frame_relpaths] frame_gpu, frames_cpu = video_io.image_files_as_tf_dataset( frame_paths, batch_size=FLAGS.batch_size, prefetch_gpu=2, tee_cpu=True) poses_per_frames = predict_sequence(predict_fn, frame_gpu, frames_cpu, len(frame_paths), camera, viz) for poses_of_frame, frame_relpath in zip(poses_per_frames, frame_relpaths): image_relpaths_all.extend([frame_relpath] * len(poses_of_frame)) poses_all.extend(poses_of_frame) np.savez(FLAGS.output_path, image_path=np.stack(image_relpaths_all, axis=0), coords3d_pred_world=np.stack(poses_all, axis=0)) if viz is not None: viz.close()
def make_mpi_inf_3dhp(camera_ids=(0, 1, 2, 4, 5, 6, 7, 8)): all_short_names = ( 'spi3,spi4,spi2,spin,pelv,neck,head,htop,lcla,lsho,lelb,lwri,lhan,rcla,rsho,relb,rwri,' 'rhan,lhip,lkne,lank,lfoo,ltoe,rhip,rkne,rank,rfoo,rtoe'.split(',')) test_set_selected_joints = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 14] selected_joints = [7, 5, 14, 15, 16, 9, 10, 11, 23, 24, 25, 18, 19, 20, 3, 6, 4] joint_names = [all_short_names[j] for j in selected_joints] edges = ('htop-head-neck-lsho-lelb-lwri,neck-rsho-relb-rwri,neck-spin-pelv-lhip-lkne-lank,' 'pelv-rhip-rkne-rank') joint_info = p3ds.JointInfo(joint_names, edges) root_3dhp = f'{paths.DATA_ROOT}/3dhp' detections_all = util.load_pickle(f'{paths.DATA_ROOT}/3dhp/yolov3_person_detections.pkl') ################################# # TRAINING AND VALIDATION SET ################################# num_frames = np.asarray( [[6416, 12430], [6502, 6081], [12488, 12283], [6171, 6675], [12820, 12312], [6188, 6145], [6239, 6320], [6468, 6054]]) train_subjects = [0, 1, 2, 3, 4, 5, 6] valid_subjects = [7] # this is my own arbitrary split for validation (Istvan Sarandi) train_examples = [] valid_examples = [] pool = util.BoundedPool(None, 120) for i_subject, i_seq, i_cam in itertools.product( train_subjects + valid_subjects, range(2), camera_ids): seqpath = f'{root_3dhp}/S{i_subject + 1}/Seq{i_seq + 1}' print(f'Processing {seqpath} camera {i_cam}') cam3d_coords = [ann.reshape([ann.shape[0], -1, 3])[:, selected_joints] for ann in matlabfile.load(f'{seqpath}/annot.mat')['annot3']] univ_cam3d_coords = [ann.reshape([ann.shape[0], -1, 3])[:, selected_joints] for ann in matlabfile.load(f'{seqpath}/annot.mat')['univ_annot3']] cameras = load_cameras(f'{seqpath}/camera.calibration') examples_container = train_examples if i_subject in train_subjects else valid_examples frame_step = 5 prev_coords = None camera = cameras[i_cam] n_frames = num_frames[i_subject, i_seq] if i_subject == 5 and i_seq == 1 and i_cam == 2: # This video is shorter for some reason n_frames = 3911 for i_frame in util.progressbar(range(0, n_frames, frame_step)): image_relpath = ( f'3dhp/S{i_subject + 1}/Seq{i_seq + 1}/' f'imageSequence/img_{i_cam}_{i_frame:06d}.jpg') cam_coords = cam3d_coords[i_cam][i_frame] world_coords = cameras[i_cam].camera_to_world(cam_coords) univ_camcoords = univ_cam3d_coords[i_cam][i_frame] univ_world_coords = cameras[i_cam].camera_to_world(univ_camcoords) # Check if the joints are within the image frame bounds if not np.all(camera.is_visible(world_coords, [2048, 2048])): continue im_coords = camera.camera_to_image(cam_coords) bbox = get_bbox(im_coords, image_relpath, detections_all) # Adaptive temporal sampling if (prev_coords is not None and np.all(np.linalg.norm(world_coords - prev_coords, axis=1) < 100)): continue prev_coords = world_coords mask_path = image_relpath.replace('imageSequence', 'FGmasks') new_image_relpath = image_relpath.replace('3dhp', '3dhp_downscaled') ex = p3ds.Pose3DExample( image_relpath, world_coords, bbox, camera, mask=mask_path, univ_coords=univ_world_coords) pool.apply_async(make_efficient_example, (ex, new_image_relpath, 1, True), callback=examples_container.append) print('Waiting for tasks...') pool.close() pool.join() print('Done...') ################################# # TEST SET ################################# test_examples = [] cam1_4 = make_3dhp_test_camera( sensor_size=np.array([10, 10]), im_size=np.array([2048, 2048]), focal_length=7.32506, pixel_aspect=1.00044, center_offset=np.array([-0.0322884, 0.0929296]), distortion=None, origin=np.array([3427.28, 1387.86, 309.42]), up=np.array([-0.208215, 0.976233, 0.06014]), right=np.array([0.000575281, 0.0616098, -0.9981])) cam5_6 = make_3dhp_test_camera( sensor_size=np.array([10, 5.625]), im_size=np.array([1920, 1080]), focal_length=8.770747185, pixel_aspect=0.993236423, center_offset=np.array([-0.104908645, 0.104899704]), distortion=np.array([-0.276859611, 0.131125256, -0.000360494, -0.001149441, -0.049318332]), origin=np.array([-2104.3074, 1038.6707, -4596.6367]), up=np.array([0.025272345, 0.995038509, 0.096227370]), right=np.array([-0.939647257, -0.009210289, 0.342020929])) activity_names = [ 'Stand/Walk', 'Exercise', 'Sit on Chair', 'Reach/Crouch', 'On Floor', 'Sports', 'Misc.'] for i_subject in range(1, 7): seqpath = f'{root_3dhp}/TS{i_subject}' annotation_path = f'{seqpath}/annot_data.mat' with h5py.File(annotation_path, 'r') as m: cam3d_coords = np.array(m['annot3'])[:, 0, test_set_selected_joints] univ_cam3d_coords = np.array(m['univ_annot3'])[:, 0, test_set_selected_joints] valid_frames = np.where(m['valid_frame'][:, 0])[0] activity_ids = m['activity_annotation'][:, 0].astype(int) - 1 camera = cam1_4 if i_subject <= 4 else cam5_6 scene = ['green-screen', 'no-green-screen', 'outdoor'][(i_subject - 1) // 2] for i_frame in valid_frames: image_relpath = f'3dhp/TS{i_subject}/imageSequence/img_{i_frame + 1:06d}.jpg' cam_coords = cam3d_coords[i_frame] univ_camcoords = univ_cam3d_coords[i_frame] activity = activity_names[activity_ids[i_frame]] world_coords = camera.camera_to_world(cam_coords) univ_world_coords = camera.camera_to_world(univ_camcoords) im_coords = camera.camera_to_image(cam_coords) bbox = get_bbox(im_coords, image_relpath, detections_all) ex = p3ds.Pose3DExample( image_relpath, world_coords, bbox, camera, activity_name=activity, scene_name=scene, univ_coords=univ_world_coords) test_examples.append(ex) train_examples.sort(key=lambda x: x.image_path) valid_examples.sort(key=lambda x: x.image_path) test_examples.sort(key=lambda x: x.image_path) return p3ds.Pose3DDataset(joint_info, train_examples, valid_examples, test_examples)
def make_mpii_yolo(): joint_info_full = JointInfo( 'rank,rkne,rhip,lhip,lkne,lank,pelv,thor,neck,head,rwri,relb,rsho,lsho,lelb,lwri', 'lsho-lelb-lwri,rsho-relb-rwri,lhip-lkne-lank,rhip-rkne-rank,neck-head,pelv-thor') joint_info_used = JointInfo( 'rank,rkne,rhip,lhip,lkne,lank,rwri,relb,lelb,lwri', 'lelb-lwri,relb-rwri,lhip-lkne-lank,rhip-rkne-rank') selected_joints = [joint_info_full.ids[name] for name in joint_info_used.names] mat_path = f'{paths.DATA_ROOT}/mpii/mpii_human_pose_v1_u12_1.mat' s = matlabfile.load(mat_path).RELEASE annolist = np.atleast_1d(s.annolist) all_boxes = util.load_pickle(f'{paths.DATA_ROOT}/mpii/yolov3_detections.pkl') examples = [] with util.BoundedPool(None, 120) as pool: for anno_id, (anno, is_train) in enumerate( zip(annolist, util.progressbar(s.img_train))): if not is_train: continue image_path = f'{paths.DATA_ROOT}/mpii/images/{anno.image.name}' annorect = np.atleast_1d(anno.annorect) gt_people = [] for rect_id, rect in enumerate(annorect): if 'annopoints' not in rect or len(rect.annopoints) == 0: continue coords = np.full( shape=[joint_info_full.n_joints, 2], fill_value=np.nan, dtype=np.float32) for joint in np.atleast_1d(rect.annopoints.point): coords[joint.id] = [joint.x, joint.y] bbox = boxlib.expand(boxlib.bb_of_points(coords), 1.25) coords = coords[selected_joints] ex = Pose2DExample(image_path, coords, bbox=bbox) gt_people.append(ex) if not gt_people: continue image_relpath = os.path.relpath(f'images/{anno.image.name}') boxes = [box for box in all_boxes[image_relpath] if box[-1] > 0.5] if not boxes: continue iou_matrix = np.array([[boxlib.iou(gt_person.bbox, box[:4]) for box in boxes] for gt_person in gt_people]) gt_indices, box_indices = scipy.optimize.linear_sum_assignment(-iou_matrix) for i_gt, i_det in zip(gt_indices, box_indices): if iou_matrix[i_gt, i_det] > 0.1: ex = gt_people[i_gt] ex.bbox = np.array(boxes[i_det][:4]) new_im_path = image_path.replace('mpii', 'mpii_downscaled_yolo') without_ext, ext = os.path.splitext(new_im_path) new_im_path = f'{without_ext}_{i_gt:02d}{ext}' pool.apply_async(make_efficient_example, (ex, new_im_path), callback=examples.append) examples.sort(key=lambda ex: ex.image_path) def n_valid_joints(example): return np.count_nonzero(np.all(~np.isnan(example.coords), axis=-1)) examples = [ex for ex in examples if n_valid_joints(ex) > 6] return Pose2DDataset(joint_info_used, examples)
def make_mupots_yolo(): all_short_names = ( 'thor,spi4,spi2,spin,pelv,neck,head,htop,lcla,lsho,lelb,lwri,lhan,rcla,rsho,relb,rwri,' 'rhan,lhip,lkne,lank,lfoo,ltoe,rhip,rkne,rank,rfoo,rtoe'.split(',')) # originally: [7, 5, 14, 15, 16, 9, 10, 11, 23, 24, 25, 18, 19, 20, 4, 3, 6] selected_joints = [ 7, 5, 14, 15, 16, 9, 10, 11, 23, 24, 25, 18, 19, 20, 3, 6, 4 ] order_joints = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 14] joint_names = [all_short_names[j] for j in selected_joints] j = p3ds.JointInfo.make_id_map(joint_names) edges = [(j.htop, j.head), (j.head, j.neck), (j.neck, j.lsho), (j.lsho, j.lelb), (j.lelb, j.lwri), (j.neck, j.rsho), (j.rsho, j.relb), (j.relb, j.rwri), (j.neck, j.spin), (j.spin, j.pelv), (j.pelv, j.lhip), (j.lhip, j.lkne), (j.lkne, j.lank), (j.pelv, j.rhip), (j.rhip, j.rkne), (j.rkne, j.rank)] joint_info = p3ds.JointInfo(j, edges) root = f'{paths.DATA_ROOT}/mupots' intrinsic_matrices = util.load_json(f'{root}/camera_intrinsics.json') dummy_coords = np.ones((joint_info.n_joints, 3)) detections_all = util.load_pickle(f'{root}/yolov3_detections.pkl') examples_val = [] examples_test = [] for i_seq in range(1, 21): annotations = matlabfile.load( f'{root}/TS{i_seq}/annot.mat')['annotations'] intrinsic_matrix = intrinsic_matrices[f'TS{i_seq}'] camera = cameralib.Camera(np.zeros(3), np.eye(3), intrinsic_matrix, distortion_coeffs=None, world_up=(0, -1, 0)) n_people = annotations.shape[1] n_frames = annotations.shape[0] for i_frame in range(n_frames): image_relpath = f'TS{i_seq}/img_{i_frame:06d}.jpg' detections_frame = detections_all[image_relpath] image_path = f'{root}/{image_relpath}' for detection in detections_frame: if detection[4] > 0.1: ex = p3ds.Pose3DExample(image_path, dummy_coords, detection[:4], camera, mask=None, univ_coords=dummy_coords, scene_name=f'TS{i_seq}') examples_test.append(ex) gt_people = [] for i_person in range(n_people): world_coords = np.array( annotations[i_frame, i_person].annot3.T[order_joints], dtype=np.float32) univ_world_coords = np.array( annotations[i_frame, i_person].univ_annot3.T[order_joints], dtype=np.float32) im_coords = camera.world_to_image(world_coords) gt_box = boxlib.expand(boxlib.bb_of_points(im_coords), 1.1) ex = p3ds.Pose3DExample(image_path, world_coords, gt_box, camera, mask=None, univ_coords=univ_world_coords, scene_name=f'TS{i_seq}') gt_people.append(ex) confident_detections = [ det for det in detections_frame if det[-1] > 0.1 ] if confident_detections: iou_matrix = np.array([[ boxlib.iou(gt_person.bbox, box[:4]) for box in confident_detections ] for gt_person in gt_people]) gt_indices, detection_indices = scipy.optimize.linear_sum_assignment( -iou_matrix) for i_gt, i_det in zip(gt_indices, detection_indices): if iou_matrix[i_gt, i_det] > 0.1: ex = gt_people[i_gt] ex.bbox = np.array(confident_detections[i_det][:4]) examples_val.append(ex) return p3ds.Pose3DDataset(joint_info, valid_examples=examples_val, test_examples=examples_test)