def make_efficient_example(ex, root_muco, i_person): image_relpath = ex.image_path max_rotate = np.pi / 6 padding_factor = 1 / 0.85 scale_up_factor = 1 / 0.85 scale_down_factor = 1 / 0.85 shift_factor = 1.2 base_dst_side = 256 box_center = boxlib.center(ex.bbox) s = np.sin(max_rotate) c = np.cos(max_rotate) rot_bbox_size = (np.array([[c, s], [s, c]]) @ ex.bbox[2:, np.newaxis])[:, 0] side = np.max(rot_bbox_size) rot_bbox_size = np.array([side, side]) rot_bbox = boxlib.box_around(box_center, rot_bbox_size) scale_factor = min(base_dst_side / np.max(ex.bbox[2:]) * scale_up_factor, 1) expansion_factor = padding_factor * shift_factor * scale_down_factor expanded_bbox = boxlib.expand(rot_bbox, expansion_factor) expanded_bbox = boxlib.intersect(expanded_bbox, boxlib.full_box([2048, 2048])) new_camera = ex.camera.copy() new_camera.intrinsic_matrix[:2, 2] -= expanded_bbox[:2] new_camera.scale_output(scale_factor) new_camera.undistort() dst_shape = improc.rounded_int_tuple(scale_factor * expanded_bbox[[3, 2]]) new_im_path = f'{root_muco}_downscaled/{image_relpath[:-4]}_{i_person:01d}.jpg' if not (util.is_file_newer(new_im_path, "2020-02-15T23:28:26")): im = improc.imread_jpeg(f'{root_muco}/{image_relpath}') new_im = cameralib.reproject_image(im, ex.camera, new_camera, dst_shape, antialias_factor=4) util.ensure_path_exists(new_im_path) imageio.imwrite(new_im_path, new_im, quality=95) new_bbox_topleft = cameralib.reproject_image_points(ex.bbox[:2], ex.camera, new_camera) new_bbox = np.concatenate([new_bbox_topleft, ex.bbox[2:] * scale_factor]) if ex.mask is None: noext, ext = os.path.splitext(image_relpath[:-4]) noext = noext.replace('unaugmented_set_001/', '') mask = improc.decode_mask(util.load_pickle(f'{root_muco}/masks/{noext}.pkl')) else: mask = ex.mask if mask is False: new_mask_encoded = None else: new_mask = cameralib.reproject_image(mask, ex.camera, new_camera, dst_shape) new_mask_encoded = improc.encode_mask(new_mask) return p3ds.Pose3DExample( os.path.relpath(new_im_path, paths.DATA_ROOT), ex.world_coords.astype(np.float32), new_bbox.astype(np.float32), new_camera, mask=new_mask_encoded, univ_coords=ex.univ_coords.astype(np.float32))
def make_mupots(): joint_names = ( 'htop,neck,rsho,relb,rwri,lsho,lelb,lwri,rhip,rkne,rank,lhip,lkne,lank,spin,head,pelv' ) edges = ('htop-head-neck-spin-pelv-lhip-lkne-lank,' 'lwri-lelb-lsho-neck-rsho-relb-rwri,pelv-rhip-rkne-rank') joint_info = p3ds.JointInfo(joint_names, edges) #import data.muco #joint_info = data.muco.make_joint_info()[0] root = f'{paths.DATA_ROOT}/mupots' intrinsic_matrices = util.load_json(f'{root}/camera_intrinsics.json') dummy_coords = np.ones((joint_info.n_joints, 3)) detections_all = util.load_pickle(f'{root}/yolov3_detections.pkl') examples_val = [] examples_test = [] for i_seq in range(1, 21): annotations = matlabfile.load( f'{root}/TS{i_seq}/annot.mat')['annotations'] intrinsic_matrix = intrinsic_matrices[f'TS{i_seq}'] camera = cameralib.Camera(np.zeros(3), np.eye(3), intrinsic_matrix, distortion_coeffs=None, world_up=(0, -1, 0)) n_frames = annotations.shape[0] for i_frame in range(n_frames): image_relpath = f'TS{i_seq}/img_{i_frame:06d}.jpg' detections_frame = detections_all[image_relpath] image_path = f'{root}/{image_relpath}' for detection in detections_frame: confidence = detection[4] if confidence > 0.1: ex = p3ds.Pose3DExample(os.path.relpath( image_path, paths.DATA_ROOT), dummy_coords, detection[:4], camera, mask=None, univ_coords=dummy_coords, scene_name=f'TS{i_seq}') examples_test.append(ex) return p3ds.Pose3DDataset(joint_info, valid_examples=examples_val, test_examples=examples_test)
def generate_poseviz_gt(i_subject, activity_name, camera_id): camera_names = ['54138969', '55011271', '58860488', '60457274'] camera_name = camera_names[camera_id] image_relpaths, world_coords_all, bboxes, camera = get_examples( i_subject, activity_name, camera_id, frame_step=1, correct_S9=True) results = [] examples = [] for image_relpath, world_coords, bbox in zip(image_relpaths, world_coords_all, bboxes): results.append({ 'gt_poses': [world_coords.tolist()], 'camera_intrinsics': camera.intrinsic_matrix.tolist(), 'camera_extrinsics': camera.get_extrinsic_matrix().tolist(), 'image_path': image_relpath, 'bboxes': [bbox.tolist()] }) ex = ps3d.Pose3DExample(image_relpath, world_coords, bbox, camera, activity_name=activity_name) examples.append(ex) joint_names = ('rhip,rkne,rank,lhip,lkne,lank,tors,neck,head,htop,' 'lsho,lelb,lwri,rsho,relb,rwri,pelv'.split(',')) edges = ('htop-head-neck-lsho-lelb-lwri,neck-rsho-relb-rwri,' 'neck-tors-pelv-lhip-lkne-lank,pelv-rhip-rkne-rank') joint_info = ps3d.JointInfo(joint_names, edges) ds = ps3d.Pose3DDataset(joint_info, test_examples=examples) util.dump_pickle( ds, f'{paths.DATA_ROOT}/h36m/poseviz/S{i_subject}_{activity_name}_{camera_name}.pkl' ) output = {} output['joint_names'] = joint_info.names output['stick_figure_edges'] = joint_info.stick_figure_edges output['world_up'] = camera.world_up.tolist() output['frame_infos'] = results util.dump_json( output, f'{paths.DATA_ROOT}/h36m/poseviz/S{i_subject}_{activity_name}_{camera_name}.json' )
def get_examples(phase, pool): result = [] seq_filepaths = glob.glob(f'{root}/sequenceFiles/{phase}/*.pkl') for filepath in seq_filepaths: with open(filepath, 'rb') as f: seq = pickle.load(f, encoding='latin1') seq_name = seq['sequence'] intrinsics = seq['cam_intrinsics'] extrinsics_per_frame = seq['cam_poses'] for i_person, (coord_seq, coords2d_seq, trans_seq, camvalid_seq) in enumerate(zip( seq['jointPositions'], seq['poses2d'], seq['trans'], seq['campose_valid'])): for i_frame, (coords, coords2d, trans, extrinsics, campose_valid) in enumerate( zip(coord_seq, coords2d_seq, trans_seq, extrinsics_per_frame, camvalid_seq)): if not campose_valid or np.all(coords2d == 0): continue impath = f'{root}/imageFiles/{seq_name}/image_{i_frame:05d}.jpg' camera = cameralib.Camera( extrinsic_matrix=extrinsics, intrinsic_matrix=intrinsics, world_up=(0, 1, 0)) camera.t *= 1000 world_coords = (coords.reshape(-1, 3))[selected_joints] * 1000 camera2 = cameralib.Camera(intrinsic_matrix=intrinsics, world_up=(0, -1, 0)) camcoords = camera.world_to_camera(world_coords) imcoords = camera.world_to_image(world_coords) bbox = boxlib.expand(boxlib.bb_of_points(imcoords), 1.15) ex = p3ds.Pose3DExample(impath, camcoords, bbox=bbox, camera=camera2) noext, ext = os.path.splitext(os.path.relpath(impath, root)) new_image_relpath = f'tdpw_downscaled/{noext}_{i_person:03d}.jpg' pool.apply_async( make_efficient_example, (ex, new_image_relpath, 1, False, "2021-07-09T12:28:07"), callback=result.append) return result
def make_muco(): joint_info, selected_joints = make_joint_info() root_3dhp = f'{paths.DATA_ROOT}/3dhp' root_muco = f'{paths.DATA_ROOT}/muco' sample_info = np.load(f'{root_muco}/composite_frame_origins.npy') n_all_joints = 28 valid_indices = list(np.load(f'{root_muco}/valid_composite_frame_indices.npy')) all_detections = util.load_pickle(f'{root_muco}/yolov3_detections.pkl') all_detections = np.array([all_detections[k] for k in sorted(all_detections.keys())]) all_visible_boxes = np.load(f'{root_muco}/visible_boxes.npy') matloader = functools.lru_cache(1024)(matlabfile.load) @functools.lru_cache(1024) def get_world_coords(i_subject, i_seq, i_cam, anno_name): seqpath = f'{root_3dhp}/S{i_subject}/Seq{i_seq}' anno_file = matloader(f'{seqpath}/annot.mat') camcoords = anno_file[anno_name][i_cam].reshape( [-1, n_all_joints, 3])[:, selected_joints] camera = load_cameras(f'{seqpath}/camera.calibration')[i_cam] world_coords = [camera.camera_to_world(c) for c in camcoords] return world_coords examples = [] with util.BoundedPool(None, 120) as pool: for i_sample, people, detections, visible_boxes in zip( util.progressbar(valid_indices), sample_info[valid_indices], all_detections[valid_indices], all_visible_boxes[valid_indices]): detections = [box for box in detections if box[-1] > 0.1] if not detections: continue filename = f'{i_sample + 1:06d}.jpg' image_relpath = f'unaugmented_set_001/{filename[:2]}/{filename[:4]}/{filename}' gt_people = [] for i_person, ((i_subject, i_seq, i_cam, i_frame), visible_box) in enumerate( zip(people, visible_boxes)): seqpath = f'{root_3dhp}/S{i_subject}/Seq{i_seq}' world_coords = get_world_coords(i_subject, i_seq, i_cam, 'annot3')[i_frame] univ_world_coords = get_world_coords( i_subject, i_seq, i_cam, 'univ_annot3')[i_frame] camera = load_cameras(f'{seqpath}/camera.calibration')[i_cam] im_coords = camera.world_to_image(world_coords) coord_bbox = boxlib.expand(boxlib.intersect( boxlib.bb_of_points(im_coords), boxlib.full_box([2048, 2048])), 1.05) bbox = boxlib.intersect_vertical(visible_box, coord_bbox) ex = p3ds.Pose3DExample( image_relpath, world_coords, bbox, camera, mask=None, univ_coords=univ_world_coords) gt_people.append(ex) if not gt_people: continue iou_matrix = np.array([[boxlib.iou(gt_person.bbox, box[:4]) for box in detections] for gt_person in gt_people]) gt_indices, det_indices = scipy.optimize.linear_sum_assignment(-iou_matrix) for i_gt, i_det in zip(gt_indices, det_indices): gt_box = gt_people[i_gt].bbox det_box = detections[i_det] if (iou_matrix[i_gt, i_det] > 0.1 and boxlib.area(det_box) < 2 * boxlib.area(gt_box)): ex = gt_people[i_gt] ex.bbox = np.array(detections[i_det][:4]) pool.apply_async(make_efficient_example, (ex, root_muco, i_gt), callback=examples.append) examples.sort(key=lambda ex: ex.image_path) return p3ds.Pose3DDataset(joint_info, examples)
def make_mpi_inf_3dhp(camera_ids=(0, 1, 2, 4, 5, 6, 7, 8)): all_short_names = ( 'spi3,spi4,spi2,spin,pelv,neck,head,htop,lcla,lsho,lelb,lwri,lhan,rcla,rsho,relb,rwri,' 'rhan,lhip,lkne,lank,lfoo,ltoe,rhip,rkne,rank,rfoo,rtoe'.split(',')) test_set_selected_joints = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 14] selected_joints = [7, 5, 14, 15, 16, 9, 10, 11, 23, 24, 25, 18, 19, 20, 3, 6, 4] joint_names = [all_short_names[j] for j in selected_joints] edges = ('htop-head-neck-lsho-lelb-lwri,neck-rsho-relb-rwri,neck-spin-pelv-lhip-lkne-lank,' 'pelv-rhip-rkne-rank') joint_info = p3ds.JointInfo(joint_names, edges) root_3dhp = f'{paths.DATA_ROOT}/3dhp' detections_all = util.load_pickle(f'{paths.DATA_ROOT}/3dhp/yolov3_person_detections.pkl') ################################# # TRAINING AND VALIDATION SET ################################# num_frames = np.asarray( [[6416, 12430], [6502, 6081], [12488, 12283], [6171, 6675], [12820, 12312], [6188, 6145], [6239, 6320], [6468, 6054]]) train_subjects = [0, 1, 2, 3, 4, 5, 6] valid_subjects = [7] # this is my own arbitrary split for validation (Istvan Sarandi) train_examples = [] valid_examples = [] pool = util.BoundedPool(None, 120) for i_subject, i_seq, i_cam in itertools.product( train_subjects + valid_subjects, range(2), camera_ids): seqpath = f'{root_3dhp}/S{i_subject + 1}/Seq{i_seq + 1}' print(f'Processing {seqpath} camera {i_cam}') cam3d_coords = [ann.reshape([ann.shape[0], -1, 3])[:, selected_joints] for ann in matlabfile.load(f'{seqpath}/annot.mat')['annot3']] univ_cam3d_coords = [ann.reshape([ann.shape[0], -1, 3])[:, selected_joints] for ann in matlabfile.load(f'{seqpath}/annot.mat')['univ_annot3']] cameras = load_cameras(f'{seqpath}/camera.calibration') examples_container = train_examples if i_subject in train_subjects else valid_examples frame_step = 5 prev_coords = None camera = cameras[i_cam] n_frames = num_frames[i_subject, i_seq] if i_subject == 5 and i_seq == 1 and i_cam == 2: # This video is shorter for some reason n_frames = 3911 for i_frame in util.progressbar(range(0, n_frames, frame_step)): image_relpath = ( f'3dhp/S{i_subject + 1}/Seq{i_seq + 1}/' f'imageSequence/img_{i_cam}_{i_frame:06d}.jpg') cam_coords = cam3d_coords[i_cam][i_frame] world_coords = cameras[i_cam].camera_to_world(cam_coords) univ_camcoords = univ_cam3d_coords[i_cam][i_frame] univ_world_coords = cameras[i_cam].camera_to_world(univ_camcoords) # Check if the joints are within the image frame bounds if not np.all(camera.is_visible(world_coords, [2048, 2048])): continue im_coords = camera.camera_to_image(cam_coords) bbox = get_bbox(im_coords, image_relpath, detections_all) # Adaptive temporal sampling if (prev_coords is not None and np.all(np.linalg.norm(world_coords - prev_coords, axis=1) < 100)): continue prev_coords = world_coords mask_path = image_relpath.replace('imageSequence', 'FGmasks') new_image_relpath = image_relpath.replace('3dhp', '3dhp_downscaled') ex = p3ds.Pose3DExample( image_relpath, world_coords, bbox, camera, mask=mask_path, univ_coords=univ_world_coords) pool.apply_async(make_efficient_example, (ex, new_image_relpath, 1, True), callback=examples_container.append) print('Waiting for tasks...') pool.close() pool.join() print('Done...') ################################# # TEST SET ################################# test_examples = [] cam1_4 = make_3dhp_test_camera( sensor_size=np.array([10, 10]), im_size=np.array([2048, 2048]), focal_length=7.32506, pixel_aspect=1.00044, center_offset=np.array([-0.0322884, 0.0929296]), distortion=None, origin=np.array([3427.28, 1387.86, 309.42]), up=np.array([-0.208215, 0.976233, 0.06014]), right=np.array([0.000575281, 0.0616098, -0.9981])) cam5_6 = make_3dhp_test_camera( sensor_size=np.array([10, 5.625]), im_size=np.array([1920, 1080]), focal_length=8.770747185, pixel_aspect=0.993236423, center_offset=np.array([-0.104908645, 0.104899704]), distortion=np.array([-0.276859611, 0.131125256, -0.000360494, -0.001149441, -0.049318332]), origin=np.array([-2104.3074, 1038.6707, -4596.6367]), up=np.array([0.025272345, 0.995038509, 0.096227370]), right=np.array([-0.939647257, -0.009210289, 0.342020929])) activity_names = [ 'Stand/Walk', 'Exercise', 'Sit on Chair', 'Reach/Crouch', 'On Floor', 'Sports', 'Misc.'] for i_subject in range(1, 7): seqpath = f'{root_3dhp}/TS{i_subject}' annotation_path = f'{seqpath}/annot_data.mat' with h5py.File(annotation_path, 'r') as m: cam3d_coords = np.array(m['annot3'])[:, 0, test_set_selected_joints] univ_cam3d_coords = np.array(m['univ_annot3'])[:, 0, test_set_selected_joints] valid_frames = np.where(m['valid_frame'][:, 0])[0] activity_ids = m['activity_annotation'][:, 0].astype(int) - 1 camera = cam1_4 if i_subject <= 4 else cam5_6 scene = ['green-screen', 'no-green-screen', 'outdoor'][(i_subject - 1) // 2] for i_frame in valid_frames: image_relpath = f'3dhp/TS{i_subject}/imageSequence/img_{i_frame + 1:06d}.jpg' cam_coords = cam3d_coords[i_frame] univ_camcoords = univ_cam3d_coords[i_frame] activity = activity_names[activity_ids[i_frame]] world_coords = camera.camera_to_world(cam_coords) univ_world_coords = camera.camera_to_world(univ_camcoords) im_coords = camera.camera_to_image(cam_coords) bbox = get_bbox(im_coords, image_relpath, detections_all) ex = p3ds.Pose3DExample( image_relpath, world_coords, bbox, camera, activity_name=activity, scene_name=scene, univ_coords=univ_world_coords) test_examples.append(ex) train_examples.sort(key=lambda x: x.image_path) valid_examples.sort(key=lambda x: x.image_path) test_examples.sort(key=lambda x: x.image_path) return p3ds.Pose3DDataset(joint_info, train_examples, valid_examples, test_examples)
def make_h36m(train_subjects=(1, 5, 6, 7, 8), valid_subjects=(), test_subjects=(9, 11), correct_S9=True, partial_visibility=False): joint_names = ('rhip,rkne,rank,lhip,lkne,lank,tors,neck,head,htop,' 'lsho,lelb,lwri,rsho,relb,rwri,pelv'.split(',')) edges = ('htop-head-neck-lsho-lelb-lwri,neck-rsho-relb-rwri,' 'neck-tors-pelv-lhip-lkne-lank,pelv-rhip-rkne-rank') joint_info = ps3d.JointInfo(joint_names, edges) if not util.all_disjoint(train_subjects, valid_subjects, test_subjects): raise Exception('Set of train, val and test subject must be disjoint.') # use last subject of the non-test subjects for validation train_examples = [] test_examples = [] valid_examples = [] pool = util.BoundedPool(None, 120) if partial_visibility: dir_suffix = '_partial' further_expansion_factor = 1.8 else: dir_suffix = '' if correct_S9 else 'incorrect_S9' further_expansion_factor = 1 for i_subject in [*test_subjects, *train_subjects, *valid_subjects]: if i_subject in train_subjects: examples_container = train_examples elif i_subject in valid_subjects: examples_container = valid_examples else: examples_container = test_examples frame_step = 5 if i_subject in train_subjects else 64 for activity_name, camera_id in itertools.product( get_activity_names(i_subject), range(4)): print(f'Processing S{i_subject} {activity_name} {camera_id}') image_relpaths, world_coords_all, bboxes, camera = get_examples( i_subject, activity_name, camera_id, frame_step=frame_step, correct_S9=correct_S9) prev_coords = None for image_relpath, world_coords, bbox in zip( util.progressbar(image_relpaths), world_coords_all, bboxes): # Using very similar examples is wasteful when training. Therefore: # skip frame if all keypoints are within a distance compared to last stored frame. # This is not done when testing, as it would change the results. if (i_subject in train_subjects and prev_coords is not None and np.all( np.linalg.norm(world_coords - prev_coords, axis=1) < 100)): continue prev_coords = world_coords activity_name = activity_name.split(' ')[0] ex = ps3d.Pose3DExample(image_relpath, world_coords, bbox, camera, activity_name=activity_name) new_image_relpath = image_relpath.replace( 'h36m', f'h36m_downscaled{dir_suffix}') pool.apply_async( make_efficient_example, (ex, new_image_relpath, further_expansion_factor), callback=examples_container.append) print('Waiting for tasks...') pool.close() pool.join() print('Done...') train_examples.sort(key=lambda x: x.image_path) valid_examples.sort(key=lambda x: x.image_path) test_examples.sort(key=lambda x: x.image_path) return ps3d.Pose3DDataset(joint_info, train_examples, valid_examples, test_examples)
def make_mupots_yolo(): all_short_names = ( 'thor,spi4,spi2,spin,pelv,neck,head,htop,lcla,lsho,lelb,lwri,lhan,rcla,rsho,relb,rwri,' 'rhan,lhip,lkne,lank,lfoo,ltoe,rhip,rkne,rank,rfoo,rtoe'.split(',')) # originally: [7, 5, 14, 15, 16, 9, 10, 11, 23, 24, 25, 18, 19, 20, 4, 3, 6] selected_joints = [ 7, 5, 14, 15, 16, 9, 10, 11, 23, 24, 25, 18, 19, 20, 3, 6, 4 ] order_joints = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 14] joint_names = [all_short_names[j] for j in selected_joints] j = p3ds.JointInfo.make_id_map(joint_names) edges = [(j.htop, j.head), (j.head, j.neck), (j.neck, j.lsho), (j.lsho, j.lelb), (j.lelb, j.lwri), (j.neck, j.rsho), (j.rsho, j.relb), (j.relb, j.rwri), (j.neck, j.spin), (j.spin, j.pelv), (j.pelv, j.lhip), (j.lhip, j.lkne), (j.lkne, j.lank), (j.pelv, j.rhip), (j.rhip, j.rkne), (j.rkne, j.rank)] joint_info = p3ds.JointInfo(j, edges) root = f'{paths.DATA_ROOT}/mupots' intrinsic_matrices = util.load_json(f'{root}/camera_intrinsics.json') dummy_coords = np.ones((joint_info.n_joints, 3)) detections_all = util.load_pickle(f'{root}/yolov3_detections.pkl') examples_val = [] examples_test = [] for i_seq in range(1, 21): annotations = matlabfile.load( f'{root}/TS{i_seq}/annot.mat')['annotations'] intrinsic_matrix = intrinsic_matrices[f'TS{i_seq}'] camera = cameralib.Camera(np.zeros(3), np.eye(3), intrinsic_matrix, distortion_coeffs=None, world_up=(0, -1, 0)) n_people = annotations.shape[1] n_frames = annotations.shape[0] for i_frame in range(n_frames): image_relpath = f'TS{i_seq}/img_{i_frame:06d}.jpg' detections_frame = detections_all[image_relpath] image_path = f'{root}/{image_relpath}' for detection in detections_frame: if detection[4] > 0.1: ex = p3ds.Pose3DExample(image_path, dummy_coords, detection[:4], camera, mask=None, univ_coords=dummy_coords, scene_name=f'TS{i_seq}') examples_test.append(ex) gt_people = [] for i_person in range(n_people): world_coords = np.array( annotations[i_frame, i_person].annot3.T[order_joints], dtype=np.float32) univ_world_coords = np.array( annotations[i_frame, i_person].univ_annot3.T[order_joints], dtype=np.float32) im_coords = camera.world_to_image(world_coords) gt_box = boxlib.expand(boxlib.bb_of_points(im_coords), 1.1) ex = p3ds.Pose3DExample(image_path, world_coords, gt_box, camera, mask=None, univ_coords=univ_world_coords, scene_name=f'TS{i_seq}') gt_people.append(ex) confident_detections = [ det for det in detections_frame if det[-1] > 0.1 ] if confident_detections: iou_matrix = np.array([[ boxlib.iou(gt_person.bbox, box[:4]) for box in confident_detections ] for gt_person in gt_people]) gt_indices, detection_indices = scipy.optimize.linear_sum_assignment( -iou_matrix) for i_gt, i_det in zip(gt_indices, detection_indices): if iou_matrix[i_gt, i_det] > 0.1: ex = gt_people[i_gt] ex.bbox = np.array(confident_detections[i_det][:4]) examples_val.append(ex) return p3ds.Pose3DDataset(joint_info, valid_examples=examples_val, test_examples=examples_test)