def make_camera_from_match(match): intrinsic_matrix = np.reshape(to_array(match['intrinsic']), [4, 4])[:3, :3] extrinsic_matrix = np.reshape(to_array(match['extrinsic']), [4, 4]) R = extrinsic_matrix[:3, :3] eye = R.T @ extrinsic_matrix[:3, 3] return cameralib.Camera( eye, R, intrinsic_matrix, None, world_up=(0, 1, 0))
def get_all_gt_poses(): all_valid_poses = {} seq_filepaths = glob.glob(f'{paths.DATA_ROOT}/3dpw/sequenceFiles/*/*.pkl') for filepath in seq_filepaths: with open(filepath, 'rb') as f: seq = pickle.load(f, encoding='latin1') seq_name = seq['sequence'] intrinsics = seq['cam_intrinsics'] extrinsics_per_frame = seq['cam_poses'] for i_person, (coord3d_seq, coords2d_seq, trans_seq, camvalid_seq) in enumerate( zip(seq['jointPositions'], seq['poses2d'], seq['trans'], seq['campose_valid'])): for i_frame, (coords3d, coords2d, trans, extrinsics, campose_valid) in enumerate( zip(coord3d_seq, coords2d_seq, trans_seq, extrinsics_per_frame, camvalid_seq)): if not campose_valid or np.all(coords2d == 0): continue image_relpath = f'imageFiles/{seq_name}/image_{i_frame:05d}.jpg' camera = cameralib.Camera(extrinsic_matrix=extrinsics, intrinsic_matrix=intrinsics, world_up=(0, 1, 0)) camera.t *= 1000 world_coords = coords3d.reshape(-1, 3) * 1000 camcoords = camera.world_to_camera(world_coords) all_valid_poses[image_relpath, i_person] = camcoords return all_valid_poses
def get_cameras(json_path): json_data = util.load_json(json_path) intrinsic_matrices = [json_data[f'TS{i_seq}'] for i_seq in range(1, 21)] return [ cameralib.Camera(intrinsic_matrix=intrinsic_matrix, world_up=(0, -1, 0)) for intrinsic_matrix in intrinsic_matrices ]
def make_3dhp_test_camera( sensor_size, im_size, focal_length, pixel_aspect, center_offset, distortion, origin, up, right): R = np.row_stack([right, -up, np.cross(up, right)]) intrinsic_matrix = np.diag([focal_length, focal_length, 1]) intrinsic_matrix[:2, 2] = sensor_size / 2 + center_offset mm_to_px_factors = im_size / sensor_size * np.array([1, pixel_aspect]) intrinsic_matrix[:2] = np.diag(mm_to_px_factors) @ intrinsic_matrix[:2] return cameralib.Camera(origin, R, intrinsic_matrix, distortion, world_up=(0, 1, 0))
def make_h36m_camera(extrinsic_params, intrinsic_params): x_angle, y_angle, z_angle = extrinsic_params[:3] R = transforms3d.euler.euler2mat(x_angle, y_angle, z_angle, 'rxyz') t = extrinsic_params[3:6] f, c, k, p = np.split(intrinsic_params, (2, 4, 7)) distortion_coeffs = np.array([k[0], k[1], p[0], p[1], k[2]], np.float32) intrinsic_matrix = np.array([[f[0], 0, c[0]], [0, f[1], c[1]], [0, 0, 1]], np.float32) return cameralib.Camera(t, R, intrinsic_matrix, distortion_coeffs)
def make_mupots(): joint_names = ( 'htop,neck,rsho,relb,rwri,lsho,lelb,lwri,rhip,rkne,rank,lhip,lkne,lank,spin,head,pelv' ) edges = ('htop-head-neck-spin-pelv-lhip-lkne-lank,' 'lwri-lelb-lsho-neck-rsho-relb-rwri,pelv-rhip-rkne-rank') joint_info = p3ds.JointInfo(joint_names, edges) #import data.muco #joint_info = data.muco.make_joint_info()[0] root = f'{paths.DATA_ROOT}/mupots' intrinsic_matrices = util.load_json(f'{root}/camera_intrinsics.json') dummy_coords = np.ones((joint_info.n_joints, 3)) detections_all = util.load_pickle(f'{root}/yolov3_detections.pkl') examples_val = [] examples_test = [] for i_seq in range(1, 21): annotations = matlabfile.load( f'{root}/TS{i_seq}/annot.mat')['annotations'] intrinsic_matrix = intrinsic_matrices[f'TS{i_seq}'] camera = cameralib.Camera(np.zeros(3), np.eye(3), intrinsic_matrix, distortion_coeffs=None, world_up=(0, -1, 0)) n_frames = annotations.shape[0] for i_frame in range(n_frames): image_relpath = f'TS{i_seq}/img_{i_frame:06d}.jpg' detections_frame = detections_all[image_relpath] image_path = f'{root}/{image_relpath}' for detection in detections_frame: confidence = detection[4] if confidence > 0.1: ex = p3ds.Pose3DExample(os.path.relpath( image_path, paths.DATA_ROOT), dummy_coords, detection[:4], camera, mask=None, univ_coords=dummy_coords, scene_name=f'TS{i_seq}') examples_test.append(ex) return p3ds.Pose3DDataset(joint_info, valid_examples=examples_val, test_examples=examples_test)
def get_examples(phase, pool): result = [] seq_filepaths = glob.glob(f'{root}/sequenceFiles/{phase}/*.pkl') for filepath in seq_filepaths: with open(filepath, 'rb') as f: seq = pickle.load(f, encoding='latin1') seq_name = seq['sequence'] intrinsics = seq['cam_intrinsics'] extrinsics_per_frame = seq['cam_poses'] for i_person, (coord_seq, coords2d_seq, trans_seq, camvalid_seq) in enumerate(zip( seq['jointPositions'], seq['poses2d'], seq['trans'], seq['campose_valid'])): for i_frame, (coords, coords2d, trans, extrinsics, campose_valid) in enumerate( zip(coord_seq, coords2d_seq, trans_seq, extrinsics_per_frame, camvalid_seq)): if not campose_valid or np.all(coords2d == 0): continue impath = f'{root}/imageFiles/{seq_name}/image_{i_frame:05d}.jpg' camera = cameralib.Camera( extrinsic_matrix=extrinsics, intrinsic_matrix=intrinsics, world_up=(0, 1, 0)) camera.t *= 1000 world_coords = (coords.reshape(-1, 3))[selected_joints] * 1000 camera2 = cameralib.Camera(intrinsic_matrix=intrinsics, world_up=(0, -1, 0)) camcoords = camera.world_to_camera(world_coords) imcoords = camera.world_to_image(world_coords) bbox = boxlib.expand(boxlib.bb_of_points(imcoords), 1.15) ex = p3ds.Pose3DExample(impath, camcoords, bbox=bbox, camera=camera2) noext, ext = os.path.splitext(os.path.relpath(impath, root)) new_image_relpath = f'tdpw_downscaled/{noext}_{i_person:03d}.jpg' pool.apply_async( make_efficient_example, (ex, new_image_relpath, 1, False, "2021-07-09T12:28:07"), callback=result.append) return result
def get_all_pred_poses(path): results = np.load(path, allow_pickle=True) if FLAGS.root_last: joint_remapping = [*range(14), 16, 14, 15] else: joint_remapping = list(range(17)) n_seq = 20 n_frames_per_seq = [ len(glob.glob(f'{paths.DATA_ROOT}/mupots/TS{i_seq + 1}/img_*.jpg')) for i_seq in range(n_seq) ] all_pred2d = [[[] for _ in range(n_frames)] for n_frames in n_frames_per_seq] all_pred3d = [[[] for _ in range(n_frames)] for n_frames in n_frames_per_seq] intrinsic_matrices = util.load_json( f'{paths.DATA_ROOT}/mupots/camera_intrinsics.json') for image_path, coords3d_pred in zip(results['image_path'], results['coords3d_pred_world']): if isinstance(image_path, bytes): image_path = image_path.decode('utf8') m = re.match(r'.+/TS(?P<i_seq>\d+)/img_(?P<i_frame>\d+)\.jpg', image_path) i_seq = int(m['i_seq']) - 1 i_frame = int(m['i_frame']) coords3d_pred = coords3d_pred[joint_remapping] camera = cameralib.Camera( intrinsic_matrix=intrinsic_matrices[f'TS{i_seq + 1}'], world_up=(0, -1, 0)) coords2d = camera.world_to_image(coords3d_pred) all_pred2d[i_seq][i_frame].append(coords2d) all_pred3d[i_seq][i_frame].append(coords3d_pred) all_pred2d = [[ np.array(frame_preds).reshape([-1, 17, 2]) for frame_preds in seq_preds ] for seq_preds in all_pred2d] all_pred3d = [[ np.array(frame_preds).reshape([-1, 17, 3]) for frame_preds in seq_preds ] for seq_preds in all_pred3d] return all_pred2d, all_pred3d
def make_many(): joint_names = [ 'lhip', 'rhip', 'bell', 'lkne', 'rkne', 'spin', 'lank', 'rank', 'thor', 'ltoe', 'rtoe', 'neck', 'lcla', 'rcla', 'head', 'lsho', 'rsho', 'lelb', 'relb', 'lwri', 'rwri', 'lhan', 'rhan', 'pelv', 'head_h36m', 'head_muco', 'head_sailvos', 'htop_h36m', 'htop_muco', 'htop_sailvos', 'lcla_muco', 'lear', 'leye', 'lfin_h36m', 'lfoo_h36m', 'lfoo_muco', 'lhan_muco', 'lhip_cmu_panoptic', 'lhip_h36m', 'lhip_muco', 'lhip_sailvos', 'lsho_cmu_panoptic', 'lsho_h36m', 'lsho_muco', 'lsho_sailvos', 'lthu_h36m', 'neck_cmu_panoptic', 'neck_h36m', 'neck_muco', 'neck_sailvos', 'nose', 'pelv_cmu_panoptic', 'pelv_h36m', 'pelv_muco', 'pelv_sailvos', 'rcla_muco', 'rear', 'reye', 'rfin_h36m', 'rfoo_h36m', 'rfoo_muco', 'rhan_muco', 'rhip_cmu_panoptic', 'rhip_h36m', 'rhip_muco', 'rhip_sailvos', 'rsho_cmu_panoptic', 'rsho_h36m', 'rsho_muco', 'rsho_sailvos', 'rthu_h36m', 'spi2_muco', 'spi4_muco' ] edges = [(0, 3), (0, 23), (1, 4), (1, 23), (2, 5), (2, 23), (3, 6), (3, 37), (3, 38), (3, 39), (3, 40), (4, 7), (4, 62), (4, 63), (4, 64), (4, 65), (5, 8), (5, 47), (5, 49), (5, 52), (5, 53), (5, 54), (5, 71), (6, 9), (6, 34), (6, 35), (7, 10), (7, 59), (7, 60), (8, 11), (8, 71), (8, 72), (9, 34), (9, 35), (10, 59), (10, 60), (11, 12), (11, 13), (11, 14), (12, 15), (13, 16), (15, 17), (16, 18), (17, 19), (17, 41), (17, 42), (17, 43), (17, 44), (18, 20), (18, 66), (18, 67), (18, 68), (18, 69), (19, 21), (19, 33), (19, 36), (19, 45), (20, 22), (20, 58), (20, 61), (20, 70), (24, 27), (24, 47), (25, 28), (25, 48), (26, 29), (26, 49), (30, 43), (30, 48), (31, 32), (32, 50), (37, 51), (38, 52), (39, 53), (40, 54), (41, 46), (42, 47), (44, 49), (46, 50), (46, 51), (46, 66), (47, 67), (48, 55), (48, 72), (49, 69), (50, 57), (51, 62), (52, 63), (53, 64), (54, 65), (55, 68), (56, 57)] joint_info = JointInfo(joint_names, edges) import imageio import tempfile import cameralib _, image_path = tempfile.mkstemp(suffix='.jpg') imageio.imwrite(image_path, np.zeros((256, 256), dtype=np.uint8)) dummy_example = Pose3DExample( image_path, np.zeros((joint_info.n_joints, 3), np.float32), [0, 0, 256, 256], cameralib.Camera()) return Pose3DDataset(joint_info, [dummy_example], [dummy_example], [dummy_example])
def get_all_pred_poses(path): results = np.load(path, allow_pickle=True) joint_remapping = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16, 14, 15 ] n_seq = 20 n_frames_per_seq = [ len(glob.glob(f'{paths.DATA_ROOT}/mupots/TS{i_seq + 1}/img_*.jpg')) for i_seq in range(n_seq) ] all_pred2d = [[[] for _ in range(n_frames)] for n_frames in n_frames_per_seq] all_pred3d = [[[] for _ in range(n_frames)] for n_frames in n_frames_per_seq] intrinsic_matrices = util.load_json( f'{paths.DATA_ROOT}/mupots/camera_intrinsics.json') for image_path, coords3d_pred in zip(results['image_path'], results['coords3d_pred']): m = re.match(r'.+/TS(?P<i_seq>\d+)/img_(?P<i_frame>\d+)\.jpg', image_path.decode('utf8')) i_seq = int(m['i_seq']) - 1 i_frame = int(m['i_frame']) coords3d_pred = coords3d_pred[joint_remapping] camera = cameralib.Camera( intrinsic_matrix=intrinsic_matrices[f'TS{i_seq + 1}'], world_up=(0, -1, 0)) coords2d = camera.world_to_image(coords3d_pred) all_pred2d[i_seq][i_frame].append(coords2d) all_pred3d[i_seq][i_frame].append(coords3d_pred) all_pred2d = [[np.array(frame_preds) for frame_preds in seq_preds] for seq_preds in all_pred2d] all_pred3d = [[np.array(frame_preds) for frame_preds in seq_preds] for seq_preds in all_pred3d] return all_pred2d, all_pred3d
def get_main_camera(imshape): f = np.max(imshape[:2]) / (np.tan(np.deg2rad(60) / 2) * 2) intrinsic_matrix = np.array([[f, 0, imshape[1] / 2], [0, f, imshape[0] / 2], [0, 0, 1]]) return cameralib.Camera(intrinsic_matrix=intrinsic_matrix, world_up=(0, -1, 0))
def get_3dpw_camera(seq_filepath): with open(seq_filepath, 'rb') as f: intr = pickle.load(f, encoding='latin1')['cam_intrinsics'] return cameralib.Camera(intrinsic_matrix=intr, world_up=[0, -1, 0])
def main(): initialize() model = tf.saved_model.load(FLAGS.model_path) ji3d = get_joint_info(model, skeleton='mpi_inf_3dhp_17') predict_fn = functools.partial(model.detect_poses_batched, internal_batch_size=FLAGS.inner_batch_size, num_aug=FLAGS.num_aug, detector_threshold=0.2, detector_nms_iou_threshold=0.7, detector_flip_aug=True, antialias_factor=2, suppress_implausible_poses=False, skeleton='mpi_inf_3dhp_17') viz = poseviz.PoseViz(ji3d.names, ji3d.stick_figure_edges, write_video=bool(FLAGS.out_video_dir), world_up=(0, -1, 0), downscale=4, queue_size=2 * FLAGS.batch_size) if FLAGS.viz else None image_relpaths_all = [] poses_all = [] intrinsic_matrices = util.load_json( f'{paths.DATA_ROOT}/mupots/camera_intrinsics.json') for i_seq in range(1, 21): if FLAGS.viz: viz.new_sequence() if FLAGS.out_video_dir: viz.start_new_video(f'{FLAGS.out_video_dir}/TS{i_seq}.mp4', fps=25) annotations = matlabfile.load( f'{paths.DATA_ROOT}/mupots/TS{i_seq}/annot.mat')['annotations'] camera = cameralib.Camera( intrinsic_matrix=intrinsic_matrices[f'TS{i_seq}'], world_up=(0, -1, 0)) frame_relpaths = [ f'mupots/TS{i_seq}/img_{i:06d}.jpg' for i in range(annotations.shape[0]) ] frame_paths = [f'{paths.DATA_ROOT}/{p}' for p in frame_relpaths] frame_gpu, frames_cpu = video_io.image_files_as_tf_dataset( frame_paths, batch_size=FLAGS.batch_size, prefetch_gpu=2, tee_cpu=True) poses_per_frames = predict_sequence(predict_fn, frame_gpu, frames_cpu, len(frame_paths), camera, viz) for poses_of_frame, frame_relpath in zip(poses_per_frames, frame_relpaths): image_relpaths_all.extend([frame_relpath] * len(poses_of_frame)) poses_all.extend(poses_of_frame) np.savez(FLAGS.output_path, image_path=np.stack(image_relpaths_all, axis=0), coords3d_pred_world=np.stack(poses_all, axis=0)) if viz is not None: viz.close()
def make_mupots_yolo(): all_short_names = ( 'thor,spi4,spi2,spin,pelv,neck,head,htop,lcla,lsho,lelb,lwri,lhan,rcla,rsho,relb,rwri,' 'rhan,lhip,lkne,lank,lfoo,ltoe,rhip,rkne,rank,rfoo,rtoe'.split(',')) # originally: [7, 5, 14, 15, 16, 9, 10, 11, 23, 24, 25, 18, 19, 20, 4, 3, 6] selected_joints = [ 7, 5, 14, 15, 16, 9, 10, 11, 23, 24, 25, 18, 19, 20, 3, 6, 4 ] order_joints = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 14] joint_names = [all_short_names[j] for j in selected_joints] j = p3ds.JointInfo.make_id_map(joint_names) edges = [(j.htop, j.head), (j.head, j.neck), (j.neck, j.lsho), (j.lsho, j.lelb), (j.lelb, j.lwri), (j.neck, j.rsho), (j.rsho, j.relb), (j.relb, j.rwri), (j.neck, j.spin), (j.spin, j.pelv), (j.pelv, j.lhip), (j.lhip, j.lkne), (j.lkne, j.lank), (j.pelv, j.rhip), (j.rhip, j.rkne), (j.rkne, j.rank)] joint_info = p3ds.JointInfo(j, edges) root = f'{paths.DATA_ROOT}/mupots' intrinsic_matrices = util.load_json(f'{root}/camera_intrinsics.json') dummy_coords = np.ones((joint_info.n_joints, 3)) detections_all = util.load_pickle(f'{root}/yolov3_detections.pkl') examples_val = [] examples_test = [] for i_seq in range(1, 21): annotations = matlabfile.load( f'{root}/TS{i_seq}/annot.mat')['annotations'] intrinsic_matrix = intrinsic_matrices[f'TS{i_seq}'] camera = cameralib.Camera(np.zeros(3), np.eye(3), intrinsic_matrix, distortion_coeffs=None, world_up=(0, -1, 0)) n_people = annotations.shape[1] n_frames = annotations.shape[0] for i_frame in range(n_frames): image_relpath = f'TS{i_seq}/img_{i_frame:06d}.jpg' detections_frame = detections_all[image_relpath] image_path = f'{root}/{image_relpath}' for detection in detections_frame: if detection[4] > 0.1: ex = p3ds.Pose3DExample(image_path, dummy_coords, detection[:4], camera, mask=None, univ_coords=dummy_coords, scene_name=f'TS{i_seq}') examples_test.append(ex) gt_people = [] for i_person in range(n_people): world_coords = np.array( annotations[i_frame, i_person].annot3.T[order_joints], dtype=np.float32) univ_world_coords = np.array( annotations[i_frame, i_person].univ_annot3.T[order_joints], dtype=np.float32) im_coords = camera.world_to_image(world_coords) gt_box = boxlib.expand(boxlib.bb_of_points(im_coords), 1.1) ex = p3ds.Pose3DExample(image_path, world_coords, gt_box, camera, mask=None, univ_coords=univ_world_coords, scene_name=f'TS{i_seq}') gt_people.append(ex) confident_detections = [ det for det in detections_frame if det[-1] > 0.1 ] if confident_detections: iou_matrix = np.array([[ boxlib.iou(gt_person.bbox, box[:4]) for box in confident_detections ] for gt_person in gt_people]) gt_indices, detection_indices = scipy.optimize.linear_sum_assignment( -iou_matrix) for i_gt, i_det in zip(gt_indices, detection_indices): if iou_matrix[i_gt, i_det] > 0.1: ex = gt_people[i_gt] ex.bbox = np.array(confident_detections[i_det][:4]) examples_val.append(ex) return p3ds.Pose3DDataset(joint_info, valid_examples=examples_val, test_examples=examples_test)