Esempio n. 1
0
def make_tdpw():
    root = '/globalwork/datasets/3DPW'
    body_joint_names = (
        'pelv,lhip,rhip,bell,lkne,rkne,spin,lank,rank,thor,ltoe,rtoe,neck,lcla,rcla,head,lsho,'
        'rsho,lelb,relb,lwri,rwri,lhan,rhan'.split(','))
    selected_joints = [*range(1, 24), 0]
    joint_names = [body_joint_names[j] for j in selected_joints]
    edges = ('head-neck-lcla-lsho-lelb-lwri-lhan,'
             'neck-rcla-rsho-relb-rwri-rhan,'
             'neck-thor-spin-bell-pelv-lhip-lkne-lank-ltoe,'
             'pelv-rhip-rkne-rank-rtoe')
    joint_info = p3ds.JointInfo(joint_names, edges)

    def get_examples(phase, pool):
        result = []
        seq_filepaths = glob.glob(f'{root}/sequenceFiles/{phase}/*.pkl')
        for filepath in seq_filepaths:
            with open(filepath, 'rb') as f:
                seq = pickle.load(f, encoding='latin1')
            seq_name = seq['sequence']
            intrinsics = seq['cam_intrinsics']
            extrinsics_per_frame = seq['cam_poses']

            for i_person, (coord_seq, coords2d_seq, trans_seq, camvalid_seq) in enumerate(zip(
                    seq['jointPositions'], seq['poses2d'], seq['trans'], seq['campose_valid'])):
                for i_frame, (coords, coords2d, trans, extrinsics, campose_valid) in enumerate(
                        zip(coord_seq, coords2d_seq, trans_seq, extrinsics_per_frame,
                            camvalid_seq)):
                    if not campose_valid or np.all(coords2d == 0):
                        continue

                    impath = f'{root}/imageFiles/{seq_name}/image_{i_frame:05d}.jpg'
                    camera = cameralib.Camera(
                        extrinsic_matrix=extrinsics, intrinsic_matrix=intrinsics,
                        world_up=(0, 1, 0))
                    camera.t *= 1000
                    world_coords = (coords.reshape(-1, 3))[selected_joints] * 1000
                    camera2 = cameralib.Camera(intrinsic_matrix=intrinsics, world_up=(0, -1, 0))
                    camcoords = camera.world_to_camera(world_coords)
                    imcoords = camera.world_to_image(world_coords)
                    bbox = boxlib.expand(boxlib.bb_of_points(imcoords), 1.15)
                    ex = p3ds.Pose3DExample(impath, camcoords, bbox=bbox, camera=camera2)
                    noext, ext = os.path.splitext(os.path.relpath(impath, root))
                    new_image_relpath = f'tdpw_downscaled/{noext}_{i_person:03d}.jpg'
                    pool.apply_async(
                        make_efficient_example,
                        (ex, new_image_relpath, 1, False, "2021-07-09T12:28:07"),
                        callback=result.append)
        return result

    with util.BoundedPool(None, 120) as pool:
        train_examples = get_examples('train', pool)
        val_examples = get_examples('validation', pool)
        test_examples = get_examples('test', pool)

    test_examples = [*train_examples, *val_examples, *test_examples]
    test_examples.sort(key=lambda ex: ex.image_path)
    return p3ds.Pose3DDataset(joint_info, None, None, test_examples)
Esempio n. 2
0
def make_mupots():
    joint_names = (
        'htop,neck,rsho,relb,rwri,lsho,lelb,lwri,rhip,rkne,rank,lhip,lkne,lank,spin,head,pelv'
    )
    edges = ('htop-head-neck-spin-pelv-lhip-lkne-lank,'
             'lwri-lelb-lsho-neck-rsho-relb-rwri,pelv-rhip-rkne-rank')
    joint_info = p3ds.JointInfo(joint_names, edges)

    #import data.muco
    #joint_info = data.muco.make_joint_info()[0]

    root = f'{paths.DATA_ROOT}/mupots'
    intrinsic_matrices = util.load_json(f'{root}/camera_intrinsics.json')

    dummy_coords = np.ones((joint_info.n_joints, 3))
    detections_all = util.load_pickle(f'{root}/yolov3_detections.pkl')

    examples_val = []
    examples_test = []
    for i_seq in range(1, 21):
        annotations = matlabfile.load(
            f'{root}/TS{i_seq}/annot.mat')['annotations']
        intrinsic_matrix = intrinsic_matrices[f'TS{i_seq}']
        camera = cameralib.Camera(np.zeros(3),
                                  np.eye(3),
                                  intrinsic_matrix,
                                  distortion_coeffs=None,
                                  world_up=(0, -1, 0))

        n_frames = annotations.shape[0]
        for i_frame in range(n_frames):
            image_relpath = f'TS{i_seq}/img_{i_frame:06d}.jpg'
            detections_frame = detections_all[image_relpath]
            image_path = f'{root}/{image_relpath}'
            for detection in detections_frame:
                confidence = detection[4]
                if confidence > 0.1:
                    ex = p3ds.Pose3DExample(os.path.relpath(
                        image_path, paths.DATA_ROOT),
                                            dummy_coords,
                                            detection[:4],
                                            camera,
                                            mask=None,
                                            univ_coords=dummy_coords,
                                            scene_name=f'TS{i_seq}')
                    examples_test.append(ex)

    return p3ds.Pose3DDataset(joint_info,
                              valid_examples=examples_val,
                              test_examples=examples_test)
Esempio n. 3
0
def generate_poseviz_gt(i_subject, activity_name, camera_id):
    camera_names = ['54138969', '55011271', '58860488', '60457274']
    camera_name = camera_names[camera_id]
    image_relpaths, world_coords_all, bboxes, camera = get_examples(
        i_subject, activity_name, camera_id, frame_step=1, correct_S9=True)

    results = []
    examples = []
    for image_relpath, world_coords, bbox in zip(image_relpaths,
                                                 world_coords_all, bboxes):
        results.append({
            'gt_poses': [world_coords.tolist()],
            'camera_intrinsics':
            camera.intrinsic_matrix.tolist(),
            'camera_extrinsics':
            camera.get_extrinsic_matrix().tolist(),
            'image_path':
            image_relpath,
            'bboxes': [bbox.tolist()]
        })
        ex = ps3d.Pose3DExample(image_relpath,
                                world_coords,
                                bbox,
                                camera,
                                activity_name=activity_name)
        examples.append(ex)

    joint_names = ('rhip,rkne,rank,lhip,lkne,lank,tors,neck,head,htop,'
                   'lsho,lelb,lwri,rsho,relb,rwri,pelv'.split(','))
    edges = ('htop-head-neck-lsho-lelb-lwri,neck-rsho-relb-rwri,'
             'neck-tors-pelv-lhip-lkne-lank,pelv-rhip-rkne-rank')
    joint_info = ps3d.JointInfo(joint_names, edges)
    ds = ps3d.Pose3DDataset(joint_info, test_examples=examples)
    util.dump_pickle(
        ds,
        f'{paths.DATA_ROOT}/h36m/poseviz/S{i_subject}_{activity_name}_{camera_name}.pkl'
    )

    output = {}
    output['joint_names'] = joint_info.names
    output['stick_figure_edges'] = joint_info.stick_figure_edges
    output['world_up'] = camera.world_up.tolist()
    output['frame_infos'] = results
    util.dump_json(
        output,
        f'{paths.DATA_ROOT}/h36m/poseviz/S{i_subject}_{activity_name}_{camera_name}.json'
    )
Esempio n. 4
0
def make_muco():
    joint_info, selected_joints = make_joint_info()

    root_3dhp = f'{paths.DATA_ROOT}/3dhp'
    root_muco = f'{paths.DATA_ROOT}/muco'
    sample_info = np.load(f'{root_muco}/composite_frame_origins.npy')
    n_all_joints = 28
    valid_indices = list(np.load(f'{root_muco}/valid_composite_frame_indices.npy'))
    all_detections = util.load_pickle(f'{root_muco}/yolov3_detections.pkl')
    all_detections = np.array([all_detections[k] for k in sorted(all_detections.keys())])
    all_visible_boxes = np.load(f'{root_muco}/visible_boxes.npy')
    matloader = functools.lru_cache(1024)(matlabfile.load)

    @functools.lru_cache(1024)
    def get_world_coords(i_subject, i_seq, i_cam, anno_name):
        seqpath = f'{root_3dhp}/S{i_subject}/Seq{i_seq}'
        anno_file = matloader(f'{seqpath}/annot.mat')
        camcoords = anno_file[anno_name][i_cam].reshape(
            [-1, n_all_joints, 3])[:, selected_joints]
        camera = load_cameras(f'{seqpath}/camera.calibration')[i_cam]
        world_coords = [camera.camera_to_world(c) for c in camcoords]
        return world_coords

    examples = []

    with util.BoundedPool(None, 120) as pool:
        for i_sample, people, detections, visible_boxes in zip(
                util.progressbar(valid_indices), sample_info[valid_indices],
                all_detections[valid_indices], all_visible_boxes[valid_indices]):

            detections = [box for box in detections if box[-1] > 0.1]
            if not detections:
                continue

            filename = f'{i_sample + 1:06d}.jpg'
            image_relpath = f'unaugmented_set_001/{filename[:2]}/{filename[:4]}/{filename}'

            gt_people = []
            for i_person, ((i_subject, i_seq, i_cam, i_frame), visible_box) in enumerate(
                    zip(people, visible_boxes)):
                seqpath = f'{root_3dhp}/S{i_subject}/Seq{i_seq}'
                world_coords = get_world_coords(i_subject, i_seq, i_cam, 'annot3')[i_frame]
                univ_world_coords = get_world_coords(
                    i_subject, i_seq, i_cam, 'univ_annot3')[i_frame]
                camera = load_cameras(f'{seqpath}/camera.calibration')[i_cam]

                im_coords = camera.world_to_image(world_coords)
                coord_bbox = boxlib.expand(boxlib.intersect(
                    boxlib.bb_of_points(im_coords),
                    boxlib.full_box([2048, 2048])), 1.05)
                bbox = boxlib.intersect_vertical(visible_box, coord_bbox)

                ex = p3ds.Pose3DExample(
                    image_relpath, world_coords, bbox, camera, mask=None,
                    univ_coords=univ_world_coords)
                gt_people.append(ex)

            if not gt_people:
                continue

            iou_matrix = np.array([[boxlib.iou(gt_person.bbox, box[:4])
                                    for box in detections]
                                   for gt_person in gt_people])
            gt_indices, det_indices = scipy.optimize.linear_sum_assignment(-iou_matrix)

            for i_gt, i_det in zip(gt_indices, det_indices):
                gt_box = gt_people[i_gt].bbox
                det_box = detections[i_det]
                if (iou_matrix[i_gt, i_det] > 0.1 and
                        boxlib.area(det_box) < 2 * boxlib.area(gt_box)):
                    ex = gt_people[i_gt]
                    ex.bbox = np.array(detections[i_det][:4])
                    pool.apply_async(make_efficient_example, (ex, root_muco, i_gt),
                                     callback=examples.append)

    examples.sort(key=lambda ex: ex.image_path)
    return p3ds.Pose3DDataset(joint_info, examples)
Esempio n. 5
0
def make_mpi_inf_3dhp(camera_ids=(0, 1, 2, 4, 5, 6, 7, 8)):
    all_short_names = (
        'spi3,spi4,spi2,spin,pelv,neck,head,htop,lcla,lsho,lelb,lwri,lhan,rcla,rsho,relb,rwri,'
        'rhan,lhip,lkne,lank,lfoo,ltoe,rhip,rkne,rank,rfoo,rtoe'.split(','))

    test_set_selected_joints = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 14]
    selected_joints = [7, 5, 14, 15, 16, 9, 10, 11, 23, 24, 25, 18, 19, 20, 3, 6, 4]
    joint_names = [all_short_names[j] for j in selected_joints]

    edges = ('htop-head-neck-lsho-lelb-lwri,neck-rsho-relb-rwri,neck-spin-pelv-lhip-lkne-lank,'
             'pelv-rhip-rkne-rank')
    joint_info = p3ds.JointInfo(joint_names, edges)

    root_3dhp = f'{paths.DATA_ROOT}/3dhp'
    detections_all = util.load_pickle(f'{paths.DATA_ROOT}/3dhp/yolov3_person_detections.pkl')

    #################################
    # TRAINING AND VALIDATION SET
    #################################
    num_frames = np.asarray(
        [[6416, 12430], [6502, 6081], [12488, 12283], [6171, 6675], [12820, 12312], [6188, 6145],
         [6239, 6320], [6468, 6054]])

    train_subjects = [0, 1, 2, 3, 4, 5, 6]
    valid_subjects = [7]  # this is my own arbitrary split for validation (Istvan Sarandi)
    train_examples = []
    valid_examples = []

    pool = util.BoundedPool(None, 120)
    for i_subject, i_seq, i_cam in itertools.product(
            train_subjects + valid_subjects, range(2), camera_ids):
        seqpath = f'{root_3dhp}/S{i_subject + 1}/Seq{i_seq + 1}'
        print(f'Processing {seqpath} camera {i_cam}')

        cam3d_coords = [ann.reshape([ann.shape[0], -1, 3])[:, selected_joints]
                        for ann in matlabfile.load(f'{seqpath}/annot.mat')['annot3']]
        univ_cam3d_coords = [ann.reshape([ann.shape[0], -1, 3])[:, selected_joints]
                             for ann in matlabfile.load(f'{seqpath}/annot.mat')['univ_annot3']]
        cameras = load_cameras(f'{seqpath}/camera.calibration')

        examples_container = train_examples if i_subject in train_subjects else valid_examples
        frame_step = 5

        prev_coords = None
        camera = cameras[i_cam]
        n_frames = num_frames[i_subject, i_seq]

        if i_subject == 5 and i_seq == 1 and i_cam == 2:
            # This video is shorter for some reason
            n_frames = 3911

        for i_frame in util.progressbar(range(0, n_frames, frame_step)):
            image_relpath = (
                    f'3dhp/S{i_subject + 1}/Seq{i_seq + 1}/'
                    f'imageSequence/img_{i_cam}_{i_frame:06d}.jpg')

            cam_coords = cam3d_coords[i_cam][i_frame]
            world_coords = cameras[i_cam].camera_to_world(cam_coords)

            univ_camcoords = univ_cam3d_coords[i_cam][i_frame]
            univ_world_coords = cameras[i_cam].camera_to_world(univ_camcoords)

            # Check if the joints are within the image frame bounds
            if not np.all(camera.is_visible(world_coords, [2048, 2048])):
                continue

            im_coords = camera.camera_to_image(cam_coords)
            bbox = get_bbox(im_coords, image_relpath, detections_all)

            # Adaptive temporal sampling
            if (prev_coords is not None and
                    np.all(np.linalg.norm(world_coords - prev_coords, axis=1) < 100)):
                continue
            prev_coords = world_coords

            mask_path = image_relpath.replace('imageSequence', 'FGmasks')
            new_image_relpath = image_relpath.replace('3dhp', '3dhp_downscaled')
            ex = p3ds.Pose3DExample(
                image_relpath, world_coords, bbox, camera, mask=mask_path,
                univ_coords=univ_world_coords)

            pool.apply_async(make_efficient_example, (ex, new_image_relpath, 1, True),
                             callback=examples_container.append)

    print('Waiting for tasks...')
    pool.close()
    pool.join()
    print('Done...')
    #################################
    # TEST SET
    #################################
    test_examples = []

    cam1_4 = make_3dhp_test_camera(
        sensor_size=np.array([10, 10]), im_size=np.array([2048, 2048]), focal_length=7.32506,
        pixel_aspect=1.00044, center_offset=np.array([-0.0322884, 0.0929296]), distortion=None,
        origin=np.array([3427.28, 1387.86, 309.42]), up=np.array([-0.208215, 0.976233, 0.06014]),
        right=np.array([0.000575281, 0.0616098, -0.9981]))

    cam5_6 = make_3dhp_test_camera(
        sensor_size=np.array([10, 5.625]), im_size=np.array([1920, 1080]), focal_length=8.770747185,
        pixel_aspect=0.993236423, center_offset=np.array([-0.104908645, 0.104899704]),
        distortion=np.array([-0.276859611, 0.131125256, -0.000360494, -0.001149441, -0.049318332]),
        origin=np.array([-2104.3074, 1038.6707, -4596.6367]),
        up=np.array([0.025272345, 0.995038509, 0.096227370]),
        right=np.array([-0.939647257, -0.009210289, 0.342020929]))

    activity_names = [
        'Stand/Walk', 'Exercise', 'Sit on Chair', 'Reach/Crouch', 'On Floor', 'Sports', 'Misc.']
    for i_subject in range(1, 7):
        seqpath = f'{root_3dhp}/TS{i_subject}'
        annotation_path = f'{seqpath}/annot_data.mat'

        with h5py.File(annotation_path, 'r') as m:
            cam3d_coords = np.array(m['annot3'])[:, 0, test_set_selected_joints]
            univ_cam3d_coords = np.array(m['univ_annot3'])[:, 0, test_set_selected_joints]
            valid_frames = np.where(m['valid_frame'][:, 0])[0]
            activity_ids = m['activity_annotation'][:, 0].astype(int) - 1

        camera = cam1_4 if i_subject <= 4 else cam5_6
        scene = ['green-screen', 'no-green-screen', 'outdoor'][(i_subject - 1) // 2]

        for i_frame in valid_frames:
            image_relpath = f'3dhp/TS{i_subject}/imageSequence/img_{i_frame + 1:06d}.jpg'
            cam_coords = cam3d_coords[i_frame]
            univ_camcoords = univ_cam3d_coords[i_frame]
            activity = activity_names[activity_ids[i_frame]]
            world_coords = camera.camera_to_world(cam_coords)
            univ_world_coords = camera.camera_to_world(univ_camcoords)
            im_coords = camera.camera_to_image(cam_coords)
            bbox = get_bbox(im_coords, image_relpath, detections_all)

            ex = p3ds.Pose3DExample(
                image_relpath, world_coords, bbox, camera, activity_name=activity,
                scene_name=scene, univ_coords=univ_world_coords)
            test_examples.append(ex)

    train_examples.sort(key=lambda x: x.image_path)
    valid_examples.sort(key=lambda x: x.image_path)
    test_examples.sort(key=lambda x: x.image_path)
    return p3ds.Pose3DDataset(joint_info, train_examples, valid_examples, test_examples)
Esempio n. 6
0
def make_h36m(train_subjects=(1, 5, 6, 7, 8),
              valid_subjects=(),
              test_subjects=(9, 11),
              correct_S9=True,
              partial_visibility=False):
    joint_names = ('rhip,rkne,rank,lhip,lkne,lank,tors,neck,head,htop,'
                   'lsho,lelb,lwri,rsho,relb,rwri,pelv'.split(','))
    edges = ('htop-head-neck-lsho-lelb-lwri,neck-rsho-relb-rwri,'
             'neck-tors-pelv-lhip-lkne-lank,pelv-rhip-rkne-rank')
    joint_info = ps3d.JointInfo(joint_names, edges)

    if not util.all_disjoint(train_subjects, valid_subjects, test_subjects):
        raise Exception('Set of train, val and test subject must be disjoint.')

    # use last subject of the non-test subjects for validation
    train_examples = []
    test_examples = []
    valid_examples = []
    pool = util.BoundedPool(None, 120)

    if partial_visibility:
        dir_suffix = '_partial'
        further_expansion_factor = 1.8
    else:
        dir_suffix = '' if correct_S9 else 'incorrect_S9'
        further_expansion_factor = 1

    for i_subject in [*test_subjects, *train_subjects, *valid_subjects]:
        if i_subject in train_subjects:
            examples_container = train_examples
        elif i_subject in valid_subjects:
            examples_container = valid_examples
        else:
            examples_container = test_examples

        frame_step = 5 if i_subject in train_subjects else 64

        for activity_name, camera_id in itertools.product(
                get_activity_names(i_subject), range(4)):
            print(f'Processing S{i_subject} {activity_name} {camera_id}')
            image_relpaths, world_coords_all, bboxes, camera = get_examples(
                i_subject,
                activity_name,
                camera_id,
                frame_step=frame_step,
                correct_S9=correct_S9)
            prev_coords = None
            for image_relpath, world_coords, bbox in zip(
                    util.progressbar(image_relpaths), world_coords_all,
                    bboxes):
                # Using very similar examples is wasteful when training. Therefore:
                # skip frame if all keypoints are within a distance compared to last stored frame.
                # This is not done when testing, as it would change the results.
                if (i_subject in train_subjects and prev_coords is not None
                        and np.all(
                            np.linalg.norm(world_coords -
                                           prev_coords, axis=1) < 100)):
                    continue
                prev_coords = world_coords
                activity_name = activity_name.split(' ')[0]
                ex = ps3d.Pose3DExample(image_relpath,
                                        world_coords,
                                        bbox,
                                        camera,
                                        activity_name=activity_name)
                new_image_relpath = image_relpath.replace(
                    'h36m', f'h36m_downscaled{dir_suffix}')
                pool.apply_async(
                    make_efficient_example,
                    (ex, new_image_relpath, further_expansion_factor),
                    callback=examples_container.append)

    print('Waiting for tasks...')
    pool.close()
    pool.join()
    print('Done...')
    train_examples.sort(key=lambda x: x.image_path)
    valid_examples.sort(key=lambda x: x.image_path)
    test_examples.sort(key=lambda x: x.image_path)
    return ps3d.Pose3DDataset(joint_info, train_examples, valid_examples,
                              test_examples)
Esempio n. 7
0
def make_mupots_yolo():
    all_short_names = (
        'thor,spi4,spi2,spin,pelv,neck,head,htop,lcla,lsho,lelb,lwri,lhan,rcla,rsho,relb,rwri,'
        'rhan,lhip,lkne,lank,lfoo,ltoe,rhip,rkne,rank,rfoo,rtoe'.split(','))

    # originally: [7, 5, 14, 15, 16, 9, 10, 11, 23, 24, 25, 18, 19, 20, 4, 3, 6]
    selected_joints = [
        7, 5, 14, 15, 16, 9, 10, 11, 23, 24, 25, 18, 19, 20, 3, 6, 4
    ]
    order_joints = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 14]
    joint_names = [all_short_names[j] for j in selected_joints]
    j = p3ds.JointInfo.make_id_map(joint_names)
    edges = [(j.htop, j.head), (j.head, j.neck), (j.neck, j.lsho),
             (j.lsho, j.lelb), (j.lelb, j.lwri), (j.neck, j.rsho),
             (j.rsho, j.relb), (j.relb, j.rwri), (j.neck, j.spin),
             (j.spin, j.pelv), (j.pelv, j.lhip), (j.lhip, j.lkne),
             (j.lkne, j.lank), (j.pelv, j.rhip), (j.rhip, j.rkne),
             (j.rkne, j.rank)]
    joint_info = p3ds.JointInfo(j, edges)

    root = f'{paths.DATA_ROOT}/mupots'
    intrinsic_matrices = util.load_json(f'{root}/camera_intrinsics.json')

    dummy_coords = np.ones((joint_info.n_joints, 3))
    detections_all = util.load_pickle(f'{root}/yolov3_detections.pkl')

    examples_val = []
    examples_test = []
    for i_seq in range(1, 21):
        annotations = matlabfile.load(
            f'{root}/TS{i_seq}/annot.mat')['annotations']
        intrinsic_matrix = intrinsic_matrices[f'TS{i_seq}']
        camera = cameralib.Camera(np.zeros(3),
                                  np.eye(3),
                                  intrinsic_matrix,
                                  distortion_coeffs=None,
                                  world_up=(0, -1, 0))

        n_people = annotations.shape[1]
        n_frames = annotations.shape[0]
        for i_frame in range(n_frames):

            image_relpath = f'TS{i_seq}/img_{i_frame:06d}.jpg'
            detections_frame = detections_all[image_relpath]
            image_path = f'{root}/{image_relpath}'
            for detection in detections_frame:
                if detection[4] > 0.1:
                    ex = p3ds.Pose3DExample(image_path,
                                            dummy_coords,
                                            detection[:4],
                                            camera,
                                            mask=None,
                                            univ_coords=dummy_coords,
                                            scene_name=f'TS{i_seq}')
                    examples_test.append(ex)

            gt_people = []

            for i_person in range(n_people):
                world_coords = np.array(
                    annotations[i_frame, i_person].annot3.T[order_joints],
                    dtype=np.float32)
                univ_world_coords = np.array(
                    annotations[i_frame, i_person].univ_annot3.T[order_joints],
                    dtype=np.float32)
                im_coords = camera.world_to_image(world_coords)
                gt_box = boxlib.expand(boxlib.bb_of_points(im_coords), 1.1)
                ex = p3ds.Pose3DExample(image_path,
                                        world_coords,
                                        gt_box,
                                        camera,
                                        mask=None,
                                        univ_coords=univ_world_coords,
                                        scene_name=f'TS{i_seq}')
                gt_people.append(ex)

            confident_detections = [
                det for det in detections_frame if det[-1] > 0.1
            ]
            if confident_detections:
                iou_matrix = np.array([[
                    boxlib.iou(gt_person.bbox, box[:4])
                    for box in confident_detections
                ] for gt_person in gt_people])
                gt_indices, detection_indices = scipy.optimize.linear_sum_assignment(
                    -iou_matrix)
                for i_gt, i_det in zip(gt_indices, detection_indices):
                    if iou_matrix[i_gt, i_det] > 0.1:
                        ex = gt_people[i_gt]
                        ex.bbox = np.array(confident_detections[i_det][:4])
                        examples_val.append(ex)

    return p3ds.Pose3DDataset(joint_info,
                              valid_examples=examples_val,
                              test_examples=examples_test)