def prepareVideoSample(info, images, cameras, bboxes, subject = 'S1', imageShape = [384, 384], scaleBox = 1.0, crop = True, normImage = False):
    sample = defaultdict(list) # return value
    subject_idx = info['subject_names'].index(subject) 
    for camera_idx, camera_name in enumerate(info['camera_names']):
        bbox = bboxes[camera_name][[1,0,3,2]] # TLBR to LTRB
        bbox_height = bbox[2] - bbox[0]

        if bbox_height == 0:
            # convention: if the bbox is empty, then this view is missing
            continue

        # scale the bounding box
        bbox = scale_bbox(bbox, scaleBox)
        # load camera
        shot_camera = cameras[camera_name]
        image = images[camera_name]
        #print(shot_camera)
        retval_camera = Camera(shot_camera['R'], shot_camera['t'], shot_camera['K'], shot_camera['dist'], camera_name)

        if crop:
                # crop image
                image = crop_image(image, bbox)
                retval_camera.update_after_crop(bbox)

        if imageShape is not None:
            # resize
            image_shape_before_resize = image.shape[:2]
            image = resize_image(image, imageShape)
            retval_camera.update_after_resize(image_shape_before_resize, imageShape)

        sample['images'].append(image)
        sample['cameras'].append(retval_camera)
        sample['proj_matrices'].append(retval_camera.projection)
        # projection matricies
    #print(sample['proj_matrices'])
    sample.default_factory = None
    return sample
    def __getitem__(self, idx):
        sample = defaultdict(list)  # return value
        shot = self.labels['table'][idx]

        subject = self.labels['subject_names'][shot['subject_idx']]
        action = self.labels['action_names'][shot['action_idx']]
        frame_idx = shot['frame_idx']

        for camera_idx, camera_name in enumerate(self.labels['camera_names']):
            if camera_idx in self.ignore_cameras:
                continue

            # load bounding box
            bbox = shot['bbox_by_camera_tlbr'][camera_idx][[1, 0, 3,
                                                            2]]  # TLBR to LTRB
            bbox_height = bbox[2] - bbox[0]
            if bbox_height == 0:
                # convention: if the bbox is empty, then this view is missing
                continue

            # scale the bounding box
            bbox = scale_bbox(bbox, self.scale_bbox)

            # load image
            image_path = os.path.join(
                self.h36m_root, subject, action,
                'imageSequence' + '-undistorted' * self.undistort_images,
                camera_name, 'img_%06d.jpg' % (frame_idx + 1))
            assert os.path.isfile(image_path), '%s doesn\'t exist' % image_path
            image = cv2.imread(image_path)

            # load camera
            shot_camera = self.labels['cameras'][shot['subject_idx'],
                                                 camera_idx]
            retval_camera = Camera(shot_camera['R'], shot_camera['t'],
                                   shot_camera['K'], shot_camera['dist'],
                                   camera_name)

            if self.crop:
                # crop image
                image = crop_image(image, bbox)
                retval_camera.update_after_crop(bbox)

            if self.image_shape is not None:
                # resize
                image_shape_before_resize = image.shape[:2]
                image = resize_image(image, self.image_shape)
                retval_camera.update_after_resize(image_shape_before_resize,
                                                  self.image_shape)

                sample['image_shapes_before_resize'].append(
                    image_shape_before_resize)

            if self.norm_image:
                image = normalize_image(image)

            sample['images'].append(image)
            sample['detections'].append(bbox +
                                        (1.0, ))  # TODO add real confidences
            sample['cameras'].append(retval_camera)
            sample['proj_matrices'].append(retval_camera.projection)

        # 3D keypoints
        # add dummy confidences
        sample['keypoints_3d'] = np.pad(shot['keypoints'][:self.num_keypoints],
                                        ((0, 0), (0, 1)),
                                        'constant',
                                        constant_values=1.0)

        # build cuboid
        # base_point = sample['keypoints_3d'][6, :3]
        # sides = np.array([self.cuboid_side, self.cuboid_side, self.cuboid_side])
        # position = base_point - sides / 2
        # sample['cuboids'] = volumetric.Cuboid3D(position, sides)

        # save sample's index
        sample['indexes'] = idx

        if self.keypoints_3d_pred is not None:
            sample['pred_keypoints_3d'] = self.keypoints_3d_pred[idx]

        sample.default_factory = None
        return sample
Exemple #3
0
    def __getitem__(self, idx):
        camera_idx = self.camera_idxes[idx]

        sample = defaultdict(list)  # return value
        shot = self.labels['table'][idx]

        subject = self.labels['subject_names'][shot['subject_idx']]
        sample['subject'] = subject
        action = self.labels['action_names'][shot['action_idx']]
        frame_idx = shot['frame_idx']

        for camera_idx, camera_name in [[
                camera_idx, self.labels['camera_names'][camera_idx]
        ]]:
            if camera_idx in self.ignore_cameras:
                continue

            # load bounding box
            bbox = shot['bbox_by_camera_tlbr'][camera_idx][[1, 0, 3,
                                                            2]]  # TLBR to LTRB
            bbox_height = bbox[2] - bbox[0]
            if bbox_height == 0:
                # convention: if the bbox is empty, then this view is missing
                continue

            # scale the bounding box
            bbox = scale_bbox(bbox, self.scale_bbox)
            scale = ((bbox[2] - bbox[0]) / 150.0, (bbox[3] - bbox[1]) / 150.0)

            # load image
            image_dir = self.root.replace(
                'processed',
                'processed.zip@') if self.data_format == 'zip' else self.root
            image_path = os.path.join(
                image_dir, subject, action,
                'imageSequence' + '-undistorted' * self.undistort_images,
                camera_name, 'img_%06d.jpg' % (frame_idx + 1))
            if self.data_format == 'zip':
                from mvn.datasets.utils import zipreader_imread
                image = zipreader_imread(image_path)
            else:
                image = cv2.imread(image_path)
            if image is None:
                assert os.path.isfile(
                    image_path), '%s doesn\'t exist' % image_path

            # load camera
            shot_camera = self.labels['cameras'][shot['subject_idx'],
                                                 camera_idx]
            retval_camera = Camera(shot_camera['R'], shot_camera['t'],
                                   shot_camera['K'], shot_camera['dist'],
                                   camera_name)

            if self.crop:
                # crop image
                image = crop_image(image, bbox)
                retval_camera.update_after_crop(bbox)

            if self.image_shape is not None:
                # resize
                image_shape_before_resize = image.shape[:2]
                image = resize_image(image, self.image_shape)
                retval_camera.update_after_resize(image_shape_before_resize,
                                                  self.image_shape)

                sample['image_shapes_before_resize'].append(
                    image_shape_before_resize)

            if self.norm_image:
                image = normalize_image(image)

            if self.erase:
                # erase image
                keypoints_3d_gt = shot['keypoints'][:self.num_keypoints]
                keypoints_2d_gt = project_3d_points_to_image_plane_without_distortion(
                    retval_camera.projection, keypoints_3d_gt)
                erase_joints = [6, 1, 4, 11, 14]
                image = erase_image(
                    image, [keypoints_2d_gt[joint] for joint in erase_joints])

            sample['images'].append(image)
            sample['detections'].append(bbox +
                                        (1.0, ))  # TODO add real confidences
            sample['scale'].append(scale)
            sample['cameras'].append(retval_camera)
            sample['proj_matrices'].append(retval_camera.projection)

        # 3D keypoints
        # add dummy confidences
        sample['keypoints_3d'] = np.pad(shot['keypoints'][:self.num_keypoints],
                                        ((0, 0), (0, 1)),
                                        'constant',
                                        constant_values=1.0)

        # save sample's index
        sample['indexes'] = idx

        if self.keypoints_3d_pred is not None:
            sample['pred_keypoints_3d'] = self.keypoints_3d_pred[idx]

        sample.default_factory = None
        return sample
def prepareSample(idx, labels, human36mRoot, keyPoint3d = None , imageShape = None, scaleBox = 1.0, crop = True, normImage = False):
    sample = defaultdict(list) # return value
    shot = labels['table'][idx]
    subject = labels['subject_names'][shot['subject_idx']]
    action = labels['action_names'][shot['action_idx']]
    frame_idx = shot['frame_idx']

    for camera_idx, camera_name in enumerate(labels['camera_names']):
        bbox = shot['bbox_by_camera_tlbr'][camera_idx][[1,0,3,2]] # TLBR to LTRB
        bbox_height = bbox[2] - bbox[0]

        if bbox_height == 0:
            # convention: if the bbox is empty, then this view is missing
            continue

        # scale the bounding box
        bbox = scale_bbox(bbox, scaleBox)

        # load image
        image_path = os.path.join(human36mRoot, subject, action, 'imageSequence', camera_name, 'img_%06d.jpg' % (frame_idx+1))
        assert os.path.isfile(image_path), '%s doesn\'t exist' % image_path
        image = cv2.imread(image_path)
        
        # load camera
        shot_camera = labels['cameras'][shot['subject_idx'], camera_idx]
        #print(shot_camera)
        retval_camera = Camera(shot_camera['R'], shot_camera['t'], shot_camera['K'], shot_camera['dist'], camera_name)

        if crop:
                # crop image
                image = crop_image(image, bbox)
                retval_camera.update_after_crop(bbox)
                

        if imageShape is not None:
            # resize
            image_shape_before_resize = image.shape[:2]
            image = resize_image(image, imageShape)
            retval_camera.update_after_resize(image_shape_before_resize, imageShape)

            sample['image_shapes_before_resize'].append(image_shape_before_resize)

        if normImage:
            image = normalize_image(image)

        sample['images'].append(image)
        sample['detections'].append(bbox + (1.0,)) # TODO add real confidences
        sample['cameras'].append(retval_camera)
        sample['proj_matrices'].append(retval_camera.projection)
        sample["action"].append(action)
        sample["subject"].append(subject)
        sample["frameId"].append(frame_idx)
        # 3D keypoints
        # add dummy confidences
        sample['keypoints_3d'] = np.pad(
            shot['keypoints'][:17],
            ((0,0), (0,1)), 'constant', constant_values=1.0)

    # build cuboid
    # base_point = sample['keypoints_3d'][6, :3]
    # sides = np.array([self.cuboid_side, self.cuboid_side, self.cuboid_side])
    # position = base_point - sides / 2
    # sample['cuboids'] = volumetric.Cuboid3D(position, sides)

    # save sample's index
    sample['indexes'] = idx

    if keyPoint3d is not None:
        sample['pred_keypoints_3d'] = keyPoint3d[idx]

    sample.default_factory = None
    return sample
    def __getitem__(self, idx):
        # TOOD: Change according to naming conventions
        sample = defaultdict(list)  # return value
        shot = self.labels['table'][idx]

        person = shot['person_id']

        action_idx = shot['action_idx']
        action = self.labels['action_names'][action_idx]

        frame_idx = shot['frame_name']

        for camera_idx, camera_name in enumerate(self.labels['camera_names']):
            if camera_idx not in self.choose_cameras or camera_idx in self.ignore_cameras:
                continue

            # load bounding box
            left, top, right, bottom, bbox_confidence = shot[
                'bbox_by_camera_tlbr'][camera_idx]
            bbox = (left, top, right, bottom)

            if top - bottom == 0 or left - right == 0:
                # convention: if the bbox is empty, then this view is missing
                continue

            # square and scale the bounding box
            if self.square_bbox:
                bbox = get_square_bbox(bbox)

            bbox = scale_bbox(bbox, self.scale_bbox)

            # TODO: Change according to dataset paths
            # load image
            # $DIR_ROOT/[action_NAME]/hdImgs/[VIEW_ID]/[VIEW_ID]_[FRAME_ID].jpg
            # NOTE: pad with 0s using {frame_idx:08}
            image_path = os.path.join(self.example_root, action, 'hdImgs',
                                      camera_name,
                                      f'{camera_name}_{frame_idx:08}.jpg')
            assert os.path.isfile(image_path), '%s doesn\'t exist' % image_path
            image = cv2.imread(image_path)

            # load camera
            shot_camera = self.labels['cameras'][action_idx, camera_idx]
            retval_camera = Camera(shot_camera['R'], shot_camera['t'],
                                   shot_camera['K'], shot_camera['dist'],
                                   camera_name)

            if self.crop:
                # crop image
                image = crop_image(image, bbox)
                retval_camera.update_after_crop(bbox)

            if self.image_shape is not None:
                # rescale_size
                image_shape_before_resize = image.shape[:2]
                image = resize_image(image, self.image_shape)
                retval_camera.update_after_resize(image_shape_before_resize,
                                                  self.image_shape)

                sample['image_shapes_before_resize'].append(
                    image_shape_before_resize)

            if self.norm_image:
                image = normalize_image(image)

            sample['images'].append(image)
            sample['detections'].append(bbox)
            sample['cameras'].append(retval_camera)
            # Maybe remove to save space?
            # sample['proj_matrices'].append(retval_camera.projection)

        # TODO: Can remove or modify depending on whether your dataset has ground truth
        # 3D keypoints (with real confidences, cos CMU)
        if 'keypoints' in shot:
            sample['keypoints_3d'] = np.array(
                shot['keypoints'][:self.num_keypoints])

        # build cuboid
        # base_point = sample['keypoints_3d'][6, :3]
        # sides = np.array([self.cuboid_side, self.cuboid_side, self.cuboid_side])
        # position = base_point - sides / 2
        # sample['cuboids'] = volumetric.Cuboid3D(position, sides)

        # save sample's index
        sample['indexes'] = idx

        # TODO: Check this? Keypoints are different
        if self.keypoints_3d_pred is not None:
            sample['pred_keypoints_3d'] = self.keypoints_3d_pred[idx]

        sample.default_factory = None
        return sample