Exemple #1
0
    def render(self, proj_matrix, canvas):
        point_2d = multiview.project_3d_points_to_image_plane_without_distortion(
            proj_matrix, np.array([self.point]))[0]

        point_2d = tuple(map(int, point_2d))
        cv2.circle(canvas, point_2d, self.size, self.color, self.size)

        return canvas
Exemple #2
0
    def render(self, proj_matrix, canvas):
        start_point_2d, end_point_2d = multiview.project_3d_points_to_image_plane_without_distortion(
            proj_matrix, np.array([self.start_point, self.end_point]))

        start_point_2d = tuple(map(int, start_point_2d))
        end_point_2d = tuple(map(int, end_point_2d))

        cv2.line(canvas, start_point_2d, end_point_2d, self.color, self.size)

        return canvas
Exemple #3
0
def unproject_heatmaps(heatmaps,
                       proj_matricies,
                       coord_volumes,
                       volume_aggregation_method='sum',
                       vol_confidences=None):
    device = heatmaps.device
    batch_size, n_views, n_joints, heatmap_shape = heatmaps.shape[
        0], heatmaps.shape[1], heatmaps.shape[2], tuple(heatmaps.shape[3:])
    volume_shape = coord_volumes.shape[1:4]

    volume_batch = torch.zeros(batch_size,
                               n_joints,
                               *volume_shape,
                               device=device)

    # TODO: speed up this this loop
    for batch_i in range(batch_size):
        coord_volume = coord_volumes[batch_i]
        grid_coord = coord_volume.reshape((-1, 3))

        volume_batch_to_aggregate = torch.zeros(n_views,
                                                n_joints,
                                                *volume_shape,
                                                device=device)

        for view_i in range(n_views):
            heatmap = heatmaps[batch_i, view_i]
            heatmap = heatmap.unsqueeze(0)

            grid_coord_proj = multiview.project_3d_points_to_image_plane_without_distortion(
                proj_matricies[batch_i, view_i],
                grid_coord,
                convert_back_to_euclidean=False)

            invalid_mask = grid_coord_proj[:,
                                           2] <= 0.0  # depth must be larger than 0.0

            grid_coord_proj[grid_coord_proj[:, 2] == 0.0,
                            2] = 1.0  # not to divide by zero
            grid_coord_proj = multiview.homogeneous_to_euclidean(
                grid_coord_proj)

            # transform to [-1.0, 1.0] range
            grid_coord_proj_transformed = torch.zeros_like(grid_coord_proj)
            grid_coord_proj_transformed[:, 0] = 2 * (
                grid_coord_proj[:, 0] / heatmap_shape[0] - 0.5)
            grid_coord_proj_transformed[:, 1] = 2 * (
                grid_coord_proj[:, 1] / heatmap_shape[1] - 0.5)
            grid_coord_proj = grid_coord_proj_transformed

            # prepare to F.grid_sample
            grid_coord_proj = grid_coord_proj.unsqueeze(1).unsqueeze(0)
            try:
                current_volume = F.grid_sample(heatmap,
                                               grid_coord_proj,
                                               align_corners=True)
            except TypeError:  # old PyTorch
                current_volume = F.grid_sample(heatmap, grid_coord_proj)

            # zero out non-valid points
            current_volume = current_volume.view(n_joints, -1)
            current_volume[:, invalid_mask] = 0.0

            # reshape back to volume
            current_volume = current_volume.view(n_joints, *volume_shape)

            # collect
            volume_batch_to_aggregate[view_i] = current_volume

        # agregate resulting volume
        if volume_aggregation_method.startswith('conf'):
            volume_batch[batch_i] = (volume_batch_to_aggregate *
                                     vol_confidences[batch_i].view(
                                         n_views, n_joints, 1, 1, 1)).sum(0)
        elif volume_aggregation_method == 'sum':
            volume_batch[batch_i] = volume_batch_to_aggregate.sum(0)
        elif volume_aggregation_method == 'max':
            volume_batch[batch_i] = volume_batch_to_aggregate.max(0)[0]
        elif volume_aggregation_method == 'softmax':
            volume_batch_to_aggregate_softmin = volume_batch_to_aggregate.clone(
            )
            volume_batch_to_aggregate_softmin = volume_batch_to_aggregate_softmin.view(
                n_views, -1)
            volume_batch_to_aggregate_softmin = nn.functional.softmax(
                volume_batch_to_aggregate_softmin, dim=0)
            volume_batch_to_aggregate_softmin = volume_batch_to_aggregate_softmin.view(
                n_views, n_joints, *volume_shape)

            volume_batch[batch_i] = (volume_batch_to_aggregate *
                                     volume_batch_to_aggregate_softmin).sum(0)
        else:
            raise ValueError("Unknown volume_aggregation_method: {}".format(
                volume_aggregation_method))

    return volume_batch
def visualize_batch(images_batch,
                    heatmaps_batch,
                    keypoints_2d_batch,
                    proj_matricies_batch,
                    keypoints_3d_batch_gt,
                    keypoints_3d_batch_pred,
                    kind="cmu",
                    cuboids_batch=None,
                    confidences_batch=None,
                    batch_index=0,
                    size=5,
                    max_n_cols=10,
                    pred_kind=None):
    if pred_kind is None:
        pred_kind = kind

    n_views, n_joints = heatmaps_batch.shape[1], heatmaps_batch.shape[2]

    n_rows = 3
    n_rows = n_rows + 1 if keypoints_2d_batch is not None else n_rows
    n_rows = n_rows + 1 if cuboids_batch is not None else n_rows
    n_rows = n_rows + 1 if confidences_batch is not None else n_rows

    n_cols = min(n_views, max_n_cols)
    fig, axes = plt.subplots(ncols=n_cols,
                             nrows=n_rows,
                             figsize=(n_cols * size, n_rows * size))
    axes = axes.reshape(n_rows, n_cols)

    image_shape = images_batch.shape[3:]
    heatmap_shape = heatmaps_batch.shape[3:]

    row_i = 0

    # images
    axes[row_i, 0].set_ylabel("image", size='large')

    images = image_batch_to_numpy(images_batch[batch_index])
    images = denormalize_image(images).astype(np.uint8)
    images = images[..., ::-1]  # bgr -> rgb

    for view_i in range(n_cols):
        axes[row_i][view_i].imshow(images[view_i])
    row_i += 1

    # 2D keypoints (pred)
    if keypoints_2d_batch is not None:
        axes[row_i, 0].set_ylabel("2d keypoints (pred)", size='large')

        keypoints_2d = to_numpy(keypoints_2d_batch)[batch_index]
        for view_i in range(n_cols):
            axes[row_i][view_i].imshow(images[view_i])
            draw_2d_pose(keypoints_2d[view_i], axes[row_i][view_i], kind=kind)
        row_i += 1

    # 2D keypoints (gt projected)
    axes[row_i, 0].set_ylabel("2d keypoints (gt projected)", size='large')

    for view_i in range(n_cols):
        axes[row_i][view_i].imshow(images[view_i])
        keypoints_2d_gt_proj = project_3d_points_to_image_plane_without_distortion(
            proj_matricies_batch[batch_index, view_i].detach().cpu().numpy(),
            keypoints_3d_batch_gt[batch_index].detach().cpu().numpy())
        draw_2d_pose(keypoints_2d_gt_proj, axes[row_i][view_i], kind=kind)
    row_i += 1

    # 2D keypoints (pred projected)
    axes[row_i, 0].set_ylabel("2d keypoints (pred projected)", size='large')

    for view_i in range(n_cols):
        axes[row_i][view_i].imshow(images[view_i])
        keypoints_2d_pred_proj = project_3d_points_to_image_plane_without_distortion(
            proj_matricies_batch[batch_index, view_i].detach().cpu().numpy(),
            keypoints_3d_batch_pred[batch_index].detach().cpu().numpy())
        draw_2d_pose(keypoints_2d_pred_proj,
                     axes[row_i][view_i],
                     kind=pred_kind)
    row_i += 1

    # cuboids
    if cuboids_batch is not None:
        axes[row_i, 0].set_ylabel("cuboid", size='large')

        for view_i in range(n_cols):
            cuboid = cuboids_batch[batch_index]
            axes[row_i][view_i].imshow(
                cuboid.render(
                    proj_matricies_batch[batch_index,
                                         view_i].detach().cpu().numpy(),
                    images[view_i].copy()))
        row_i += 1

    # confidences
    if confidences_batch is not None:
        axes[row_i, 0].set_ylabel("confidences", size='large')

        for view_i in range(n_cols):
            confidences = to_numpy(confidences_batch[batch_index, view_i])
            xs = np.arange(len(confidences))

            axes[row_i, view_i].bar(xs, confidences, color='green')
            axes[row_i, view_i].set_xticks(xs)
            if torch.max(confidences_batch).item() <= 1.0:
                axes[row_i, view_i].set_ylim(0.0, 1.0)

    fig.tight_layout()

    fig_image = fig_to_array(fig)

    plt.close('all')

    return fig_image
Exemple #5
0
    def __getitem__(self, idx):
        camera_idx = self.camera_idxes[idx]

        sample = defaultdict(list)  # return value
        shot = self.labels['table'][idx]

        subject = self.labels['subject_names'][shot['subject_idx']]
        sample['subject'] = subject
        action = self.labels['action_names'][shot['action_idx']]
        frame_idx = shot['frame_idx']

        for camera_idx, camera_name in [[
                camera_idx, self.labels['camera_names'][camera_idx]
        ]]:
            if camera_idx in self.ignore_cameras:
                continue

            # load bounding box
            bbox = shot['bbox_by_camera_tlbr'][camera_idx][[1, 0, 3,
                                                            2]]  # TLBR to LTRB
            bbox_height = bbox[2] - bbox[0]
            if bbox_height == 0:
                # convention: if the bbox is empty, then this view is missing
                continue

            # scale the bounding box
            bbox = scale_bbox(bbox, self.scale_bbox)
            scale = ((bbox[2] - bbox[0]) / 150.0, (bbox[3] - bbox[1]) / 150.0)

            # load image
            image_dir = self.root.replace(
                'processed',
                'processed.zip@') if self.data_format == 'zip' else self.root
            image_path = os.path.join(
                image_dir, subject, action,
                'imageSequence' + '-undistorted' * self.undistort_images,
                camera_name, 'img_%06d.jpg' % (frame_idx + 1))
            if self.data_format == 'zip':
                from mvn.datasets.utils import zipreader_imread
                image = zipreader_imread(image_path)
            else:
                image = cv2.imread(image_path)
            if image is None:
                assert os.path.isfile(
                    image_path), '%s doesn\'t exist' % image_path

            # load camera
            shot_camera = self.labels['cameras'][shot['subject_idx'],
                                                 camera_idx]
            retval_camera = Camera(shot_camera['R'], shot_camera['t'],
                                   shot_camera['K'], shot_camera['dist'],
                                   camera_name)

            if self.crop:
                # crop image
                image = crop_image(image, bbox)
                retval_camera.update_after_crop(bbox)

            if self.image_shape is not None:
                # resize
                image_shape_before_resize = image.shape[:2]
                image = resize_image(image, self.image_shape)
                retval_camera.update_after_resize(image_shape_before_resize,
                                                  self.image_shape)

                sample['image_shapes_before_resize'].append(
                    image_shape_before_resize)

            if self.norm_image:
                image = normalize_image(image)

            if self.erase:
                # erase image
                keypoints_3d_gt = shot['keypoints'][:self.num_keypoints]
                keypoints_2d_gt = project_3d_points_to_image_plane_without_distortion(
                    retval_camera.projection, keypoints_3d_gt)
                erase_joints = [6, 1, 4, 11, 14]
                image = erase_image(
                    image, [keypoints_2d_gt[joint] for joint in erase_joints])

            sample['images'].append(image)
            sample['detections'].append(bbox +
                                        (1.0, ))  # TODO add real confidences
            sample['scale'].append(scale)
            sample['cameras'].append(retval_camera)
            sample['proj_matrices'].append(retval_camera.projection)

        # 3D keypoints
        # add dummy confidences
        sample['keypoints_3d'] = np.pad(shot['keypoints'][:self.num_keypoints],
                                        ((0, 0), (0, 1)),
                                        'constant',
                                        constant_values=1.0)

        # save sample's index
        sample['indexes'] = idx

        if self.keypoints_3d_pred is not None:
            sample['pred_keypoints_3d'] = self.keypoints_3d_pred[idx]

        sample.default_factory = None
        return sample
Exemple #6
0
    def evaluate(self,
                 keypoints_gt,
                 keypoints_3d_predicted,
                 proj_matricies_batch=None,
                 config=None,
                 split_by_subject=False,
                 transfer_cmu_to_human36m=False,
                 transfer_human36m_to_human36m=False):
        #keypoints_gt = self.labels['table']['keypoints'][:, :self.num_keypoints]
        if keypoints_3d_predicted.shape != keypoints_gt.shape:
            raise ValueError(
                '`keypoints_3d_predicted` shape should be %s, got %s' % \
                (keypoints_gt.shape, keypoints_3d_predicted.shape))

        if transfer_cmu_to_human36m or transfer_human36m_to_human36m:
            human36m_joints = [10, 11, 15, 14, 1, 4]
            if transfer_human36m_to_human36m:
                cmu_joints = [10, 11, 15, 14, 1, 4]
            else:
                cmu_joints = [10, 8, 9, 7, 14, 13]

            keypoints_gt = keypoints_gt[:, human36m_joints]
            keypoints_3d_predicted = keypoints_3d_predicted[:, cmu_joints]

        # mean error per 16/17 joints in mm, for each pose
        per_pose_error = np.sqrt(
            ((keypoints_gt - keypoints_3d_predicted)**2).sum(2)).mean(1)

        # relative mean error per 16/17 joints in mm, for each pose
        if not (transfer_cmu_to_human36m or transfer_human36m_to_human36m):
            root_index = 6 if self.kind == "mpii" else 6
        else:
            root_index = 0

        keypoints_gt_relative = keypoints_gt - keypoints_gt[:, root_index:
                                                            root_index + 1, :]
        keypoints_3d_predicted_relative = keypoints_3d_predicted - keypoints_3d_predicted[:,
                                                                                          root_index:
                                                                                          root_index
                                                                                          +
                                                                                          1, :]

        per_pose_error_relative = np.sqrt(
            ((keypoints_gt_relative -
              keypoints_3d_predicted_relative)**2).sum(2)).mean(1)

        result = {
            'per_pose_error':
            self.evaluate_using_per_pose_error(per_pose_error,
                                               split_by_subject),
            'per_pose_error_relative':
            self.evaluate_using_per_pose_error(per_pose_error_relative,
                                               split_by_subject)
        }

        # project to 2d, evaluate 2d
        if proj_matricies_batch is not None:
            nsamples = keypoints_gt.shape[0]
            keypoints_2d_gt = np.array([
                project_3d_points_to_image_plane_without_distortion(
                    proj_matricies_batch[n, 0], keypoints_gt[n])
                for n in range(nsamples)
            ])[:, np.newaxis]
            keypoints_2d_pred = np.array([
                project_3d_points_to_image_plane_without_distortion(
                    proj_matricies_batch[n, 0], keypoints_3d_predicted[n])
                for n in range(nsamples)
            ])[:, np.newaxis]
            name_values_dict, mean_rate_dict = self.evaluate2d(
                keypoints_2d_gt, keypoints_2d_pred, [0.1, 0.2, 0.3, 0.4, 0.5],
                config)

            result.update({
                'pose_2d_metric': {
                    'per_joint_pckh': name_values_dict,
                    'avg_joint_pckh': mean_rate_dict
                }
            })

        return result['per_pose_error_relative']['Average']['Average'], result
Exemple #7
0
def unproject_heatmaps(heatmaps, proj_matricies, coord_volumes, volume_aggregation_method='sum', vol_confidences=None):
    device = heatmaps.device
    batch_size, n_views, n_joints, heatmap_shape = heatmaps.shape[0], heatmaps.shape[1], heatmaps.shape[2], tuple(heatmaps.shape[3:]) # 1,4,32,96x96
    volume_shape = coord_volumes.shape[1:4] #64x64x64

    volume_batch = torch.zeros(batch_size, n_joints, *volume_shape, device=device) # 1x32x64x64x64のTensor

    # TODO: speed up this this loop
    for batch_i in range(batch_size):
        coord_volume = coord_volumes[batch_i] # Bx64x64x64x3 -> 64x64x64x3
        grid_coord = coord_volume.reshape((-1, 3)) # 262144x3

        volume_batch_to_aggregate = torch.zeros(n_views, n_joints, *volume_shape, device=device) # 4x32x64x64x64

        for view_i in range(n_views):
            heatmap = heatmaps[batch_i, view_i] # 1x4x32x96x96 -> 32x96x96
            heatmap = heatmap.unsqueeze(0) # 1x32x96x96 (一番初めに次元を追加)

            grid_coord_proj = multiview.project_3d_points_to_image_plane_without_distortion( # 262144x3
                proj_matricies[batch_i, view_i], grid_coord, convert_back_to_euclidean=False
            )

            invalid_mask = grid_coord_proj[:, 2] <= 0.0  # depth must be larger than 0.0 #人がカメラに近づきすぎた場合に起こる??

            grid_coord_proj[grid_coord_proj[:, 2] == 0.0, 2] = 1.0  # not to divide by zero
            grid_coord_proj = multiview.homogeneous_to_euclidean(grid_coord_proj)

            # transform to [-1.0, 1.0] range
            grid_coord_proj_transformed = torch.zeros_like(grid_coord_proj) # 262144x2
            grid_coord_proj_transformed[:, 0] = 2 * (grid_coord_proj[:, 0] / heatmap_shape[0] - 0.5) # (0,0)->(96,96)の座標を、中心を(0,0)、左上を(-1,-1)、右下を(1,1)とする相対的な座標に変換
            grid_coord_proj_transformed[:, 1] = 2 * (grid_coord_proj[:, 1] / heatmap_shape[1] - 0.5)
            grid_coord_proj = grid_coord_proj_transformed

            # prepare to F.grid_sample
            grid_coord_proj = grid_coord_proj.unsqueeze(1).unsqueeze(0) # 引数で指定された場所に一つ次元を足すらしい 1x262144x1x2。heatmapが1x32x96x96
            try:
                current_volume = F.grid_sample(heatmap, grid_coord_proj, align_corners=True) # 1x32x262144x1 = Heatmap(1x32x96x96), grid_coord_proj(1x262144x1x2)
            except TypeError: # old PyTorch
                current_volume = F.grid_sample(heatmap, grid_coord_proj)

            # zero out non-valid points
            current_volume = current_volume.view(n_joints, -1) #32x262144
            current_volume[:, invalid_mask] = 0.0

            # reshape back to volume
            current_volume = current_volume.view(n_joints, *volume_shape) #32x64x64x64

            # collect
            volume_batch_to_aggregate[view_i] = current_volume

        # agregate resulting volume
        if volume_aggregation_method.startswith('conf'):
            volume_batch[batch_i] = (volume_batch_to_aggregate * vol_confidences[batch_i].view(n_views, n_joints, 1, 1, 1)).sum(0)
        elif volume_aggregation_method == 'sum':
            volume_batch[batch_i] = volume_batch_to_aggregate.sum(0)
        elif volume_aggregation_method == 'max':
            volume_batch[batch_i] = volume_batch_to_aggregate.max(0)[0]
        elif volume_aggregation_method == 'softmax':
            volume_batch_to_aggregate_softmin = volume_batch_to_aggregate.clone() # 2x32x64x64x64(n_views, n_joints, *volume_shape)
            volume_batch_to_aggregate_softmin = volume_batch_to_aggregate_softmin.view(n_views, -1) # reshape
            volume_batch_to_aggregate_softmin = nn.functional.softmax(volume_batch_to_aggregate_softmin, dim=0)
            volume_batch_to_aggregate_softmin = volume_batch_to_aggregate_softmin.view(n_views, n_joints, *volume_shape) #reshape back

            volume_batch[batch_i] = (volume_batch_to_aggregate * volume_batch_to_aggregate_softmin).sum(0)
        else:
            raise ValueError("Unknown volume_aggregation_method: {}".format(volume_aggregation_method))

    return volume_batch