def render(self, proj_matrix, canvas): point_2d = multiview.project_3d_points_to_image_plane_without_distortion( proj_matrix, np.array([self.point]))[0] point_2d = tuple(map(int, point_2d)) cv2.circle(canvas, point_2d, self.size, self.color, self.size) return canvas
def render(self, proj_matrix, canvas): start_point_2d, end_point_2d = multiview.project_3d_points_to_image_plane_without_distortion( proj_matrix, np.array([self.start_point, self.end_point])) start_point_2d = tuple(map(int, start_point_2d)) end_point_2d = tuple(map(int, end_point_2d)) cv2.line(canvas, start_point_2d, end_point_2d, self.color, self.size) return canvas
def unproject_heatmaps(heatmaps, proj_matricies, coord_volumes, volume_aggregation_method='sum', vol_confidences=None): device = heatmaps.device batch_size, n_views, n_joints, heatmap_shape = heatmaps.shape[ 0], heatmaps.shape[1], heatmaps.shape[2], tuple(heatmaps.shape[3:]) volume_shape = coord_volumes.shape[1:4] volume_batch = torch.zeros(batch_size, n_joints, *volume_shape, device=device) # TODO: speed up this this loop for batch_i in range(batch_size): coord_volume = coord_volumes[batch_i] grid_coord = coord_volume.reshape((-1, 3)) volume_batch_to_aggregate = torch.zeros(n_views, n_joints, *volume_shape, device=device) for view_i in range(n_views): heatmap = heatmaps[batch_i, view_i] heatmap = heatmap.unsqueeze(0) grid_coord_proj = multiview.project_3d_points_to_image_plane_without_distortion( proj_matricies[batch_i, view_i], grid_coord, convert_back_to_euclidean=False) invalid_mask = grid_coord_proj[:, 2] <= 0.0 # depth must be larger than 0.0 grid_coord_proj[grid_coord_proj[:, 2] == 0.0, 2] = 1.0 # not to divide by zero grid_coord_proj = multiview.homogeneous_to_euclidean( grid_coord_proj) # transform to [-1.0, 1.0] range grid_coord_proj_transformed = torch.zeros_like(grid_coord_proj) grid_coord_proj_transformed[:, 0] = 2 * ( grid_coord_proj[:, 0] / heatmap_shape[0] - 0.5) grid_coord_proj_transformed[:, 1] = 2 * ( grid_coord_proj[:, 1] / heatmap_shape[1] - 0.5) grid_coord_proj = grid_coord_proj_transformed # prepare to F.grid_sample grid_coord_proj = grid_coord_proj.unsqueeze(1).unsqueeze(0) try: current_volume = F.grid_sample(heatmap, grid_coord_proj, align_corners=True) except TypeError: # old PyTorch current_volume = F.grid_sample(heatmap, grid_coord_proj) # zero out non-valid points current_volume = current_volume.view(n_joints, -1) current_volume[:, invalid_mask] = 0.0 # reshape back to volume current_volume = current_volume.view(n_joints, *volume_shape) # collect volume_batch_to_aggregate[view_i] = current_volume # agregate resulting volume if volume_aggregation_method.startswith('conf'): volume_batch[batch_i] = (volume_batch_to_aggregate * vol_confidences[batch_i].view( n_views, n_joints, 1, 1, 1)).sum(0) elif volume_aggregation_method == 'sum': volume_batch[batch_i] = volume_batch_to_aggregate.sum(0) elif volume_aggregation_method == 'max': volume_batch[batch_i] = volume_batch_to_aggregate.max(0)[0] elif volume_aggregation_method == 'softmax': volume_batch_to_aggregate_softmin = volume_batch_to_aggregate.clone( ) volume_batch_to_aggregate_softmin = volume_batch_to_aggregate_softmin.view( n_views, -1) volume_batch_to_aggregate_softmin = nn.functional.softmax( volume_batch_to_aggregate_softmin, dim=0) volume_batch_to_aggregate_softmin = volume_batch_to_aggregate_softmin.view( n_views, n_joints, *volume_shape) volume_batch[batch_i] = (volume_batch_to_aggregate * volume_batch_to_aggregate_softmin).sum(0) else: raise ValueError("Unknown volume_aggregation_method: {}".format( volume_aggregation_method)) return volume_batch
def visualize_batch(images_batch, heatmaps_batch, keypoints_2d_batch, proj_matricies_batch, keypoints_3d_batch_gt, keypoints_3d_batch_pred, kind="cmu", cuboids_batch=None, confidences_batch=None, batch_index=0, size=5, max_n_cols=10, pred_kind=None): if pred_kind is None: pred_kind = kind n_views, n_joints = heatmaps_batch.shape[1], heatmaps_batch.shape[2] n_rows = 3 n_rows = n_rows + 1 if keypoints_2d_batch is not None else n_rows n_rows = n_rows + 1 if cuboids_batch is not None else n_rows n_rows = n_rows + 1 if confidences_batch is not None else n_rows n_cols = min(n_views, max_n_cols) fig, axes = plt.subplots(ncols=n_cols, nrows=n_rows, figsize=(n_cols * size, n_rows * size)) axes = axes.reshape(n_rows, n_cols) image_shape = images_batch.shape[3:] heatmap_shape = heatmaps_batch.shape[3:] row_i = 0 # images axes[row_i, 0].set_ylabel("image", size='large') images = image_batch_to_numpy(images_batch[batch_index]) images = denormalize_image(images).astype(np.uint8) images = images[..., ::-1] # bgr -> rgb for view_i in range(n_cols): axes[row_i][view_i].imshow(images[view_i]) row_i += 1 # 2D keypoints (pred) if keypoints_2d_batch is not None: axes[row_i, 0].set_ylabel("2d keypoints (pred)", size='large') keypoints_2d = to_numpy(keypoints_2d_batch)[batch_index] for view_i in range(n_cols): axes[row_i][view_i].imshow(images[view_i]) draw_2d_pose(keypoints_2d[view_i], axes[row_i][view_i], kind=kind) row_i += 1 # 2D keypoints (gt projected) axes[row_i, 0].set_ylabel("2d keypoints (gt projected)", size='large') for view_i in range(n_cols): axes[row_i][view_i].imshow(images[view_i]) keypoints_2d_gt_proj = project_3d_points_to_image_plane_without_distortion( proj_matricies_batch[batch_index, view_i].detach().cpu().numpy(), keypoints_3d_batch_gt[batch_index].detach().cpu().numpy()) draw_2d_pose(keypoints_2d_gt_proj, axes[row_i][view_i], kind=kind) row_i += 1 # 2D keypoints (pred projected) axes[row_i, 0].set_ylabel("2d keypoints (pred projected)", size='large') for view_i in range(n_cols): axes[row_i][view_i].imshow(images[view_i]) keypoints_2d_pred_proj = project_3d_points_to_image_plane_without_distortion( proj_matricies_batch[batch_index, view_i].detach().cpu().numpy(), keypoints_3d_batch_pred[batch_index].detach().cpu().numpy()) draw_2d_pose(keypoints_2d_pred_proj, axes[row_i][view_i], kind=pred_kind) row_i += 1 # cuboids if cuboids_batch is not None: axes[row_i, 0].set_ylabel("cuboid", size='large') for view_i in range(n_cols): cuboid = cuboids_batch[batch_index] axes[row_i][view_i].imshow( cuboid.render( proj_matricies_batch[batch_index, view_i].detach().cpu().numpy(), images[view_i].copy())) row_i += 1 # confidences if confidences_batch is not None: axes[row_i, 0].set_ylabel("confidences", size='large') for view_i in range(n_cols): confidences = to_numpy(confidences_batch[batch_index, view_i]) xs = np.arange(len(confidences)) axes[row_i, view_i].bar(xs, confidences, color='green') axes[row_i, view_i].set_xticks(xs) if torch.max(confidences_batch).item() <= 1.0: axes[row_i, view_i].set_ylim(0.0, 1.0) fig.tight_layout() fig_image = fig_to_array(fig) plt.close('all') return fig_image
def __getitem__(self, idx): camera_idx = self.camera_idxes[idx] sample = defaultdict(list) # return value shot = self.labels['table'][idx] subject = self.labels['subject_names'][shot['subject_idx']] sample['subject'] = subject action = self.labels['action_names'][shot['action_idx']] frame_idx = shot['frame_idx'] for camera_idx, camera_name in [[ camera_idx, self.labels['camera_names'][camera_idx] ]]: if camera_idx in self.ignore_cameras: continue # load bounding box bbox = shot['bbox_by_camera_tlbr'][camera_idx][[1, 0, 3, 2]] # TLBR to LTRB bbox_height = bbox[2] - bbox[0] if bbox_height == 0: # convention: if the bbox is empty, then this view is missing continue # scale the bounding box bbox = scale_bbox(bbox, self.scale_bbox) scale = ((bbox[2] - bbox[0]) / 150.0, (bbox[3] - bbox[1]) / 150.0) # load image image_dir = self.root.replace( 'processed', 'processed.zip@') if self.data_format == 'zip' else self.root image_path = os.path.join( image_dir, subject, action, 'imageSequence' + '-undistorted' * self.undistort_images, camera_name, 'img_%06d.jpg' % (frame_idx + 1)) if self.data_format == 'zip': from mvn.datasets.utils import zipreader_imread image = zipreader_imread(image_path) else: image = cv2.imread(image_path) if image is None: assert os.path.isfile( image_path), '%s doesn\'t exist' % image_path # load camera shot_camera = self.labels['cameras'][shot['subject_idx'], camera_idx] retval_camera = Camera(shot_camera['R'], shot_camera['t'], shot_camera['K'], shot_camera['dist'], camera_name) if self.crop: # crop image image = crop_image(image, bbox) retval_camera.update_after_crop(bbox) if self.image_shape is not None: # resize image_shape_before_resize = image.shape[:2] image = resize_image(image, self.image_shape) retval_camera.update_after_resize(image_shape_before_resize, self.image_shape) sample['image_shapes_before_resize'].append( image_shape_before_resize) if self.norm_image: image = normalize_image(image) if self.erase: # erase image keypoints_3d_gt = shot['keypoints'][:self.num_keypoints] keypoints_2d_gt = project_3d_points_to_image_plane_without_distortion( retval_camera.projection, keypoints_3d_gt) erase_joints = [6, 1, 4, 11, 14] image = erase_image( image, [keypoints_2d_gt[joint] for joint in erase_joints]) sample['images'].append(image) sample['detections'].append(bbox + (1.0, )) # TODO add real confidences sample['scale'].append(scale) sample['cameras'].append(retval_camera) sample['proj_matrices'].append(retval_camera.projection) # 3D keypoints # add dummy confidences sample['keypoints_3d'] = np.pad(shot['keypoints'][:self.num_keypoints], ((0, 0), (0, 1)), 'constant', constant_values=1.0) # save sample's index sample['indexes'] = idx if self.keypoints_3d_pred is not None: sample['pred_keypoints_3d'] = self.keypoints_3d_pred[idx] sample.default_factory = None return sample
def evaluate(self, keypoints_gt, keypoints_3d_predicted, proj_matricies_batch=None, config=None, split_by_subject=False, transfer_cmu_to_human36m=False, transfer_human36m_to_human36m=False): #keypoints_gt = self.labels['table']['keypoints'][:, :self.num_keypoints] if keypoints_3d_predicted.shape != keypoints_gt.shape: raise ValueError( '`keypoints_3d_predicted` shape should be %s, got %s' % \ (keypoints_gt.shape, keypoints_3d_predicted.shape)) if transfer_cmu_to_human36m or transfer_human36m_to_human36m: human36m_joints = [10, 11, 15, 14, 1, 4] if transfer_human36m_to_human36m: cmu_joints = [10, 11, 15, 14, 1, 4] else: cmu_joints = [10, 8, 9, 7, 14, 13] keypoints_gt = keypoints_gt[:, human36m_joints] keypoints_3d_predicted = keypoints_3d_predicted[:, cmu_joints] # mean error per 16/17 joints in mm, for each pose per_pose_error = np.sqrt( ((keypoints_gt - keypoints_3d_predicted)**2).sum(2)).mean(1) # relative mean error per 16/17 joints in mm, for each pose if not (transfer_cmu_to_human36m or transfer_human36m_to_human36m): root_index = 6 if self.kind == "mpii" else 6 else: root_index = 0 keypoints_gt_relative = keypoints_gt - keypoints_gt[:, root_index: root_index + 1, :] keypoints_3d_predicted_relative = keypoints_3d_predicted - keypoints_3d_predicted[:, root_index: root_index + 1, :] per_pose_error_relative = np.sqrt( ((keypoints_gt_relative - keypoints_3d_predicted_relative)**2).sum(2)).mean(1) result = { 'per_pose_error': self.evaluate_using_per_pose_error(per_pose_error, split_by_subject), 'per_pose_error_relative': self.evaluate_using_per_pose_error(per_pose_error_relative, split_by_subject) } # project to 2d, evaluate 2d if proj_matricies_batch is not None: nsamples = keypoints_gt.shape[0] keypoints_2d_gt = np.array([ project_3d_points_to_image_plane_without_distortion( proj_matricies_batch[n, 0], keypoints_gt[n]) for n in range(nsamples) ])[:, np.newaxis] keypoints_2d_pred = np.array([ project_3d_points_to_image_plane_without_distortion( proj_matricies_batch[n, 0], keypoints_3d_predicted[n]) for n in range(nsamples) ])[:, np.newaxis] name_values_dict, mean_rate_dict = self.evaluate2d( keypoints_2d_gt, keypoints_2d_pred, [0.1, 0.2, 0.3, 0.4, 0.5], config) result.update({ 'pose_2d_metric': { 'per_joint_pckh': name_values_dict, 'avg_joint_pckh': mean_rate_dict } }) return result['per_pose_error_relative']['Average']['Average'], result
def unproject_heatmaps(heatmaps, proj_matricies, coord_volumes, volume_aggregation_method='sum', vol_confidences=None): device = heatmaps.device batch_size, n_views, n_joints, heatmap_shape = heatmaps.shape[0], heatmaps.shape[1], heatmaps.shape[2], tuple(heatmaps.shape[3:]) # 1,4,32,96x96 volume_shape = coord_volumes.shape[1:4] #64x64x64 volume_batch = torch.zeros(batch_size, n_joints, *volume_shape, device=device) # 1x32x64x64x64のTensor # TODO: speed up this this loop for batch_i in range(batch_size): coord_volume = coord_volumes[batch_i] # Bx64x64x64x3 -> 64x64x64x3 grid_coord = coord_volume.reshape((-1, 3)) # 262144x3 volume_batch_to_aggregate = torch.zeros(n_views, n_joints, *volume_shape, device=device) # 4x32x64x64x64 for view_i in range(n_views): heatmap = heatmaps[batch_i, view_i] # 1x4x32x96x96 -> 32x96x96 heatmap = heatmap.unsqueeze(0) # 1x32x96x96 (一番初めに次元を追加) grid_coord_proj = multiview.project_3d_points_to_image_plane_without_distortion( # 262144x3 proj_matricies[batch_i, view_i], grid_coord, convert_back_to_euclidean=False ) invalid_mask = grid_coord_proj[:, 2] <= 0.0 # depth must be larger than 0.0 #人がカメラに近づきすぎた場合に起こる?? grid_coord_proj[grid_coord_proj[:, 2] == 0.0, 2] = 1.0 # not to divide by zero grid_coord_proj = multiview.homogeneous_to_euclidean(grid_coord_proj) # transform to [-1.0, 1.0] range grid_coord_proj_transformed = torch.zeros_like(grid_coord_proj) # 262144x2 grid_coord_proj_transformed[:, 0] = 2 * (grid_coord_proj[:, 0] / heatmap_shape[0] - 0.5) # (0,0)->(96,96)の座標を、中心を(0,0)、左上を(-1,-1)、右下を(1,1)とする相対的な座標に変換 grid_coord_proj_transformed[:, 1] = 2 * (grid_coord_proj[:, 1] / heatmap_shape[1] - 0.5) grid_coord_proj = grid_coord_proj_transformed # prepare to F.grid_sample grid_coord_proj = grid_coord_proj.unsqueeze(1).unsqueeze(0) # 引数で指定された場所に一つ次元を足すらしい 1x262144x1x2。heatmapが1x32x96x96 try: current_volume = F.grid_sample(heatmap, grid_coord_proj, align_corners=True) # 1x32x262144x1 = Heatmap(1x32x96x96), grid_coord_proj(1x262144x1x2) except TypeError: # old PyTorch current_volume = F.grid_sample(heatmap, grid_coord_proj) # zero out non-valid points current_volume = current_volume.view(n_joints, -1) #32x262144 current_volume[:, invalid_mask] = 0.0 # reshape back to volume current_volume = current_volume.view(n_joints, *volume_shape) #32x64x64x64 # collect volume_batch_to_aggregate[view_i] = current_volume # agregate resulting volume if volume_aggregation_method.startswith('conf'): volume_batch[batch_i] = (volume_batch_to_aggregate * vol_confidences[batch_i].view(n_views, n_joints, 1, 1, 1)).sum(0) elif volume_aggregation_method == 'sum': volume_batch[batch_i] = volume_batch_to_aggregate.sum(0) elif volume_aggregation_method == 'max': volume_batch[batch_i] = volume_batch_to_aggregate.max(0)[0] elif volume_aggregation_method == 'softmax': volume_batch_to_aggregate_softmin = volume_batch_to_aggregate.clone() # 2x32x64x64x64(n_views, n_joints, *volume_shape) volume_batch_to_aggregate_softmin = volume_batch_to_aggregate_softmin.view(n_views, -1) # reshape volume_batch_to_aggregate_softmin = nn.functional.softmax(volume_batch_to_aggregate_softmin, dim=0) volume_batch_to_aggregate_softmin = volume_batch_to_aggregate_softmin.view(n_views, n_joints, *volume_shape) #reshape back volume_batch[batch_i] = (volume_batch_to_aggregate * volume_batch_to_aggregate_softmin).sum(0) else: raise ValueError("Unknown volume_aggregation_method: {}".format(volume_aggregation_method)) return volume_batch