def prepareVideoSample(info, images, cameras, bboxes, subject = 'S1', imageShape = [384, 384], scaleBox = 1.0, crop = True, normImage = False): sample = defaultdict(list) # return value subject_idx = info['subject_names'].index(subject) for camera_idx, camera_name in enumerate(info['camera_names']): bbox = bboxes[camera_name][[1,0,3,2]] # TLBR to LTRB bbox_height = bbox[2] - bbox[0] if bbox_height == 0: # convention: if the bbox is empty, then this view is missing continue # scale the bounding box bbox = scale_bbox(bbox, scaleBox) # load camera shot_camera = cameras[camera_name] image = images[camera_name] #print(shot_camera) retval_camera = Camera(shot_camera['R'], shot_camera['t'], shot_camera['K'], shot_camera['dist'], camera_name) if crop: # crop image image = crop_image(image, bbox) retval_camera.update_after_crop(bbox) if imageShape is not None: # resize image_shape_before_resize = image.shape[:2] image = resize_image(image, imageShape) retval_camera.update_after_resize(image_shape_before_resize, imageShape) sample['images'].append(image) sample['cameras'].append(retval_camera) sample['proj_matrices'].append(retval_camera.projection) # projection matricies #print(sample['proj_matrices']) sample.default_factory = None return sample
def __getitem__(self, idx): sample = defaultdict(list) # return value shot = self.labels['table'][idx] subject = self.labels['subject_names'][shot['subject_idx']] action = self.labels['action_names'][shot['action_idx']] frame_idx = shot['frame_idx'] for camera_idx, camera_name in enumerate(self.labels['camera_names']): if camera_idx in self.ignore_cameras: continue # load bounding box bbox = shot['bbox_by_camera_tlbr'][camera_idx][[1, 0, 3, 2]] # TLBR to LTRB bbox_height = bbox[2] - bbox[0] if bbox_height == 0: # convention: if the bbox is empty, then this view is missing continue # scale the bounding box bbox = scale_bbox(bbox, self.scale_bbox) # load image image_path = os.path.join( self.h36m_root, subject, action, 'imageSequence' + '-undistorted' * self.undistort_images, camera_name, 'img_%06d.jpg' % (frame_idx + 1)) assert os.path.isfile(image_path), '%s doesn\'t exist' % image_path image = cv2.imread(image_path) # load camera shot_camera = self.labels['cameras'][shot['subject_idx'], camera_idx] retval_camera = Camera(shot_camera['R'], shot_camera['t'], shot_camera['K'], shot_camera['dist'], camera_name) if self.crop: # crop image image = crop_image(image, bbox) retval_camera.update_after_crop(bbox) if self.image_shape is not None: # resize image_shape_before_resize = image.shape[:2] image = resize_image(image, self.image_shape) retval_camera.update_after_resize(image_shape_before_resize, self.image_shape) sample['image_shapes_before_resize'].append( image_shape_before_resize) if self.norm_image: image = normalize_image(image) sample['images'].append(image) sample['detections'].append(bbox + (1.0, )) # TODO add real confidences sample['cameras'].append(retval_camera) sample['proj_matrices'].append(retval_camera.projection) # 3D keypoints # add dummy confidences sample['keypoints_3d'] = np.pad(shot['keypoints'][:self.num_keypoints], ((0, 0), (0, 1)), 'constant', constant_values=1.0) # build cuboid # base_point = sample['keypoints_3d'][6, :3] # sides = np.array([self.cuboid_side, self.cuboid_side, self.cuboid_side]) # position = base_point - sides / 2 # sample['cuboids'] = volumetric.Cuboid3D(position, sides) # save sample's index sample['indexes'] = idx if self.keypoints_3d_pred is not None: sample['pred_keypoints_3d'] = self.keypoints_3d_pred[idx] sample.default_factory = None return sample
def __getitem__(self, idx): camera_idx = self.camera_idxes[idx] sample = defaultdict(list) # return value shot = self.labels['table'][idx] subject = self.labels['subject_names'][shot['subject_idx']] sample['subject'] = subject action = self.labels['action_names'][shot['action_idx']] frame_idx = shot['frame_idx'] for camera_idx, camera_name in [[ camera_idx, self.labels['camera_names'][camera_idx] ]]: if camera_idx in self.ignore_cameras: continue # load bounding box bbox = shot['bbox_by_camera_tlbr'][camera_idx][[1, 0, 3, 2]] # TLBR to LTRB bbox_height = bbox[2] - bbox[0] if bbox_height == 0: # convention: if the bbox is empty, then this view is missing continue # scale the bounding box bbox = scale_bbox(bbox, self.scale_bbox) scale = ((bbox[2] - bbox[0]) / 150.0, (bbox[3] - bbox[1]) / 150.0) # load image image_dir = self.root.replace( 'processed', 'processed.zip@') if self.data_format == 'zip' else self.root image_path = os.path.join( image_dir, subject, action, 'imageSequence' + '-undistorted' * self.undistort_images, camera_name, 'img_%06d.jpg' % (frame_idx + 1)) if self.data_format == 'zip': from mvn.datasets.utils import zipreader_imread image = zipreader_imread(image_path) else: image = cv2.imread(image_path) if image is None: assert os.path.isfile( image_path), '%s doesn\'t exist' % image_path # load camera shot_camera = self.labels['cameras'][shot['subject_idx'], camera_idx] retval_camera = Camera(shot_camera['R'], shot_camera['t'], shot_camera['K'], shot_camera['dist'], camera_name) if self.crop: # crop image image = crop_image(image, bbox) retval_camera.update_after_crop(bbox) if self.image_shape is not None: # resize image_shape_before_resize = image.shape[:2] image = resize_image(image, self.image_shape) retval_camera.update_after_resize(image_shape_before_resize, self.image_shape) sample['image_shapes_before_resize'].append( image_shape_before_resize) if self.norm_image: image = normalize_image(image) if self.erase: # erase image keypoints_3d_gt = shot['keypoints'][:self.num_keypoints] keypoints_2d_gt = project_3d_points_to_image_plane_without_distortion( retval_camera.projection, keypoints_3d_gt) erase_joints = [6, 1, 4, 11, 14] image = erase_image( image, [keypoints_2d_gt[joint] for joint in erase_joints]) sample['images'].append(image) sample['detections'].append(bbox + (1.0, )) # TODO add real confidences sample['scale'].append(scale) sample['cameras'].append(retval_camera) sample['proj_matrices'].append(retval_camera.projection) # 3D keypoints # add dummy confidences sample['keypoints_3d'] = np.pad(shot['keypoints'][:self.num_keypoints], ((0, 0), (0, 1)), 'constant', constant_values=1.0) # save sample's index sample['indexes'] = idx if self.keypoints_3d_pred is not None: sample['pred_keypoints_3d'] = self.keypoints_3d_pred[idx] sample.default_factory = None return sample
def prepareSample(idx, labels, human36mRoot, keyPoint3d = None , imageShape = None, scaleBox = 1.0, crop = True, normImage = False): sample = defaultdict(list) # return value shot = labels['table'][idx] subject = labels['subject_names'][shot['subject_idx']] action = labels['action_names'][shot['action_idx']] frame_idx = shot['frame_idx'] for camera_idx, camera_name in enumerate(labels['camera_names']): bbox = shot['bbox_by_camera_tlbr'][camera_idx][[1,0,3,2]] # TLBR to LTRB bbox_height = bbox[2] - bbox[0] if bbox_height == 0: # convention: if the bbox is empty, then this view is missing continue # scale the bounding box bbox = scale_bbox(bbox, scaleBox) # load image image_path = os.path.join(human36mRoot, subject, action, 'imageSequence', camera_name, 'img_%06d.jpg' % (frame_idx+1)) assert os.path.isfile(image_path), '%s doesn\'t exist' % image_path image = cv2.imread(image_path) # load camera shot_camera = labels['cameras'][shot['subject_idx'], camera_idx] #print(shot_camera) retval_camera = Camera(shot_camera['R'], shot_camera['t'], shot_camera['K'], shot_camera['dist'], camera_name) if crop: # crop image image = crop_image(image, bbox) retval_camera.update_after_crop(bbox) if imageShape is not None: # resize image_shape_before_resize = image.shape[:2] image = resize_image(image, imageShape) retval_camera.update_after_resize(image_shape_before_resize, imageShape) sample['image_shapes_before_resize'].append(image_shape_before_resize) if normImage: image = normalize_image(image) sample['images'].append(image) sample['detections'].append(bbox + (1.0,)) # TODO add real confidences sample['cameras'].append(retval_camera) sample['proj_matrices'].append(retval_camera.projection) sample["action"].append(action) sample["subject"].append(subject) sample["frameId"].append(frame_idx) # 3D keypoints # add dummy confidences sample['keypoints_3d'] = np.pad( shot['keypoints'][:17], ((0,0), (0,1)), 'constant', constant_values=1.0) # build cuboid # base_point = sample['keypoints_3d'][6, :3] # sides = np.array([self.cuboid_side, self.cuboid_side, self.cuboid_side]) # position = base_point - sides / 2 # sample['cuboids'] = volumetric.Cuboid3D(position, sides) # save sample's index sample['indexes'] = idx if keyPoint3d is not None: sample['pred_keypoints_3d'] = keyPoint3d[idx] sample.default_factory = None return sample
def __getitem__(self, idx): # TOOD: Change according to naming conventions sample = defaultdict(list) # return value shot = self.labels['table'][idx] person = shot['person_id'] action_idx = shot['action_idx'] action = self.labels['action_names'][action_idx] frame_idx = shot['frame_name'] for camera_idx, camera_name in enumerate(self.labels['camera_names']): if camera_idx not in self.choose_cameras or camera_idx in self.ignore_cameras: continue # load bounding box left, top, right, bottom, bbox_confidence = shot[ 'bbox_by_camera_tlbr'][camera_idx] bbox = (left, top, right, bottom) if top - bottom == 0 or left - right == 0: # convention: if the bbox is empty, then this view is missing continue # square and scale the bounding box if self.square_bbox: bbox = get_square_bbox(bbox) bbox = scale_bbox(bbox, self.scale_bbox) # TODO: Change according to dataset paths # load image # $DIR_ROOT/[action_NAME]/hdImgs/[VIEW_ID]/[VIEW_ID]_[FRAME_ID].jpg # NOTE: pad with 0s using {frame_idx:08} image_path = os.path.join(self.example_root, action, 'hdImgs', camera_name, f'{camera_name}_{frame_idx:08}.jpg') assert os.path.isfile(image_path), '%s doesn\'t exist' % image_path image = cv2.imread(image_path) # load camera shot_camera = self.labels['cameras'][action_idx, camera_idx] retval_camera = Camera(shot_camera['R'], shot_camera['t'], shot_camera['K'], shot_camera['dist'], camera_name) if self.crop: # crop image image = crop_image(image, bbox) retval_camera.update_after_crop(bbox) if self.image_shape is not None: # rescale_size image_shape_before_resize = image.shape[:2] image = resize_image(image, self.image_shape) retval_camera.update_after_resize(image_shape_before_resize, self.image_shape) sample['image_shapes_before_resize'].append( image_shape_before_resize) if self.norm_image: image = normalize_image(image) sample['images'].append(image) sample['detections'].append(bbox) sample['cameras'].append(retval_camera) # Maybe remove to save space? # sample['proj_matrices'].append(retval_camera.projection) # TODO: Can remove or modify depending on whether your dataset has ground truth # 3D keypoints (with real confidences, cos CMU) if 'keypoints' in shot: sample['keypoints_3d'] = np.array( shot['keypoints'][:self.num_keypoints]) # build cuboid # base_point = sample['keypoints_3d'][6, :3] # sides = np.array([self.cuboid_side, self.cuboid_side, self.cuboid_side]) # position = base_point - sides / 2 # sample['cuboids'] = volumetric.Cuboid3D(position, sides) # save sample's index sample['indexes'] = idx # TODO: Check this? Keypoints are different if self.keypoints_3d_pred is not None: sample['pred_keypoints_3d'] = self.keypoints_3d_pred[idx] sample.default_factory = None return sample