def prepareVideoSample(info, images, cameras, bboxes, subject = 'S1', imageShape = [384, 384], scaleBox = 1.0, crop = True, normImage = False): sample = defaultdict(list) # return value subject_idx = info['subject_names'].index(subject) for camera_idx, camera_name in enumerate(info['camera_names']): bbox = bboxes[camera_name][[1,0,3,2]] # TLBR to LTRB bbox_height = bbox[2] - bbox[0] if bbox_height == 0: # convention: if the bbox is empty, then this view is missing continue # scale the bounding box bbox = scale_bbox(bbox, scaleBox) # load camera shot_camera = cameras[camera_name] image = images[camera_name] #print(shot_camera) retval_camera = Camera(shot_camera['R'], shot_camera['t'], shot_camera['K'], shot_camera['dist'], camera_name) if crop: # crop image image = crop_image(image, bbox) retval_camera.update_after_crop(bbox) if imageShape is not None: # resize image_shape_before_resize = image.shape[:2] image = resize_image(image, imageShape) retval_camera.update_after_resize(image_shape_before_resize, imageShape) sample['images'].append(image) sample['cameras'].append(retval_camera) sample['proj_matrices'].append(retval_camera.projection) # projection matricies #print(sample['proj_matrices']) sample.default_factory = None return sample
def __getitem__(self, idx): sample = defaultdict(list) # return value shot = self.labels['table'][idx] subject = self.labels['subject_names'][shot['subject_idx']] action = self.labels['action_names'][shot['action_idx']] frame_idx = shot['frame_idx'] for camera_idx, camera_name in enumerate(self.labels['camera_names']): if camera_idx in self.ignore_cameras: continue # load bounding box bbox = shot['bbox_by_camera_tlbr'][camera_idx][[1, 0, 3, 2]] # TLBR to LTRB bbox_height = bbox[2] - bbox[0] if bbox_height == 0: # convention: if the bbox is empty, then this view is missing continue # scale the bounding box bbox = scale_bbox(bbox, self.scale_bbox) # load image image_path = os.path.join( self.h36m_root, subject, action, 'imageSequence' + '-undistorted' * self.undistort_images, camera_name, 'img_%06d.jpg' % (frame_idx + 1)) assert os.path.isfile(image_path), '%s doesn\'t exist' % image_path image = cv2.imread(image_path) # load camera shot_camera = self.labels['cameras'][shot['subject_idx'], camera_idx] retval_camera = Camera(shot_camera['R'], shot_camera['t'], shot_camera['K'], shot_camera['dist'], camera_name) if self.crop: # crop image image = crop_image(image, bbox) retval_camera.update_after_crop(bbox) if self.image_shape is not None: # resize image_shape_before_resize = image.shape[:2] image = resize_image(image, self.image_shape) retval_camera.update_after_resize(image_shape_before_resize, self.image_shape) sample['image_shapes_before_resize'].append( image_shape_before_resize) if self.norm_image: image = normalize_image(image) sample['images'].append(image) sample['detections'].append(bbox + (1.0, )) # TODO add real confidences sample['cameras'].append(retval_camera) sample['proj_matrices'].append(retval_camera.projection) # 3D keypoints # add dummy confidences sample['keypoints_3d'] = np.pad(shot['keypoints'][:self.num_keypoints], ((0, 0), (0, 1)), 'constant', constant_values=1.0) # build cuboid # base_point = sample['keypoints_3d'][6, :3] # sides = np.array([self.cuboid_side, self.cuboid_side, self.cuboid_side]) # position = base_point - sides / 2 # sample['cuboids'] = volumetric.Cuboid3D(position, sides) # save sample's index sample['indexes'] = idx if self.keypoints_3d_pred is not None: sample['pred_keypoints_3d'] = self.keypoints_3d_pred[idx] sample.default_factory = None return sample
def prepareSample(idx, labels, human36mRoot, keyPoint3d = None , imageShape = None, scaleBox = 1.0, crop = True, normImage = False): sample = defaultdict(list) # return value shot = labels['table'][idx] subject = labels['subject_names'][shot['subject_idx']] action = labels['action_names'][shot['action_idx']] frame_idx = shot['frame_idx'] for camera_idx, camera_name in enumerate(labels['camera_names']): bbox = shot['bbox_by_camera_tlbr'][camera_idx][[1,0,3,2]] # TLBR to LTRB bbox_height = bbox[2] - bbox[0] if bbox_height == 0: # convention: if the bbox is empty, then this view is missing continue # scale the bounding box bbox = scale_bbox(bbox, scaleBox) # load image image_path = os.path.join(human36mRoot, subject, action, 'imageSequence', camera_name, 'img_%06d.jpg' % (frame_idx+1)) assert os.path.isfile(image_path), '%s doesn\'t exist' % image_path image = cv2.imread(image_path) # load camera shot_camera = labels['cameras'][shot['subject_idx'], camera_idx] #print(shot_camera) retval_camera = Camera(shot_camera['R'], shot_camera['t'], shot_camera['K'], shot_camera['dist'], camera_name) if crop: # crop image image = crop_image(image, bbox) retval_camera.update_after_crop(bbox) if imageShape is not None: # resize image_shape_before_resize = image.shape[:2] image = resize_image(image, imageShape) retval_camera.update_after_resize(image_shape_before_resize, imageShape) sample['image_shapes_before_resize'].append(image_shape_before_resize) if normImage: image = normalize_image(image) sample['images'].append(image) sample['detections'].append(bbox + (1.0,)) # TODO add real confidences sample['cameras'].append(retval_camera) sample['proj_matrices'].append(retval_camera.projection) sample["action"].append(action) sample["subject"].append(subject) sample["frameId"].append(frame_idx) # 3D keypoints # add dummy confidences sample['keypoints_3d'] = np.pad( shot['keypoints'][:17], ((0,0), (0,1)), 'constant', constant_values=1.0) # build cuboid # base_point = sample['keypoints_3d'][6, :3] # sides = np.array([self.cuboid_side, self.cuboid_side, self.cuboid_side]) # position = base_point - sides / 2 # sample['cuboids'] = volumetric.Cuboid3D(position, sides) # save sample's index sample['indexes'] = idx if keyPoint3d is not None: sample['pred_keypoints_3d'] = keyPoint3d[idx] sample.default_factory = None return sample
def __getitem__(self, idx): camera_idx = self.camera_idxes[idx] sample = defaultdict(list) # return value shot = self.labels['table'][idx] subject = self.labels['subject_names'][shot['subject_idx']] sample['subject'] = subject action = self.labels['action_names'][shot['action_idx']] frame_idx = shot['frame_idx'] for camera_idx, camera_name in [[ camera_idx, self.labels['camera_names'][camera_idx] ]]: if camera_idx in self.ignore_cameras: continue # load bounding box bbox = shot['bbox_by_camera_tlbr'][camera_idx][[1, 0, 3, 2]] # TLBR to LTRB bbox_height = bbox[2] - bbox[0] if bbox_height == 0: # convention: if the bbox is empty, then this view is missing continue # scale the bounding box bbox = scale_bbox(bbox, self.scale_bbox) scale = ((bbox[2] - bbox[0]) / 150.0, (bbox[3] - bbox[1]) / 150.0) # load image image_dir = self.root.replace( 'processed', 'processed.zip@') if self.data_format == 'zip' else self.root image_path = os.path.join( image_dir, subject, action, 'imageSequence' + '-undistorted' * self.undistort_images, camera_name, 'img_%06d.jpg' % (frame_idx + 1)) if self.data_format == 'zip': from mvn.datasets.utils import zipreader_imread image = zipreader_imread(image_path) else: image = cv2.imread(image_path) if image is None: assert os.path.isfile( image_path), '%s doesn\'t exist' % image_path # load camera shot_camera = self.labels['cameras'][shot['subject_idx'], camera_idx] retval_camera = Camera(shot_camera['R'], shot_camera['t'], shot_camera['K'], shot_camera['dist'], camera_name) if self.crop: # crop image image = crop_image(image, bbox) retval_camera.update_after_crop(bbox) if self.image_shape is not None: # resize image_shape_before_resize = image.shape[:2] image = resize_image(image, self.image_shape) retval_camera.update_after_resize(image_shape_before_resize, self.image_shape) sample['image_shapes_before_resize'].append( image_shape_before_resize) if self.norm_image: image = normalize_image(image) if self.erase: # erase image keypoints_3d_gt = shot['keypoints'][:self.num_keypoints] keypoints_2d_gt = project_3d_points_to_image_plane_without_distortion( retval_camera.projection, keypoints_3d_gt) erase_joints = [6, 1, 4, 11, 14] image = erase_image( image, [keypoints_2d_gt[joint] for joint in erase_joints]) sample['images'].append(image) sample['detections'].append(bbox + (1.0, )) # TODO add real confidences sample['scale'].append(scale) sample['cameras'].append(retval_camera) sample['proj_matrices'].append(retval_camera.projection) # 3D keypoints # add dummy confidences sample['keypoints_3d'] = np.pad(shot['keypoints'][:self.num_keypoints], ((0, 0), (0, 1)), 'constant', constant_values=1.0) # save sample's index sample['indexes'] = idx if self.keypoints_3d_pred is not None: sample['pred_keypoints_3d'] = self.keypoints_3d_pred[idx] sample.default_factory = None return sample
def makeNViewImagesAndCameras(): sample = defaultdict(list) # return value calib = ArucoCalibrator() image_paths_test = [ "testdata/IMG_20210208_135527.jpg", "testdata/IMG_20210208_135532.jpg", "testdata/IMG_20210209_114242.jpg", "testdata/IMG_20210209_114246.jpg", "testdata/IMG_20210209_114249.jpg", "testdata/IMG_20210209_114255.jpg", "testdata/IMG_20210209_114300.jpg", "testdata/IMG_20210209_114305.jpg", "testdata/IMG_20210209_114311.jpg", "testdata/IMG_20210209_114318.jpg", "testdata/IMG_20210209_114323.jpg" ] calibration, cameraMatrix, distCoeffs, nView_rvecs, nView_tvecs = calib.getCameraIntrinsicFromImages( image_paths_test) print(cameraMatrix) print(distCoeffs) image_paths_test = [ "testdata/IMG_20210210_202957.jpg", "testdata/IMG_20210210_203002.jpg", "testdata/IMG_20210210_203017.jpg" ] #camera_names_test = ['54138969', '55011271', '58860488', '60457274'] #image_paths_test =[ # "C:\\Users\\User\\Downloads\\learnable-triangulation-pytorch-master\\data\\human36m\\processed\\S9\\Posing-1\\imageSequence\\54138969\\img_001771.jpg", # "C:\\Users\\User\\Downloads\\learnable-triangulation-pytorch-master\\data\\human36m\\processed\\S9\\Posing-1\\imageSequence\\55011271\\img_001771.jpg", # "C:\\Users\\User\\Downloads\\learnable-triangulation-pytorch-master\\data\\human36m\\processed\\S9\\Posing-1\\imageSequence\\58860488\\img_001771.jpg", # "C:\\Users\\User\\Downloads\\learnable-triangulation-pytorch-master\\data\\human36m\\processed\\S9\\Posing-1\\imageSequence\\60457274\\img_001771.jpg" # ] #bbox_test = [ # [221, 185, 616, 580], # [201, 298, 649, 746], # [182, 416, 554, 788], # [120, 332, 690, 902] # ] # R_test = [ # [[-0.9153617, 0.40180838, 0.02574755], # [ 0.05154812, 0.18037356, -0.9822465 ], # [-0.39931902, -0.89778364, -0.18581952]], #[[ 0.92816836, 0.37215385, 0.00224838], # [ 0.08166409, -0.1977723, -0.9768404 ], # [-0.36309022, 0.9068559, -0.2139576 ]], #[[-0.91415495, -0.40277803, -0.04572295], # [-0.04562341, 0.2143085, -0.97569996], # [ 0.4027893 , -0.8898549, -0.21428728]], #[[ 0.91415626, -0.40060705, 0.06190599], # [-0.05641001, -0.2769532, -0.9592262 ], # [ 0.40141782, 0.8733905, -0.27577674]] # ] # t_test = [ #[[-346.0509 ], # [ 546.98083], # [5474.481 ]], #[[ 251.4252 ], # [ 420.94223], # [5588.196 ]], #[[ 480.4826 ], # [ 253.83238], # [5704.2075 ]], #[[ 51.883537], # [ 378.4209 ], # [4406.1494 ]] # ] # K_test = [ #[[1.1450494e+03, 0.0000000e+00, 5.1254150e+02], # [0.0000000e+00, 1.1437811e+03, 5.1545148e+02], # [0.0000000e+00, 0.0000000e+00, 1.0000000e+00]], #[[1.1496757e+03, 0.0000000e+00, 5.0884863e+02], # [0.0000000e+00, 1.1475917e+03, 5.0806491e+02], # [0.0000000e+00, 0.0000000e+00, 1.0000000e+00]], #[[1.1491407e+03, 0.0000000e+00, 5.1981586e+02], # [0.0000000e+00, 1.1487990e+03, 5.0140265e+02], # [0.0000000e+00, 0.0000000e+00, 1.0000000e+00]], #[[1.1455114e+03, 0.0000000e+00, 5.1496820e+02], # [0.0000000e+00, 1.1447739e+03, 5.0188202e+02], # [0.0000000e+00, 0.0000000e+00, 1.0000000e+00]] # ] # dist_test = [ #[-0.20709892, 0.24777518, -0.00142447, -0.0009757, -0.00307515], #[-0.19421363, 0.24040854, -0.00274089, -0.00161903, 0.00681998], #[-0.20833819, 0.255488, -0.00076, 0.00148439, -0.0024605 ], #[-0.19838409, 0.21832368, -0.00181336, -0.00058721, -0.00894781] # ] for i, image_path in enumerate(image_paths_test): image = cv2.imread(image_paths_test[i]) #retval_camera = Camera(R_test[i], t_test[i], K_test[i], dist_test[i], camera_names_test[i]) retval, rmat, tvecs = calib.getRmatAndTvecFromImgWithCharuco( image, cameraMatrix, distCoeffs) if not retval: continue retval_camera = Camera(rmat, tvecs, cameraMatrix, distCoeffs, "asaba_cell_photo") # R, t, K, dist ############################ rvecs, jacob = cv2.Rodrigues(cv2.UMat(np.array(rmat))) tempimg = calib.drawWorldBox(image, rvecs, tvecs, cameraMatrix, distCoeffs) #print(rvecs.get()) #print(tvecs) #print(retval_camera.projection) #cv2.imshow('img',tempimg) #cv2.waitKey(0) & 0xff ############################ # Get bbox det2u = Detectron2util() bbox = det2u.getBboxOfFirstHuman(image) # crop image = crop_image( image, bbox) # 2nd arg is a tuple of (left, upper, right, lower) retval_camera.update_after_crop(bbox) # resize image_shape_before_resize = image.shape[:2] image = resize_image(image, [384, 384]) retval_camera.update_after_resize(image_shape_before_resize, [384, 384]) # Normalize image = normalize_image(image) sample['images'].append(image) sample['cameras'].append(retval_camera) sample['proj_matrices'].append(retval_camera.projection) sample['pred_keypoints_3d'] = np.zeros([17, 3]) return sample
def __getitem__(self, idx): # TOOD: Change according to naming conventions sample = defaultdict(list) # return value shot = self.labels['table'][idx] person = shot['person_id'] action_idx = shot['action_idx'] action = self.labels['action_names'][action_idx] frame_idx = shot['frame_name'] for camera_idx, camera_name in enumerate(self.labels['camera_names']): if camera_idx not in self.choose_cameras or camera_idx in self.ignore_cameras: continue # load bounding box left, top, right, bottom, bbox_confidence = shot[ 'bbox_by_camera_tlbr'][camera_idx] bbox = (left, top, right, bottom) if top - bottom == 0 or left - right == 0: # convention: if the bbox is empty, then this view is missing continue # square and scale the bounding box if self.square_bbox: bbox = get_square_bbox(bbox) bbox = scale_bbox(bbox, self.scale_bbox) # TODO: Change according to dataset paths # load image # $DIR_ROOT/[action_NAME]/hdImgs/[VIEW_ID]/[VIEW_ID]_[FRAME_ID].jpg # NOTE: pad with 0s using {frame_idx:08} image_path = os.path.join(self.example_root, action, 'hdImgs', camera_name, f'{camera_name}_{frame_idx:08}.jpg') assert os.path.isfile(image_path), '%s doesn\'t exist' % image_path image = cv2.imread(image_path) # load camera shot_camera = self.labels['cameras'][action_idx, camera_idx] retval_camera = Camera(shot_camera['R'], shot_camera['t'], shot_camera['K'], shot_camera['dist'], camera_name) if self.crop: # crop image image = crop_image(image, bbox) retval_camera.update_after_crop(bbox) if self.image_shape is not None: # rescale_size image_shape_before_resize = image.shape[:2] image = resize_image(image, self.image_shape) retval_camera.update_after_resize(image_shape_before_resize, self.image_shape) sample['image_shapes_before_resize'].append( image_shape_before_resize) if self.norm_image: image = normalize_image(image) sample['images'].append(image) sample['detections'].append(bbox) sample['cameras'].append(retval_camera) # Maybe remove to save space? # sample['proj_matrices'].append(retval_camera.projection) # TODO: Can remove or modify depending on whether your dataset has ground truth # 3D keypoints (with real confidences, cos CMU) if 'keypoints' in shot: sample['keypoints_3d'] = np.array( shot['keypoints'][:self.num_keypoints]) # build cuboid # base_point = sample['keypoints_3d'][6, :3] # sides = np.array([self.cuboid_side, self.cuboid_side, self.cuboid_side]) # position = base_point - sides / 2 # sample['cuboids'] = volumetric.Cuboid3D(position, sides) # save sample's index sample['indexes'] = idx # TODO: Check this? Keypoints are different if self.keypoints_3d_pred is not None: sample['pred_keypoints_3d'] = self.keypoints_3d_pred[idx] sample.default_factory = None return sample