def prepare_supervised_sample( self, sample: dict, augmenter: SampleAugmenter ) -> dict: """Prepares samples for supervised experiment with keypoints. Args: sample (dict): Underlying data from dataloader class. augmenter (SampleAugmenter): Augmenter used to transform sample Returns: dict: sample containing following elements 'image' 'joints' 'joints3D' 'K' 'scale' 'joints3D_recreated' """ joints25D_raw, scale = convert_to_2_5D(sample["K"], sample["joints3D"]) joints_raw = ( sample["joints_raw"] if "joints_raw" in sample.keys() else sample["joints3D"].clone() ) image, joints25D, transformation_matrix = augmenter.transform_sample( sample["image"], joints25D_raw ) sample["K"] = torch.Tensor(transformation_matrix) @ sample["K"] if self.config.use_palm: sample["joints3D"] = self.move_wrist_to_palm(sample["joints3D"]) joints25D, scale = convert_to_2_5D(sample["K"], sample["joints3D"]) joints3D_recreated = convert_2_5D_to_3D(joints25D, scale, sample["K"]) # This variable is for procrustes analysis, only relevant when youtube data is used if self.config.use_palm: joints_raw = self.move_wrist_to_palm(joints_raw) if self.transform: image = self.transform(image) return { "image": image, "joints": joints25D, "joints3D": sample["joints3D"], "K": sample["K"], "scale": scale, "joints3D_recreated": joints3D_recreated, "joints_valid": sample["joints_valid"], "joints_raw": joints_raw, "T": torch.Tensor(transformation_matrix), }
def prepare_hybrid2_sample(self, sample: dict, augmenter: SampleAugmenter) -> dict: joints25D, _ = convert_to_2_5D(sample["K"], sample["joints3D"]) if augmenter.crop: override_jitter = None else: # Zero jitter is added incase the cropping is off. It is done to trigger the # cropping but always with no translation in image. override_jitter = [0, 0] img1, joints1, _ = augmenter.transform_sample( sample["image"], joints25D.clone(), None, override_jitter ) param1 = self.get_random_augment_param(augmenter) img2, joints2, _ = augmenter.transform_sample( sample["image"], joints25D.clone(), None, override_jitter ) param2 = self.get_random_augment_param(augmenter) # Applying only image related transform if self.transform: img1 = self.transform(img1) img2 = self.transform(img2) return { **{"transformed_image1": img1, "transformed_image2": img2}, **{f"{k}_1": v for k, v in param1.items() if v is not None}, **{f"{k}_2": v for k, v in param2.items() if v is not None}, }
def prepare_simclr_sample(self, sample: dict, augmenter: SampleAugmenter) -> dict: """Prepares sample according to SimCLR experiment. For each sample two transformations of an image are returned. Note: Rotation and jitter is kept same in both the transformations. Args: sample (dict): Underlying data from dataloader class. augmenter (SampleAugmenter): Augmenter used to transform sample Returns: dict: sample containing 'transformed_image1' and 'transformed_image2' """ joints25D, _ = convert_to_2_5D(sample["K"], sample["joints3D"]) img1, _, _ = augmenter.transform_sample(sample["image"], joints25D.clone()) # To keep rotation and jitter consistent between the two transformations. override_angle = augmenter.angle overrride_jitter = augmenter.jitter img2, _, _ = augmenter.transform_sample( sample["image"], joints25D.clone(), override_angle, overrride_jitter ) # Applying only image related transform if self.transform: img1 = self.transform(img1) img2 = self.transform(img2) return {"transformed_image1": img1, "transformed_image2": img2}
def prepare_pairwise_ablative( self, sample: dict, augmenter: SampleAugmenter ) -> dict: """Prepares samples according to pairwise experiment, i.e. transforming the image and keeping track of the relative parameters. Augmentations are isolated. Args: sample (dict): Underlying data from dataloader class. augmenter (SampleAugmenter): Augmenter used to transform sample Returns: dict: sample containing following elements 'transformed_image1' 'transformed_image2' 'joints1' (2.5D joints) 'joints2' (2.5D joints) 'rotation' 'jitter' ... """ joints25D, _ = convert_to_2_5D(sample["K"], sample["joints3D"]) if augmenter.crop: override_jitter = None else: # Zero jitter is added incase the cropping is off. It is done to trigger the # cropping but always with no translation in image. override_jitter = [0, 0] if augmenter.rotate: override_angle = None else: override_angle = None # override_angle = random.uniform(1, 360) # uncomment line above to add this rotation to both channels img1, joints1, _ = augmenter.transform_sample( sample["image"], joints25D.clone(), override_angle, override_jitter ) param1 = self.get_random_augment_param(augmenter) img2, joints2, _ = augmenter.transform_sample( sample["image"], joints25D.clone(), override_angle, override_jitter ) param2 = self.get_random_augment_param(augmenter) # relative transform calculation. rel_param = self.get_relative_param(augmenter, param1, param2) # Applying only image related transform if self.transform: img1 = self.transform(img1) img2 = self.transform(img2) return { **{ "transformed_image1": img1, "transformed_image2": img2, "joints1": joints1, "joints2": joints2, }, **rel_param, }
def model_refined_inference( model: Union[BaselineModel, DenoisedBaselineModel], sample: dict, augmenter: SampleAugmenter, transform: transforms.Compose, is_palm_trained: bool, ) -> torch.Tensor: """Calculates refined bound box from an initial estimate around image center and uses that bound box to predict the joints3D. Args: model (Union[BaselineModel, DenoisedBaselineModel]): Trained model sample (dict): image and camera intrinsics dictionary augmenter (SampleAugmenter): augmenter for processing image(cropping and resizing) transform (transforms.Compose): Transforms on image, normalization and tensor conversion. is_palm_trained (bool): True when palm is regressed during training. Returns: torch.Tensor: predicted I3D joints """ img_orig, K = sample["image"], sample["K"] sudo_bbox = create_sudo_bound_box(BBOX_SCALE) sample = process_data({ "image": img_orig.copy(), "K": K.clone() }, sudo_bbox, augmenter, transform, 1) predictions25d = model(sample["image"].to(model.device)).view(21, 3) if is_palm_trained: # this step is done to ensure image is cropped properly by using wrist. # predictions25d = move_palm_to_wrist( predictions25d) predictions3d = compute_refined_3d(predictions25d, model, sample["K"].clone()) predictions3d = move_palm_to_wrist(predictions3d) predictions25d, _ = convert_to_2_5D(sample["K"].clone(), predictions3d) predictions25d[..., -1] = 1.0 bbox = (predictions25d.to(model.device) @ torch.inverse( sample["transformation_matrix"].to(model.device)).T) # Cropping image with refined crop box. sample = process_data({ "image": img_orig.copy(), "K": K.clone() }, bbox, augmenter, transform, step=2) predictions25d = model(sample["image"].to(model.device)).view(21, 3) predictions3d = compute_refined_3d(predictions25d, model, sample["K"]) if is_palm_trained: predictions3d = move_palm_to_wrist(predictions3d) return predictions3d
def prepare_pairwise_sample(self, sample: dict, augmenter: SampleAugmenter) -> dict: """Prepares samples according to pairwise experiment, i.e. transforming the image and keepinf track of the relative parameters. Note: Gaussian blur and Flip are treated as boolean. Also it was decided not to use them for experiment. The effects of transformations are isolated. Args: sample (dict): Underlying data from dataloader class. augmenter (SampleAugmenter): Augmenter used to transform sample Returns: dict: sample containing following elements 'transformed_image1' 'transformed_image2' 'joints1' (2.5D joints) 'joints2' (2.5D joints) 'rotation' 'jitter' ... """ joints25D, _ = convert_to_2_5D(sample["K"], sample["joints3D"]) img1, joints1, _ = augmenter.transform_sample( sample["image"], joints25D.clone() ) param1 = self.get_random_augment_param(augmenter) img2, joints2, _ = augmenter.transform_sample( sample["image"], joints25D.clone() ) param2 = self.get_random_augment_param(augmenter) # relative transform calculation. rel_param = self.get_relative_param(augmenter, param1, param2) # Applying only image related transform if self.transform: img1 = self.transform(img1) img2 = self.transform(img2) return { **{ "transformed_image1": img1, "transformed_image2": img2, "joints1": joints1, "joints2": joints2, }, **rel_param, }
def prepare_experiment4_pretraining( self, sample: dict, augmenter: SampleAugmenter ) -> dict: """Prepares samples for ablative studies on Simclr. This function isolates the effect of each transform. Make sure no other transformation is applied except the one you want to isolate. (Resize is allowed). Samples are not artificially increased by changing rotation and jitter for both samples. Args: sample (dict): Underlying data from dataloader class. augmenter (SampleAugmenter): Augmenter used to transform sample Returns: dict: sample containing 'transformed_image1' and 'transformed_image2' """ joints25D, _ = convert_to_2_5D(sample["K"], sample["joints3D"]) if augmenter.crop: override_jitter = None else: # Zero jitter is added incase the cropping is off. It is done to trigger the # cropping but always with no translation in image. override_jitter = [0, 0] if augmenter.rotate: override_angle = None else: override_angle = None # override_angle = random.uniform(1, 360) # uncomment line above to add this rotation to both channels img1, _, _ = augmenter.transform_sample( sample["image"], joints25D.clone(), override_angle, override_jitter ) img2, _, _ = augmenter.transform_sample( sample["image"], joints25D.clone(), override_angle, override_jitter ) # Applying only image related transform if self.transform: img1 = self.transform(img1) img2 = self.transform(img2) return {"transformed_image1": img1, "transformed_image2": img2}