def __init__( self, root_dir: str, split: str = "train", seed: int = 5, train_ratio: float = 0.9, ): """Initializes the MPII dataset class, relevant paths and the Joints initializes the class for remapping of MPII formatted joints to that of AIT. joints mapping at https://github.com/CMU-Perceptual-Computing-Lab/openpose/blob/master/doc/output.md#hand-output-format Args: root_dir (str): Path to the directory with image samples. split (str): To select train or test split. """ self.root_dir = root_dir self.split = split self.seed = seed self.train_ratio = train_ratio split_set = "train" if self.split in ["train", "val"] else split self.image_dir_path = os.path.join(self.root_dir, f"manual_{split_set}") self.label_dir_path = os.path.join(self.root_dir, f"manual_{split_set}") self.img_names = self.get_image_names() self.labels = self.get_labels() self.indices = self.create_train_val_split() # To convert from MPII to AIT format. self.joints = Joints()
def __init__( self, root_dir: str, split: str, annotor: str = "all", seed: int = 5, train_ratio: float = 0.9, ): """Initializes the Interhand dataset class, relevant paths, meta_info jsons, dataframes and the Joints class for remappinng interhand formatted joints to that of AIT. Args: root_dir (str): Path to the directory with image samples. split (str): set to 'train', 'val' or 'test'. annotor (str, optional): [description]. Defaults to 'all'. Other options are 'human_annot' and 'machine_annot' . """ self.root_dir = root_dir # To convert from freihand to AIT format. self.joints = Joints() self.seed = seed self.train_ratio = train_ratio self.annotor = annotor # "human_annot" and "machine_annot" possible. self.annotation_sampling_folder = "InterHand2.6M.annotations.5.fps" self.image_sampling_folder = "InterHand2.6M_5fps_batch0/images" self._split = split self.split = "train" if split in ["train", "val"] else split ( self.image_info, self.annotations_info, self.camera_info, self.joints_dict, ) = self.get_meta_info() self.indices = self.create_train_val_split()
def __init__(self, root_dir: str, split: str = "train"): self.root_dir = root_dir self.split = split self.joints_list, self.img_list = self.get_joints_labels_and_images() self.img_dict = {item["id"]: item for item in self.img_list} self.joints = Joints() self.indices = self.create_train_val_split()
def __init__( self, root_dir: str, split: str, seed: int = 5, train_ratio: float = 0.9 ): """Initializes the freihand dataset class, relevant paths and the Joints class for remapping of freihand formatted joints to that of AIT. Args: root_dir (str): Path to the directory with image samples. """ self.root_dir = root_dir self.split = split self.seed = seed self.train_ratio = train_ratio self.labels = self.get_labels() self.scale = self.get_scale() self.camera_param = self.get_camera_param() self.img_names, self.img_path = self.get_image_names() self.indices = self.create_train_val_split() # To convert from freihand to AIT format. self.joints = Joints()
import copy from math import cos, pi, sin from typing import Tuple, Union import numpy as np import torch from PIL import Image from src.data_loader.joints import Joints from src.types import CAMERA_PARAM, JOINTS_3D, JOINTS_25D, SCALE from src.constants import MANO_MAT from torch.utils.data import ConcatDataset, DataLoader, Dataset, WeightedRandomSampler from torchvision import transforms JOINTS = Joints() PARENT_JOINT = JOINTS.mapping.ait.wrist CHILD_JOINT = JOINTS.mapping.ait.index_mcp def convert_to_2_5D(K: CAMERA_PARAM, joints_3D: JOINTS_3D) -> Tuple[JOINTS_25D, SCALE]: """Converts coordinates from 3D to 2.5D Refer: https://arxiv.org/pdf/1804.09534.pdf Args: K (CAMERA_PARAM):3x3 Matrix with camera parameters. joints_3D (JOINTS_3D): Original 3D coordinates unscaled. Returns: Tuple[JOINTS_25D, SCALE]: 2.5 D coordinates and scale information. """ scale = (((joints_3D[CHILD_JOINT] -
class IH_DB(Dataset): """Class to load samples from the Interhand dataset. https://mks0601.github.io/InterHand2.6M/ Inherits from the Dataset class in torch.utils.data. Note: The keypoints are mapped to format used at AIT. Refer to joint_mapping.json in src/data_loader/utils. """ IS_LEFT = True def __init__( self, root_dir: str, split: str, annotor: str = "all", seed: int = 5, train_ratio: float = 0.9, ): """Initializes the Interhand dataset class, relevant paths, meta_info jsons, dataframes and the Joints class for remappinng interhand formatted joints to that of AIT. Args: root_dir (str): Path to the directory with image samples. split (str): set to 'train', 'val' or 'test'. annotor (str, optional): [description]. Defaults to 'all'. Other options are 'human_annot' and 'machine_annot' . """ self.root_dir = root_dir # To convert from freihand to AIT format. self.joints = Joints() self.seed = seed self.train_ratio = train_ratio self.annotor = annotor # "human_annot" and "machine_annot" possible. self.annotation_sampling_folder = "InterHand2.6M.annotations.5.fps" self.image_sampling_folder = "InterHand2.6M_5fps_batch0/images" self._split = split self.split = "train" if split in ["train", "val"] else split ( self.image_info, self.annotations_info, self.camera_info, self.joints_dict, ) = self.get_meta_info() self.indices = self.create_train_val_split() def get_meta_info(self) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame, dict]: data = read_json( os.path.join( self.root_dir, self.annotation_sampling_folder, self.annotor, f"InterHand2.6M_{self.split}_data.json", ) ) camera_info = pd.DataFrame( read_json( os.path.join( self.root_dir, self.annotation_sampling_folder, self.annotor, f"InterHand2.6M_{self.split}_camera.json", ) ) ).T joints_dict = read_json( os.path.join( self.root_dir, self.annotation_sampling_folder, self.annotor, f"InterHand2.6M_{self.split}_joint_3d.json", ) ) annotations_info = pd.DataFrame(data["annotations"]) # selecting only single hand images annotations_info = annotations_info[ annotations_info["hand_type"] != "interacting" ] annotations_info = annotations_info.set_index(np.arange(len(annotations_info))) image_info = pd.DataFrame(data["images"]).set_index("id") return image_info, annotations_info, camera_info, joints_dict def get_camera_params( self, camera, capture_id ) -> Tuple[np.array, np.array, np.array]: camera_param = self.camera_info.loc[str(capture_id)] t, r, (fx, fy), (px, py) = ( camera_param.campos[camera], camera_param.camrot[camera], camera_param.focal[camera], camera_param.princpt[camera], ) intrinsic_param = np.array([[fx, 0, px], [0, fy, py], [0, 0, 1.0]]) # intrinsic_param = np.array([[fx, 0, px], [0, fy, py]]) return intrinsic_param, np.array(r), np.array(t) def get_joints( self, capture_id: Union[int, str], frame_idx: Union[int, str] ) -> Tuple[np.array, np.array, bool]: joint_item = self.joints_dict[str(capture_id)][str(frame_idx)] if joint_item["hand_type"] == "left": return ( np.array(joint_item["world_coord"][-21:]), np.array(joint_item["joint_valid"][-21:]), IH_DB.IS_LEFT, ) elif joint_item["hand_type"] == "right": return ( np.array(joint_item["world_coord"][:21]), np.array(joint_item["joint_valid"][:21]), not IH_DB.IS_LEFT, ) else: raise NotImplementedError def create_train_val_split(self) -> np.array: """Creates split for train and val data in mpii Raises: NotImplementedError: In case the split doesn't match test, train or val. Returns: np.array: array of indices """ num_images = len(self.annotations_info) train_indices, val_indices = train_test_split( np.arange(num_images), train_size=self.train_ratio, random_state=self.seed ) if self._split == "train": return np.sort(train_indices) elif self._split == "val": return np.sort(val_indices) elif self._split == "test": return np.arange(len(self.annotations_info)) else: raise NotImplementedError def __len__(self): return len(self.indices) def __getitem__(self, idx: int) -> dict: """Returns a sample corresponding to the index. Args: idx (int): index Returns: dict: item with following elements. "image" in opencv bgr format. "K": camera params "joints3D": 3D coordinates of joints in AIT format. """ if torch.is_tensor(idx): idx = idx.tolist() idx_ = self.indices[idx] image_id = self.annotations_info.loc[idx_]["image_id"] image_item = self.image_info.loc[image_id] image = cv2.imread( os.path.join( self.root_dir, self.image_sampling_folder, self.split, image_item.file_name, ) ) joints, joints_valid, is_left = self.get_joints( image_item.capture, image_item.frame_idx ) joints, joints_valid = ( self.joints.interhand_to_ait(joints), self.joints.interhand_to_ait(joints_valid), ) intrinsic_camera_matrix, camera_rot, camera_t = self.get_camera_params( image_item.camera, image_item.capture ) if is_left: image = cv2.flip(image, 1) _, W = image.shape[:2] intrinsic_camera_matrix = ( np.float32([[-1, 0, W - 1], [0, 1, 0], [0, 0, 1]]) @ intrinsic_camera_matrix ) joints_camera_frame = (joints - camera_t) @ camera_rot.T # To avoid division by zero. joints_camera_frame[:, -1] += 1e-5 sample = { "image": image, "K": torch.tensor(intrinsic_camera_matrix).float(), "joints3D": torch.tensor(joints_camera_frame).float() / 1000.0, "joints_valid": torch.tensor(joints_valid), } return sample
import os from typing import Union import cv2 import matplotlib.pyplot as plt import numpy as np import torch from comet_ml import Experiment from pytorch_lightning.loggers import comet from src.constants import MASTER_THESIS_DIR from src.data_loader.joints import Joints from src.types import JOINTS_3D, JOINTS_25D from src.utils import read_json from torchvision import transforms joints = Joints() def plot_hand( axis: plt.Axes, coords_hand: np.array, plot_3d: bool = False, linewidth: str = "1", linestyle: str = "-", alpha: float = 1.0, ms=1, ): """Makes a hand stick figure from the coordinates wither in uv plane or xyz plane on the passed axes object. Code adapted from: https://github.com/lmb-freiburg/freihand/blob/master/utils/eval_util.py Args:
class MPII_DB(Dataset): def __init__( self, root_dir: str, split: str = "train", seed: int = 5, train_ratio: float = 0.9, ): """Initializes the MPII dataset class, relevant paths and the Joints initializes the class for remapping of MPII formatted joints to that of AIT. joints mapping at https://github.com/CMU-Perceptual-Computing-Lab/openpose/blob/master/doc/output.md#hand-output-format Args: root_dir (str): Path to the directory with image samples. split (str): To select train or test split. """ self.root_dir = root_dir self.split = split self.seed = seed self.train_ratio = train_ratio split_set = "train" if self.split in ["train", "val"] else split self.image_dir_path = os.path.join(self.root_dir, f"manual_{split_set}") self.label_dir_path = os.path.join(self.root_dir, f"manual_{split_set}") self.img_names = self.get_image_names() self.labels = self.get_labels() self.indices = self.create_train_val_split() # To convert from MPII to AIT format. self.joints = Joints() def get_image_names(self) -> List[str]: """Gets the name of all the files in root_dir. Make sure there are only image in that directory as it reads all the file names. Returns: List[str]: List of image names. """ img_names = [ file_name for file_name in next(os.walk(self.image_dir_path))[2] if ".jpg" in file_name ] img_names.sort() # popping, images with annottaaions out of image bounds try: img_names.remove("Ricki_unit_8.flv_000003_l.jpg") img_names.remove("Ricki_unit_8.flv_000002_l.jpg") except Exception as e: print(f"Out of frame images not found {e}") return img_names def get_labels(self) -> Dict[str, dict]: label_file_names = [ file_name for file_name in next(os.walk(self.label_dir_path))[2] if ".json" in file_name ] labels = { file_name.replace(".json", ""): read_json(os.path.join(self.label_dir_path, file_name)) for file_name in label_file_names } return labels def create_train_val_split(self) -> np.array: """Creates split for train and val data in mpii Raises: NotImplementedError: In case the split doesn't match test, train or val. Returns: np.array: array of indices """ num_unique_images = len(self.img_names) train_indices, val_indices = train_test_split( np.arange(num_unique_images), train_size=self.train_ratio, random_state=self.seed, ) if self.split == "train": return np.sort(train_indices) elif self.split == "val": return np.sort(val_indices) elif self.split == "test": return np.arange(len(self.img_names)) else: raise NotImplementedError def __len__(self): return len(self.indices) def __getitem__(self, idx: int) -> Dict[str, Union[np.array, torch.Tensor]]: """Returns a sample corresponding to the index. Args: idx (int): index Returns: dict: item with following elements. "image" in opencv bgr format. "K": camera params (Indetity matrix in this case) "joints3D": 3D coordinates of joints in AIT format. (z coordinate is 1.0) """ if torch.is_tensor(idx): idx = idx.tolist() idx_ = self.indices[idx] img_name = os.path.join(self.image_dir_path, self.img_names[idx_]) img = cv2.cvtColor(cv2.imread(img_name), cv2.COLOR_BGR2RGB) # mpii follow the same strategy as the freihand for joint naming. label = self.labels[self.img_names[idx_].replace(".jpg", "")] joints3D = self.joints.freihand_to_ait( torch.tensor(label["hand_pts"]).float()) if label["is_left"] == 1: # flipping horizontally to make it right hand img = cv2.flip(img, 1) # width - x coord joints3D[:, 0] = img.shape[1] - joints3D[:, 0] camera_param = torch.eye(3).float() joints_valid = torch.ones_like(joints3D[..., -1:]) sample = { "image": img, "K": camera_param, "joints3D": joints3D, "joints_valid": joints_valid, } return sample
class YTB_DB(Dataset): """Class to load samples from the youtube dataset. Inherits from the Dataset class in torch.utils.data. Not be used for supervised learning!! Camera matrix is unity to fit with the sample augmenter. """ def __init__(self, root_dir: str, split: str = "train"): self.root_dir = root_dir self.split = split self.joints_list, self.img_list = self.get_joints_labels_and_images() self.img_dict = {item["id"]: item for item in self.img_list} self.joints = Joints() self.indices = self.create_train_val_split() def get_joints_labels_and_images(self) -> Tuple[dict, dict]: """Returns the dictionary conatinign the bound box of the image and dictionary containig image information. Returns: Tuple[dict, dict]: joints, image_dict image_dict - `name` - Image name in the form of `youtube/VIDEO_ID/video/frames/FRAME_ID.png`. - `width` - Width of the image. - `height` - Height of the image. - `id` - Image ID. joints - `joints` - 21 joints, containing bound box limits as vertices. - `is_left` - Binary value indicating a right/left hand side. - `image_id` - ID to the corresponding entry in `images`. - `id` - Annotation ID (an image can contain multiple hands). """ data_json_path = os.path.join(self.root_dir, f"youtube_{self.split}.json") joints_path = os.path.join(self.root_dir, f"youtube_{self.split}_joints.json") images_json_path = os.path.join(self.root_dir, f"youtube_{self.split}_images.json") if os.path.exists(joints_path) and os.path.exists(images_json_path): return read_json(joints_path), read_json(images_json_path) else: data_json = read_json(data_json_path) images_dict = data_json["images"] save_json(images_dict, images_json_path) annotations_dict = data_json["annotations"] joints = self.get_joints_from_annotations(annotations_dict) save_json(joints, joints_path) return joints, images_dict def get_joints_from_annotations(self, annotations: dict) -> dict: """Converts vertices corresponding to mano mesh to 21 coordinates signifying the bound box. Args: annotations (dict): dictionary containing annotations. Returns: dict: same dictionary as annotations except 'vertices' is removed and 'joints' key is added. """ optimized_vertices = [] mano_matrix = torch.load(MANO_MAT) for elem in tqdm(annotations): # joints_21 = sudo_joint_bound(elem["vertices"]) joints_21 = get_joints_from_mano_mesh( torch.tensor(elem["vertices"]), mano_matrix) optimized_vertices.append({ **{key: val for key, val in elem.items() if key != "vertices"}, **{ "joints": joints_21.tolist() }, }) return optimized_vertices def create_train_val_split(self) -> np.array: """Creates split for train and val data in mpii Raises: NotImplementedError: In case the split doesn't match test, train or val. Returns: np.array: array of indices """ if self.split == "train": return np.arange(len(self.joints_list)) elif self.split == "val": valid_index_df = pd.read_csv( os.path.join(self.root_dir, f"youtube_{self.split}_invalid_index.csv")) return valid_index_df[valid_index_df.valid]["joint_idx"].values elif self.split == "test": valid_index_df = pd.read_csv( os.path.join(self.root_dir, f"youtube_{self.split}_invalid_index.csv")) return valid_index_df[valid_index_df.valid]["joint_idx"].values else: raise NotImplementedError def __len__(self): return len(self.indices) def __getitem__(self, idx: int) -> dict: """Returns a sample corresponding to the index. Args: idx (int): index Returns: dict: item with following elements. "image" in opencv bgr format. "K": camera params "joints3D": 3D coordinates of joints in AIT format. """ if torch.is_tensor(idx): idx = idx.tolist() idx_ = self.indices[idx] img_name = os.path.join( self.root_dir, self.img_dict[self.joints_list[idx_]["image_id"]]["name"]) img = cv2.cvtColor(cv2.imread(img_name.replace(".png", ".jpg")), cv2.COLOR_BGR2RGB) joints3D = self.joints.mano_to_ait( torch.tensor(self.joints_list[idx_]["joints"]).float()) if self.joints_list[idx_]["is_left"] == 1: # flipping horizontally to make it right hand img = cv2.flip(img, 1) # width - x coord joints3D[:, 0] = img.shape[1] - joints3D[:, 0] joints_raw = joints3D.clone() # joints3D = torch.tensor(self.bbox[idx_]["joints"]).float() # because image is cropped and rotated with the 2d projections of these coordinates. # It needs to have depth as 1.0 to not cause problems. For procrustes use "joints_raw" joints3D[..., -1] = 1.0 camera_param = torch.eye(3).float() joints_valid = torch.zeros_like(joints3D[..., -1:]) sample = { "image": img, "K": camera_param, "joints3D": joints3D, "joints_valid": joints_valid, "joints_raw": joints_raw, } return sample
class F_DB(Dataset): """Class to load samples from the Freihand dataset. Inherits from the Dataset class in torch.utils.data. Note: The keypoints are mapped to format used at AIT. Refer to joint_mapping.json in src/data_loader/utils. """ def __init__( self, root_dir: str, split: str, seed: int = 5, train_ratio: float = 0.9 ): """Initializes the freihand dataset class, relevant paths and the Joints class for remapping of freihand formatted joints to that of AIT. Args: root_dir (str): Path to the directory with image samples. """ self.root_dir = root_dir self.split = split self.seed = seed self.train_ratio = train_ratio self.labels = self.get_labels() self.scale = self.get_scale() self.camera_param = self.get_camera_param() self.img_names, self.img_path = self.get_image_names() self.indices = self.create_train_val_split() # To convert from freihand to AIT format. self.joints = Joints() def create_train_val_split(self) -> np.array: """Creates split for train and val data in freihand Raises: NotImplementedError: In case the split doesn't match test, train or val. Returns: np.array: array of indices """ num_unique_images = len(self.camera_param) train_indices, val_indices = train_test_split( np.arange(num_unique_images), train_size=self.train_ratio, random_state=self.seed, ) if self.split == "train": train_indices = np.sort(train_indices) train_indices = np.concatenate( ( train_indices, train_indices + num_unique_images, train_indices + num_unique_images * 2, train_indices + num_unique_images * 3, ), axis=0, ) return train_indices elif self.split == "val": val_indices = np.sort(val_indices) val_indices = np.concatenate( ( val_indices, val_indices + num_unique_images, val_indices + num_unique_images * 2, val_indices + num_unique_images * 3, ), axis=0, ) return val_indices elif self.split == "test": return np.arange(len(self.camera_param)) else: raise NotImplementedError def get_image_names(self) -> Tuple[List[str], str]: """Gets the name of all the files in root_dir. Make sure there are only image in that directory as it reads all the file names. Returns: List[str]: List of image names. str: base path for image directory """ if self.split in ["train", "val"]: img_path = os.path.join(self.root_dir, "training", "rgb") else: img_path = os.path.join(self.root_dir, "evaluation", "rgb") img_names = next(os.walk(img_path))[2] img_names.sort() return img_names, img_path def get_labels(self) -> list: """Extacts the labels(joints coordinates) from the label_json at labels_path Returns: list: List of all the the coordinates(32650). """ if self.split in ["train", "val"]: labels_path = os.path.join(self.root_dir, "training_xyz.json") return read_json(labels_path) else: return None def get_scale(self) -> list: """Extacts the scale from freihand data.""" if self.split in ["train", "val"]: labels_path = os.path.join(self.root_dir, "training_scale.json") else: labels_path = os.path.join(self.root_dir, "evaluation_scale.json") return read_json(labels_path) def get_camera_param(self) -> list: """Extacts the camera parameters from the camera_param_json at camera_param_path. Returns: list: List of camera paramters for all images(32650) """ if self.split in ["train", "val"]: camera_param_path = os.path.join(self.root_dir, "training_K.json") else: camera_param_path = os.path.join(self.root_dir, "evaluation_K.json") return read_json(camera_param_path) def __len__(self): return len(self.indices) def create_sudo_bound_box(self, scale) -> Tensor: max_bound = torch.tensor([224.0, 224.0]) min_bound = torch.tensor([0.0, 0.0]) c = (max_bound + min_bound) / 2.0 s = ((max_bound - min_bound) / 2.0) * scale bound_box = torch.tensor( [[0, 0, 0]] + [[s[0], s[1], 1]] * 5 + [[-s[0], s[1], 1]] * 5 + [[s[0], -s[1], 1]] * 5 + [[-s[0], -s[1], 1]] * 5 ) + torch.tensor([c[0], c[1], 0]) return bound_box.float() def __getitem__(self, idx: int) -> dict: """Returns a sample corresponding to the index. Args: idx (int): index Returns: dict: item with following elements. "image" in opencv bgr format. "K": camera params "joints3D": 3D coordinates of joints in AIT format. """ if torch.is_tensor(idx): idx = idx.tolist() idx_ = self.indices[idx] img_name = os.path.join(self.img_path, self.img_names[idx_]) img = cv2.cvtColor(cv2.imread(img_name),cv2.COLOR_BGR2RGB) if self.labels is not None: camera_param = torch.tensor(self.camera_param[idx_ % 32560]).float() joints3D = self.joints.freihand_to_ait( torch.tensor(self.labels[idx_ % 32560]).float() ) else: camera_param = torch.tensor(self.camera_param[idx_]).float() joints2d_orthogonal = self.create_sudo_bound_box(scale=BOUND_BOX_SCALE) joints3D = convert_2_5D_to_3D( joints2d_orthogonal, scale=1.0, K=camera_param.clone() ) joints_valid = torch.ones_like(joints3D[..., -1:]) sample = { "image": img, "K": camera_param, "joints3D": joints3D, "joints_valid": joints_valid, } return sample