def __getitem__(self, index): # Load training image im_name = self.im_list[index] im_path = os.path.join(self.root, self.dataset + '_images', im_name + '.jpg') parsing_anno_path = os.path.join(self.root, self.dataset + '_segmentations', im_name + '.png') im = cv2.imread(im_path, cv2.IMREAD_COLOR) h, w, _ = im.shape parsing_anno = np.zeros((h, w), dtype=np.long) # Get center and scale center, s = self._box2cs([0, 0, w - 1, h - 1]) r = 0 if self.dataset != 'test': parsing_anno = cv2.imread(parsing_anno_path, cv2.IMREAD_GRAYSCALE) if self.dataset == 'train' or self.dataset == 'trainval': sf = self.scale_factor rf = self.rotation_factor s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) r = np.clip(np.random.randn() * rf, -rf * 2, rf * 2) \ if random.random() <= 0.6 else 0 if random.random() <= self.flip_prob: im = im[:, ::-1, :] parsing_anno = parsing_anno[:, ::-1] center[0] = im.shape[1] - center[0] - 1 right_idx = [15, 17, 19] left_idx = [14, 16, 18] for i in range(0, 3): right_pos = np.where(parsing_anno == right_idx[i]) left_pos = np.where(parsing_anno == left_idx[i]) parsing_anno[right_pos[0], right_pos[1]] = left_idx[i] parsing_anno[left_pos[0], left_pos[1]] = right_idx[i] trans = get_affine_transform(center, s, r, self.crop_size) input = cv2.warpAffine( im, trans, (int(self.crop_size[1]), int(self.crop_size[0])), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT, borderValue=(0, 0, 0)) if self.transform: input = self.transform(input) meta = { 'name': im_name, 'center': center, 'height': h, 'width': w, 'scale': s, 'rotation': r } if self.dataset != 'train': return input, meta else: label_parsing = cv2.warpAffine( parsing_anno, trans, (int(self.crop_size[1]), int(self.crop_size[0])), flags=cv2.INTER_NEAREST, borderMode=cv2.BORDER_CONSTANT, borderValue=(255)) label_edge = generate_edge(label_parsing) label_parsing = torch.from_numpy(label_parsing) label_edge = torch.from_numpy(label_edge) return input, label_parsing, label_edge, meta
def __getitem__(self, index): im_name = self.im_list[index] im_path = os.path.join(self.root, self.dataset + '_images', im_name + '.jpg') parsing_anno_path = os.path.join(self.root, self.dataset + '_segmentations', im_name + '.png') im = cv2.imread(im_path, cv2.IMREAD_COLOR) h, w, _ = im.shape parsing_anno = np.zeros((h, w), dtype=np.long) # get pose anno if self.dataset == 'train' or self.dataset == 'val': joints_all_info = np.array(self.pose_info[im_name]) joints_loc = np.zeros((joints_all_info.shape[0], 2)) joints_loc[:, :] = joints_all_info[:, 0:2] # 1st and 2nd column # get visibility of joints coord_sum = np.sum(joints_loc, axis=1) visibility = coord_sum != 0 # Get center and scale center, s = self._box2cs([0, 0, w - 1, h - 1]) r = 0 if self.dataset != 'test': parsing_anno = cv2.imread(parsing_anno_path, cv2.IMREAD_GRAYSCALE) if self.dataset == 'train' or self.dataset == 'trainval': sf = self.scale_factor rf = self.rotation_factor s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) r = np.clip(np.random.randn() * rf, -rf * 2, rf * 2) \ if random.random() <= 0.6 else 0 if random.random() <= self.flip_prob: im = im[:, ::-1, :] parsing_anno = parsing_anno[:, ::-1] center[0] = im.shape[1] - center[0] - 1 right_idx = [15, 17, 19] left_idx = [14, 16, 18] for i in range(0, 3): right_pos = np.where(parsing_anno == right_idx[i]) left_pos = np.where(parsing_anno == left_idx[i]) parsing_anno[right_pos[0], right_pos[1]] = left_idx[i] parsing_anno[left_pos[0], left_pos[1]] = right_idx[i] # flip the joints joints_loc = flip_joints(joints_loc, w) # swap the visibility of left and right joints r_joint = [0, 1, 2, 10, 11, 12] l_joint = [3, 4, 5, 13, 14, 15] for i in range(0, 6): temp_visibility = visibility[r_joint[i]] visibility[r_joint[i]] = visibility[l_joint[i]] visibility[l_joint[i]] = temp_visibility trans = get_affine_transform(center, s, r, self.crop_size) input = cv2.warpAffine( im, trans, (int(self.crop_size[1]), int(self.crop_size[0])), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT, borderValue=(0, 0, 0)) if self.transform: input = self.transform(input) meta = { 'name': im_name, 'center': center, 'height': h, 'width': w, 'scale': s, 'rotation': r } if self.dataset == 'test': return input, meta else: label_parsing = cv2.warpAffine( parsing_anno, trans, (int(self.crop_size[1]), int(self.crop_size[0])), flags=cv2.INTER_NEAREST, borderMode=cv2.BORDER_CONSTANT, borderValue=(255)) grid_x = int(self.crop_size[1] / self.pose_net_stride) grid_y = int(self.crop_size[0] / self.pose_net_stride) for i in range(joints_all_info.shape[0]): if visibility[i] > 0: joints_loc[i, 0:2] = self.affine_trans( joints_loc[i, 0:2], trans) label_pose = generate_pose(joints_loc, visibility, trans, grid_x, grid_y, self.pose_net_stride, self.sigma) label_edge = generate_edge(label_parsing) return input, label_parsing, label_pose, label_edge, meta