def __getitem__(self, index): if self.is_train: ids = self.train[index] else: ids = self.valid[index] images = self.dataset.get_image([self.cam_name], [ids]) img_path = images[0] img = load_image(img_path) # CxHxW target = self.load_angles(img_path) original_size = np.array((img.shape[2], img.shape[1])) segmasks = self.dataset.get_seg([self.cam_name], [ids]) segmask = io.imread(segmasks[0]) binary_arm = vdb.get_obj_mask(segmask, self.color) bb = vdb.seg2bb(binary_arm) x0, x1, y0, y1 = bb c = np.array([(x0 + x1), (y0 + y1)]) / 2 # s = np.sqrt((y1-y0)*(x1-x0))/120.0 s = np.sqrt((y1 - y0) * (x1 - x0)) / 60.0 r = 0 # s = max(x1-x0, y1-y0)/125 if self.is_train: c = c + np.array([ -30 + 60 * random.random(), -30 + 60 * random.random() ]) # random move s *= 0.6 * (1 + 2 * random.random()) # random scale rf = 15 r = -rf + 2 * random.random() * rf # random rotation # r = torch.randn(1).mul_(rf).clamp(-2*rf, 2*rf)[0] if random.random() <= 0.6 else 0 # Color im_rgb = im_to_numpy(img) im_lab = cv2.cvtColor(im_rgb, cv2.COLOR_RGB2LAB) im_lab[:, :, 0] = np.clip(im_lab[:, :, 0] * (random.uniform(0.3, 1.3)), 0, 255) img = im_to_torch(cv2.cvtColor(im_lab, cv2.COLOR_LAB2RGB)) if random.random() <= 0.5: img = torch.from_numpy(fliplr(img.numpy())).float() inp = crop(img, c, s, [self.inp_res, self.inp_res], rot=r) inp = color_normalize(inp, self.mean, self.std) return inp, target
def __getitem__(self, index): #actor_name = "RobotArmActor_3" #color = [0, 255, 63] scale_factor = 60.0 if self.multi_scale: scale = self.scales[index % len(self.scales)] index = index // len(self.scales) if self.anno_type == '3d' or self.anno_type == '2d': if self.is_train: ids = self.train[index] else: ids = self.valid[index] if self.anno_type == '3d': joint_2d, vertex_2d, img_path = get_joint_vertex_2d( self.dataset, ids, self.cam_name, self.actor_name) joint_2d = joint_2d[ 1:] #discard the first joint, as we do not predict it. with open(os.path.join(self.meta_dir, 'vertex.json'), 'r') as f: #from raw vertexs to final keypoints vertex_seq = json.load(f) num_vertex = len(vertex_seq) pts = np.zeros((num_vertex, 2)) for i in range(num_vertex): pts[i] = np.average(vertex_2d[vertex_seq[i]], axis=0) #pts[i] = (vertex_2d[2*i]+vertex_2d[2*i+1])/2 pts = np.concatenate((joint_2d, pts), axis=0) if self.anno_type == '2d': #data with only 2d annotations img_path = os.path.join(self.img_folder, 'imgs', self.dataset[index]) with open( os.path.join( self.img_folder, 'd3_preds', os.path.splitext(os.path.basename(img_path))[0] + '.json'), 'r') as f: obj = json.load(f) pts = np.transpose(np.array(obj['reprojection'])) if self.ignore_invis_pts and 'visibility' in obj: visibility = obj['visibility'][:-2] pts[np.invert(visibility), :] = -1.0 # For single-person pose estimation with a centered/scaled figure nparts = pts.shape[0] if not self.replace_bg: img = load_image(img_path) # CxHxW else: img = im_to_torch( cv2.cvtColor( self.background_replace.replace( cv2.imread(img_path), 'white'), cv2.COLOR_BGR2RGB)) original_size = np.array((img.shape[2], img.shape[1])) if self.anno_type == '3d': segmasks = self.dataset.get_seg([self.cam_name], [ids]) segmask = io.imread(segmasks[0]) binary_arm = vdb.get_obj_mask(segmask, self.color) bb = vdb.seg2bb(binary_arm) x0, x1, y0, y1 = bb if self.anno_type == '2d': bb = self.bbox_anno[os.path.basename(img_path)] x0, x1, y0, y1 = bb[0][0], bb[1][0], bb[0][1], bb[1][1] c = np.array([(x0 + x1), (y0 + y1)]) / 2 s = np.sqrt((y1 - y0) * (x1 - x0)) / scale_factor r = 0 if self.is_train: c = c + np.array([ -30 + 60 * random.random(), -30 + 60 * random.random() ]) #random move s *= 0.6 * (1 + 2 * random.random()) #random scale rf = 15 r = -rf + 2 * random.random() * rf #random rotation #r = torch.randn(1).mul_(rf).clamp(-2*rf, 2*rf)[0] if random.random() <= 0.6 else 0 # Color im_rgb = im_to_numpy(img) im_lab = cv2.cvtColor(im_rgb, cv2.COLOR_RGB2LAB) im_lab[:, :, 0] = np.clip( im_lab[:, :, 0] * (random.uniform(0.3, 1.3)), 0, 255) img = im_to_torch(cv2.cvtColor(im_lab, cv2.COLOR_LAB2RGB)) if random.random() <= 0.5: img = torch.from_numpy(fliplr(img.numpy())).float() pts[:, 0] = img.size(2) - pts[:, 0] for pair in self.lr_pairs: pts[[pair[0], pair[1]]] = pts[[pair[1], pair[0]]] c[0] = img.size(2) - c[0] if self.multi_scale: s = s * scale inp = crop(img, c, s, [self.inp_res, self.inp_res], rot=r) inp = color_normalize(inp, self.mean, self.std) # print(pts) tpts = pts.copy() target = torch.zeros(nparts, self.out_res, self.out_res) for i in range(nparts): # if tpts[i, 2] > 0: # This is evil!! if tpts[i, 1] > 0: tpts[i, 0:2] = to_torch( transform(tpts[i, 0:2], c, s, [self.out_res, self.out_res], rot=r)) target[i] = draw_labelmap(target[i], tpts[i], self.sigma, type=self.label_type) # print(transform_preds(torch.from_numpy(tpts), c, s, [64, 64])) # Meta info meta = { 'index': index, 'pts': pts, 'tpts': tpts, 'center': c, 'original_size': original_size, 'scale': s, 'img_name': os.path.splitext(os.path.basename(img_path))[0] } return inp, target, meta if self.anno_type == 'none': img_path = self.dataset[index] if not self.replace_bg: img = load_image(img_path) # CxHxW else: img = im_to_torch( cv2.cvtColor( self.background_replace.replace( cv2.imread(img_path), 'white'), cv2.COLOR_BGR2RGB)) original_size = np.array((img.shape[2], img.shape[1])) inp = img if self.anno is not None: joints = self.anno[self.F[index]] x0, y0, x1, y1 = joints[0][0], joints[0][1], joints[1][ 0], joints[1][1] c = np.array([(x0 + x1), (y0 + y1)]) / 2 s = np.sqrt((y1 - y0) * (x1 - x0)) / scale_factor if self.multi_scale: s = s * scale else: c = np.array([img.shape[2] / 2, img.shape[1] / 2]) s = 5.0 r = 0 inp = crop(img, c, s, [self.inp_res, self.inp_res], rot=r) inp = color_normalize(inp, self.mean, self.std) meta = { 'index': index, 'pts': [], 'tpts': [], 'center': c, 'original_size': original_size, 'scale': s, 'img_name': os.path.splitext(os.path.basename(img_path))[0] } return inp, [], meta