def __getitem__(self, idx): db_rec = copy.deepcopy(self.db[idx]) image_dir = 'images.zip@' if self.data_format == 'zip' else '' image_file = osp.join(self.root, db_rec['source'], image_dir, 'images', db_rec['image']) if self.data_format == 'zip': from utils import zipreader data_numpy = zipreader.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) else: data_numpy = cv2.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) joints = db_rec['joints_2d'].copy() joints_vis = db_rec['joints_vis'].copy() center = np.array(db_rec['center']).copy() scale = np.array(db_rec['scale']).copy() rotation = 0 if self.is_train: sf = self.scale_factor rf = self.rotation_factor scale = scale * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) rotation = np.clip(np.random.randn() * rf, -rf * 2, rf * 2) \ if random.random() <= 0.6 else 0 trans = get_affine_transform(center, scale, rotation, self.image_size) input = cv2.warpAffine( data_numpy, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) if self.transform: input = self.transform(input) for i in range(self.num_joints): if joints_vis[i, 0] > 0.0: joints[i, 0:2] = affine_transform(joints[i, 0:2], trans) if (np.min(joints[i, :2]) < 0 or joints[i, 0] >= self.image_size[0] or joints[i, 1] >= self.image_size[1]): joints_vis[i, :] = 0 target, target_weight = self.generate_target(joints, joints_vis) target = torch.from_numpy(target) target_weight = torch.from_numpy(target_weight) meta = { 'scale': scale, 'center': center, 'rotation': rotation, 'joints_2d': db_rec['joints_2d'], 'joints_2d_transformed': joints, 'joints_vis': joints_vis, 'source': db_rec['source'] } return input, target, target_weight, meta
def __getitem__(self, index): """ Args: index (int): Index Returns: tuple: Tuple (image, target). target is the object returned by ``coco.loadAnns``. """ coco = self.coco img_id = self.ids[index] ann_ids = coco.getAnnIds(imgIds=img_id) target = coco.loadAnns(ann_ids) image_info = coco.loadImgs(img_id)[0] file_name = image_info['file_name'] if self.data_format == 'zip': img = zipreader.imread( self._get_image_path(file_name), cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) else: img = cv2.imread(self._get_image_path(file_name), cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) if 'train' in self.dataset: return img, [obj for obj in target], image_info else: return img
def __getitem__(self, index): """ Args: index (int): Index Returns: tuple: Tuple (image, target). target is the object returned by ``coco.loadAnns``. """ coco = self.coco img_id = self.ids[index] ann_ids = coco.getAnnIds(imgIds=img_id) target = coco.loadAnns(ann_ids) file_name = coco.loadImgs(img_id)[0]['file_name'] if self.data_format == 'zip': img = zipreader.imread( self._get_image_path(file_name), cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) else: img = cv2.imread(self._get_image_path(file_name), cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) if self.transform is not None: img = self.transform(img) if self.target_transform is not None: target = self.target_transform(target) return img, target
def getitem(self): #return input, meta db_rec = self._load_coco_keypoint_annotation_kernal() image_file = db_rec[0]['image'] filename = db_rec[0]['filename'] if 'filename' in db_rec[0] else '' imgnum = db_rec[0]['imgnum'] if 'imgnum' in db_rec[0] else '' if self.data_format == 'zip': from utils import zipreader data_numpy = zipreader.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) else: data_numpy = cv2.imread( #(文件名,标记) image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) if data_numpy is None: logger.error('=> fail to read {}'.format(image_file)) raise ValueError('Fail to read {}'.format(image_file)) c = db_rec[0]['center'] s = db_rec[0]['scale'] score = db_rec[0]['score'] if 'score' in db_rec[0] else 1 r = 0 trans = get_affine_transform( c, s, r, self.image_size) #将s经过r缩放到imagesize 矩阵2x3 input = cv2.warpAffine( #input是一个旋转缩放过的原图 data_numpy, #输入图像 trans, #变换矩阵 (int(self.image_size[0]), int(self.image_size[1])), #输出图像大小192x256 flags=cv2.INTER_LINEAR) #插值方法 if self.transform: input = self.transform(input) meta = { 'image': image_file, 'filename': filename, 'imgnum': imgnum, 'center': c, 'scale': s, 'score': score, 'joints_vis': [[(1, 1, 0), (1, 1, 0), (1, 1, 0), (1, 1, 0), (1, 1, 0), (1, 1, 0), (1, 1, 0), (1, 1, 0), (1, 1, 0), (1, 1, 0), (1, 1, 0), (1, 1, 0), (1, 1, 0), (1, 1, 0), (1, 1, 0), (1, 1, 0), (1, 1, 0)]] } return input, meta
def __getitem__(self, idx): db_rec = copy.deepcopy(self.db[idx]) image_file = db_rec['image'] # filename = db_rec['filename'] if 'filename' in db_rec else '' # imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else '' if self.data_format == 'zip': from utils import zipreader data_numpy = zipreader.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) else: data_numpy = cv2.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) if self.color_rgb: data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB) if data_numpy is None: logger.error('=> fail to read {}'.format(image_file)) raise ValueError('Fail to read {}'.format(image_file)) # joints = db_rec['joints_3d'] # joints_vis = db_rec['joints_3d_vis'] c = db_rec['center'] s = db_rec['scale'] score = db_rec['score'] if 'score' in db_rec else 1 r = 0 trans = get_affine_transform(c, s, r, self.image_size) input = cv2.warpAffine( data_numpy, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) if self.transform: input = self.transform(input) meta = { 'image': image_file, 'center': c, 'scale': s, 'rotation': r, 'score': score } return input, meta
def save_all_preds(pose2d_gt, preds, detected, names, source, save_dir): """ pose2d_gt: [N, j, 2] preds: [N, j, 2] detected: [N, j], whether this joint is in thresold names: a list of [N] """ assert len(pose2d_gt) == len(names) file_path = os.path.dirname(os.path.realpath(__file__)) begin = file_path.find('pose_unsupervised') assert begin >= 0, 'not find pose_unsupervised' root_path = os.path.join(file_path[:begin], 'pose_unsupervised') save_path = os.path.join(save_dir, 'debug') if not os.path.exists(save_path): os.makedirs(save_path) from utils import zipreader red = (0, 0, 255) green = (0, 255, 0) blue = (255, 0, 0) count = 0 for img_idx in range(len(pose2d_gt)): image_name = names[img_idx] image_file = os.path.join(root_path, 'data', source, 'images.zip@', 'images', image_name) org_img = zipreader.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) label = pose2d_gt[img_idx] # [j, 2] pred = preds[img_idx] # [j, 2] detected_one_image = detected[img_idx] # [j] # gt->red, pred_detected->green, pred_not_detcte->blue for joint_idx in range(len(label)): img = cv2.circle( org_img, (int(label[joint_idx][0]), int(label[joint_idx][1])), 5, red, -1) img = cv2.drawMarker( img, (int(pred[joint_idx][0]), int(pred[joint_idx][1])), green if detected_one_image[joint_idx] else blue, cv2.MARKER_CROSS, 10) cv2.imwrite(os.path.join(save_path, '%05d.jpg' % count), img) count += 1 if count == 200: break
def __getitem__(self, index): """ Args: index (int): Index Returns: tuple: Tuple (image, target). target is the object returned by ``coco.loadAnns``. """ coco = self.coco img_id = self.ids[index] ann_ids = coco.getAnnIds(imgIds=img_id) target = coco.loadAnns(ann_ids) file_name = coco.loadImgs(img_id)[0]['file_name'] if self.data_format == 'zip': img = zipreader.imread( self._get_image_path(file_name), cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION ) else: img = cv2.imread( self._get_image_path(file_name), cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION ) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img_h, img_w = img.shape[:2] if self.transform is not None: img = self.transform(img) if self.target_transform is not None: target = self.target_transform(target) anno = [obj for obj in target] joints, area = self.get_joints(anno) human_mask = self.get_human_mask(anno, img_h, img_w) if 'test' in self.dataset or self.get_rescore_data: return img, joints, human_mask, area else: return img, anno
def __getitem__(self, index): """ Args: index (int): Index Returns: tuple: Tuple (image, target). target is the object returned by ``coco.loadAnns``. #IMPORTANT: Might be the target file that gets selected. Worth investigating. """ coco = self.coco img_id = self.ids[index] ann_ids = coco.getAnnIds(imgIds=img_id) #Examine here. target = coco.loadAnns(ann_ids) file_name = coco.loadImgs(img_id)[0]['file_name'] if self.data_format == 'zip': img = zipreader.imread( self._get_image_path(file_name), cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) else: img = cv2.imread(self._get_image_path(file_name), cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) #Writing the taken input into a file write_path = "/home/student_2/HRNet2/output/coco_kpt/pose_higher_hrnet/file.txt" write_mode = 'a' if os.path.exists(write_path) else 'w' with open(write_path, write_mode) as f: f.write(self._get_image_path(file_name) + "\n") print("Now analyzing file: ", self._get_image_path(file_name)) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) if self.transform is not None: img = self.transform(img) if self.target_transform is not None: target = self.target_transform(target) return img, target
def __getitem__(self, idx): db_record = copy.deepcopy(self.db[idx]) image_file = db_record['image'] filename = db_record['filename'] if 'filename' in db_record else '' imgnum = db_record['imgnum'] if 'imgnum' in db_record else '' if self.data_format == 'zip': from utils import zipreader input_numpy = zipreader.imread(image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) else: input_numpy = cv2.imread(image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) if input_numpy is None: logger.error('=> fail to read {}'.format(image_file)) raise ValueError('Fail to read {}'.format(image_file)) joints_xyv = db_record['joints_xyv'] c = db_record['center'] s = db_record['scale'] score = db_record['score'] if 'score' in db_record else 1 r = 0 if self.is_train: sf = self.scale_factor rf = self.rotation_factor s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) r = np.clip(np.random.randn() * rf, -rf * 2, rf * 2) if random.random() <= 0.5 else 0 trans = get_affine_transform(c, s, r, self.image_size) input_t = cv2.warpAffine(input_numpy, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) if self.transform: input_t = Image.fromarray(input_t) input_t = self.transform(input_t) # joints_xyv [num_joints, k, 5] -> (x0, y0, x1, y1, v) # n is the keypoint number of each target, k is the instance number, here k==3 # if in an image the instance number is less than 3, then for the absence instances v==0 for n in range(self.num_joints): for k in range(joints_xyv.shape[1]): if joints_xyv[n][k][4] > 0: joints_xyv[n, k, 0:2] = affine_transform(joints_xyv[n, k, 0:2], trans) joints_xyv[n, k, 2:4] = affine_transform(joints_xyv[n, k, 2:4], trans) target_heatmap, target_vectormap = self.generate_target(joints_xyv) target_heatmap = torch.from_numpy(target_heatmap) target_vectormap = torch.from_numpy(target_vectormap) meta = { 'image': image_file, 'filename': filename, 'imgnum': imgnum, 'center': c, 'scale': s, 'rotation': r, 'score': score, 'joints_xyv': joints_xyv } return input_t, target_heatmap, target_vectormap, meta
def __getitem__(self, idx): db_rec = copy.deepcopy(self.db[idx]) image_file = db_rec['image'] filename = db_rec['filename'] if 'filename' in db_rec else '' imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else '' if self.data_format == 'zip': from utils import zipreader data_numpy = zipreader.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) else: data_numpy = cv2.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) if data_numpy is None: logger.error('=> fail to read {}'.format(image_file)) raise ValueError('Fail to read {}'.format(image_file)) joints = db_rec['joints_3d'] joints_vis = db_rec['joints_3d_vis'] c = db_rec['center'] s = db_rec['scale'] score = db_rec['score'] if 'score' in db_rec else 1 r = 0 ############################################## data augmentation if self.is_train: # scale and rotation augmentation sf = self.scale_factor rf = self.rotation_factor s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) r = np.clip(np.random.randn() * rf, -rf * 2, rf * 2) if random.random() <= 0.6 else 0 # flips images if self.flip and random.random() <= 0.5: data_numpy = data_numpy[:, ::-1, :] joints, joints_vis = fliplr_joints(joints, joints_vis, data_numpy.shape[1], self.flip_pairs) c[0] = data_numpy.shape[1] - c[0] - 1 # brighten/darken image by shifting all pixels. not sure if this actually helps # if self.brighten and random.random() <= 0.5: # shift = 2 * np.random.randn() # data_numpy = np.clip(data_numpy + shift, 0, 255).astype(np.uint8) trans = get_affine_transform(c, s, r, self.image_size) # NOTE: This scales images and crops them to be 256*256. During eval, replace with input = data_numpy input = data_numpy if not 'TEST_MODE' in self.cfg: input = cv2.warpAffine( data_numpy, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) if self.transform: input = self.transform(input) for i in range(self.num_joints): if joints_vis[i, 0] > 0.0: joints[i, 0:2] = affine_transform(joints[i, 0:2], trans) target, target_weight = self.generate_target(joints, joints_vis) target = torch.from_numpy(target) target_weight = torch.from_numpy(target_weight) meta = { 'image': image_file, 'filename': filename, 'imgnum': imgnum, 'joints': joints, 'joints_vis': joints_vis, 'center': c, 'scale': s, 'rotation': r, 'score': score } return input, target, target_weight, meta
def __getitem__(self, idx): db_rec = copy.deepcopy( self.db[idx] ) # dict_keys(['image', 'center', 'scale', 'joints_3d', 'joints_3d_vis', 'filename', 'imgnum']) image_file = db_rec['image'] filename = db_rec['filename'] if 'filename' in db_rec else '' imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else '' if self.data_format == 'zip': from utils import zipreader data_numpy = zipreader.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) else: data_numpy = cv2.imread(image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION ) # e.g. data_numpy.shape = (426, 640, 3) if data_numpy is None: logger.error('=> fail to read {}'.format(image_file)) raise ValueError('Fail to read {}'.format(image_file)) joints = db_rec['joints_3d'] joints_vis = db_rec['joints_3d_vis'] c = db_rec['center'] s = db_rec['scale'] score = db_rec['score'] if 'score' in db_rec else 1 r = 0 if self.is_train: sf = self.scale_factor rf = self.rotation_factor s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) # scale factorにaugmentation, prob = 1 r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \ if random.random() <= 0.6 else 0 # rotate のaugmentation, p=0.6 if self.flip and random.random() <= 0.5: # horizontal flip p=0.5 data_numpy = data_numpy[:, ::-1, :] joints, joints_vis = fliplr_joints(joints, joints_vis, data_numpy.shape[1], self.flip_pairs) c[0] = data_numpy.shape[1] - c[0] - 1 # centerをflip trans = get_affine_transform(c, s, r, self.image_size) # affine変換 input = cv2.warpAffine( data_numpy, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) # resize if self.transform: input = self.transform(input) # normalize for i in range(self.num_joints): if joints_vis[i, 0] > 0.0: joints[i, 0:2] = affine_transform(joints[i, 0:2], trans) target, target_weight = self.generate_target( joints, joints_vis) # target: heatmap, target_weight: ignore target = torch.from_numpy(target) target_weight = torch.from_numpy(target_weight) meta = { 'image': image_file, 'filename': filename, 'imgnum': imgnum, 'joints': joints, 'joints_vis': joints_vis, 'center': c, 'scale': s, 'rotation': r, 'score': score } return input, target, target_weight, meta
def __getitem__(self, idx): # 【c】db_rec是db的其中一个,是啥来着,一张图及其相关信息? db_rec = copy.deepcopy(self.db[idx]) image_file = db_rec['image'] # db是数据集 filename = db_rec['filename'] if 'filename' in db_rec else '' imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else '' # 【c】总数?batch? if self.data_format == 'zip': # 解压 from utils import zipreader # 【see】如果要用才导 data_numpy = zipreader.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) # 【l】 else: data_numpy = cv2.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) # 【】随便挑一个选项? if self.color_rgb: data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB) # 【l】为啥要转,不是该rgb2bgr? if data_numpy is None: logger.error('=> fail to read {}'.format(image_file)) # 【see】语法不会报错但是完全影响了后面的结果,因此让其主动报错 raise ValueError('Fail to read {}'.format(image_file)) joints = db_rec['joints_3d'] # 【c】3d? joints_vis = db_rec['joints_3d_vis'] # 【】之前那个joints_vis就是从这儿获取的吧? c = db_rec['center'] s = db_rec['scale'] # 数据集标注的 # 【】谁的score,还是说暂时只用来说明非空 score = db_rec['score'] if 'score' in db_rec else 1 r = 0 if self.is_train: # 训练集才求半身 if (np.sum(joints_vis[:, 0]) > self. num_joints_half_body # 【】第0列元素求和;那么就是第一列为0,1?那么就是所有的点都有? and np.random.rand() < self.prob_half_body): # 【c】第二个是要采取半身的概率,为什么不在预处理做 c_half_body, s_half_body = self.half_body_transform( joints, joints_vis) if c_half_body is not None and s_half_body is not None: c, s = c_half_body, s_half_body # 取到了上半身或下半身的点就将c和s替换掉原标注的 sf = self.scale_factor rf = self.rotation_factor # 缩放旋转因子 s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) # 【l】取最大? r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \ if random.random() <= 0.6 else 0 # 【c】 if self.flip and random.random() <= 0.5: data_numpy = data_numpy[:, ::-1, :] # 将图像值水平翻转 joints, joints_vis = fliplr_joints(joints, joints_vis, data_numpy.shape[1], self.flip_pairs) # GT坐标 c[0] = data_numpy.shape[1] - c[0] - 1 # 最右-原==翻转过的因为宽比最右多1 trans = get_affine_transform( c, s, r, self.image_size) # 缩放旋转在transform里定义的,旋转空白怎么解决的? input = cv2.warpAffine( data_numpy, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) # 【l】应用缩放旋转变换,input的size也变了吧? if self.transform: input = self.transform(input) # 【c】还有另外的变换?从哪儿传入的哪儿定义的? # cut_trans = self._cutpoint(8, 1, 1, point) # input = cut_trans(input) # for i in range(self.num_joints): if joints_vis[i, 0] > 0.0: # 【c】第一列不是0,1?有权重?只对可见点执行?还是说vis是未缺失有标记的点? # 【】对GT坐标也执行,怎么上面那个用的是warpAffine有何不同? joints[i, 0:2] = affine_transform(joints[i, 0:2], trans) target, target_weight = self.generate_target(joints, joints_vis) # 权重代表什么? # 【】上面都是在对numpy进行变换 target = torch.from_numpy(target) target_weight = torch.from_numpy(target_weight) # 【c】 meta = { 'image': image_file, 'filename': filename, 'imgnum': imgnum, 'joints': joints, 'joints_vis': joints_vis, 'center': c, 'scale': s, 'rotation': r, 'score': score } # 【】有何用,日志? return input, target, target_weight, meta # 【c】input是Tensor?
def __getitem__(self, idx): if cfg.DATALOADER.BENCHMARK: self.timer0.tic() db_rec = copy.deepcopy(self.db[idx]) if cfg.DATASETS.TASK not in [ 'lifting', 'lifting_direct', 'lifting_rot' ]: if cfg.VIS.H36M: #seq = (db_rec['subject'], db_rec['action'], db_rec['subaction']) #if not seq in self.checked: # print(seq) # print(self.isdamaged(db_rec)) # self.checked.append(seq) #else: # return np.ones(2) print(db_rec['image']) # print(db_rec['image']) if self.data_format == 'undistoredzip': image_dir = 'undistoredimages.zip@' elif self.data_format == 'zip': image_dir = 'images.zip@' else: image_dir = '' image_file = osp.join(self.root, db_rec['source'], image_dir, 'images', db_rec['image']) if 'zip' in self.data_format: from utils import zipreader data_numpy = zipreader.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) else: data_numpy = cv2.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) # crop image from 1002 x 1000 to 1000 x 1000 data_numpy = data_numpy[:1000] assert data_numpy.shape == (1000, 1000, 3), data_numpy.shape joints = db_rec['joints_2d'].copy() joints_3d = db_rec['joints_3d'].copy() joints_3d_camera = db_rec['joints_3d_camera'].copy() joints_3d_camera_normed = joints_3d_camera - joints_3d_camera[0] keypoint_scale = np.linalg.norm(joints_3d_camera_normed[8] - joints_3d_camera_normed[0]) joints_3d_camera_normed /= keypoint_scale if cfg.DATALOADER.BENCHMARK: assert joints.shape[0] == cfg.KEYPOINT.NUM_PTS, joints.shape[0] #assert db_rec['joints_3d'].shape[0] == cfg.KEYPOINT.NUM_PTS,db_rec['joints_3d'].shape[0] center = np.array(db_rec['center']).copy() joints_vis = db_rec['joints_vis'].copy() scale = np.array(db_rec['scale']).copy() #undistort camera = db_rec['camera'] R = camera['R'].copy() rotation = 0 K = np.array([ [float(camera['fx']), 0, float(camera['cx'])], [0, float(camera['fy']), float(camera['cy'])], [0, 0, 1.], ]) T = camera['T'].copy() world3d = (R.T @ joints_3d_camera.T + T).T Rt = np.zeros((3, 4)) Rt[:, :3] = R Rt[:, 3] = -R @ T.squeeze() # Rt[:, :3] = R.T # Rt[:, 3] = T.squeeze() if cfg.DATASETS.TASK not in [ 'lifting', 'lifting_direct', 'lifting_rot' ]: if cfg.VIS.H36M: if not np.isclose(world3d, joints_3d).all(): print('world3d difference') print(world3d) print('joints_3d') print(joints_3d) from IPython import embed import matplotlib.pyplot as plt import matplotlib.patches as patches fig = plt.figure(1) ax1 = fig.add_subplot(231) ax2 = fig.add_subplot(232) ax3 = fig.add_subplot(233) ax4 = fig.add_subplot(234) ax5 = fig.add_subplot(235) ax6 = fig.add_subplot(236) ax1.imshow(data_numpy[..., ::-1]) ax1.set_title('raw') #0.058 s distCoeffs = np.array([ float(i) for i in [ camera['k'][0], camera['k'][1], camera['p'][0], camera['p'][1], camera['k'][2] ] ]) if cfg.DATASETS.TASK not in [ 'lifting', 'lifting_direct', 'lifting_rot' ]: if self.data_format != 'undistoredzip': data_numpy = cv2.undistort(data_numpy, K, distCoeffs) #0.30 s if cfg.DATALOADER.BENCHMARK: print('timer0', self.timer0.toc()) if cfg.DATALOADER.BENCHMARK: self.timer.tic() if cfg.VIS.H36M: ax1.scatter(joints[:, 0], joints[:, 1], color='green') imagePoints, _ = cv2.projectPoints(joints_3d[:, None, :], (0, 0, 0), (0, 0, 0), K, distCoeffs) imagePoints = imagePoints.squeeze() ax1.scatter(imagePoints[:, 0], imagePoints[:, 1], color='yellow') from vision.multiview import project_point_radial camera = db_rec['camera'] f = (K[0, 0] + K[1, 1]) / 2. c = K[:2, 2].reshape((2, 1)) iccv19Points = project_point_radial(joints_3d_camera, f, c, camera['k'], camera['p']) ax1.scatter(iccv19Points[:, 0], iccv19Points[:, 1], color='blue') # trans1 = get_affine_transform(center, scale, rotation, self.image_size) # box1 = affine_transform(np.array([[0, 0], [999, 999]]), trans1) # print(box1) # rect1 = patches.Rectangle(box1[0],box1[1][0] - box1[0][0],box1[1][1] - box1[0][1],linewidth=1,edgecolor='r',facecolor='none') # ax1.add_patch(rect1) # print(joints, joints.shape, center.shape) joints = cv2.undistortPoints(joints[:, None, :], K, distCoeffs, P=K).squeeze() center = cv2.undistortPoints(np.array(center)[None, None, :], K, distCoeffs, P=K).squeeze() #data_numpy = self.compute_distorted_meshgrid(data_numpy , # float(camera['fx']), # float(camera['fy']), # float(camera['cx']), # float(camera['cy']), # np.array([float(i) for i in camera['k']]), # np.array([float(i) for i in camera['p']])) if self.is_train: sf = self.scale_factor rf = self.rotation_factor scale = scale * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) rotation = np.clip(np.random.randn() * rf, -rf * 2, rf * 2) \ if random.random() <= 0.6 else 0 if cfg.DATASETS.TASK not in [ 'lifting', 'lifting_direct', 'lifting_rot' ]: if cfg.VIS.H36M: # print(joints.shape, center.shape) # print(trans) ax2.imshow(data_numpy[..., ::-1]) projected2d = K.dot(joints_3d_camera.T) projected2d[:2] = projected2d[:2] / projected2d[-1] ax1.scatter(projected2d[0], projected2d[1], color='red') ax2.scatter(joints[:, 0], joints[:, 1], color='green') ax2.scatter(projected2d[0], projected2d[1], color='red') # box1 = affine_transform(np.array([[0, 0], [999, 999]]), trans) # rect1 = patches.Rectangle(box1[0],box1[1][0] - box1[0][0],box1[1][1] - box1[0][1],linewidth=1,edgecolor='r',facecolor='none') # ax2.add_patch(rect1) ax2.set_title('undistort') #input = data_numpy trans = get_affine_transform(center, scale, rotation, self.image_size) cropK = np.concatenate((trans, np.array([[0., 0., 1.]])), 0).dot(K) KRT = cropK.dot(Rt) if cfg.DATASETS.TASK not in [ 'lifting', 'lifting_direct', 'lifting_rot' ]: input = cv2.warpAffine( data_numpy, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) # 0.31 s for i in range(self.num_joints): if joints_vis[i, 0] > 0.0: joints[i, 0:2] = affine_transform(joints[i, 0:2], trans) if (np.min(joints[i, :2]) < 0 or joints[i, 0] >= self.image_size[0] or joints[i, 1] >= self.image_size[1]): joints_vis[i, :] = 0 if cfg.DATASETS.TASK not in [ 'lifting', 'lifting_direct', 'lifting_rot' ]: if cfg.VIS.H36M: ax3.imshow(input[..., ::-1]) # ax3.scatter(joints[:, 0], joints[:, 1]) # projected2d = KRT.dot(np.concatenate((db_rec['joints_3d'], np.ones( (len(db_rec['joints_3d']), 1))), 1).T) ax3.scatter(joints[:, 0], joints[:, 1]) ax3.set_title('cropped') ax4.imshow(input[..., ::-1]) # ax4.scatter(joints[:, 0], joints[:, 1]) # projected2d = KRT.dot(np.concatenate((db_rec['joints_3d'], np.ones( (len(db_rec['joints_3d']), 1))), 1).T) projected2d = cropK.dot(joints_3d_camera.T) projected2d[:2] = projected2d[:2] / projected2d[-1] #ax4.scatter(joints[:, 0], joints[:, 1], color='green') #ax4.scatter(projected2d[0], projected2d[1], color='red') ax4.scatter(joints[-2:, 0], joints[-2:, 1], color='green') ax4.scatter(projected2d[0, -2:], projected2d[1, -2:], color='red') ax4.set_title('cropped, project 3d to 2d') if self.transform: input = self.transform(input) target = self.heatmapcreator.get(joints) target = target.reshape((-1, target.shape[1], target.shape[2])) target_weight = joints_vis[:, 0, None] ## inaccurate heatmap #target, target_weight = self.generate_target(joints, joints_vis) # target = torch.from_numpy(target).float() # target_weight = torch.from_numpy(target_weight) if cfg.VIS.H36M: #ax5.imshow(target.max(0)[0]) #ax5.scatter(coord2pix(joints[:, 0], 4), coord2pix(joints[:, 1], 4), color='green') from modeling.backbones.basic_batch import find_tensor_peak_batch # pred_joints, _ = find_tensor_peak_batch(target, self.sigma, cfg.BACKBONE.DOWNSAMPLE) # ax5.scatter(coord2pix(pred_joints[:, 0], 4), coord2pix(pred_joints[:, 1], 4), color='blue') # ax6.scatter(coord2pix(pred_joints[:, 0], 4), coord2pix(pred_joints[:, 1], 4), color='blue') heatmap_by_creator = self.heatmapcreator.get(joints) heatmap_by_creator = heatmap_by_creator.reshape( (-1, heatmap_by_creator.shape[1], heatmap_by_creator.shape[2])) ax6.imshow(heatmap_by_creator.max(0)) ax6.scatter(coord2pix(joints[:, 0], 4), coord2pix(joints[:, 1], 4), color='green') # pred_joints, _ = find_tensor_peak_batch(torch.from_numpy(heatmap_by_creator).float(), self.sigma, cfg.BACKBONE.DOWNSAMPLE) # print('creator found', pred_joints) # ax5.scatter(coord2pix(pred_joints[:, 0], 4), coord2pix(pred_joints[:, 1], 4), color='red') # ax6.scatter(coord2pix(pred_joints[:, 0], 4), coord2pix(pred_joints[:, 1], 4), color='red') plt.show() ret = { 'heatmap': target, 'visibility': target_weight, 'KRT': KRT, 'points-2d': joints, 'points-3d': world3d.astype(np.double) if 'lifting' not in cfg.DATASETS.TASK else world3d, 'camera-points-3d': joints_3d_camera, 'normed-points-3d': joints_3d_camera_normed, 'scale': keypoint_scale, 'action': torch.tensor([db_rec['action']]), 'img-path': db_rec['image'], } if cfg.DATASETS.TASK not in [ 'lifting', 'lifting_direct', 'lifting_rot' ]: ret['img'] = input ret['K'] = cropK ret['RT'] = Rt if cfg.VIS.MULTIVIEWH36M: ret['T'] = T ret['R'] = R ret['original_image'] = data_numpy if cfg.KEYPOINT.TRIANGULATION == 'rpsm' and not self.is_train: ret['origK'] = K ret['crop_center'] = center ret['crop_scale'] = scale if cfg.DATALOADER.BENCHMARK: print('timer1', self.timer.toc()) return ret
def __getitem__(self, idx): db_rec = copy.deepcopy(self.db[idx]) image_file = db_rec["image"] filename = db_rec["filename"] if "filename" in db_rec else "" imgnum = db_rec["imgnum"] if "imgnum" in db_rec else "" if self.data_format == "zip": from utils import zipreader data_numpy = zipreader.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) else: data_numpy = cv2.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) if data_numpy is None: logger.error("=> fail to read {}".format(image_file)) raise ValueError("Fail to read {}".format(image_file)) joints = db_rec["joints_3d"] joints_vis = db_rec["joints_3d_vis"] c = db_rec["center"] s = db_rec["scale"] score = db_rec["score"] if "score" in db_rec else 1 r = 0 if self.is_train: sf = self.scale_factor rf = self.rotation_factor s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) r = (np.clip(np.random.randn() * rf, -rf * 2, rf * 2) if random.random() <= 0.6 else 0) if self.flip and random.random() <= 0.5: data_numpy = data_numpy[:, ::-1, :] joints, joints_vis = fliplr_joints(joints, joints_vis, data_numpy.shape[1], self.flip_pairs) c[0] = data_numpy.shape[1] - c[0] - 1 trans = get_affine_transform(c, s, r, self.image_size) input = cv2.warpAffine( data_numpy, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR, ) if self.transform: input = self.transform(input) for i in range(self.num_joints): if joints_vis[i, 0] > 0.0: joints[i, 0:2] = affine_transform(joints[i, 0:2], trans) target, target_weight = self.generate_target(joints, joints_vis) target = torch.from_numpy(target) target_weight = torch.from_numpy(target_weight) meta = { "image": image_file, "filename": filename, "imgnum": imgnum, "joints": joints, "joints_vis": joints_vis, "center": c, "scale": s, "rotation": r, "score": score, } return input, target, target_weight, meta
def __getitem__(self, idx): # 根据 idx 从db获取样本信息 db_rec = copy.deepcopy(self.db[idx]) # 获取图像名 image_file = db_rec['image'] filename = db_rec['filename'] if 'filename' in db_rec else '' imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else '' # 如果数据格式为zip则解压 if self.data_format == 'zip': from utils import zipreader data_numpy = zipreader.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) # 否则直接读取图像,获得像素值 else: data_numpy = cv2.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) # 转化为rgb格式 if self.color_rgb: data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB) # 如果读取到的数据不为numpy格式则报错 if data_numpy is None: logger.error('=> fail to read {}'.format(image_file)) raise ValueError('Fail to read {}'.format(image_file)) # 获取人体关键点坐标 joints = db_rec['joints_3d'] joints_vis = db_rec['joints_3d_vis'] # 获取训练样本转化之后的center以及scale c = db_rec['center'] s = db_rec['scale'] # 如果训练样本中没有设置score,则加载该属性,并且设置为1 score = db_rec['score'] if 'score' in db_rec else 1 r = 0 # 如果是进行训练 if self.is_train: # 如果可见关键点大于人体一半关键点,并且生成的随机数小于self.prob_half_body=0.3 if (np.sum(joints_vis[:, 0]) > self.num_joints_half_body and np.random.rand() < self.prob_half_body): # 重新调整center、scale c_half_body, s_half_body = self.half_body_transform( joints, joints_vis) if c_half_body is not None and s_half_body is not None: c, s = c_half_body, s_half_body # 缩放因子scale_factor=0.35,以及旋转因子rotation_factor=0.35 sf = self.scale_factor rf = self.rotation_factor # s大小为[1-0.35=0.65, 1+0.35=1.35]之间 s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) # r大小为[-2*45=95, 2*45=90]之间 r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \ if random.random() <= 0.6 else 0 # 进行数据水平翻转 if self.flip and random.random() <= 0.5: data_numpy = data_numpy[:, ::-1, :] joints, joints_vis = fliplr_joints(joints, joints_vis, data_numpy.shape[1], self.flip_pairs) c[0] = data_numpy.shape[1] - c[0] - 1 # 进行仿射变换,样本数据关键点发生角度旋转之后,每个像素也旋转到对应位置 # 获得旋转矩阵 trans = get_affine_transform(c, s, r, self.image_size) # 根据旋转矩阵进行仿射变换 # 通过仿射变换截取实例图片 input = cv2.warpAffine( data_numpy, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) # 进行正则化,形状改变等 if self.transform: input = self.transform(input) # 对人体关键点也进行仿射变换 for i in range(self.num_joints): if joints_vis[i, 0] > 0.0: joints[i, 0:2] = affine_transform(joints[i, 0:2], trans) # 获得ground truch,热图target[17, 64, 48],target_weight[17, 1] target, target_weight = self.generate_target(joints, joints_vis) target = torch.from_numpy(target) target_weight = torch.from_numpy(target_weight) meta = { 'image': image_file, 'filename': filename, 'imgnum': imgnum, 'joints': joints, 'joints_vis': joints_vis, 'center': c, 'scale': s, 'rotation': r, 'score': score } return input, target, target_weight, meta
def __getitem__(self, idx): db_rec = copy.deepcopy(self.db[idx]) image_file = db_rec['image'] filename = db_rec['filename'] if 'filename' in db_rec else '' imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else '' if self.data_format == 'zip': from utils import zipreader data_numpy = zipreader.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) else: data_numpy = cv2.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) if data_numpy is None: logger.error('=> fail to read {}'.format(image_file)) raise ValueError('Fail to read {}'.format(image_file)) joints = db_rec['joints_3d'] joints_vis = db_rec['joints_3d_vis'] c = db_rec['center'] s = db_rec['scale'] score = db_rec['score'] if 'score' in db_rec else 1 r = 0 if self.is_train: sf = self.scale_factor rf = self.rotation_factor s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \ if random.random() <= 0.6 else 0 if self.flip and random.random() <= 0.5: data_numpy = data_numpy[:, ::-1, :] joints, joints_vis = fliplr_joints(joints, joints_vis, data_numpy.shape[1], self.flip_pairs) c[0] = data_numpy.shape[1] - c[0] - 1 trans = get_affine_transform(c, s, r, self.image_size) input = cv2.warpAffine( data_numpy, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) # # sharpening # kernel = np.array([[-1, -1, -1], [-1, 9, -1], [-1, -1, -1]]) # input = cv2.filter2D(input, -1, kernel) if self.transform: input = self.transform(input) for i in range(self.num_joints): if joints_vis[i, 0] > 0.0: joints[i, 0:2] = affine_transform(joints[i, 0:2], trans) target, target_weight = self.generate_target(joints, joints_vis) target = torch.from_numpy(target) target_weight = torch.from_numpy(target_weight) meta = { 'image': image_file, 'filename': filename, 'imgnum': imgnum, 'joints': joints, 'joints_vis': joints_vis, 'center': c, 'scale': s, 'rotation': r, 'score': score } onehot_heatmap = self.render_onehot_heatmap(meta['joints'], input.shape[1]) return input, target, target_weight, meta, onehot_heatmap
src = os.path.expanduser(args.src) dst = os.path.expanduser(args.dst) with open(os.path.expanduser(args.anno), 'rb') as f: data = pickle.load(f) for db_rec in tqdm(data): path = db_rec['image'] image_dir = 'images.zip@' image_file = os.path.join(src, db_rec['source'], image_dir, 'images', db_rec['image']) output_path = os.path.join(dst, path) if os.path.exists(output_path): continue output_dir = os.path.dirname(output_path) os.makedirs(output_dir, exist_ok=True) data_numpy = zipreader.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) camera = db_rec['camera'] K = np.array([ [float(camera['fx']), 0, float(camera['cx'])], [0, float(camera['fy']), float(camera['cy'])], [0, 0, 1.], ]) distCoeffs = np.array([ float(i) for i in [ camera['k'][0], camera['k'][1], camera['p'][0], camera['p'][1], camera['k'][2] ] ]) data_numpy = cv2.undistort(data_numpy, K, distCoeffs) #cv2.imwrite(output_path, data_numpy, [int(cv2.IMWRITE_JPEG_QUALITY), 100]) #cv2.imwrite(output_path, data_numpy)
def __getitem__(self, idx): db_rec = copy.deepcopy(self.db[idx]) image_file = db_rec['image'] filename = db_rec['filename'] if 'filename' in db_rec else '' imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else '' if self.data_format == 'zip': from utils import zipreader data_numpy = zipreader.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) else: data_numpy = self.read_image(image_file) ##### supporting frame if (self.is_train and self.use_warping_train) or (not self.is_train and self.use_warping_test): T = self.timestep_delta_range temp = image_file.split('/') prev_nm = temp[len(temp) - 1] ref_idx = int(prev_nm.replace('.jpg', '')) if self.timestep_delta_rand: delta = -T + np.random.randint(T * 2 + 1) else: delta = self.timestep_delta sup_idx = ref_idx + delta ######## if 'nframes' in db_rec: nframes = db_rec['nframes'] if not self.is_posetrack18: sup_idx = np.clip(sup_idx, 1, nframes) else: sup_idx = np.clip(sup_idx, 0, nframes - 1) if not self.is_posetrack18: new_sup_image_file = image_file.replace( prev_nm, str(sup_idx).zfill(8) + '.jpg') else: new_sup_image_file = image_file.replace( prev_nm, str(sup_idx).zfill(6) + '.jpg') if os.path.exists(new_sup_image_file): sup_image_file = new_sup_image_file else: sup_image_file = image_file ########## data_numpy_sup = self.read_image(sup_image_file) ########### if self.color_rgb: data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB) if (self.is_train and self.use_warping_train) or (not self.is_train and self.use_warping_test): data_numpy_sup = cv2.cvtColor(data_numpy_sup, cv2.COLOR_BGR2RGB) if data_numpy is None: logger.error('=> fail to read {}'.format(image_file)) raise ValueError('Fail to read {}'.format(image_file)) if (self.is_train and self.use_warping_train) or (not self.is_train and self.use_warping_test): if data_numpy_sup is None: logger.error('=> SUP: fail to read {}'.format(sup_image_file)) raise ValueError('SUP: Fail to read {}'.format(sup_image_file)) joints = db_rec['joints_3d'] joints_vis = db_rec['joints_3d_vis'] c = db_rec['center'] s = db_rec['scale'] score = db_rec['score'] if 'score' in db_rec else 1 r = 0 if self.is_train: if (np.sum(joints_vis[:, 0]) > self.num_joints_half_body and np.random.rand() < self.prob_half_body): c_half_body, s_half_body = self.half_body_transform( joints, joints_vis) if c_half_body is not None and s_half_body is not None: c, s = c_half_body, s_half_body sf = self.scale_factor rf = self.rotation_factor s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \ if random.random() <= 0.6 else 0 if self.flip and random.random() <= 0.5: data_numpy = data_numpy[:, ::-1, :] if (self.is_train and self.use_warping_train) or ( not self.is_train and self.use_warping_test): data_numpy_sup = data_numpy_sup[:, ::-1, :] joints, joints_vis = fliplr_joints(joints, joints_vis, data_numpy.shape[1], self.flip_pairs) c[0] = data_numpy.shape[1] - c[0] - 1 trans = get_affine_transform(c, s, r, self.image_size) input = cv2.warpAffine( data_numpy, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) ##### supportingimage if (self.is_train and self.use_warping_train) or (not self.is_train and self.use_warping_test): input_sup = cv2.warpAffine( data_numpy_sup, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) ######### if self.transform: input = self.transform(input) if (self.is_train and self.use_warping_train) or (not self.is_train and self.use_warping_test): input_sup = self.transform(input_sup) for i in range(self.num_joints): if joints_vis[i, 0] > 0.0: joints[i, 0:2] = affine_transform(joints[i, 0:2], trans) target, target_weight = self.generate_target(joints, joints_vis) target = torch.from_numpy(target) target_weight = torch.from_numpy(target_weight) if (self.is_train and self.use_warping_train) or (not self.is_train and self.use_warping_test): meta = { 'image': image_file, 'sup_image': sup_image_file, 'filename': filename, 'imgnum': imgnum, 'joints': joints, 'joints_vis': joints_vis, 'center': c, 'scale': s, 'rotation': r, 'score': score } return input, input_sup, target, target_weight, meta else: meta = { 'image': image_file, 'filename': filename, 'imgnum': imgnum, 'joints': joints, 'joints_vis': joints_vis, 'center': c, 'scale': s, 'rotation': r, 'score': score } return input, target, target_weight, meta
def __getitem__(self, idx): db_rec = copy.deepcopy(self.db[idx]) image_dir = 'images.zip@' if self.data_format == 'zip' else '' image_file = osp.join(self.root, db_rec['source'], image_dir, 'images', db_rec['image']) if self.data_format == 'zip': from utils import zipreader data_numpy = zipreader.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) else: data_numpy = cv2.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) joints = db_rec['joints_2d'].copy() # [union_joints, 2] joints_vis = db_rec['joints_vis'].copy()[:, :2] # [union_joints, 2] assert len(joints) == self.num_joints assert len(joints_vis) == self.num_joints # crop and scale according to ground truth center = np.array(db_rec['center']).copy() scale = np.array(db_rec['scale']).copy() rotation = 0 if self.is_train and db_rec['source'] == 'mpii': sf = self.mpii_scale_factor rf = self.mpii_rotation_factor scale = scale * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) rotation = np.clip(np.random.randn() * rf, -rf * 2, rf * 2) \ if random.random() <= 0.6 else 0 if self.mpii_flip and random.random() <= 0.5: data_numpy = data_numpy[:, ::-1, :] joints, joints_vis = fliplr_joints(joints, joints_vis, data_numpy.shape[1], self.mpii_flip_pairs) center[0] = data_numpy.shape[1] - center[0] - 1 trans = get_affine_transform(center, scale, rotation, self.image_size) input = cv2.warpAffine( data_numpy, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) if self.transform: input = self.transform(input) visible_joints = joints_vis[:, 0] > 0 if np.any(visible_joints): joints[visible_joints, :2] = affine_transform( joints[visible_joints, :2], trans) # zero_indices = np.any( # np.concatenate((joints[:, :2]<0, # joints[:, [0]] >= self.image_size[0], # joints[:, [1]] >= self.image_size[1]), # axis=1), # axis=1) # joints_vis[zero_indices, :] = 0 target, target_weight = self.generate_target(joints, joints_vis, db_rec['source']) target = torch.from_numpy(target) target_weight = torch.from_numpy(target_weight) meta = { 'scale': scale, 'center': center, 'rotation': rotation, 'joints_2d': db_rec['joints_2d'], 'joints_2d_transformed': joints, 'joints_vis': joints_vis, 'source': db_rec['source'] } return input, target, target_weight, meta
def __getitem__(self, idx): db_rec = copy.deepcopy(self.db[idx]) image_file = db_rec['image'] filename = db_rec['filename'] if 'filename' in db_rec else '' imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else '' if self.data_format == 'zip': from utils import zipreader data_numpy = zipreader.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) else: data_numpy = cv2.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) if data_numpy is None: logger.error('=> fail to read {}'.format(image_file)) raise ValueError('Fail to read {}'.format(image_file)) joints = db_rec['joints_3d'] joints_vis = db_rec['joints_3d_vis'] c = db_rec['center'] s = db_rec['scale'] score = db_rec['score'] if 'score' in db_rec else 1 r = 0 if self.is_train: sf = self.scale_factor rf = self.rotation_factor s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \ if random.random() <= 0.6 else 0 if self.flip and random.random() <= 0.5: data_numpy = data_numpy[:, ::-1, :] joints, joints_vis = fliplr_joints(joints, joints_vis, data_numpy.shape[1], self.flip_pairs) c[0] = data_numpy.shape[1] - c[0] - 1 trans = get_affine_transform(c, s, r, self.image_size) input = cv2.warpAffine( data_numpy, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) if self.transform: input = self.transform(input) for i in range(self.num_joints): if joints_vis[i, 0] > 0.0: joints[i, 0:2] = affine_transform(joints[i, 0:2], trans) target, target_weight = self.generate_target(joints, joints_vis) target = torch.from_numpy(target) target_weight = torch.from_numpy(target_weight) meta = { 'image': image_file, 'filename': filename, 'imgnum': imgnum, 'joints': joints, 'joints_vis': joints_vis, 'center': c, 'scale': s, 'rotation': r, 'score': score } from boxx import cf if cf.args.task == 'ssm': feat_stride = self.image_size / self.heatmap_size joints_h = copy.deepcopy(joints) # TODO 减少量化损失 joints_h[:, 0] = (joints_h[:, 0] / feat_stride[0] + 0.5) joints_h[:, 1] = (joints_h[:, 1] / feat_stride[1] + 0.5) joints_h = joints_h.astype(np.int32) meta['joints_h'] = joints_h return input, target, target_weight, meta
def __getitem__(self, idx, source='h36m', **kwargs): db_rec = copy.deepcopy(self.db[idx]) image_dir = 'images.zip@' if self.data_format == 'zip' else '' image_file = osp.join(self.root, db_rec['source'], image_dir, 'images', db_rec['image']) if self.data_format == 'zip': from utils import zipreader data_numpy = zipreader.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) else: data_numpy = cv2.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) joints = db_rec['joints_2d'].copy() joints_vis = db_rec['joints_vis'].copy() center = np.array(db_rec['center']).copy() scale = np.array(db_rec['scale']).copy() rotation = 0 if self.is_train: sf = self.scale_factor rf = self.rotation_factor scale = scale * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) rotation = np.clip(np.random.randn() * rf, -rf * 2, rf * 2) \ if random.random() <= 0.6 else 0 trans = get_affine_transform(center, scale, rotation, self.image_size) # ! Notice: this trans represents full image to cropped image, # not full image->heatmap input = cv2.warpAffine( data_numpy, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) if self.transform: input = self.transform(input) for i in range(self.num_joints): if joints_vis[i, 0] > 0.0: joints[i, 0:2] = affine_transform(joints[i, 0:2], trans) if (np.min(joints[i, :2]) < 0 or joints[i, 0] >= self.image_size[0] or joints[i, 1] >= self.image_size[1]): joints_vis[i, :] = 0 target, target_weight = self.generate_target(joints, joints_vis) target = torch.from_numpy(target) target_weight = torch.from_numpy(target_weight) # 3x3 data augmentation affine trans (scale rotate) # !!! Notice: this transformation contains both heatmap->image scale affine # and data augmentation affine aug_trans = np.eye(3, 3) aug_trans[0:2] = trans # full img -> cropped img hm_scale = self.heatmap_size / self.image_size scale_trans = np.eye(3,3) # cropped img -> heatmap scale_trans[0,0] = hm_scale[1] scale_trans[1, 1] = hm_scale[0] aug_trans = scale_trans @ aug_trans meta = { 'scale': scale, 'center': center, 'rotation': rotation, 'joints_2d': db_rec['joints_2d'], 'joints_2d_transformed': joints, 'joints_vis': joints_vis, 'source': db_rec['source'], 'heatmap_size': self.heatmap_size, 'aug_trans': aug_trans, } if source == 'totalcapture': meta['joints_gt'] = db_rec['joints_gt'] meta['camera'] = db_rec['camera'] elif source in ['h36m']: meta['camera'] = db_rec['camera'] meta['joints_gt'] = cam_utils.camera_to_world_frame(db_rec['joints_3d'], db_rec['camera']['R'], db_rec['camera']['T']) elif source == 'panoptic': meta['camera'] = db_rec['camera'] meta['joints_gt'] = db_rec['joints_gt'] elif source in ['unrealcv']: meta['camera'] = db_rec['camera'] meta['joints_gt'] = db_rec['joints_gt'] else: assert 0==1, 'No such dataset definition in JointDataset' return input, target, target_weight, meta
def __getitem__(self, idx): db_rec = copy.deepcopy(self.db[idx]) image_file = db_rec['image'] if (self.is_train and self.use_warping_train) or (not self.is_train and self.use_warping_test): prev_image_file1 = db_rec['image'] prev_image_file2 = db_rec['image'] next_image_file1 = db_rec['image'] next_image_file2 = db_rec['image'] filename = db_rec['filename'] if 'filename' in db_rec else '' imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else '' if self.data_format == 'zip': from utils import zipreader data_numpy = zipreader.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) else: data_numpy = self.read_image(image_file) ##### supporting frames if (self.is_train and self.use_warping_train) or (not self.is_train and self.use_warping_test): T = self.timestep_delta_range temp = prev_image_file1.split('/') prev_nm = temp[len(temp) - 1] ref_idx = int(prev_nm.replace('.jpg', '')) ### setting deltas prev_delta1 = -1 prev_delta2 = -2 next_delta1 = 1 next_delta2 = 2 #### image indices prev_idx1 = ref_idx + prev_delta1 prev_idx2 = ref_idx + prev_delta2 next_idx1 = ref_idx + next_delta1 next_idx2 = ref_idx + next_delta2 if 'nframes' in db_rec: nframes = db_rec['nframes'] if not self.is_posetrack18: prev_idx1 = np.clip(prev_idx1, 1, nframes) prev_idx2 = np.clip(prev_idx2, 1, nframes) next_idx1 = np.clip(next_idx1, 1, nframes) next_idx2 = np.clip(next_idx2, 1, nframes) else: prev_idx1 = np.clip(prev_idx1, 0, nframes - 1) prev_idx2 = np.clip(prev_idx2, 0, nframes - 1) next_idx1 = np.clip(next_idx1, 0, nframes - 1) next_idx2 = np.clip(next_idx2, 0, nframes - 1) if self.is_posetrack18: z = 6 else: z = 8 ### delta -1 new_prev_image_file1 = prev_image_file1.replace( prev_nm, str(prev_idx1).zfill(z) + '.jpg') #### delta -2 new_prev_image_file2 = prev_image_file1.replace( prev_nm, str(prev_idx2).zfill(z) + '.jpg') ### delta 1 new_next_image_file1 = next_image_file1.replace( prev_nm, str(next_idx1).zfill(z) + '.jpg') #### delta 2 new_next_image_file2 = next_image_file1.replace( prev_nm, str(next_idx2).zfill(z) + '.jpg') ###### checking for files existence if os.path.exists(new_prev_image_file1): prev_image_file1 = new_prev_image_file1 if os.path.exists(new_prev_image_file2): prev_image_file2 = new_prev_image_file2 if os.path.exists(new_next_image_file1): next_image_file1 = new_next_image_file1 if os.path.exists(new_next_image_file2): next_image_file2 = new_next_image_file2 ########## data_numpy_prev1 = self.read_image(prev_image_file1) data_numpy_prev2 = self.read_image(prev_image_file2) data_numpy_next1 = self.read_image(next_image_file1) data_numpy_next2 = self.read_image(next_image_file2) ########### if self.color_rgb: data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB) if (self.is_train and self.use_warping_train) or (not self.is_train and self.use_warping_test): data_numpy_prev1 = cv2.cvtColor(data_numpy_prev1, cv2.COLOR_BGR2RGB) data_numpy_prev2 = cv2.cvtColor(data_numpy_prev2, cv2.COLOR_BGR2RGB) data_numpy_next1 = cv2.cvtColor(data_numpy_next1, cv2.COLOR_BGR2RGB) data_numpy_next2 = cv2.cvtColor(data_numpy_next2, cv2.COLOR_BGR2RGB) if data_numpy is None: logger.error('=> fail to read {}'.format(image_file)) raise ValueError('Fail to read {}'.format(image_file)) if (self.is_train and self.use_warping_train) or (not self.is_train and self.use_warping_test): if data_numpy_prev1 is None: logger.error( '=> PREV SUP: fail to read {}'.format(prev_image_file1)) raise ValueError( 'PREV SUP: Fail to read {}'.format(prev_image_file1)) if data_numpy_prev2 is None: logger.error( '=> PREV SUP: fail to read {}'.format(prev_image_file2)) raise ValueError( 'PREV SUP: Fail to read {}'.format(prev_image_file2)) if data_numpy_next1 is None: logger.error( '=> NEXT SUP: fail to read {}'.format(next_image_file1)) raise ValueError( 'NEXT SUP: Fail to read {}'.format(next_image_file1)) if data_numpy_next2 is None: logger.error( '=> NEXT SUP: fail to read {}'.format(next_image_file2)) raise ValueError( 'NEXT SUP: Fail to read {}'.format(next_image_file2)) ########## joints = db_rec['joints_3d'] joints_vis = db_rec['joints_3d_vis'] c = db_rec['center'] s = db_rec['scale'] score = db_rec['score'] if 'score' in db_rec else 1 r = 0 if self.is_train: if (np.sum(joints_vis[:, 0]) > self.num_joints_half_body and np.random.rand() < self.prob_half_body): c_half_body, s_half_body = self.half_body_transform( joints, joints_vis) if c_half_body is not None and s_half_body is not None: c, s = c_half_body, s_half_body sf = self.scale_factor rf = self.rotation_factor s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \ if random.random() <= 0.6 else 0 if self.flip and random.random() <= 0.5: data_numpy = data_numpy[:, ::-1, :] ##### if (self.is_train and self.use_warping_train) or ( not self.is_train and self.use_warping_test): data_numpy_prev1 = data_numpy_prev1[:, ::-1, :] data_numpy_prev2 = data_numpy_prev2[:, ::-1, :] data_numpy_next1 = data_numpy_next1[:, ::-1, :] data_numpy_next2 = data_numpy_next2[:, ::-1, :] ########## joints, joints_vis = fliplr_joints(joints, joints_vis, data_numpy.shape[1], self.flip_pairs) c[0] = data_numpy.shape[1] - c[0] - 1 trans = get_affine_transform(c, s, r, self.image_size) input = cv2.warpAffine( data_numpy, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) if (self.is_train and self.use_warping_train) or (not self.is_train and self.use_warping_test): input_prev1 = cv2.warpAffine( data_numpy_prev1, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) input_prev2 = cv2.warpAffine( data_numpy_prev2, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) input_next1 = cv2.warpAffine( data_numpy_next1, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) input_next2 = cv2.warpAffine( data_numpy_next2, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) ######### if self.transform: input = self.transform(input) if (self.is_train and self.use_warping_train) or (not self.is_train and self.use_warping_test): input_prev1 = self.transform(input_prev1) input_prev2 = self.transform(input_prev2) input_next1 = self.transform(input_next1) input_next2 = self.transform(input_next2) ############ for i in range(self.num_joints): if joints_vis[i, 0] > 0.0: joints[i, 0:2] = affine_transform(joints[i, 0:2], trans) target, target_weight = self.generate_target(joints, joints_vis) target = torch.from_numpy(target) target_weight = torch.from_numpy(target_weight) if (self.is_train and self.use_warping_train) or (not self.is_train and self.use_warping_test): meta = { 'image': image_file, 'sup_image': prev_image_file1, 'filename': filename, 'imgnum': imgnum, 'joints': joints, 'joints_vis': joints_vis, 'center': c, 'scale': s, 'rotation': r, 'score': score } return input, input_prev1, input_prev2, input_next1, input_next2, target, target_weight, meta else: meta = { 'image': image_file, 'filename': filename, 'imgnum': imgnum, 'joints': joints, 'joints_vis': joints_vis, 'center': c, 'scale': s, 'rotation': r, 'score': score } return input, target, target_weight, meta
def __getitem__(self, idx): db_rec = copy.deepcopy(self.db[idx]) image_file = db_rec['image'] im_id = db_rec['image_id'] if 'image_id' in db_rec else 0 im_bbox = np.array(db_rec['bbox']) if 'bbox' in db_rec else np.array( [0, 0, 0, 0]) #print(im_bbox) filename = db_rec['filename'] if 'filename' in db_rec else '' imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else '' if self.data_format == 'zip': from utils import zipreader data_numpy = zipreader.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) else: data_numpy = cv2.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) joints = db_rec['joints_3d'] joints_vis = db_rec['joints_3d_vis'] c = db_rec['center'] s = db_rec['scale'] score = db_rec['score'] if 'score' in db_rec else 1 r = 0 if self.is_train: sf = self.scale_factor rf = self.rotation_factor s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \ if random.random() <= 0.6 else 0 if self.flip and random.random() <= 0.5: data_numpy = data_numpy[:, ::-1, :] joints, joints_vis = fliplr_joints(joints, joints_vis, data_numpy.shape[1], self.flip_pairs) c[0] = data_numpy.shape[1] - c[0] - 1 trans = get_affine_transform(c, s, r, self.image_size) input = cv2.warpAffine( data_numpy, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) if self.transform: input = self.transform(input) for i in range(self.num_joints): if joints_vis[i, 0] > 0.0: joints[i, 0:2] = affine_transform(joints[i, 0:2], trans) target, target_weight = self.generate_target(joints, joints_vis) target = torch.from_numpy(target) target_weight = torch.from_numpy(target_weight) meta = { 'image': image_file, 'image_id': im_id, 'im_bbox': im_bbox, 'filename': filename, 'imgnum': imgnum, 'joints': joints, 'joints_vis': joints_vis, 'center': c, 'scale': s, 'rotation': r, 'score': score } # print(meta) return input, target, target_weight, meta
def __getitem__(self, idx): db_rec = copy.deepcopy(self.db[idx]) image_file = db_rec['image'] if self.data_format == 'zip': from utils import zipreader data_numpy = zipreader.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) else: data_numpy = cv2.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) if data_numpy is None: # logger.error('=> fail to read {}'.format(image_file)) # raise ValueError('Fail to read {}'.format(image_file)) return None, None, None, None, None, None if self.color_rgb: data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB) joints = db_rec['joints_2d'] joints_vis = db_rec['joints_2d_vis'] joints_3d = db_rec['joints_3d'] joints_3d_vis = db_rec['joints_3d_vis'] nposes = len(joints) assert nposes <= self.maximum_person, 'too many persons' height, width, _ = data_numpy.shape c = np.array([width / 2.0, height / 2.0]) s = get_scale((width, height), self.image_size) r = 0 trans = get_affine_transform(c, s, r, self.image_size) input = cv2.warpAffine( data_numpy, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) if self.transform: input = self.transform(input) for n in range(nposes): for i in range(len(joints[0])): if joints_vis[n][i, 0] > 0.0: joints[n][i, 0:2] = affine_transform(joints[n][i, 0:2], trans) if (np.min(joints[n][i, :2]) < 0 or joints[n][i, 0] >= self.image_size[0] or joints[n][i, 1] >= self.image_size[1]): joints_vis[n][i, :] = 0 if 'pred_pose2d' in db_rec and db_rec['pred_pose2d'] != None: # For convenience, we use predicted poses and corresponding values at the original heatmaps # to generate 2d heatmaps for Campus and Shelf dataset. # You can also use other 2d backbone trained on COCO to generate 2d heatmaps directly. pred_pose2d = db_rec['pred_pose2d'] for n in range(len(pred_pose2d)): for i in range(len(pred_pose2d[n])): pred_pose2d[n][i, 0:2] = affine_transform( pred_pose2d[n][i, 0:2], trans) input_heatmap = self.generate_input_heatmap(pred_pose2d) input_heatmap = torch.from_numpy(input_heatmap) else: input_heatmap = torch.zeros(self.cfg.NETWORK.NUM_JOINTS, self.heatmap_size[1], self.heatmap_size[0]) target_heatmap, target_weight = self.generate_target_heatmap( joints, joints_vis) target_heatmap = torch.from_numpy(target_heatmap) target_weight = torch.from_numpy(target_weight) # make joints and joints_vis having same shape joints_u = np.zeros((self.maximum_person, self.num_joints, 2)) joints_vis_u = np.zeros((self.maximum_person, self.num_joints, 2)) for i in range(nposes): joints_u[i] = joints[i] joints_vis_u[i] = joints_vis[i] joints_3d_u = np.zeros((self.maximum_person, self.num_joints, 3)) joints_3d_vis_u = np.zeros((self.maximum_person, self.num_joints, 3)) for i in range(nposes): joints_3d_u[i] = joints_3d[i][:, 0:3] joints_3d_vis_u[i] = joints_3d_vis[i][:, 0:3] target_3d = self.generate_3d_target(joints_3d) target_3d = torch.from_numpy(target_3d) if isinstance(self.root_id, int): roots_3d = joints_3d_u[:, self.root_id] elif isinstance(self.root_id, list): roots_3d = np.mean([joints_3d_u[:, j] for j in self.root_id], axis=0) meta = { 'image': image_file, 'num_person': nposes, 'joints_3d': joints_3d_u, 'joints_3d_vis': joints_3d_vis_u, 'roots_3d': roots_3d, 'joints': joints_u, 'joints_vis': joints_vis_u, 'center': c, 'scale': s, 'rotation': r, 'camera': db_rec['camera'] } return input, target_heatmap, target_weight, target_3d, meta, input_heatmap
def __getitem__(self, idx): db_rec = copy.deepcopy(self.db[idx]) image_file = db_rec['image'] filename = db_rec['filename'] if 'filename' in db_rec else '' imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else '' if self.data_format == 'zip': from utils import zipreader data_numpy = zipreader.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION ) else: data_numpy = cv2.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION ) if self.color_rgb: data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB) if data_numpy is None: logger.error('=> fail to read {}'.format(image_file)) raise ValueError('Fail to read {}'.format(image_file)) joints = db_rec['joints_3d'] # print(joints) joints_copy = db_rec['joints_3d_copy'] joints_vis = db_rec['joints_3d_vis'] # body = db_rec['body_3d'] # body_vis = db_rec['body_3d_vis'] c = db_rec['center'] s = db_rec['scale'] score = db_rec['score'] if 'score' in db_rec else 1 r = 0 if self.is_train: if (np.sum(joints_vis[:, 0]) > self.num_joints_half_body and np.random.rand() < self.prob_half_body): c_half_body, s_half_body = self.half_body_transform( joints, joints_vis ) if c_half_body is not None and s_half_body is not None: c, s = c_half_body, s_half_body sf = self.scale_factor rf = self.rotation_factor s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) # 随机缩放因子 r = np.clip(np.random.randn() * rf, -rf * 2, rf * 2) \ if random.random() <= 0.6 else 0 # 随机旋转因子 if self.flip and random.random() <= 0.5: data_numpy = data_numpy[:, ::-1, :] joints, joints_vis = fliplr_joints( joints, joints_vis, data_numpy.shape[1], self.flip_pairs) # 加我们的对称 c[0] = data_numpy.shape[1] - c[0] - 1 # 重新确定镜像翻转后的中心点 trans = get_affine_transform(c, s, r, self.image_size) input = cv2.warpAffine( data_numpy, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) if self.transform: input = self.transform(input) for i in range(self.num_joints): if joints_vis[i, 0] > 0.0: joints[i, 0:2] = affine_transform(joints[i, 0:2], trans) body = np.zeros((self.num_body, 3), dtype=np.float) body_vis = np.zeros((self.num_body, 3), dtype=np.float) for idbody, skeleton in enumerate(self.skeletons): point_a = joints[skeleton[0]] # print(point_a) point_b = joints[skeleton[1]] # if point_a[2] == 0 or point_b[2] == 0: if joints_copy[skeleton[0]][2] == 0 or joints_copy[skeleton[1]][2] == 0: continue axis_x = (point_b - point_a)[:-1] # print(x) lx = np.sqrt(axis_x.dot(axis_x)) if lx == 0: continue ly = 1 cos_angle = axis_x.dot(self.axis_y) / (lx * ly) angle = np.arccos(cos_angle) angle = angle / np.pi # angle2 = angle * 180 / np.pi if axis_x[1] < 0: angle = - angle # print(angle2) # print(lx,angle2) body[idbody] = [lx/332.55, angle, 1] body_vis[idbody] = [1, 1, 0] joint_target, joint_target_weight = self.generate_target(joints, joints_vis) body_target, body_target_weight = self.generate_body_target(joints, joints_copy, body_vis) # for i in range(19): # # print(image_file) # cv2.imwrite('image/'+image_file.split('/')[-1][:-4]+'_'+str(i)+'.jpg', np.uint8(body_target[i][:,:,np.newaxis]*255)) # for i in range(17): # # print(image_file) # cv2.imwrite('image/'+image_file.split('/')[-1][:-4]+'_'+str(i)+'_point.jpg', np.uint8(joint_target[i][:,:,np.newaxis]*255)) joint_target = torch.from_numpy(joint_target) joint_target_weight = torch.from_numpy(joint_target_weight) body_target = torch.from_numpy(body_target) body_target_weight = torch.from_numpy(body_target_weight) body = torch.from_numpy(body) body_vis = torch.from_numpy(body_vis) meta = { 'image': image_file, 'filename': filename, 'imgnum': imgnum, 'joints': joints, 'joints_vis': joints_vis, 'body': body, 'body_vis': body_vis, 'center': c, 'scale': s, 'rotation': r, 'score': score } return input, joint_target, joint_target_weight, body_target, body_target_weight, body, body_vis, meta
def __getitem__(self, idx): db_rec = copy.deepcopy(self.db[idx]) if db_rec['source'] == 'h36m' and self.no_distortion: image_dir_zip = 'images_nodistortion.zip@' else: image_dir_zip = 'images.zip@' image_dir = image_dir_zip if self.data_format == 'zip' else '' # special process for coco dataset if db_rec['source'] == 'coco': image_dir = '' image_file = osp.join(self.root, db_rec['source'], image_dir, 'images', db_rec['image']) if self.data_format == 'zip': from utils import zipreader data_numpy = zipreader.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) else: data_numpy = cv2.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) if db_rec['source'] == 'h36m' and self.pseudo_label: joints = db_rec['joints_2d_pseudo'].copy() # [union_joints, 2] joints_vis = db_rec['joints_vis_pseudo'].copy()[:, :2] # [union_joints, 2] else: joints = db_rec['joints_2d'].copy() # [union_joints, 2] joints_vis = db_rec['joints_vis'].copy()[:, :2] # [union_joints, 2] assert len(joints) == self.num_joints assert len(joints_vis) == self.num_joints # crop and scale according to ground truth center = np.array(db_rec['center']).copy() scale = np.array(db_rec['scale']).copy() rotation = 0 if self.is_train and db_rec['source'] != 'h36m': sf = self.aug_param_dict[db_rec['source']]['scale_factor'] rf = self.aug_param_dict[db_rec['source']]['rotation_factor'] scale = scale * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) rotation = np.clip(np.random.randn() * rf, -rf * 2, rf * 2) \ if random.random() <= 0.6 else 0 if self.aug_param_dict[db_rec['source']]['flip'] and random.random() <= 0.5: data_numpy = data_numpy[:, ::-1, :] joints, joints_vis = fliplr_joints( joints, joints_vis, data_numpy.shape[1], self.flip_pairs) center[0] = data_numpy.shape[1] - center[0] - 1 trans = get_affine_transform(center, scale, rotation, self.image_size) input = cv2.warpAffine( data_numpy, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) if self.transform: if self.color_jitter: input = input[:, :, ::-1] # BGR -> RGB input = self.color_jitter(input) r, g, b = input.split() input = Image.merge("RGB", (b, g, r)) # RGB -> BGR input = self.transform(input) visible_joints = joints_vis[:, 0] > 0 if np.any(visible_joints): joints[visible_joints, :2] = affine_transform(joints[visible_joints, :2], trans) # zero_indices = np.any( # np.concatenate((joints[:, :2]<0, # joints[:, [0]] >= self.image_size[0], # joints[:, [1]] >= self.image_size[1]), # axis=1), # axis=1) # joints_vis[zero_indices, :] = 0 target, target_weight = self.generate_target(joints, joints_vis, db_rec['source']) target = torch.from_numpy(target) target_weight = torch.from_numpy(target_weight) meta = { 'scale': scale, 'center': center, 'rotation': rotation, 'joints_2d': db_rec['joints_2d'], 'joints_2d_transformed': joints, 'joints_vis': joints_vis, 'source': db_rec['source'], 'subject': db_rec['subject'] if db_rec['source'] == 'h36m' else -1 } return input, target, target_weight, meta
def __getitem__(self, idx, source='h36m', **kwargs): db_rec = copy.deepcopy(self.db[idx]) image_dir = 'images.zip@' if self.data_format == 'zip' else '' image_file = osp.join(self.root, db_rec['source'], image_dir, 'images', db_rec['image']) if self.data_format == 'zip': from utils import zipreader data_numpy = zipreader.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) else: data_numpy = cv2.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) joints = db_rec['joints_2d'].copy() joints_vis = db_rec['joints_vis'].copy() center = np.array(db_rec['center']).copy() scale = np.array(db_rec['scale']).copy() rotation = 0 if self.is_train: sf = self.scale_factor rf = self.rotation_factor scale = scale * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) rotation = np.clip(np.random.randn() * rf, -rf * 2, rf * 2) \ if random.random() <= 0.6 else 0 trans = get_affine_transform(center, scale, rotation, self.image_size) input = cv2.warpAffine( data_numpy, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) if self.transform: input = self.transform(input) for i in range(self.num_joints): if joints_vis[i, 0] > 0.0: joints[i, 0:2] = affine_transform(joints[i, 0:2], trans) if (np.min(joints[i, :2]) < 0 or joints[i, 0] >= self.image_size[0] or joints[i, 1] >= self.image_size[1]): joints_vis[i, :] = 0 target, target_weight = self.generate_target(joints, joints_vis) target = torch.from_numpy(target) target_weight = torch.from_numpy(target_weight) meta = { 'scale': scale, 'center': center, 'rotation': rotation, 'joints_2d': db_rec['joints_2d'], 'joints_2d_transformed': joints, 'joints_vis': joints_vis, 'source': db_rec['source'], 'heatmap_size': self.heatmap_size } if source == 'totalcapture': imubone_mapping = kwargs['tc_imubone_map'] meta['joints_gt'] = db_rec['joints_gt'] meta['bone_vec'] = db_rec['bone_vec'] meta['camera'] = db_rec['camera'] bone_vec_tc = meta['bone_vec'] bone_vectors = dict() for bone_name in imubone_mapping: bone_vectors[ imubone_mapping[bone_name]] = bone_vec_tc[bone_name] meta['bone_vectors'] = bone_vectors # if self.totalcapture_template_meta is None: # self.totalcapture_template_meta = meta elif source == 'h36m': meta['camera'] = db_rec['camera'] meta['joints_gt'] = cam_utils.camera_to_world_frame( db_rec['joints_3d'], db_rec['camera']['R'], db_rec['camera']['T']) else: # since tc is mixed with mpii, they should have same keys in meta, # otherwise will lead to error when collate data in dataloader meta['joints_gt'] = self.totalcapture_template_meta['joints_gt'] # meta['joints_gt'] = np.zeros((16,3)) meta['bone_vec'] = self.totalcapture_template_meta['bone_vec'] meta['camera'] = self.totalcapture_template_meta['camera'] meta['bone_vectors'] = self.totalcapture_template_meta[ 'bone_vectors'] return input, target, target_weight, meta
def __getitem__(self, idx): db_rec = copy.deepcopy(self.db[idx]) image_file = db_rec['image'] filename = db_rec['filename'] if 'filename' in db_rec else '' imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else '' if self.data_format == 'zip': from utils import zipreader data_numpy = zipreader.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) else: data_numpy = cv2.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) if self.color_rgb: data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB) if data_numpy is None: logger.error('=> fail to read {}'.format(image_file)) raise ValueError('Fail to read {}'.format(image_file)) joints = db_rec['joints_3d'] joints_vis = db_rec['joints_3d_vis'] c = db_rec['center'] s = db_rec['scale'] score = db_rec['score'] if 'score' in db_rec else 1 r = 0 if self.is_train: if (np.sum(joints_vis[:, 0]) > self.num_joints_half_body and np.random.rand() < self.prob_half_body): c_half_body, s_half_body = self.half_body_transform( joints, joints_vis) if c_half_body is not None and s_half_body is not None: c, s = c_half_body, s_half_body sf = self.scale_factor rf = self.rotation_factor s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \ if random.random() <= 0.6 else 0 if self.flip and random.random() <= 0.5: data_numpy = data_numpy[:, ::-1, :] joints, joints_vis = fliplr_joints(joints, joints_vis, data_numpy.shape[1], self.flip_pairs) c[0] = data_numpy.shape[1] - c[0] - 1 trans = get_affine_transform(c, s, r, self.image_size) input = cv2.warpAffine( data_numpy, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) if self.transform: input = self.transform(input) for i in range(self.num_joints): if joints_vis[i, 0] > 0.0: joints[i, 0:2] = affine_transform(joints[i, 0:2], trans) target, target_weight = self.generate_target(joints, joints_vis) target = torch.from_numpy(target) target_weight = torch.from_numpy(target_weight) meta = { 'image': image_file, 'filename': filename, 'imgnum': imgnum, 'joints': joints, 'joints_vis': joints_vis, 'center': c, 'scale': s, 'rotation': r, 'score': score } return input, target, target_weight, meta
def __getitem__(self, idx): db_rec = copy.deepcopy(self.db[idx]) image_file = db_rec['image'] filename = db_rec['filename'] if 'filename' in db_rec else '' imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else '' if self.data_format == 'zip': from utils import zipreader data_numpy = zipreader.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) else: data_numpy = cv2.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) if data_numpy is None: logger.error('=> fail to read {}'.format(image_file)) raise ValueError('Fail to read {}'.format(image_file)) joints = db_rec['joints_3d'] joints_vis = db_rec['joints_3d_vis'] c = db_rec['center'] s = db_rec['scale'] score = db_rec['score'] if 'score' in db_rec else 1 r = 0 if self.is_train: sf = self.scale_factor rf = self.rotation_factor s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \ if random.random() <= 0.6 else 0 if self.flip and random.random() <= 0.5: data_numpy = data_numpy[:, ::-1, :] joints, joints_vis = fliplr_joints(joints, joints_vis, data_numpy.shape[1], self.flip_pairs) c[0] = data_numpy.shape[1] - c[0] - 1 trans = get_affine_transform(c, s, r, self.image_size) input = cv2.warpAffine( data_numpy, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) if self.transform: input = self.transform(input) for i in range(self.num_joints): if joints_vis[i, 0] > 0.0: joints[i, 0:2] = affine_transform(joints[i, 0:2], trans) target, target_weight = self.generate_target(joints, joints_vis) target = torch.from_numpy(target) target_weight = torch.from_numpy(target_weight) info = { 'image': image_file, 'filename': filename, 'imgnum': imgnum, 'joints': joints, 'joints_vis': joints_vis, 'center': c, 'scale': s, 'rotation': r, 'score': score } if self.is_train: ################################################################ # @yangsen # 难度系数和遗忘程度 初始化 noise = db_rec['invisible_keypoints'] #标记但不可见点 w = np.array(db_rec['num_keypoints']) decay_beta = 2 #控制权重衰减 lamda = np.exp(1 / decay_beta) + 1 #控制标注点数量为1的难度值D设为最大为1 initial_difficult = lamda * ( 1 - 1 / (1 + np.exp(-np.sqrt(w - noise) / decay_beta))) #(0,1] meta = { 'index': idx, 'memory_difficult': int(100 * initial_difficult), #(0,100] 'forget_degree': 100, #[0,100] 0 represents remenber, 100 represents forgotten } # yangsen ############################################################### return input, target, target_weight, info, meta else: return input, target, target_weight, info