def __getitem__(self, idx): image_path = os.path.join(self.data_root, self.landmarks_frame.iloc[idx, 0]) scale = self.landmarks_frame.iloc[idx, 1] center_w = self.landmarks_frame.iloc[idx, 2] center_h = self.landmarks_frame.iloc[idx, 3] center = torch.Tensor([center_w, center_h]) pts = self.landmarks_frame.iloc[idx, 4:].values pts = pts.astype('float').reshape(-1, 2) scale *= 1.25 nparts = pts.shape[0] img = np.array(Image.open(image_path).convert('RGB'), dtype=np.float32) r = 0 if self.is_train: scale = scale * (random.uniform(1 - self.scale_factor, 1 + self.scale_factor)) r = random.uniform(-self.rot_factor, self.rot_factor) \ if random.random() <= 0.6 else 0 if random.random() <= 0.5 and self.flip: img = np.fliplr(img) pts = fliplr_joints(pts, width=img.shape[1], dataset='WFLW') center[0] = img.shape[1] - center[0] img = crop(img, center, scale, self.input_size, rot=r) target = np.zeros((nparts, self.output_size[0], self.output_size[1])) tpts = pts.copy() for i in range(nparts): if tpts[i, 1] > 0: tpts[i, 0:2] = transform_pixel(tpts[i, 0:2] + 1, center, scale, self.output_size, rot=r) target[i] = generate_target(target[i], tpts[i] - 1, self.sigma, label_type=self.label_type) img = img.astype(np.float32) img = (img / 255.0 - self.mean) / self.std img = img.transpose([2, 0, 1]) target = torch.Tensor(target) tpts = torch.Tensor(tpts) center = torch.Tensor(center) meta = { 'index': idx, 'center': center, 'scale': scale, 'pts': torch.Tensor(pts), 'tpts': tpts } return img, target, meta
def data_augmentation(sample, is_train): image_file = sample['image'] filename = sample['filename'] if 'filename' in sample else '' joints = sample['joints_3d'] joints_vis = sample['joints_3d_vis'] c = sample['center'] s = sample['scale'] score = sample['score'] if 'score' in sample else 1 # imgnum = sample['imgnum'] if 'imgnum' in sample else '' r = 0 # used for ce if 'ce_mode' in os.environ: random.seed(0) np.random.seed(0) data_numpy = cv2.imread(image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) if is_train: sf = cfg.SCALE_FACTOR rf = cfg.ROT_FACTOR s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \ if random.random() <= 0.6 else 0 if cfg.FLIP and random.random() <= 0.5: data_numpy = data_numpy[:, ::-1, :] joints, joints_vis = fliplr_joints(joints, joints_vis, data_numpy.shape[1], cfg.FLIP_PAIRS) c[0] = data_numpy.shape[1] - c[0] - 1 trans = get_affine_transform(c, s, r, cfg.IMAGE_SIZE) input = cv2.warpAffine(data_numpy, trans, (int(cfg.IMAGE_SIZE[0]), int(cfg.IMAGE_SIZE[1])), flags=cv2.INTER_LINEAR) for i in range(cfg.NUM_JOINTS): if joints_vis[i, 0] > 0.0: joints[i, 0:2] = affine_transform(joints[i, 0:2], trans) # Numpy target target, target_weight = generate_target(cfg, joints, joints_vis) if cfg.DEBUG: visualize(cfg, filename, data_numpy, input.copy(), joints, target) # Normalization input = input.astype('float32').transpose((2, 0, 1)) / 255 input -= np.array(cfg.MEAN).reshape((3, 1, 1)) input /= np.array(cfg.STD).reshape((3, 1, 1)) if is_train: return input, target, target_weight else: return input, target, target_weight, c, s, score, image_file
def __getitem__(self, idx): img = self.images[idx][0] if len(img.shape) == 2: img = img.reshape(img.shape[0], img.shape[1], 1) img = np.repeat(img, 3, axis=2) pts = self.pts[idx][0:58].reshape(2, -1).transpose() xmin = np.min(pts[:, 0]) xmax = np.max(pts[:, 0]) ymin = np.min(pts[:, 1]) ymax = np.max(pts[:, 1]) center_w = (math.floor(xmin) + math.ceil(xmax)) / 2.0 center_h = (math.floor(ymin) + math.ceil(ymax)) / 2.0 scale = max(math.ceil(xmax) - math.floor(xmin), math.ceil(ymax) - math.floor(ymin)) / 200.0 center = torch.Tensor([center_w, center_h]) scale *= 1.25 nparts = pts.shape[0] r = 0 if self.is_train: scale = scale * (random.uniform(1 - self.scale_factor, 1 + self.scale_factor)) r = random.uniform(-self.rot_factor, self.rot_factor) \ if random.random() <= 0.6 else 0 if random.random() <= 0.5 and self.flip: img = np.fliplr(img) pts = fliplr_joints(pts, width=img.shape[1], dataset='COFW') center[0] = img.shape[1] - center[0] img = crop(img, center, scale, self.input_size, rot=r) target = np.zeros((nparts, self.output_size[0], self.output_size[1])) tpts = pts.copy() for i in range(nparts): if tpts[i, 1] > 0: tpts[i, 0:2] = transform_pixel(tpts[i, 0:2]+1, center, scale, self.output_size, rot=r) target[i] = generate_target(target[i], tpts[i]-1, self.sigma, label_type=self.label_type) img = img.astype(np.float32) img = (img/255 - self.mean) / self.std img = img.transpose([2, 0, 1]) target = torch.Tensor(target) tpts = torch.Tensor(tpts) center = torch.Tensor(center) meta = {'index': idx, 'center': center, 'scale': scale, 'pts': torch.Tensor(pts), 'tpts': tpts} return img, target, meta
def get_final_preds_match(config, outputs, center, scale, flip_pairs=None): pred_logits = outputs['pred_logits'].detach().cpu() pred_coords = outputs['pred_coords'].detach().cpu() num_joints = pred_logits.shape[-1] - 1 if config.TEST.INCLUDE_BG_LOGIT: prob = F.softmax(pred_logits, dim=-1)[..., :-1] else: prob = F.softmax(pred_logits[..., :-1], dim=-1) score_holder = [] coord_holder = [] orig_coord = [] for b, C in enumerate(prob): _, query_ind = linear_sum_assignment( -C.transpose(0, 1)) # Cost Matrix: [17, N] score = prob[b, query_ind, list(np.arange(num_joints))][..., None].numpy() pred_raw = pred_coords[b, query_ind].numpy() if flip_pairs is not None: pred_raw, score = fliplr_joints(pred_raw, score, 1, flip_pairs, pixel_align=False, is_vis_logit=True) # scale to the whole patch pred_raw *= np.array(config.MODEL.IMAGE_SIZE) # transform back w.r.t. the entire img pred = transform_preds(pred_raw, center[b], scale[b], config.MODEL.IMAGE_SIZE) orig_coord.append(pred_raw) score_holder.append(score) coord_holder.append(pred) matched_score = np.stack(score_holder) matched_coord = np.stack(coord_holder) return matched_coord, matched_score, np.stack(orig_coord)
def __getitem__(self, idx): # 【c】db_rec是db的其中一个,是啥来着,一张图及其相关信息? db_rec = copy.deepcopy(self.db[idx]) image_file = db_rec['image'] # db是数据集 filename = db_rec['filename'] if 'filename' in db_rec else '' imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else '' # 【c】总数?batch? if self.data_format == 'zip': # 解压 from utils import zipreader # 【see】如果要用才导 data_numpy = zipreader.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) # 【l】 else: data_numpy = cv2.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) # 【】随便挑一个选项? if self.color_rgb: data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB) # 【l】为啥要转,不是该rgb2bgr? if data_numpy is None: logger.error('=> fail to read {}'.format(image_file)) # 【see】语法不会报错但是完全影响了后面的结果,因此让其主动报错 raise ValueError('Fail to read {}'.format(image_file)) joints = db_rec['joints_3d'] # 【c】3d? joints_vis = db_rec['joints_3d_vis'] # 【】之前那个joints_vis就是从这儿获取的吧? c = db_rec['center'] s = db_rec['scale'] # 数据集标注的 # 【】谁的score,还是说暂时只用来说明非空 score = db_rec['score'] if 'score' in db_rec else 1 r = 0 if self.is_train: # 训练集才求半身 if (np.sum(joints_vis[:, 0]) > self. num_joints_half_body # 【】第0列元素求和;那么就是第一列为0,1?那么就是所有的点都有? and np.random.rand() < self.prob_half_body): # 【c】第二个是要采取半身的概率,为什么不在预处理做 c_half_body, s_half_body = self.half_body_transform( joints, joints_vis) if c_half_body is not None and s_half_body is not None: c, s = c_half_body, s_half_body # 取到了上半身或下半身的点就将c和s替换掉原标注的 sf = self.scale_factor rf = self.rotation_factor # 缩放旋转因子 s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) # 【l】取最大? r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \ if random.random() <= 0.6 else 0 # 【c】 if self.flip and random.random() <= 0.5: data_numpy = data_numpy[:, ::-1, :] # 将图像值水平翻转 joints, joints_vis = fliplr_joints(joints, joints_vis, data_numpy.shape[1], self.flip_pairs) # GT坐标 c[0] = data_numpy.shape[1] - c[0] - 1 # 最右-原==翻转过的因为宽比最右多1 trans = get_affine_transform( c, s, r, self.image_size) # 缩放旋转在transform里定义的,旋转空白怎么解决的? input = cv2.warpAffine( data_numpy, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) # 【l】应用缩放旋转变换,input的size也变了吧? if self.transform: input = self.transform(input) # 【c】还有另外的变换?从哪儿传入的哪儿定义的? # cut_trans = self._cutpoint(8, 1, 1, point) # input = cut_trans(input) # for i in range(self.num_joints): if joints_vis[i, 0] > 0.0: # 【c】第一列不是0,1?有权重?只对可见点执行?还是说vis是未缺失有标记的点? # 【】对GT坐标也执行,怎么上面那个用的是warpAffine有何不同? joints[i, 0:2] = affine_transform(joints[i, 0:2], trans) target, target_weight = self.generate_target(joints, joints_vis) # 权重代表什么? # 【】上面都是在对numpy进行变换 target = torch.from_numpy(target) target_weight = torch.from_numpy(target_weight) # 【c】 meta = { 'image': image_file, 'filename': filename, 'imgnum': imgnum, 'joints': joints, 'joints_vis': joints_vis, 'center': c, 'scale': s, 'rotation': r, 'score': score } # 【】有何用,日志? return input, target, target_weight, meta # 【c】input是Tensor?
def __getitem__(self, idx): # 根据 idx 从db获取样本信息 db_rec = copy.deepcopy(self.db[idx]) # 获取图像名 image_file = db_rec['image'] filename = db_rec['filename'] if 'filename' in db_rec else '' imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else '' # 如果数据格式为zip则解压 if self.data_format == 'zip': from utils import zipreader data_numpy = zipreader.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) # 否则直接读取图像,获得像素值 else: data_numpy = cv2.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) # 转化为rgb格式 if self.color_rgb: data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB) # 如果读取到的数据不为numpy格式则报错 if data_numpy is None: logger.error('=> fail to read {}'.format(image_file)) raise ValueError('Fail to read {}'.format(image_file)) # 获取人体关键点坐标 joints = db_rec['joints_3d'] joints_vis = db_rec['joints_3d_vis'] # 获取训练样本转化之后的center以及scale c = db_rec['center'] s = db_rec['scale'] # 如果训练样本中没有设置score,则加载该属性,并且设置为1 score = db_rec['score'] if 'score' in db_rec else 1 r = 0 # 如果是进行训练 if self.is_train: # 如果可见关键点大于人体一半关键点,并且生成的随机数小于self.prob_half_body=0.3 if (np.sum(joints_vis[:, 0]) > self.num_joints_half_body and np.random.rand() < self.prob_half_body): # 重新调整center、scale c_half_body, s_half_body = self.half_body_transform( joints, joints_vis) if c_half_body is not None and s_half_body is not None: c, s = c_half_body, s_half_body # 缩放因子scale_factor=0.35,以及旋转因子rotation_factor=0.35 sf = self.scale_factor rf = self.rotation_factor # s大小为[1-0.35=0.65, 1+0.35=1.35]之间 s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) # r大小为[-2*45=95, 2*45=90]之间 r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \ if random.random() <= 0.6 else 0 # 进行数据水平翻转 if self.flip and random.random() <= 0.5: data_numpy = data_numpy[:, ::-1, :] joints, joints_vis = fliplr_joints(joints, joints_vis, data_numpy.shape[1], self.flip_pairs) c[0] = data_numpy.shape[1] - c[0] - 1 # 进行仿射变换,样本数据关键点发生角度旋转之后,每个像素也旋转到对应位置 # 获得旋转矩阵 trans = get_affine_transform(c, s, r, self.image_size) # 根据旋转矩阵进行仿射变换 # 通过仿射变换截取实例图片 input = cv2.warpAffine( data_numpy, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) # 进行正则化,形状改变等 if self.transform: input = self.transform(input) # 对人体关键点也进行仿射变换 for i in range(self.num_joints): if joints_vis[i, 0] > 0.0: joints[i, 0:2] = affine_transform(joints[i, 0:2], trans) # 获得ground truch,热图target[17, 64, 48],target_weight[17, 1] target, target_weight = self.generate_target(joints, joints_vis) target = torch.from_numpy(target) target_weight = torch.from_numpy(target_weight) meta = { 'image': image_file, 'filename': filename, 'imgnum': imgnum, 'joints': joints, 'joints_vis': joints_vis, 'center': c, 'scale': s, 'rotation': r, 'score': score } return input, target, target_weight, meta
def __getitem__(self, idx): db_rec = copy.deepcopy(self.db[idx]) if db_rec['source'] == 'h36m' and self.no_distortion: image_dir_zip = 'images_nodistortion.zip@' else: image_dir_zip = 'images.zip@' image_dir = image_dir_zip if self.data_format == 'zip' else '' # special process for coco dataset if db_rec['source'] == 'coco': image_dir = '' image_file = osp.join(self.root, db_rec['source'], image_dir, 'images', db_rec['image']) if self.data_format == 'zip': from utils import zipreader data_numpy = zipreader.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) else: data_numpy = cv2.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) if db_rec['source'] == 'h36m' and self.pseudo_label: joints = db_rec['joints_2d_pseudo'].copy() # [union_joints, 2] joints_vis = db_rec['joints_vis_pseudo'].copy()[:, :2] # [union_joints, 2] else: joints = db_rec['joints_2d'].copy() # [union_joints, 2] joints_vis = db_rec['joints_vis'].copy()[:, :2] # [union_joints, 2] assert len(joints) == self.num_joints assert len(joints_vis) == self.num_joints # crop and scale according to ground truth center = np.array(db_rec['center']).copy() scale = np.array(db_rec['scale']).copy() rotation = 0 if self.is_train and db_rec['source'] != 'h36m': sf = self.aug_param_dict[db_rec['source']]['scale_factor'] rf = self.aug_param_dict[db_rec['source']]['rotation_factor'] scale = scale * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) rotation = np.clip(np.random.randn() * rf, -rf * 2, rf * 2) \ if random.random() <= 0.6 else 0 if self.aug_param_dict[db_rec['source']]['flip'] and random.random() <= 0.5: data_numpy = data_numpy[:, ::-1, :] joints, joints_vis = fliplr_joints( joints, joints_vis, data_numpy.shape[1], self.flip_pairs) center[0] = data_numpy.shape[1] - center[0] - 1 trans = get_affine_transform(center, scale, rotation, self.image_size) input = cv2.warpAffine( data_numpy, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) if self.transform: if self.color_jitter: input = input[:, :, ::-1] # BGR -> RGB input = self.color_jitter(input) r, g, b = input.split() input = Image.merge("RGB", (b, g, r)) # RGB -> BGR input = self.transform(input) visible_joints = joints_vis[:, 0] > 0 if np.any(visible_joints): joints[visible_joints, :2] = affine_transform(joints[visible_joints, :2], trans) # zero_indices = np.any( # np.concatenate((joints[:, :2]<0, # joints[:, [0]] >= self.image_size[0], # joints[:, [1]] >= self.image_size[1]), # axis=1), # axis=1) # joints_vis[zero_indices, :] = 0 target, target_weight = self.generate_target(joints, joints_vis, db_rec['source']) target = torch.from_numpy(target) target_weight = torch.from_numpy(target_weight) meta = { 'scale': scale, 'center': center, 'rotation': rotation, 'joints_2d': db_rec['joints_2d'], 'joints_2d_transformed': joints, 'joints_vis': joints_vis, 'source': db_rec['source'], 'subject': db_rec['subject'] if db_rec['source'] == 'h36m' else -1 } return input, target, target_weight, meta
def __getitem__(self, idx): db_rec = copy.deepcopy(self.db[idx]) image_file = db_rec['image'] filename = db_rec['filename'] if 'filename' in db_rec else '' imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else '' data_numpy = cv2.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) if self.color_rgb: data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB) if data_numpy is None: logger.error('=> fail to read {}'.format(image_file)) raise ValueError('Fail to read {}'.format(image_file)) joints = db_rec['joints_3d'] joints_vis = db_rec['joints_3d_vis'] if 'interference' in db_rec.keys(): interference_joints = db_rec['interference'] interference_joints_vis = db_rec['interference_vis'] else: interference_joints = [joints] interference_joints_vis = [joints_vis] c = db_rec['center'] s = db_rec['scale'] score = db_rec['score'] if 'score' in db_rec else 1 r = 0 if self.is_train: if (np.sum(joints_vis[:, 0]) > self.num_joints_half_body and np.random.rand() < self.prob_half_body): c_half_body, s_half_body = self.half_body_transform( joints, joints_vis) if c_half_body is not None and s_half_body is not None: c, s = c_half_body, s_half_body sf = self.scale_factor rf = self.rotation_factor s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \ if random.random() <= 0.6 else 0 if self.flip and random.random() <= 0.5: data_numpy = data_numpy[:, ::-1, :] joints, joints_vis = fliplr_joints(joints, joints_vis, data_numpy.shape[1], self.flip_pairs) c[0] = data_numpy.shape[1] - c[0] - 1 for i in range(len(interference_joints)): interference_joints[i], interference_joints_vis[ i] = fliplr_joints(interference_joints[i], interference_joints_vis[i], data_numpy.shape[1], self.flip_pairs) trans = get_affine_transform(c, s, r, self.image_size) input = cv2.warpAffine( data_numpy, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) # cv2.imwrite('img.jpg',input[:,:,::-1]) if self.transform: input = self.transform(input) for i in range(self.num_joints): if joints_vis[i, 0] > 0.0: joints[i, 0:2] = affine_transform(joints[i, 0:2], trans) target, target_weight = self.generate_target(joints, joints_vis) # interference joints heatmaps inter_target = np.zeros_like(target) inter_target_weight = np.zeros_like(target_weight) for i in range(len(interference_joints)): inter_joints = interference_joints[i] inter_joints_vis = interference_joints_vis[i] for j in range(self.num_joints): if inter_joints_vis[j, 0] > 0.0: inter_joints[j, 0:2] = affine_transform( inter_joints[j, 0:2], trans) _inter_target, _inter_target_weight = self.generate_target( inter_joints, inter_joints_vis) inter_target = np.maximum(inter_target, _inter_target) inter_target_weight = np.maximum(inter_target_weight, _inter_target_weight) all_ins_target = np.maximum(inter_target, target) all_ins_target_weight = np.maximum(inter_target_weight, target_weight) # AE labels All_joints = [joints] + interference_joints ae_targets = self.generate_joints_ae_targets(All_joints) # GPU formate all_ins_target = torch.from_numpy(all_ins_target) all_ins_target_weight = torch.from_numpy(all_ins_target_weight) ae_targets = torch.from_numpy(ae_targets) meta = { 'image': image_file, 'filename': filename, 'imgnum': imgnum, 'joints': joints, 'joints_vis': joints_vis, 'center': c, 'scale': s, 'rotation': r, 'score': score, 'interference_maps': inter_target, } return input, all_ins_target, all_ins_target_weight, ae_targets, meta
def __getitem__(self, idx): db_rec = copy.deepcopy(self.db[idx]) image_dir = 'images.zip@' if self.data_format == 'zip' else '' image_file = osp.join(self.root, db_rec['source'], image_dir, 'images', db_rec['image']) if self.data_format == 'zip': from utils import zipreader data_numpy = zipreader.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) else: data_numpy = cv2.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) joints = db_rec['joints_2d'].copy() # [union_joints, 2] joints_vis = db_rec['joints_vis'].copy()[:, :2] # [union_joints, 2] assert len(joints) == self.num_joints assert len(joints_vis) == self.num_joints # crop and scale according to ground truth center = np.array(db_rec['center']).copy() scale = np.array(db_rec['scale']).copy() rotation = 0 if self.is_train and db_rec['source'] == 'mpii': sf = self.mpii_scale_factor rf = self.mpii_rotation_factor scale = scale * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) rotation = np.clip(np.random.randn() * rf, -rf * 2, rf * 2) \ if random.random() <= 0.6 else 0 if self.mpii_flip and random.random() <= 0.5: data_numpy = data_numpy[:, ::-1, :] joints, joints_vis = fliplr_joints(joints, joints_vis, data_numpy.shape[1], self.mpii_flip_pairs) center[0] = data_numpy.shape[1] - center[0] - 1 trans = get_affine_transform(center, scale, rotation, self.image_size) input = cv2.warpAffine( data_numpy, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) if self.transform: input = self.transform(input) visible_joints = joints_vis[:, 0] > 0 if np.any(visible_joints): joints[visible_joints, :2] = affine_transform( joints[visible_joints, :2], trans) # zero_indices = np.any( # np.concatenate((joints[:, :2]<0, # joints[:, [0]] >= self.image_size[0], # joints[:, [1]] >= self.image_size[1]), # axis=1), # axis=1) # joints_vis[zero_indices, :] = 0 target, target_weight = self.generate_target(joints, joints_vis, db_rec['source']) target = torch.from_numpy(target) target_weight = torch.from_numpy(target_weight) meta = { 'scale': scale, 'center': center, 'rotation': rotation, 'joints_2d': db_rec['joints_2d'], 'joints_2d_transformed': joints, 'joints_vis': joints_vis, 'source': db_rec['source'] } return input, target, target_weight, meta
def __getitem__(self, idx): db_rec = copy.deepcopy(self.db[idx]) image_file = db_rec['image'] filename = db_rec['filename'] if 'filename' in db_rec else '' imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else '' if self.data_format == 'zip': from utils import zipreader data_numpy = zipreader.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) else: data_numpy = self.read_image(image_file) ##### supporting frame if (self.is_train and self.use_warping_train) or (not self.is_train and self.use_warping_test): T = self.timestep_delta_range temp = image_file.split('/') prev_nm = temp[len(temp) - 1] ref_idx = int(prev_nm.replace('.jpg', '')) if self.timestep_delta_rand: delta = -T + np.random.randint(T * 2 + 1) else: delta = self.timestep_delta sup_idx = ref_idx + delta ######## if 'nframes' in db_rec: nframes = db_rec['nframes'] if not self.is_posetrack18: sup_idx = np.clip(sup_idx, 1, nframes) else: sup_idx = np.clip(sup_idx, 0, nframes - 1) if not self.is_posetrack18: new_sup_image_file = image_file.replace( prev_nm, str(sup_idx).zfill(8) + '.jpg') else: new_sup_image_file = image_file.replace( prev_nm, str(sup_idx).zfill(6) + '.jpg') if os.path.exists(new_sup_image_file): sup_image_file = new_sup_image_file else: sup_image_file = image_file ########## data_numpy_sup = self.read_image(sup_image_file) ########### if self.color_rgb: data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB) if (self.is_train and self.use_warping_train) or (not self.is_train and self.use_warping_test): data_numpy_sup = cv2.cvtColor(data_numpy_sup, cv2.COLOR_BGR2RGB) if data_numpy is None: logger.error('=> fail to read {}'.format(image_file)) raise ValueError('Fail to read {}'.format(image_file)) if (self.is_train and self.use_warping_train) or (not self.is_train and self.use_warping_test): if data_numpy_sup is None: logger.error('=> SUP: fail to read {}'.format(sup_image_file)) raise ValueError('SUP: Fail to read {}'.format(sup_image_file)) joints = db_rec['joints_3d'] joints_vis = db_rec['joints_3d_vis'] c = db_rec['center'] s = db_rec['scale'] score = db_rec['score'] if 'score' in db_rec else 1 r = 0 if self.is_train: if (np.sum(joints_vis[:, 0]) > self.num_joints_half_body and np.random.rand() < self.prob_half_body): c_half_body, s_half_body = self.half_body_transform( joints, joints_vis) if c_half_body is not None and s_half_body is not None: c, s = c_half_body, s_half_body sf = self.scale_factor rf = self.rotation_factor s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \ if random.random() <= 0.6 else 0 if self.flip and random.random() <= 0.5: data_numpy = data_numpy[:, ::-1, :] if (self.is_train and self.use_warping_train) or ( not self.is_train and self.use_warping_test): data_numpy_sup = data_numpy_sup[:, ::-1, :] joints, joints_vis = fliplr_joints(joints, joints_vis, data_numpy.shape[1], self.flip_pairs) c[0] = data_numpy.shape[1] - c[0] - 1 trans = get_affine_transform(c, s, r, self.image_size) input = cv2.warpAffine( data_numpy, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) ##### supportingimage if (self.is_train and self.use_warping_train) or (not self.is_train and self.use_warping_test): input_sup = cv2.warpAffine( data_numpy_sup, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) ######### if self.transform: input = self.transform(input) if (self.is_train and self.use_warping_train) or (not self.is_train and self.use_warping_test): input_sup = self.transform(input_sup) for i in range(self.num_joints): if joints_vis[i, 0] > 0.0: joints[i, 0:2] = affine_transform(joints[i, 0:2], trans) target, target_weight = self.generate_target(joints, joints_vis) target = torch.from_numpy(target) target_weight = torch.from_numpy(target_weight) if (self.is_train and self.use_warping_train) or (not self.is_train and self.use_warping_test): meta = { 'image': image_file, 'sup_image': sup_image_file, 'filename': filename, 'imgnum': imgnum, 'joints': joints, 'joints_vis': joints_vis, 'center': c, 'scale': s, 'rotation': r, 'score': score } return input, input_sup, target, target_weight, meta else: meta = { 'image': image_file, 'filename': filename, 'imgnum': imgnum, 'joints': joints, 'joints_vis': joints_vis, 'center': c, 'scale': s, 'rotation': r, 'score': score } return input, target, target_weight, meta
def __getitem__(self, idx): db_rec = copy.deepcopy(self.db[idx]) image_file = db_rec['image'] filename = db_rec['filename'] if 'filename' in db_rec else '' imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else '' if self.data_format == 'zip': from utils import zipreader data_numpy = zipreader.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) else: data_numpy = cv2.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) if data_numpy is None: logger.error('=> fail to read {}'.format(image_file)) raise ValueError('Fail to read {}'.format(image_file)) joints = db_rec['joints_3d'] joints_vis = db_rec['joints_3d_vis'] c = db_rec['center'] s = db_rec['scale'] score = db_rec['score'] if 'score' in db_rec else 1 r = 0 if self.is_train: sf = self.scale_factor rf = self.rotation_factor s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \ if random.random() <= 0.6 else 0 if self.flip and random.random() <= 0.5: data_numpy = data_numpy[:, ::-1, :] joints, joints_vis = fliplr_joints(joints, joints_vis, data_numpy.shape[1], self.flip_pairs) c[0] = data_numpy.shape[1] - c[0] - 1 trans = get_affine_transform(c, s, r, self.image_size) input = cv2.warpAffine( data_numpy, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) # # sharpening # kernel = np.array([[-1, -1, -1], [-1, 9, -1], [-1, -1, -1]]) # input = cv2.filter2D(input, -1, kernel) if self.transform: input = self.transform(input) for i in range(self.num_joints): if joints_vis[i, 0] > 0.0: joints[i, 0:2] = affine_transform(joints[i, 0:2], trans) target, target_weight = self.generate_target(joints, joints_vis) target = torch.from_numpy(target) target_weight = torch.from_numpy(target_weight) meta = { 'image': image_file, 'filename': filename, 'imgnum': imgnum, 'joints': joints, 'joints_vis': joints_vis, 'center': c, 'scale': s, 'rotation': r, 'score': score } onehot_heatmap = self.render_onehot_heatmap(meta['joints'], input.shape[1]) return input, target, target_weight, meta, onehot_heatmap
def __getitem__(self, idx): db_rec = copy.deepcopy(self.db[idx]) image_file = db_rec["image"] filename = db_rec["filename"] if "filename" in db_rec else "" imgnum = db_rec["imgnum"] if "imgnum" in db_rec else "" if self.data_format == "zip": from utils import zipreader data_numpy = zipreader.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) else: data_numpy = cv2.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) if self.color_rgb: data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB) if data_numpy is None: logger.error("=> fail to read {}".format(image_file)) raise ValueError("Fail to read {}".format(image_file)) joints = db_rec["joints_3d"] joints_vis = db_rec["joints_3d_vis"] c = db_rec["center"] s = db_rec["scale"] score = db_rec["score"] if "score" in db_rec else 1 r = 0 if self.is_train: if np.sum(joints_vis[:, 0] ) > self.num_joints_half_body and np.random.rand( ) < self.prob_half_body: c_half_body, s_half_body = self.half_body_transform( joints, joints_vis) if c_half_body is not None and s_half_body is not None: c, s = c_half_body, s_half_body sf = self.scale_factor rf = self.rotation_factor s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) r = np.clip(np.random.randn() * rf, -rf * 2, rf * 2) if random.random() <= 0.6 else 0 if self.flip and random.random() <= 0.5: data_numpy = data_numpy[:, ::-1, :] joints, joints_vis = fliplr_joints(joints, joints_vis, data_numpy.shape[1], self.flip_pairs) c[0] = data_numpy.shape[1] - c[0] - 1 joints_heatmap = joints.copy() trans = get_affine_transform(c, s, r, self.image_size) trans_heatmap = get_affine_transform(c, s, r, self.heatmap_size) input = cv2.warpAffine( data_numpy, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) if self.transform: input = self.transform(input) for i in range(self.num_joints): if joints_vis[i, 0] > 0.0: joints[i, 0:2] = affine_transform(joints[i, 0:2], trans) joints_heatmap[i, 0:2] = affine_transform(joints_heatmap[i, 0:2], trans_heatmap) target, target_weight = self.generate_target(joints_heatmap, joints_vis) target = torch.from_numpy(target) target_weight = torch.from_numpy(target_weight) meta = { "image": image_file, "filename": filename, "imgnum": imgnum, "joints": joints, "joints_vis": joints_vis, "center": c, "scale": s, "rotation": r, "score": score, } return input, target, target_weight, meta
def __getitem__(self, idx): db_rec = copy.deepcopy(self.db[idx]) # img,及5个特征点坐标 image_file = db_rec['image'] # img path filename = db_rec['filename'] if 'filename' in db_rec else '' imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else '' data_numpy = cv2.imread(image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) # 原始图片的尺寸 cv2.imwrite("data_numpy.jpg", data_numpy) img_raw = copy.deepcopy(data_numpy) if data_numpy is None: logger.error('=> fail to read {}'.format(image_file)) raise ValueError('Fail to read {}'.format(image_file)) joints = db_rec['joints'] joints_raw = copy.deepcopy(db_rec['joints']) joints_vis = db_rec['joints_vis'] img_resize256 = cv2.resize(data_numpy, (256, 256)) # resize成256, 直接写入meta,返回输出 joints_256 = np.array([[0 for i in range(2)] for j in range(5)]) joints_256[:, 0] = joints[:, 0] * 256 / img_raw.shape[0] joints_256[:, 1] = joints[:, 1] * 256 / img_raw.shape[0] target_256_64, target_weight = self.generate_target( joints_256, joints_vis) # 对进行仿射变换后的label,生成heatmap data_numpy = cv2.resize(data_numpy, (250, 250)) joints[:, 0] = joints[:, 0] * 250 / img_raw.shape[ 0] # 将label中的特征点,缩放到250这个级别 joints[:, 1] = joints[:, 1] * 250 / img_raw.shape[1] # drift c = np.array([ 125.0 + random.uniform(-30.0, 30.0), 125.0 + random.uniform(-30.0, 30.0) ]) # db_rec['center'], 中心点,偏移后的量 s = 1.0 # db_rec['scale'] score = db_rec['score'] if 'score' in db_rec else 1 r = 0 if self.is_train: # 训练时,做缩放和旋转,测试时不做 sf = self.scale_factor # 缩放因子 rf = self.rotation_factor # 旋转因子 #s = s * np.clip(np.random.randn()*sf + 1, 1 - sf, 1 + sf) s = s * np.clip(np.random.randn() * sf + 1, 0.7, 1.2) r = np.clip(np.random.randn() * rf, -rf * 2, rf * 2) if random.random() <= 0.6 else 0 if self.flip and random.random() <= 0.5: data_numpy = data_numpy[:, ::-1, :] joints, joints_vis = fliplr_joints(joints, joints_vis, data_numpy.shape[1], self.flip_pairs) c[0] = data_numpy.shape[1] - c[0] - 1 trans = get_affine_transform( c, s, r, self.image_size ) # 定义trans, img做缩放,平移和翻转,将图片扩充为256, trans比例为rand(以240为例)-256 input = cv2.warpAffine( # 对input做放射变换到256 data_numpy, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) img_256 = copy.deepcopy(input) for i in range(self.num_joints): if joints_vis[i, 0] > 0.0: joints[i, 0:2] = affine_transform(joints[i, 0:2], trans) # 对label做放射变换到256 input = augmentation(input) # 图像加噪音 if self.transform: input = self.transform( input) # 对input做transform,为tensor(除以255),再做BN if self.transform: img_resize256_BN = self.transform( img_resize256) # 对input做transform,为tensor(除以255),再做BN target, target_weight = self.generate_target( joints, joints_vis) # 对进行仿射变换后的label,生成heatmap target = torch.from_numpy(target) target_weight = torch.from_numpy(target_weight) meta = { 'image': image_file, # 文件的名字 'img_raw': img_raw, # img的pixel数组 'img_resize256': img_resize256, 'img_resize256_BN': img_resize256_BN, 'img_256': img_256, 'filename': filename, 'imgnum': imgnum, 'joints': joints, 'joints_raw': joints_raw, # txt中的label信息 'joints_256': joints_256, 'target_256_64': target_256_64, 'joints_vis': joints_vis, 'center': c, 'scale': s, 'rotation': r, 'score': score } # return input, target, target_weight # targer_weight用于控制特征点显示不显示 return input, target, target_weight, meta
def __getitem__(self, idx): db_rec = copy.deepcopy( self.db[idx] ) # dict_keys(['image', 'center', 'scale', 'joints_3d', 'joints_3d_vis', 'filename', 'imgnum']) image_file = db_rec['image'] filename = db_rec['filename'] if 'filename' in db_rec else '' imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else '' if self.data_format == 'zip': from utils import zipreader data_numpy = zipreader.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) else: data_numpy = cv2.imread(image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION ) # e.g. data_numpy.shape = (426, 640, 3) if data_numpy is None: logger.error('=> fail to read {}'.format(image_file)) raise ValueError('Fail to read {}'.format(image_file)) joints = db_rec['joints_3d'] joints_vis = db_rec['joints_3d_vis'] c = db_rec['center'] s = db_rec['scale'] score = db_rec['score'] if 'score' in db_rec else 1 r = 0 if self.is_train: sf = self.scale_factor rf = self.rotation_factor s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) # scale factorにaugmentation, prob = 1 r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \ if random.random() <= 0.6 else 0 # rotate のaugmentation, p=0.6 if self.flip and random.random() <= 0.5: # horizontal flip p=0.5 data_numpy = data_numpy[:, ::-1, :] joints, joints_vis = fliplr_joints(joints, joints_vis, data_numpy.shape[1], self.flip_pairs) c[0] = data_numpy.shape[1] - c[0] - 1 # centerをflip trans = get_affine_transform(c, s, r, self.image_size) # affine変換 input = cv2.warpAffine( data_numpy, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) # resize if self.transform: input = self.transform(input) # normalize for i in range(self.num_joints): if joints_vis[i, 0] > 0.0: joints[i, 0:2] = affine_transform(joints[i, 0:2], trans) target, target_weight = self.generate_target( joints, joints_vis) # target: heatmap, target_weight: ignore target = torch.from_numpy(target) target_weight = torch.from_numpy(target_weight) meta = { 'image': image_file, 'filename': filename, 'imgnum': imgnum, 'joints': joints, 'joints_vis': joints_vis, 'center': c, 'scale': s, 'rotation': r, 'score': score } return input, target, target_weight, meta
def __getitem__(self, idx): if self.landmarks_frame.iloc[idx, 0].find('/ibug/image_092_01.jpg'): self.landmarks_frame.iloc[idx, 0] = self.landmarks_frame.iloc[ idx, 0].replace('image_092_01.jpg', 'image_092 _01.jpg') image_path = os.path.join(self.data_root, self.landmarks_frame.iloc[idx, 0]) scale = self.landmarks_frame.iloc[idx, 1] center_w = self.landmarks_frame.iloc[idx, 2] center_h = self.landmarks_frame.iloc[idx, 3] center = torch.Tensor([center_w, center_h]) pts = self.landmarks_frame.iloc[idx, 4:].values pts = pts.astype('float').reshape(-1, 2) scale *= 1.3 nparts = pts.shape[0] img = np.array(Image.open(image_path).convert('RGB'), dtype=np.float32) r = 0 t_x, t_y = (1, 1) if self.is_train: scale = scale * (random.uniform(1 - self.scale_factor, 1 + self.scale_factor)) r = random.uniform(-self.rot_factor, self.rot_factor) \ if random.random() <= 0.6 else 0 t_x = random.uniform(1-self.trans_factor, 1+self.trans_factor) \ if random.random() <= 0.5 else 1 t_y = random.uniform(1-self.trans_factor, 1+self.trans_factor) \ if random.random() <= 0.5 else 1 if random.random() <= 0.5 and self.flip: img = np.fliplr(img) pts = fliplr_joints(pts, width=img.shape[1], dataset='300W') center[0] = img.shape[1] - center[0] img = crop(img, center, scale, self.input_size, translation=(t_x, t_y), rot=r) if self.is_train: if random.random() <= 0.3 and self.gaussian_blur: radius = random.choice([1, 3, 5]) img = cv2.GaussianBlur(img, (radius, radius), sigmaX=1.0) if random.random() <= 1.0 and self.occlusion: img = add_occlusion(img, max_size=102) targets = [] offsets = [] for output_size in self.output_size: target = np.zeros((nparts, output_size[0], output_size[1])) tpts = pts.copy() for i in range(nparts): if tpts[i, 1] > 0: tpts[i, 0:2] = transform_pixel(tpts[i, 0:2], center, scale, output_size, translation=(t_x, t_y), rot=r) x, y = tpts[i].astype(int) if 0 <= y < output_size[0] and 0 <= x < output_size[1]: target[i, y, x] = 1 xx_channel, yy_channel = self._generate_offset( nparts, tpts, output_size, self.offset_mode, self.offset_dim) offset = [torch.Tensor(xx_channel), torch.Tensor(yy_channel)] targets.append(torch.Tensor(target)) offsets.append(offset) img = img.astype(np.float32) img = (img / 255.0 - self.mean) / self.std img = img.transpose([2, 0, 1]) tpts = torch.Tensor(tpts) center = torch.Tensor(center) meta = { 'index': idx, 'center': center, 'scale': scale, 'pts': torch.Tensor(pts), 'tpts': tpts } return img, targets, offsets, meta
def __getitem__(self, idx): db_rec = copy.deepcopy(self.db[idx]) image_file = db_rec['image'] filename = db_rec['filename'] if 'filename' in db_rec else '' imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else '' if self.data_format == 'zip': from utils import zipreader data_numpy = zipreader.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) else: data_numpy = cv2.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) if data_numpy is None: logger.error('=> fail to read {}'.format(image_file)) raise ValueError('Fail to read {}'.format(image_file)) joints = db_rec['joints_3d'] joints_vis = db_rec['joints_3d_vis'] c = db_rec['center'] s = db_rec['scale'] score = db_rec['score'] if 'score' in db_rec else 1 r = 0 ############################################## data augmentation if self.is_train: # scale and rotation augmentation sf = self.scale_factor rf = self.rotation_factor s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) r = np.clip(np.random.randn() * rf, -rf * 2, rf * 2) if random.random() <= 0.6 else 0 # flips images if self.flip and random.random() <= 0.5: data_numpy = data_numpy[:, ::-1, :] joints, joints_vis = fliplr_joints(joints, joints_vis, data_numpy.shape[1], self.flip_pairs) c[0] = data_numpy.shape[1] - c[0] - 1 # brighten/darken image by shifting all pixels. not sure if this actually helps # if self.brighten and random.random() <= 0.5: # shift = 2 * np.random.randn() # data_numpy = np.clip(data_numpy + shift, 0, 255).astype(np.uint8) trans = get_affine_transform(c, s, r, self.image_size) # NOTE: This scales images and crops them to be 256*256. During eval, replace with input = data_numpy input = data_numpy if not 'TEST_MODE' in self.cfg: input = cv2.warpAffine( data_numpy, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) if self.transform: input = self.transform(input) for i in range(self.num_joints): if joints_vis[i, 0] > 0.0: joints[i, 0:2] = affine_transform(joints[i, 0:2], trans) target, target_weight = self.generate_target(joints, joints_vis) target = torch.from_numpy(target) target_weight = torch.from_numpy(target_weight) meta = { 'image': image_file, 'filename': filename, 'imgnum': imgnum, 'joints': joints, 'joints_vis': joints_vis, 'center': c, 'scale': s, 'rotation': r, 'score': score } return input, target, target_weight, meta
def __getitem__(self, index): item = self.files[index] image_id = item["name"] num = int(image_id.split('_')[1]) image_rgb = cv2.imread(item["img_rgb"], cv2.IMREAD_COLOR) label = cv2.imread(item["img_depth"], cv2.IMREAD_GRAYSCALE) size = image_rgb.shape if 'train' in image_id: bbox = list(map(int, self.annot_t["annotations"][num]["bbox"])) keypoints = self.annot_t["annotations"][num]["keypoints"] else: bbox = list(map(int, self.annot_v["annotations"][num]["bbox"])) keypoints = self.annot_v["annotations"][num]["keypoints"] #crop image_rgb = image_rgb[bbox[1]:bbox[1] + bbox[3], bbox[0]:bbox[0] + bbox[2]] label = label[bbox[1]:bbox[1] + bbox[3], bbox[0]:bbox[0] + bbox[2]] #resize image_rgb = cv2.resize(image_rgb, (256, 256), interpolation=cv2.INTER_LINEAR) label = cv2.resize(label, (256, 256), interpolation=cv2.INTER_LINEAR) #one line image_depth = np.zeros((256, 256)) image_depth[:, 128] = label[:, 128] label = cv2.resize(label, (64, 64), interpolation=cv2.INTER_LINEAR) if 'test' in self.list_path: image_rgb = self.input_transform(image_rgb) image_depth = self.label_transform(image_depth) image_rgb = image_rgb.transpose((2, 0, 1)) return image_rgb.copy(), image_depth.copy(), np.array( size), image_id image_rgb, image_depth, label = self.gen_sample( image_rgb, image_depth, label) joints = np.array([[keypoints[3 * i], keypoints[3 * i + 1], 0] for i in range(15)]) joints_vis = np.array([[keypoints[3 * i + 2], keypoints[3 * i + 2], 0] for i in range(15)]) for i in range(15): joints[i, 0] = (joints[i, 0] - bbox[0]) * 256 / bbox[2] joints[i, 1] = (joints[i, 1] - bbox[1]) * 256 / bbox[3] image_depth = np.expand_dims(image_depth, axis=0) image = np.concatenate((image_rgb, image_depth), axis=0) label = np.expand_dims(label, axis=0) if self.flip: flip = np.random.choice(2) * 2 - 1 image = image[:, :, ::flip] label = label[:, :, ::flip] joints, joints_vis = fliplr_joints(joints, joints_vis, 256, self.flip_pairs) target, target_weight = self.generate_target(joints, joints_vis) target = np.concatenate((target, label), axis=0) return image.copy(), target.copy(), target_weight.copy(), np.array( size), image_id, joints.copy(), joints_vis.copy()
def __getitem__(self, idx): db_rec = copy.deepcopy(self.db[idx]) image_file = db_rec['image'] filename = db_rec['filename'] if 'filename' in db_rec else '' imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else '' if self.data_format == 'zip': from utils import zipreader data_numpy = zipreader.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) else: data_numpy = cv2.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) if data_numpy is None: logger.error('=> fail to read {}'.format(image_file)) raise ValueError('Fail to read {}'.format(image_file)) joints = db_rec['joints_3d'] joints_vis = db_rec['joints_3d_vis'] c = db_rec['center'] s = db_rec['scale'] score = db_rec['score'] if 'score' in db_rec else 1 r = 0 if self.is_train: sf = self.scale_factor rf = self.rotation_factor s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \ if random.random() <= 0.6 else 0 if self.flip and random.random() <= 0.5: data_numpy = data_numpy[:, ::-1, :] joints, joints_vis = fliplr_joints(joints, joints_vis, data_numpy.shape[1], self.flip_pairs) c[0] = data_numpy.shape[1] - c[0] - 1 trans = get_affine_transform(c, s, r, self.image_size) input = cv2.warpAffine( data_numpy, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) if self.transform: input = self.transform(input) for i in range(self.num_joints): if joints_vis[i, 0] > 0.0: joints[i, 0:2] = affine_transform(joints[i, 0:2], trans) target, target_weight = self.generate_target(joints, joints_vis) target = torch.from_numpy(target) target_weight = torch.from_numpy(target_weight) meta = { 'image': image_file, 'filename': filename, 'imgnum': imgnum, 'joints': joints, 'joints_vis': joints_vis, 'center': c, 'scale': s, 'rotation': r, 'score': score } from boxx import cf if cf.args.task == 'ssm': feat_stride = self.image_size / self.heatmap_size joints_h = copy.deepcopy(joints) # TODO 减少量化损失 joints_h[:, 0] = (joints_h[:, 0] / feat_stride[0] + 0.5) joints_h[:, 1] = (joints_h[:, 1] / feat_stride[1] + 0.5) joints_h = joints_h.astype(np.int32) meta['joints_h'] = joints_h return input, target, target_weight, meta
def __getitem__(self, idx): db_rec = copy.deepcopy(self.db[idx]) image_file = db_rec['image'] if (self.is_train and self.use_warping_train) or (not self.is_train and self.use_warping_test): prev_image_file1 = db_rec['image'] prev_image_file2 = db_rec['image'] next_image_file1 = db_rec['image'] next_image_file2 = db_rec['image'] filename = db_rec['filename'] if 'filename' in db_rec else '' imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else '' if self.data_format == 'zip': from utils import zipreader data_numpy = zipreader.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) else: data_numpy = self.read_image(image_file) ##### supporting frames if (self.is_train and self.use_warping_train) or (not self.is_train and self.use_warping_test): T = self.timestep_delta_range temp = prev_image_file1.split('/') prev_nm = temp[len(temp) - 1] ref_idx = int(prev_nm.replace('.jpg', '')) ### setting deltas prev_delta1 = -1 prev_delta2 = -2 next_delta1 = 1 next_delta2 = 2 #### image indices prev_idx1 = ref_idx + prev_delta1 prev_idx2 = ref_idx + prev_delta2 next_idx1 = ref_idx + next_delta1 next_idx2 = ref_idx + next_delta2 if 'nframes' in db_rec: nframes = db_rec['nframes'] if not self.is_posetrack18: prev_idx1 = np.clip(prev_idx1, 1, nframes) prev_idx2 = np.clip(prev_idx2, 1, nframes) next_idx1 = np.clip(next_idx1, 1, nframes) next_idx2 = np.clip(next_idx2, 1, nframes) else: prev_idx1 = np.clip(prev_idx1, 0, nframes - 1) prev_idx2 = np.clip(prev_idx2, 0, nframes - 1) next_idx1 = np.clip(next_idx1, 0, nframes - 1) next_idx2 = np.clip(next_idx2, 0, nframes - 1) if self.is_posetrack18: z = 6 else: z = 8 ### delta -1 new_prev_image_file1 = prev_image_file1.replace( prev_nm, str(prev_idx1).zfill(z) + '.jpg') #### delta -2 new_prev_image_file2 = prev_image_file1.replace( prev_nm, str(prev_idx2).zfill(z) + '.jpg') ### delta 1 new_next_image_file1 = next_image_file1.replace( prev_nm, str(next_idx1).zfill(z) + '.jpg') #### delta 2 new_next_image_file2 = next_image_file1.replace( prev_nm, str(next_idx2).zfill(z) + '.jpg') ###### checking for files existence if os.path.exists(new_prev_image_file1): prev_image_file1 = new_prev_image_file1 if os.path.exists(new_prev_image_file2): prev_image_file2 = new_prev_image_file2 if os.path.exists(new_next_image_file1): next_image_file1 = new_next_image_file1 if os.path.exists(new_next_image_file2): next_image_file2 = new_next_image_file2 ########## data_numpy_prev1 = self.read_image(prev_image_file1) data_numpy_prev2 = self.read_image(prev_image_file2) data_numpy_next1 = self.read_image(next_image_file1) data_numpy_next2 = self.read_image(next_image_file2) ########### if self.color_rgb: data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB) if (self.is_train and self.use_warping_train) or (not self.is_train and self.use_warping_test): data_numpy_prev1 = cv2.cvtColor(data_numpy_prev1, cv2.COLOR_BGR2RGB) data_numpy_prev2 = cv2.cvtColor(data_numpy_prev2, cv2.COLOR_BGR2RGB) data_numpy_next1 = cv2.cvtColor(data_numpy_next1, cv2.COLOR_BGR2RGB) data_numpy_next2 = cv2.cvtColor(data_numpy_next2, cv2.COLOR_BGR2RGB) if data_numpy is None: logger.error('=> fail to read {}'.format(image_file)) raise ValueError('Fail to read {}'.format(image_file)) if (self.is_train and self.use_warping_train) or (not self.is_train and self.use_warping_test): if data_numpy_prev1 is None: logger.error( '=> PREV SUP: fail to read {}'.format(prev_image_file1)) raise ValueError( 'PREV SUP: Fail to read {}'.format(prev_image_file1)) if data_numpy_prev2 is None: logger.error( '=> PREV SUP: fail to read {}'.format(prev_image_file2)) raise ValueError( 'PREV SUP: Fail to read {}'.format(prev_image_file2)) if data_numpy_next1 is None: logger.error( '=> NEXT SUP: fail to read {}'.format(next_image_file1)) raise ValueError( 'NEXT SUP: Fail to read {}'.format(next_image_file1)) if data_numpy_next2 is None: logger.error( '=> NEXT SUP: fail to read {}'.format(next_image_file2)) raise ValueError( 'NEXT SUP: Fail to read {}'.format(next_image_file2)) ########## joints = db_rec['joints_3d'] joints_vis = db_rec['joints_3d_vis'] c = db_rec['center'] s = db_rec['scale'] score = db_rec['score'] if 'score' in db_rec else 1 r = 0 if self.is_train: if (np.sum(joints_vis[:, 0]) > self.num_joints_half_body and np.random.rand() < self.prob_half_body): c_half_body, s_half_body = self.half_body_transform( joints, joints_vis) if c_half_body is not None and s_half_body is not None: c, s = c_half_body, s_half_body sf = self.scale_factor rf = self.rotation_factor s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \ if random.random() <= 0.6 else 0 if self.flip and random.random() <= 0.5: data_numpy = data_numpy[:, ::-1, :] ##### if (self.is_train and self.use_warping_train) or ( not self.is_train and self.use_warping_test): data_numpy_prev1 = data_numpy_prev1[:, ::-1, :] data_numpy_prev2 = data_numpy_prev2[:, ::-1, :] data_numpy_next1 = data_numpy_next1[:, ::-1, :] data_numpy_next2 = data_numpy_next2[:, ::-1, :] ########## joints, joints_vis = fliplr_joints(joints, joints_vis, data_numpy.shape[1], self.flip_pairs) c[0] = data_numpy.shape[1] - c[0] - 1 trans = get_affine_transform(c, s, r, self.image_size) input = cv2.warpAffine( data_numpy, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) if (self.is_train and self.use_warping_train) or (not self.is_train and self.use_warping_test): input_prev1 = cv2.warpAffine( data_numpy_prev1, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) input_prev2 = cv2.warpAffine( data_numpy_prev2, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) input_next1 = cv2.warpAffine( data_numpy_next1, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) input_next2 = cv2.warpAffine( data_numpy_next2, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) ######### if self.transform: input = self.transform(input) if (self.is_train and self.use_warping_train) or (not self.is_train and self.use_warping_test): input_prev1 = self.transform(input_prev1) input_prev2 = self.transform(input_prev2) input_next1 = self.transform(input_next1) input_next2 = self.transform(input_next2) ############ for i in range(self.num_joints): if joints_vis[i, 0] > 0.0: joints[i, 0:2] = affine_transform(joints[i, 0:2], trans) target, target_weight = self.generate_target(joints, joints_vis) target = torch.from_numpy(target) target_weight = torch.from_numpy(target_weight) if (self.is_train and self.use_warping_train) or (not self.is_train and self.use_warping_test): meta = { 'image': image_file, 'sup_image': prev_image_file1, 'filename': filename, 'imgnum': imgnum, 'joints': joints, 'joints_vis': joints_vis, 'center': c, 'scale': s, 'rotation': r, 'score': score } return input, input_prev1, input_prev2, input_next1, input_next2, target, target_weight, meta else: meta = { 'image': image_file, 'filename': filename, 'imgnum': imgnum, 'joints': joints, 'joints_vis': joints_vis, 'center': c, 'scale': s, 'rotation': r, 'score': score } return input, target, target_weight, meta
def __getitem__(self, idx): db_rec = copy.deepcopy(self.db[idx]) image_file = db_rec['image'] filename = db_rec['filename'] if 'filename' in db_rec else '' imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else '' data_numpy = cv2.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) if self.color_rgb: data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB) # cv2.imwrite('ori_img.jpg', data_numpy[:, :, ::-1]) if data_numpy is None: logger.error('=> fail to read {}'.format(image_file)) raise ValueError('Fail to read {}'.format(image_file)) joints = db_rec['joints_3d'] joints_vis = db_rec['joints_3d_vis'] if 'interference' in db_rec.keys(): interference_joints = db_rec['interference'] interference_joints_vis = db_rec['interference_vis'] else: interference_joints = [joints] interference_joints_vis = [joints_vis] c = db_rec['center'] s = db_rec['scale'] score = db_rec['score'] if 'score' in db_rec else 1 size = db_rec['obj_size'] r = 0 if self.is_train: if (np.sum(joints_vis[:, 0]) > self.num_joints_half_body and np.random.rand() < self.prob_half_body): c_half_body, s_half_body = self.half_body_transform( joints, joints_vis) if c_half_body is not None and s_half_body is not None: c, s = c_half_body, s_half_body sf = self.scale_factor rf = self.rotation_factor s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \ if random.random() <= 0.6 else 0 if self.flip and random.random() <= 0.5: data_numpy = data_numpy[:, ::-1, :] joints, joints_vis = fliplr_joints(joints, joints_vis, data_numpy.shape[1], self.flip_pairs) c[0] = data_numpy.shape[1] - c[0] - 1 for i in range(len(interference_joints)): interference_joints[i], interference_joints_vis[ i] = fliplr_joints(interference_joints[i], interference_joints_vis[i], data_numpy.shape[1], self.flip_pairs) trans = get_affine_transform(c, s, r, self.image_size) input = cv2.warpAffine( data_numpy, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) # cv2.imwrite('img.jpg',input[:,:,::-1]) if self.transform: input = self.transform(input) # relation_joints = [] for i in range(self.num_joints): if joints_vis[i, 0] > 0.0: joints[i, 0:2] = affine_transform(joints[i, 0:2], trans) target, target_weight = self.generate_target(joints, joints_vis) # all_points = np.asarray(np.where(target == 1))[::-1].transpose() # for p in all_points: # relation_joints += [c[0], c[1], size[0], size[1], p[0], p[1], p[2], 1] # interference joints heatmaps inter_target = np.zeros_like(target) inter_target_weight = np.zeros_like(target_weight) for i in range(len(interference_joints)): inter_joints = interference_joints[i] inter_joints_vis = interference_joints_vis[i] for j in range(self.num_joints): if inter_joints_vis[j, 0] > 0.0: inter_joints[j, 0:2] = affine_transform( inter_joints[j, 0:2], trans) _inter_target, _inter_target_weight = self.generate_target( inter_joints, inter_joints_vis) inter_target = np.maximum(inter_target, _inter_target) inter_target_weight = np.maximum(inter_target_weight, _inter_target_weight) # if inter_target.max()>0: # all_points = np.asarray(np.where(inter_target == 1))[::-1].transpose() # for p in all_points: # relation_joints += [c[0], c[1], size[0], size[1], p[0], p[1], p[2], 0] # all_ins_target = np.maximum(inter_target, target) all_ins_target = np.maximum(inter_target * 0.5, target) # points = self.generate_candidate_points_from_heatmaps(inter_target) all_ins_target_weight = np.maximum(inter_target_weight, target_weight) # cv2.imwrite('heatmap.jpg',np.max(target,axis=0)*255) # cv2.imwrite('inter_heatmap.jpg', np.max(inter_target, axis=0) * 255) # relation labels # relation_joints = np.asarray(relation_joints).reshape((-1,8)) kpts_onehots = self.heatmap2onehot(target) # if kpts_onehots.shape[0]!=15: # print(target.shape) # target_amaps, target_aweights = self.generate_association_map_from_gt_heatmaps(target, all_ins_target) # amaps = self.generate_association_map_from_labels(relation_joints) # max_points = self.num_joints * 5 # num_points = len(relation_joints) if len(relation_joints) <= max_points else max_points # target_relation_points = np.zeros((max_points, 8)) # target_amaps = np.zeros((max_points, max_points)) # target_relation_points[:num_points] = relation_joints[:num_points] # target_amaps[:num_points, :num_points] = amaps[:num_points, :num_points] # heatmap labels target = torch.from_numpy(target) target_weight = torch.from_numpy(target_weight) all_ins_target = torch.from_numpy(all_ins_target) all_ins_target_weight = torch.from_numpy(all_ins_target_weight) # target_amaps = torch.from_numpy(target_amaps) # target_aweights = torch.from_numpy(target_aweights) meta = { 'image': image_file, 'filename': filename, 'imgnum': imgnum, 'joints': joints, 'joints_vis': joints_vis, 'center': c, 'scale': s, 'rotation': r, 'score': score, # 'relation_joints': target_relation_points, # 'num_points': num_points, # 'association_maps': target_amaps, # 'association_weights': target_aweights, 'interference_maps': inter_target, 'kpt_cat_maps': kpts_onehots, } # return input, target, target_weight, meta return input, target, target_weight, all_ins_target, all_ins_target_weight, meta
def __getitem__(self, idx): db_rec = copy.deepcopy(self.db[idx]) image_file = db_rec['image'] filename = db_rec['filename'] if 'filename' in db_rec else '' imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else '' if self.data_format == 'zip': from utils import zipreader data_numpy = zipreader.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION ) else: data_numpy = cv2.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION ) if self.color_rgb: data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB) if data_numpy is None: logger.error('=> fail to read {}'.format(image_file)) raise ValueError('Fail to read {}'.format(image_file)) joints = db_rec['joints_3d'] # print(joints) joints_copy = db_rec['joints_3d_copy'] joints_vis = db_rec['joints_3d_vis'] # body = db_rec['body_3d'] # body_vis = db_rec['body_3d_vis'] c = db_rec['center'] s = db_rec['scale'] score = db_rec['score'] if 'score' in db_rec else 1 r = 0 if self.is_train: if (np.sum(joints_vis[:, 0]) > self.num_joints_half_body and np.random.rand() < self.prob_half_body): c_half_body, s_half_body = self.half_body_transform( joints, joints_vis ) if c_half_body is not None and s_half_body is not None: c, s = c_half_body, s_half_body sf = self.scale_factor rf = self.rotation_factor s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) # 随机缩放因子 r = np.clip(np.random.randn() * rf, -rf * 2, rf * 2) \ if random.random() <= 0.6 else 0 # 随机旋转因子 if self.flip and random.random() <= 0.5: data_numpy = data_numpy[:, ::-1, :] joints, joints_vis = fliplr_joints( joints, joints_vis, data_numpy.shape[1], self.flip_pairs) # 加我们的对称 c[0] = data_numpy.shape[1] - c[0] - 1 # 重新确定镜像翻转后的中心点 trans = get_affine_transform(c, s, r, self.image_size) input = cv2.warpAffine( data_numpy, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) if self.transform: input = self.transform(input) for i in range(self.num_joints): if joints_vis[i, 0] > 0.0: joints[i, 0:2] = affine_transform(joints[i, 0:2], trans) body = np.zeros((self.num_body, 3), dtype=np.float) body_vis = np.zeros((self.num_body, 3), dtype=np.float) for idbody, skeleton in enumerate(self.skeletons): point_a = joints[skeleton[0]] # print(point_a) point_b = joints[skeleton[1]] # if point_a[2] == 0 or point_b[2] == 0: if joints_copy[skeleton[0]][2] == 0 or joints_copy[skeleton[1]][2] == 0: continue axis_x = (point_b - point_a)[:-1] # print(x) lx = np.sqrt(axis_x.dot(axis_x)) if lx == 0: continue ly = 1 cos_angle = axis_x.dot(self.axis_y) / (lx * ly) angle = np.arccos(cos_angle) angle = angle / np.pi # angle2 = angle * 180 / np.pi if axis_x[1] < 0: angle = - angle # print(angle2) # print(lx,angle2) body[idbody] = [lx/332.55, angle, 1] body_vis[idbody] = [1, 1, 0] joint_target, joint_target_weight = self.generate_target(joints, joints_vis) body_target, body_target_weight = self.generate_body_target(joints, joints_copy, body_vis) # for i in range(19): # # print(image_file) # cv2.imwrite('image/'+image_file.split('/')[-1][:-4]+'_'+str(i)+'.jpg', np.uint8(body_target[i][:,:,np.newaxis]*255)) # for i in range(17): # # print(image_file) # cv2.imwrite('image/'+image_file.split('/')[-1][:-4]+'_'+str(i)+'_point.jpg', np.uint8(joint_target[i][:,:,np.newaxis]*255)) joint_target = torch.from_numpy(joint_target) joint_target_weight = torch.from_numpy(joint_target_weight) body_target = torch.from_numpy(body_target) body_target_weight = torch.from_numpy(body_target_weight) body = torch.from_numpy(body) body_vis = torch.from_numpy(body_vis) meta = { 'image': image_file, 'filename': filename, 'imgnum': imgnum, 'joints': joints, 'joints_vis': joints_vis, 'body': body, 'body_vis': body_vis, 'center': c, 'scale': s, 'rotation': r, 'score': score } return input, joint_target, joint_target_weight, body_target, body_target_weight, body, body_vis, meta
def __getitem__(self, idx): db_rec = copy.deepcopy(self.db[idx]) image_file = db_rec['image'] filename = db_rec['filename'] if 'filename' in db_rec else '' imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else '' if self.data_format == 'zip': from utils import zipreader data_numpy = zipreader.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) else: data_numpy = cv2.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) if self.color_rgb: data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB) if data_numpy is None: logger.error('=> fail to read {}'.format(image_file)) raise ValueError('Fail to read {}'.format(image_file)) joints = db_rec['joints_3d'] joints_vis = db_rec['joints_3d_vis'] c = db_rec['center'] s = db_rec['scale'] score = db_rec['score'] if 'score' in db_rec else 1 r = 0 if self.is_train: if (np.sum(joints_vis[:, 0]) > self.num_joints_half_body and np.random.rand() < self.prob_half_body): c_half_body, s_half_body = self.half_body_transform( joints, joints_vis) if c_half_body is not None and s_half_body is not None: c, s = c_half_body, s_half_body sf = self.scale_factor rf = self.rotation_factor s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \ if random.random() <= 0.6 else 0 if self.flip and random.random() <= 0.5: data_numpy = data_numpy[:, ::-1, :] joints, joints_vis = fliplr_joints(joints, joints_vis, data_numpy.shape[1], self.flip_pairs) c[0] = data_numpy.shape[1] - c[0] - 1 trans = get_affine_transform(c, s, r, self.image_size) input = cv2.warpAffine( data_numpy, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) if self.transform: input = self.transform(input) for i in range(self.num_joints): if joints_vis[i, 0] > 0.0: joints[i, 0:2] = affine_transform(joints[i, 0:2], trans) target, target_weight = self.generate_target(joints, joints_vis) target = torch.from_numpy(target) target_weight = torch.from_numpy(target_weight) meta = { 'image': image_file, 'filename': filename, 'imgnum': imgnum, 'joints': joints, 'joints_vis': joints_vis, 'center': c, 'scale': s, 'rotation': r, 'score': score } return input, target, target_weight, meta
def __getitem__(self, idx): db_rec = copy.deepcopy(self.db[idx]) image_file = db_rec['image'] filename = db_rec['filename'] if 'filename' in db_rec else '' imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else '' if self.data_format == 'zip': from utils import zipreader data_numpy = zipreader.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) else: data_numpy = cv2.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) if data_numpy is None: logger.error('=> fail to read {}'.format(image_file)) raise ValueError('Fail to read {}'.format(image_file)) joints = db_rec['joints_3d'] joints_vis = db_rec['joints_3d_vis'] c = db_rec['center'] s = db_rec['scale'] score = db_rec['score'] if 'score' in db_rec else 1 r = 0 if self.is_train: sf = self.scale_factor rf = self.rotation_factor s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \ if random.random() <= 0.6 else 0 if self.flip and random.random() <= 0.5: data_numpy = data_numpy[:, ::-1, :] joints, joints_vis = fliplr_joints(joints, joints_vis, data_numpy.shape[1], self.flip_pairs) c[0] = data_numpy.shape[1] - c[0] - 1 trans = get_affine_transform(c, s, r, self.image_size) input = cv2.warpAffine( data_numpy, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) if self.transform: input = self.transform(input) for i in range(self.num_joints): if joints_vis[i, 0] > 0.0: joints[i, 0:2] = affine_transform(joints[i, 0:2], trans) target, target_weight = self.generate_target(joints, joints_vis) target = torch.from_numpy(target) target_weight = torch.from_numpy(target_weight) info = { 'image': image_file, 'filename': filename, 'imgnum': imgnum, 'joints': joints, 'joints_vis': joints_vis, 'center': c, 'scale': s, 'rotation': r, 'score': score } if self.is_train: ################################################################ # @yangsen # 难度系数和遗忘程度 初始化 noise = db_rec['invisible_keypoints'] #标记但不可见点 w = np.array(db_rec['num_keypoints']) decay_beta = 2 #控制权重衰减 lamda = np.exp(1 / decay_beta) + 1 #控制标注点数量为1的难度值D设为最大为1 initial_difficult = lamda * ( 1 - 1 / (1 + np.exp(-np.sqrt(w - noise) / decay_beta))) #(0,1] meta = { 'index': idx, 'memory_difficult': int(100 * initial_difficult), #(0,100] 'forget_degree': 100, #[0,100] 0 represents remenber, 100 represents forgotten } # yangsen ############################################################### return input, target, target_weight, info, meta else: return input, target, target_weight, info