def __getitem__(self, index): val_item = self.val_list[index] # Load training image im_path = os.path.join(self.root, self.dataset + '_images', val_item + '.jpg') im = cv2.imread(im_path, cv2.IMREAD_COLOR) h, w, _ = im.shape # Get person center and scale person_center, s = self._box2cs([0, 0, w - 1, h - 1]) r = 0 trans = get_affine_transform(person_center, s, r, self.crop_size) input = cv2.warpAffine( im, trans, (int(self.crop_size[1]), int(self.crop_size[0])), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT, borderValue=(0, 0, 0)) input = self.transform(input) flip_input = input.flip(dims=[-1]) if self.flip: batch_input_im = torch.stack([input, flip_input]) else: batch_input_im = input meta = { 'name': val_item, 'center': person_center, 'height': h, 'width': w, 'scale': s, 'rotation': r } return batch_input_im, meta
def get_image_info(self, index): info = self.gt_db[index] imgpath = info['image'] image = cv2.imread(imgpath)[:, :, ::-1] joints = info['joints_3d'] joints_vis = info['joints_3d_vis'][:, 0] c = info['center'] s = info['scale'] r = 0 if self.train_flag: sf = self.scale_factor rf = self.rotation_factor s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \ if random.random() <= 0.6 else 0 trans = get_affine_transform(c, s, r, (self.crop_size, self.crop_size)) dst_image = cv2.warpAffine(image, trans, (self.crop_size, self.crop_size), flags=cv2.INTER_LINEAR) for i in range(self.num_joints): if joints_vis[i] > 0.0: joints[i, 0:2] = affine_transform(joints[i, 0:2], trans) kp2d = np.concatenate([joints[:, 0:2], joints_vis[:, None]], 1)[self.mpii_2_lsp14] result_dir = '{}/{}'.format(self.save_dir, os.path.basename(imgpath)) metas = ('mpii', imgpath, result_dir, self.empty_kp3d, self.empty_kp3d, self.empty_param, self.empty_gr) return dst_image, kp2d, self.const_box, metas
def __getitem__(self, index): img_name = self.file_list[index] img_path = os.path.join(self.root, img_name) img = cv2.imread(img_path, cv2.IMREAD_COLOR) h, w, _ = img.shape # Get person center and scale person_center, s = self._box2cs([0, 0, w - 1, h - 1]) r = 0 trans = get_affine_transform(person_center, s, r, self.input_size) input = cv2.warpAffine( img, trans, (int(self.input_size[1]), int(self.input_size[0])), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT, borderValue=(0, 0, 0)) input = self.transform(input) meta = { 'name': img_name, 'center': person_center, 'height': h, 'width': w, 'scale': s, 'rotation': r } return input, meta
def get_pose_estimation_prediction(pose_model, image, center, scale): rotation = 0 # pose estimation transformation trans = get_affine_transform(center, scale, rotation, cfg.MODEL.IMAGE_SIZE) model_input = cv2.warpAffine( image, trans, (int(cfg.MODEL.IMAGE_SIZE[0]), int(cfg.MODEL.IMAGE_SIZE[1])), flags=cv2.INTER_LINEAR) transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) # pose estimation inference model_input = transform(model_input).unsqueeze(0) # switch to evaluate mode pose_model.eval() with torch.no_grad(): # compute output heatmap output = pose_model(model_input.to(device)) preds, _ = get_final_preds( cfg, output.clone().cpu().numpy(), np.asarray([center]), np.asarray([scale])) return preds
def __getitem__(self, idx): db_rec = copy.deepcopy(self.db[idx]) image_dir = 'images.zip@' if self.data_format == 'zip' else '' image_file = osp.join(self.root, db_rec['source'], image_dir, 'images', db_rec['image']) if self.data_format == 'zip': from utils import zipreader data_numpy = zipreader.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) else: data_numpy = cv2.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) joints = db_rec['joints_2d'].copy() joints_vis = db_rec['joints_vis'].copy() center = np.array(db_rec['center']).copy() scale = np.array(db_rec['scale']).copy() rotation = 0 if self.is_train: sf = self.scale_factor rf = self.rotation_factor scale = scale * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) rotation = np.clip(np.random.randn() * rf, -rf * 2, rf * 2) \ if random.random() <= 0.6 else 0 trans = get_affine_transform(center, scale, rotation, self.image_size) input = cv2.warpAffine( data_numpy, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) if self.transform: input = self.transform(input) for i in range(self.num_joints): if joints_vis[i, 0] > 0.0: joints[i, 0:2] = affine_transform(joints[i, 0:2], trans) if (np.min(joints[i, :2]) < 0 or joints[i, 0] >= self.image_size[0] or joints[i, 1] >= self.image_size[1]): joints_vis[i, :] = 0 target, target_weight = self.generate_target(joints, joints_vis) target = torch.from_numpy(target) target_weight = torch.from_numpy(target_weight) meta = { 'scale': scale, 'center': center, 'rotation': rotation, 'joints_2d': db_rec['joints_2d'], 'joints_2d_transformed': joints, 'joints_vis': joints_vis, 'source': db_rec['source'] } return input, target, target_weight, meta
def get_pose_estimation_prediction(pose_model, image, centers, scales, transform): rotation = 0 # pose estimation transformation model_inputs = [] for center, scale in zip(centers, scales): trans = get_affine_transform(center, scale, rotation, cfg.MODEL.IMAGE_SIZE) # Crop smaller image of people model_input = cv2.warpAffine( image, trans, (int(cfg.MODEL.IMAGE_SIZE[0]), int(cfg.MODEL.IMAGE_SIZE[1])), flags=cv2.INTER_LINEAR) # hwc -> 1chw model_input = transform(model_input) #.unsqueeze(0) model_inputs.append(model_input) # n * 1chw -> nchw model_inputs = torch.stack(model_inputs) # compute output heatmap output = pose_model(model_inputs.to(CTX)) coords, _ = get_final_preds(cfg, output.cpu().detach().numpy(), np.asarray(centers), np.asarray(scales)) return coords
def __getitem__(self, index): # Load image im_name = self.im_list[index] im_path = os.path.join(self.root, im_name) im = cv2.imread(im_path, cv2.IMREAD_COLOR) h, w, _ = im.shape # Get center and scale center, s = self._box2cs([0, 0, w - 1, h - 1]) r = 0 trans = get_affine_transform(center, s, r, self.crop_size) input = cv2.warpAffine( im, trans, (int(self.crop_size[1]), int(self.crop_size[0])), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT, borderValue=(0, 0, 0)) if self.transform: input = self.transform(input) meta = { 'name': os.path.basename(im_name) [:-4], # drop file extension such as ".jpg" and ".png" 'center': center, 'height': h, 'width': w, 'scale': s, 'rotation': r } return input, meta
def data_augmentation(sample, is_train): image_file = sample['image'] filename = sample['filename'] if 'filename' in sample else '' joints = sample['joints_3d'] joints_vis = sample['joints_3d_vis'] c = sample['center'] s = sample['scale'] score = sample['score'] if 'score' in sample else 1 # imgnum = sample['imgnum'] if 'imgnum' in sample else '' r = 0 # used for ce if 'ce_mode' in os.environ: random.seed(0) np.random.seed(0) data_numpy = cv2.imread(image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) if is_train: sf = cfg.SCALE_FACTOR rf = cfg.ROT_FACTOR s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \ if random.random() <= 0.6 else 0 if cfg.FLIP and random.random() <= 0.5: data_numpy = data_numpy[:, ::-1, :] joints, joints_vis = fliplr_joints(joints, joints_vis, data_numpy.shape[1], cfg.FLIP_PAIRS) c[0] = data_numpy.shape[1] - c[0] - 1 trans = get_affine_transform(c, s, r, cfg.IMAGE_SIZE) input = cv2.warpAffine(data_numpy, trans, (int(cfg.IMAGE_SIZE[0]), int(cfg.IMAGE_SIZE[1])), flags=cv2.INTER_LINEAR) for i in range(cfg.NUM_JOINTS): if joints_vis[i, 0] > 0.0: joints[i, 0:2] = affine_transform(joints[i, 0:2], trans) # Numpy target target, target_weight = generate_target(cfg, joints, joints_vis) if cfg.DEBUG: visualize(cfg, filename, data_numpy, input.copy(), joints, target) # Normalization input = input.astype('float32').transpose((2, 0, 1)) / 255 input -= np.array(cfg.MEAN).reshape((3, 1, 1)) input /= np.array(cfg.STD).reshape((3, 1, 1)) if is_train: return input, target, target_weight else: return input, target, target_weight, c, s, score, image_file
def __getitem__(self, idx: int): data = self.data[idx] frame_idx = data["image_id"] x,y,w,h = data['bbox'] # x1,y1,x2,y2 = data['orig_bbox'] self.cap.set(1, frame_idx) _, img = self.cap.read() aspect_ratio = self.cfg.MODEL.IMAGE_SIZE[1] / self.cfg.MODEL.IMAGE_SIZE[0] centre = np.array([x+w*.5, y+h*.5]) if w > aspect_ratio * h: h = w / aspect_ratio elif w < aspect_ratio * h: w = h * aspect_ratio scale = np.array([w, h]) * 1.25 rotation = 0 trans = get_affine_transform(centre, scale, rotation, (self.cfg.MODEL.IMAGE_SIZE[1], self.cfg.MODEL.IMAGE_SIZE[0])) cropped_img = cv2.warpAffine(img, trans, (self.cfg.MODEL.IMAGE_SIZE[1], self.cfg.MODEL.IMAGE_SIZE[0]), flags=cv2.INTER_LINEAR) cropped_img = normalize_input(cropped_img, self.cfg) # cv2.imshow("orig", img) # cropped_show = denormalize_input(cropped_img, self.cfg).copy().astype(np.uint8) # cv2.imshow("crop", cropped_show) # cv2.waitKey() # cv2.destroyAllWindows() estimated_joints = np.zeros((self.cfg.MODEL.NUM_JOINTS, 3), dtype=np.float) offsets = np.zeros((self.cfg.MODEL.NUM_JOINTS, 2), dtype=np.float) offsets[:, 0] = self.frame_area[0] offsets[:, 1] = self.frame_area[1] estimated_joints[:, :2] = np.array(data['joints']).reshape(self.cfg.MODEL.NUM_JOINTS, 2) estimated_joints[:, :2] += offsets estimated_joints[:, 2] = np.array(data['score']) for j in range(self.cfg.MODEL.NUM_JOINTS): if estimated_joints[j,2] > 0: estimated_joints[j,:2] = affine_transform(estimated_joints[j,:2], trans) estimated_joints[j, 2] *= ((estimated_joints[j,0] >= 0) & (estimated_joints[j,0] < self.cfg.MODEL.IMAGE_SIZE[1]) & (estimated_joints[j,1] >= 0) & (estimated_joints[j,1] < self.cfg.MODEL.IMAGE_SIZE[0])) input_pose_coord = estimated_joints[:,:2] input_pose_valid = np.array([1 if i not in self.cfg.ignore_kps else 0 for i in range(self.cfg.MODEL.NUM_JOINTS)]) input_pose_score = estimated_joints[:, 2] crop_info = np.asarray([centre[0]-scale[0]*0.5, centre[1]-scale[1]*0.5, centre[0]+scale[0]*0.5, centre[1]+scale[1]*0.5]) return [torch.from_numpy(cropped_img).float().permute(2, 0, 1), input_pose_coord, input_pose_valid, input_pose_score, crop_info, frame_idx, ]
def __getitem__(self, idx): db_rec = copy.deepcopy(self.db[idx]) image_file = db_rec['image'] filename = db_rec['filename'] if 'filename' in db_rec else '' imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else '' data_numpy = cv2.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB) if data_numpy is None: print('=> fail to read {}'.format(image_file)) raise ValueError('Fail to read {}'.format(image_file)) joints = db_rec['joints_3d'] joints_vis = db_rec['joints_3d_vis'] c = db_rec['center'] s = db_rec['scale'] score = db_rec['score'] if 'score' in db_rec else 1 r = 0 trans = get_affine_transform(c, s, r, self.image_size) input = cv2.warpAffine( data_numpy, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) if self.transform: input = self.transform(input) for i in range(self.num_joints): if joints_vis[i, 0] > 0.0: joints[i, 0:2] = affine_transform(joints[i, 0:2], trans) target, target_weight = self.generate_target(joints, joints_vis) target = torch.from_numpy(target) target_weight = torch.from_numpy(target_weight) target_map = torch.from_numpy(self.generate_paf(joints, joints_vis)) meta = { 'image': image_file, 'filename': filename, 'imgnum': imgnum, 'joints': joints, 'joints_vis': joints_vis, 'center': c, 'scale': s, 'rotation': r, 'score': score } return input, target, target_weight, target_map, meta
def getitem(self): #return input, meta db_rec = self._load_coco_keypoint_annotation_kernal() image_file = db_rec[0]['image'] filename = db_rec[0]['filename'] if 'filename' in db_rec[0] else '' imgnum = db_rec[0]['imgnum'] if 'imgnum' in db_rec[0] else '' if self.data_format == 'zip': from utils import zipreader data_numpy = zipreader.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) else: data_numpy = cv2.imread( #(文件名,标记) image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) if data_numpy is None: logger.error('=> fail to read {}'.format(image_file)) raise ValueError('Fail to read {}'.format(image_file)) c = db_rec[0]['center'] s = db_rec[0]['scale'] score = db_rec[0]['score'] if 'score' in db_rec[0] else 1 r = 0 trans = get_affine_transform( c, s, r, self.image_size) #将s经过r缩放到imagesize 矩阵2x3 input = cv2.warpAffine( #input是一个旋转缩放过的原图 data_numpy, #输入图像 trans, #变换矩阵 (int(self.image_size[0]), int(self.image_size[1])), #输出图像大小192x256 flags=cv2.INTER_LINEAR) #插值方法 if self.transform: input = self.transform(input) meta = { 'image': image_file, 'filename': filename, 'imgnum': imgnum, 'center': c, 'scale': s, 'score': score, 'joints_vis': [[(1, 1, 0), (1, 1, 0), (1, 1, 0), (1, 1, 0), (1, 1, 0), (1, 1, 0), (1, 1, 0), (1, 1, 0), (1, 1, 0), (1, 1, 0), (1, 1, 0), (1, 1, 0), (1, 1, 0), (1, 1, 0), (1, 1, 0), (1, 1, 0), (1, 1, 0)]] } return input, meta
def resize_align_multi_scale(image, input_size, current_scale, min_scale): size_resized, center, scale = get_multi_scale_size(image, input_size, current_scale, min_scale) trans = get_affine_transform(center, scale, 0, size_resized) image_resized = cv2.warpAffine(image, trans, size_resized # (int(w_resized), int(h_resized)) ) return image_resized, center, scale
def get_humankeypoints(self, img_orig, objs, threshold=0.6): n_objs = len(objs) np_inputs = np.zeros(shape=(n_objs, 3, 256, 192), dtype=np.float32) # output_data = np.zeros(shape=(n_objs, 17, 64, 48), dtype=np.float32) for idx in range(n_objs): c = objs[idx]['center'] s = objs[idx]['scale'] trans = get_affine_transform(c, s, 0, (192, 256), inv=0) warp_img = cv2.warpAffine(img_orig, trans, (192, 256), flags=cv2.INTER_LINEAR) np_input = cv2.cvtColor(warp_img, cv2.COLOR_BGR2RGB) np_input = np.expand_dims(np_input, 0).astype(np.float32) np_inputs_nchw = np_input.transpose(0, 3, 1, 2) / 255 np_inputs[idx] = self.standardization(np_inputs_nchw[0]) output_data = self.rep.run(np_inputs)[0] list_c = [obj['center'] for obj in objs] list_s = [obj['scale'] for obj in objs] preds, maxvals = get_final_preds(output_data, list_c, list_s) annotations = [] cnt_num_point = 0 for obj_idx in range(len(objs)): keypoints = [] for idx, ptval in enumerate(zip(preds[obj_idx], maxvals[obj_idx])): point, maxval = ptval x, y = np.array(point, dtype=np.float) if maxval > threshold: keypoints.extend([x, y, 2]) cnt_num_point += 1 else: keypoints.extend([0, 0, 0]) x, y, w, h = objs[obj_idx]['bbox'] annotation = usrcoco.create_annotation_info( annotation_id=obj_idx + 1, image_id=1, category_info=1, keypoints=keypoints, num_keypoints=cnt_num_point, bounding_box=objs[obj_idx]['bbox']) annotations.append(annotation) return annotations
def __getitem__(self, idx): db_rec = copy.deepcopy(self.db[idx]) image_file = db_rec['image'] # filename = db_rec['filename'] if 'filename' in db_rec else '' # imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else '' if self.data_format == 'zip': from utils import zipreader data_numpy = zipreader.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) else: data_numpy = cv2.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) if self.color_rgb: data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB) if data_numpy is None: logger.error('=> fail to read {}'.format(image_file)) raise ValueError('Fail to read {}'.format(image_file)) # joints = db_rec['joints_3d'] # joints_vis = db_rec['joints_3d_vis'] c = db_rec['center'] s = db_rec['scale'] score = db_rec['score'] if 'score' in db_rec else 1 r = 0 trans = get_affine_transform(c, s, r, self.image_size) input = cv2.warpAffine( data_numpy, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) if self.transform: input = self.transform(input) meta = { 'image': image_file, 'center': c, 'scale': s, 'rotation': r, 'score': score } return input, meta
def __getitem__(self, idx): db_rec = copy.deepcopy(self.db[idx]) image_file = db_rec['image'] filename = db_rec['filename'] if 'filename' in db_rec else '' imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else '' data_numpy = cv2.imread( self.root + image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) if self.color_rgb: data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB) if data_numpy is None: logger.error('=> fail to read {}'.format(image_file)) raise ValueError('Fail to read {}'.format(image_file)) c = db_rec['center'] s = db_rec['scale'] score = db_rec['score'] if 'score' in db_rec else 1 r = 0 trans = get_affine_transform(c, s, r, self.image_size) input = cv2.warpAffine( data_numpy, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) if self.transform: input = self.transform(input) meta = { 'image': image_file, 'filename': filename, 'imgnum': imgnum, 'center': c, 'scale': s, 'rotation': r, 'score': score } #input = np.swapaxes(input, 1, 2) #target = np.swapaxes(target, 1, 2) return input, meta
def get_pose_estimation_prediction(pose_model, image, centers, scales, box, transform): rotation = 0 # pose estimation transformation model_inputs = [] for center, scale in zip(centers, scales): cv2.imwrite('../data/nlos/nlos_result/first_input.jpg', image) trans = get_affine_transform(center, scale, rotation, cfg.MODEL.IMAGE_SIZE) # Crop smaller image of people model_input = cv2.warpAffine( image, trans, (int(cfg.MODEL.IMAGE_SIZE[0]), int(cfg.MODEL.IMAGE_SIZE[1])), flags=cv2.INTER_LINEAR) #print('model_input(w/ trans)', model_input.shape) img = model_input cv2.imwrite('../data/nlos/nlos_result/trans_input.jpg', img) #inv_trans = get_affine_transform(center, scale, rotation, cfg.MODEL.IMAGE_SIZE, inv=1) # hwc -> 1chw model_input = transform(model_input) # .unsqueeze(0) model_inputs.append(model_input) # n * 1chw -> nchw model_inputs = torch.stack(model_inputs) zero_heatmap = torch.cuda.FloatTensor(int( cfg.MODEL.HEATMAP_SIZE[0]), int(cfg.MODEL.HEATMAP_SIZE[1])).fill_(0) # compute output heatmap output = pose_model(model_inputs.to(CTX)) # using heatmap, get inverse transformed coordinates coords, _ = get_final_preds(cfg, output.cpu().detach().numpy(), np.asarray(centers), np.asarray(scales)) for idx1, mat in enumerate(coords[0]): x_coord, y_coord = int(mat[0]), int(mat[1]) if not (in_box(x_coord, y_coord, box)): coords[0][idx1] = [-1, -1] output[0][idx1] = zero_heatmap return output, coords
def get_pose_estimation_prediction(pose_model, image, centers, scales, box, transform): rotation = 0 #print("img shape ", image.shape) #print("centers ", centers) #print("scales ", scales) #print(box) # pose estimation transformation model_inputs = [] for center, scale in zip(centers, scales): trans = get_affine_transform(center, scale, rotation, cfg.MODEL.IMAGE_SIZE) #print("trans", trans) # Crop smaller image of people model_input = cv2.warpAffine( image, trans, (int(cfg.MODEL.IMAGE_SIZE[0]), int(cfg.MODEL.IMAGE_SIZE[1])), flags=cv2.INTER_LINEAR) img = model_input cv2.imwrite('trans_input.jpg', img) # hwc -> 1chw model_input = transform(model_input) #.unsqueeze(0) model_inputs.append(model_input) # n * 1chw -> nchw model_inputs = torch.stack(model_inputs) #zero_heatmap = np.zeros((120, 120), dtype=np.float32) zero_heatmap = torch.cuda.FloatTensor(120, 120).fill_(0) # compute output heatmap output = pose_model(model_inputs.to(CTX)) # heatmap output : coords, _ = get_final_preds(cfg, output.cpu().detach().numpy(), np.asarray(centers), np.asarray(scales)) for idx1, mat in enumerate(coords[0]): x_coord, y_coord = int(mat[0]), int(mat[1]) if not (in_box(x_coord, y_coord, box)): coords[0][idx1] = [-1, -1] output[0][idx1] = zero_heatmap return output, coords
def compute_unary_term(heatmap, grid, bbox2D, cam, imgSize): """ Args: heatmap: array of size (n * k * h * w) -n: number of views, -k: number of joints -h: heatmap height, -w: heatmap width grid: list of k ndarrays of size (nbins * 3) -k: number of joints; 1 when the grid is shared in PSM -nbins: number of bins in the grid bbox2D: bounding box on which heatmap is computed Returns: unary_of_all_joints: a list of ndarray of size nbins """ n, k = heatmap.shape[0], heatmap.shape[1] h, w = heatmap.shape[2], heatmap.shape[3] nbins = grid[0].shape[0] unary_of_all_joints = [] for j in range(k): unary = np.zeros(nbins) for c in range(n): grid_id = 0 if len(grid) == 1 else j xy = cameras.project_pose(grid[grid_id], cam[c]) trans = get_affine_transform(bbox2D[c]['center'], bbox2D[c]['scale'], 0, imgSize) xy = affine_transform_pts(xy, trans) * np.array([w, h]) / imgSize # for i in range(nbins): # xy[i] = affine_transform(xy[i], trans) * np.array([w, h]) / imgSize hmap = heatmap[c, j, :, :] point_x, point_y = np.arange(hmap.shape[0]), np.arange( hmap.shape[1]) rgi = RegularGridInterpolator(points=[point_x, point_y], values=hmap.transpose(), bounds_error=False, fill_value=0) score = rgi(xy) unary = unary + np.reshape(score, newshape=unary.shape) unary_of_all_joints.append(unary) return unary_of_all_joints
def __getitem__(self, idx): assert self.last_idx_read is None or self.last_idx_read == idx - 1, "idx junmp: %d -> %d" % ( self.last_idx_read, idx) db_rec = copy.deepcopy(self.db[idx]) self.last_idx_read = idx image_file = db_rec['image'] frame = self._get_img(image_file) if frame is None: logger.error('=> fail to read {}'.format(image_file)) raise ValueError('Fail to read {}'.format(image_file)) c = db_rec['center'] s = db_rec['scale'] score = db_rec['score'] if 'score' in db_rec else 1 r = 0 trans = get_affine_transform(c, s, r, self.image_size) input = cv2.warpAffine( frame, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) if self.transform: input = self.transform(input) meta = { 'image': image_file, 'origbox': db_rec['origbox'], 'center': c, 'scale': s, 'rotation': r, 'score': score } return input, meta
def get_item(self, img): h, w, _ = img.shape person_center, s = self._box2cs([0, 0, w - 1, h - 1]) r = 0 trans = get_affine_transform(person_center, s, r, self.input_size) input = cv2.warpAffine( img, trans, (int(self.input_size[1]), int(self.input_size[0])), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT, borderValue=(0, 0, 0)) input = self.transform(input) input = input.unsqueeze(0) meta = { 'center': person_center, 'height': h, 'width': w, 'scale': s, 'rotation': r } return input, meta
def __getitem__(self, idx): db_rec = copy.deepcopy(self.db[idx]) image_file = db_rec['image'] filename = db_rec['filename'] if 'filename' in db_rec else '' imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else '' data_numpy = cv2.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) if self.color_rgb: data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB) if data_numpy is None: logger.error('=> fail to read {}'.format(image_file)) raise ValueError('Fail to read {}'.format(image_file)) joints = db_rec['joints_3d'] joints_vis = db_rec['joints_3d_vis'] if 'interference' in db_rec.keys(): interference_joints = db_rec['interference'] interference_joints_vis = db_rec['interference_vis'] else: interference_joints = [joints] interference_joints_vis = [joints_vis] c = db_rec['center'] s = db_rec['scale'] score = db_rec['score'] if 'score' in db_rec else 1 r = 0 if self.is_train: if (np.sum(joints_vis[:, 0]) > self.num_joints_half_body and np.random.rand() < self.prob_half_body): c_half_body, s_half_body = self.half_body_transform( joints, joints_vis) if c_half_body is not None and s_half_body is not None: c, s = c_half_body, s_half_body sf = self.scale_factor rf = self.rotation_factor s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \ if random.random() <= 0.6 else 0 if self.flip and random.random() <= 0.5: data_numpy = data_numpy[:, ::-1, :] joints, joints_vis = fliplr_joints(joints, joints_vis, data_numpy.shape[1], self.flip_pairs) c[0] = data_numpy.shape[1] - c[0] - 1 for i in range(len(interference_joints)): interference_joints[i], interference_joints_vis[ i] = fliplr_joints(interference_joints[i], interference_joints_vis[i], data_numpy.shape[1], self.flip_pairs) trans = get_affine_transform(c, s, r, self.image_size) input = cv2.warpAffine( data_numpy, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) # cv2.imwrite('img.jpg',input[:,:,::-1]) if self.transform: input = self.transform(input) for i in range(self.num_joints): if joints_vis[i, 0] > 0.0: joints[i, 0:2] = affine_transform(joints[i, 0:2], trans) target, target_weight = self.generate_target(joints, joints_vis) # interference joints heatmaps inter_target = np.zeros_like(target) inter_target_weight = np.zeros_like(target_weight) for i in range(len(interference_joints)): inter_joints = interference_joints[i] inter_joints_vis = interference_joints_vis[i] for j in range(self.num_joints): if inter_joints_vis[j, 0] > 0.0: inter_joints[j, 0:2] = affine_transform( inter_joints[j, 0:2], trans) _inter_target, _inter_target_weight = self.generate_target( inter_joints, inter_joints_vis) inter_target = np.maximum(inter_target, _inter_target) inter_target_weight = np.maximum(inter_target_weight, _inter_target_weight) all_ins_target = np.maximum(inter_target, target) all_ins_target_weight = np.maximum(inter_target_weight, target_weight) # AE labels All_joints = [joints] + interference_joints ae_targets = self.generate_joints_ae_targets(All_joints) # GPU formate all_ins_target = torch.from_numpy(all_ins_target) all_ins_target_weight = torch.from_numpy(all_ins_target_weight) ae_targets = torch.from_numpy(ae_targets) meta = { 'image': image_file, 'filename': filename, 'imgnum': imgnum, 'joints': joints, 'joints_vis': joints_vis, 'center': c, 'scale': s, 'rotation': r, 'score': score, 'interference_maps': inter_target, } return input, all_ins_target, all_ins_target_weight, ae_targets, meta
def __getitem__(self, idx): db_rec = copy.deepcopy(self.db[idx]) image_file = db_rec['image'] filename = db_rec['filename'] if 'filename' in db_rec else '' imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else '' data_numpy = cv2.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) if self.color_rgb: data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB) # cv2.imwrite('ori_img.jpg', data_numpy[:, :, ::-1]) if data_numpy is None: logger.error('=> fail to read {}'.format(image_file)) raise ValueError('Fail to read {}'.format(image_file)) joints = db_rec['joints_3d'] joints_vis = db_rec['joints_3d_vis'] if 'interference' in db_rec.keys(): interference_joints = db_rec['interference'] interference_joints_vis = db_rec['interference_vis'] else: interference_joints = [joints] interference_joints_vis = [joints_vis] c = db_rec['center'] s = db_rec['scale'] score = db_rec['score'] if 'score' in db_rec else 1 size = db_rec['obj_size'] r = 0 if self.is_train: if (np.sum(joints_vis[:, 0]) > self.num_joints_half_body and np.random.rand() < self.prob_half_body): c_half_body, s_half_body = self.half_body_transform( joints, joints_vis) if c_half_body is not None and s_half_body is not None: c, s = c_half_body, s_half_body sf = self.scale_factor rf = self.rotation_factor s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \ if random.random() <= 0.6 else 0 if self.flip and random.random() <= 0.5: data_numpy = data_numpy[:, ::-1, :] joints, joints_vis = fliplr_joints(joints, joints_vis, data_numpy.shape[1], self.flip_pairs) c[0] = data_numpy.shape[1] - c[0] - 1 for i in range(len(interference_joints)): interference_joints[i], interference_joints_vis[ i] = fliplr_joints(interference_joints[i], interference_joints_vis[i], data_numpy.shape[1], self.flip_pairs) trans = get_affine_transform(c, s, r, self.image_size) input = cv2.warpAffine( data_numpy, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) # cv2.imwrite('img.jpg',input[:,:,::-1]) if self.transform: input = self.transform(input) # relation_joints = [] for i in range(self.num_joints): if joints_vis[i, 0] > 0.0: joints[i, 0:2] = affine_transform(joints[i, 0:2], trans) target, target_weight = self.generate_target(joints, joints_vis) # all_points = np.asarray(np.where(target == 1))[::-1].transpose() # for p in all_points: # relation_joints += [c[0], c[1], size[0], size[1], p[0], p[1], p[2], 1] # interference joints heatmaps inter_target = np.zeros_like(target) inter_target_weight = np.zeros_like(target_weight) for i in range(len(interference_joints)): inter_joints = interference_joints[i] inter_joints_vis = interference_joints_vis[i] for j in range(self.num_joints): if inter_joints_vis[j, 0] > 0.0: inter_joints[j, 0:2] = affine_transform( inter_joints[j, 0:2], trans) _inter_target, _inter_target_weight = self.generate_target( inter_joints, inter_joints_vis) inter_target = np.maximum(inter_target, _inter_target) inter_target_weight = np.maximum(inter_target_weight, _inter_target_weight) # if inter_target.max()>0: # all_points = np.asarray(np.where(inter_target == 1))[::-1].transpose() # for p in all_points: # relation_joints += [c[0], c[1], size[0], size[1], p[0], p[1], p[2], 0] # all_ins_target = np.maximum(inter_target, target) all_ins_target = np.maximum(inter_target * 0.5, target) # points = self.generate_candidate_points_from_heatmaps(inter_target) all_ins_target_weight = np.maximum(inter_target_weight, target_weight) # cv2.imwrite('heatmap.jpg',np.max(target,axis=0)*255) # cv2.imwrite('inter_heatmap.jpg', np.max(inter_target, axis=0) * 255) # relation labels # relation_joints = np.asarray(relation_joints).reshape((-1,8)) kpts_onehots = self.heatmap2onehot(target) # if kpts_onehots.shape[0]!=15: # print(target.shape) # target_amaps, target_aweights = self.generate_association_map_from_gt_heatmaps(target, all_ins_target) # amaps = self.generate_association_map_from_labels(relation_joints) # max_points = self.num_joints * 5 # num_points = len(relation_joints) if len(relation_joints) <= max_points else max_points # target_relation_points = np.zeros((max_points, 8)) # target_amaps = np.zeros((max_points, max_points)) # target_relation_points[:num_points] = relation_joints[:num_points] # target_amaps[:num_points, :num_points] = amaps[:num_points, :num_points] # heatmap labels target = torch.from_numpy(target) target_weight = torch.from_numpy(target_weight) all_ins_target = torch.from_numpy(all_ins_target) all_ins_target_weight = torch.from_numpy(all_ins_target_weight) # target_amaps = torch.from_numpy(target_amaps) # target_aweights = torch.from_numpy(target_aweights) meta = { 'image': image_file, 'filename': filename, 'imgnum': imgnum, 'joints': joints, 'joints_vis': joints_vis, 'center': c, 'scale': s, 'rotation': r, 'score': score, # 'relation_joints': target_relation_points, # 'num_points': num_points, # 'association_maps': target_amaps, # 'association_weights': target_aweights, 'interference_maps': inter_target, 'kpt_cat_maps': kpts_onehots, } # return input, target, target_weight, meta return input, target, target_weight, all_ins_target, all_ins_target_weight, meta
def __getitem__(self, index): im_name = self.im_list[index] im_path = os.path.join(self.root, self.dataset + '_images', im_name + '.jpg') parsing_anno_path = os.path.join(self.root, self.dataset + '_segmentations', im_name + '.png') im = cv2.imread(im_path, cv2.IMREAD_COLOR) h, w, _ = im.shape parsing_anno = np.zeros((h, w), dtype=np.long) # get pose anno if self.dataset == 'train' or self.dataset == 'val': joints_all_info = np.array(self.pose_info[im_name]) joints_loc = np.zeros((joints_all_info.shape[0], 2)) joints_loc[:, :] = joints_all_info[:, 0:2] # 1st and 2nd column # get visibility of joints coord_sum = np.sum(joints_loc, axis=1) visibility = coord_sum != 0 # Get center and scale center, s = self._box2cs([0, 0, w - 1, h - 1]) r = 0 if self.dataset != 'test': parsing_anno = cv2.imread(parsing_anno_path, cv2.IMREAD_GRAYSCALE) if self.dataset == 'train' or self.dataset == 'trainval': sf = self.scale_factor rf = self.rotation_factor s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) r = np.clip(np.random.randn() * rf, -rf * 2, rf * 2) \ if random.random() <= 0.6 else 0 if random.random() <= self.flip_prob: im = im[:, ::-1, :] parsing_anno = parsing_anno[:, ::-1] center[0] = im.shape[1] - center[0] - 1 right_idx = [15, 17, 19] left_idx = [14, 16, 18] for i in range(0, 3): right_pos = np.where(parsing_anno == right_idx[i]) left_pos = np.where(parsing_anno == left_idx[i]) parsing_anno[right_pos[0], right_pos[1]] = left_idx[i] parsing_anno[left_pos[0], left_pos[1]] = right_idx[i] # flip the joints joints_loc = flip_joints(joints_loc, w) # swap the visibility of left and right joints r_joint = [0, 1, 2, 10, 11, 12] l_joint = [3, 4, 5, 13, 14, 15] for i in range(0, 6): temp_visibility = visibility[r_joint[i]] visibility[r_joint[i]] = visibility[l_joint[i]] visibility[l_joint[i]] = temp_visibility trans = get_affine_transform(center, s, r, self.crop_size) input = cv2.warpAffine( im, trans, (int(self.crop_size[1]), int(self.crop_size[0])), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT, borderValue=(0, 0, 0)) if self.transform: input = self.transform(input) meta = { 'name': im_name, 'center': center, 'height': h, 'width': w, 'scale': s, 'rotation': r } if self.dataset == 'test': return input, meta else: label_parsing = cv2.warpAffine( parsing_anno, trans, (int(self.crop_size[1]), int(self.crop_size[0])), flags=cv2.INTER_NEAREST, borderMode=cv2.BORDER_CONSTANT, borderValue=(255)) grid_x = int(self.crop_size[1] / self.pose_net_stride) grid_y = int(self.crop_size[0] / self.pose_net_stride) for i in range(joints_all_info.shape[0]): if visibility[i] > 0: joints_loc[i, 0:2] = self.affine_trans( joints_loc[i, 0:2], trans) label_pose = generate_pose(joints_loc, visibility, trans, grid_x, grid_y, self.pose_net_stride, self.sigma) label_edge = generate_edge(label_parsing) return input, label_parsing, label_pose, label_edge, meta
def __getitem__(self, idx): db_rec = copy.deepcopy(self.db[idx]) image_dir = 'images.zip@' if self.data_format == 'zip' else '' image_file = osp.join(self.root, db_rec['source'], image_dir, 'images', db_rec['image']) if self.data_format == 'zip': from utils import zipreader data_numpy = zipreader.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) else: data_numpy = cv2.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) joints = db_rec['joints_2d'].copy() # [union_joints, 2] joints_vis = db_rec['joints_vis'].copy()[:, :2] # [union_joints, 2] assert len(joints) == self.num_joints assert len(joints_vis) == self.num_joints # crop and scale according to ground truth center = np.array(db_rec['center']).copy() scale = np.array(db_rec['scale']).copy() rotation = 0 if self.is_train and db_rec['source'] == 'mpii': sf = self.mpii_scale_factor rf = self.mpii_rotation_factor scale = scale * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) rotation = np.clip(np.random.randn() * rf, -rf * 2, rf * 2) \ if random.random() <= 0.6 else 0 if self.mpii_flip and random.random() <= 0.5: data_numpy = data_numpy[:, ::-1, :] joints, joints_vis = fliplr_joints(joints, joints_vis, data_numpy.shape[1], self.mpii_flip_pairs) center[0] = data_numpy.shape[1] - center[0] - 1 trans = get_affine_transform(center, scale, rotation, self.image_size) input = cv2.warpAffine( data_numpy, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) if self.transform: input = self.transform(input) visible_joints = joints_vis[:, 0] > 0 if np.any(visible_joints): joints[visible_joints, :2] = affine_transform( joints[visible_joints, :2], trans) # zero_indices = np.any( # np.concatenate((joints[:, :2]<0, # joints[:, [0]] >= self.image_size[0], # joints[:, [1]] >= self.image_size[1]), # axis=1), # axis=1) # joints_vis[zero_indices, :] = 0 target, target_weight = self.generate_target(joints, joints_vis, db_rec['source']) target = torch.from_numpy(target) target_weight = torch.from_numpy(target_weight) meta = { 'scale': scale, 'center': center, 'rotation': rotation, 'joints_2d': db_rec['joints_2d'], 'joints_2d_transformed': joints, 'joints_vis': joints_vis, 'source': db_rec['source'] } return input, target, target_weight, meta
def __getitem__(self, idx): db_rec = copy.deepcopy(self.db[idx]) image_file = db_rec['image'] if self.data_format == 'zip': from utils import zipreader data_numpy = zipreader.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) else: data_numpy = cv2.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) if data_numpy is None: # logger.error('=> fail to read {}'.format(image_file)) # raise ValueError('Fail to read {}'.format(image_file)) return None, None, None, None, None, None if self.color_rgb: data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB) joints = db_rec['joints_2d'] joints_vis = db_rec['joints_2d_vis'] joints_3d = db_rec['joints_3d'] joints_3d_vis = db_rec['joints_3d_vis'] nposes = len(joints) assert nposes <= self.maximum_person, 'too many persons' height, width, _ = data_numpy.shape c = np.array([width / 2.0, height / 2.0]) s = get_scale((width, height), self.image_size) r = 0 trans = get_affine_transform(c, s, r, self.image_size) input = cv2.warpAffine( data_numpy, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) if self.transform: input = self.transform(input) for n in range(nposes): for i in range(len(joints[0])): if joints_vis[n][i, 0] > 0.0: joints[n][i, 0:2] = affine_transform(joints[n][i, 0:2], trans) if (np.min(joints[n][i, :2]) < 0 or joints[n][i, 0] >= self.image_size[0] or joints[n][i, 1] >= self.image_size[1]): joints_vis[n][i, :] = 0 if 'pred_pose2d' in db_rec and db_rec['pred_pose2d'] != None: # For convenience, we use predicted poses and corresponding values at the original heatmaps # to generate 2d heatmaps for Campus and Shelf dataset. # You can also use other 2d backbone trained on COCO to generate 2d heatmaps directly. pred_pose2d = db_rec['pred_pose2d'] for n in range(len(pred_pose2d)): for i in range(len(pred_pose2d[n])): pred_pose2d[n][i, 0:2] = affine_transform( pred_pose2d[n][i, 0:2], trans) input_heatmap = self.generate_input_heatmap(pred_pose2d) input_heatmap = torch.from_numpy(input_heatmap) else: input_heatmap = torch.zeros(self.cfg.NETWORK.NUM_JOINTS, self.heatmap_size[1], self.heatmap_size[0]) target_heatmap, target_weight = self.generate_target_heatmap( joints, joints_vis) target_heatmap = torch.from_numpy(target_heatmap) target_weight = torch.from_numpy(target_weight) # make joints and joints_vis having same shape joints_u = np.zeros((self.maximum_person, self.num_joints, 2)) joints_vis_u = np.zeros((self.maximum_person, self.num_joints, 2)) for i in range(nposes): joints_u[i] = joints[i] joints_vis_u[i] = joints_vis[i] joints_3d_u = np.zeros((self.maximum_person, self.num_joints, 3)) joints_3d_vis_u = np.zeros((self.maximum_person, self.num_joints, 3)) for i in range(nposes): joints_3d_u[i] = joints_3d[i][:, 0:3] joints_3d_vis_u[i] = joints_3d_vis[i][:, 0:3] target_3d = self.generate_3d_target(joints_3d) target_3d = torch.from_numpy(target_3d) if isinstance(self.root_id, int): roots_3d = joints_3d_u[:, self.root_id] elif isinstance(self.root_id, list): roots_3d = np.mean([joints_3d_u[:, j] for j in self.root_id], axis=0) meta = { 'image': image_file, 'num_person': nposes, 'joints_3d': joints_3d_u, 'joints_3d_vis': joints_3d_vis_u, 'roots_3d': roots_3d, 'joints': joints_u, 'joints_vis': joints_vis_u, 'center': c, 'scale': s, 'rotation': r, 'camera': db_rec['camera'] } return input, target_heatmap, target_weight, target_3d, meta, input_heatmap
def __getitem__(self, idx): db_rec = copy.deepcopy(self.db[idx]) image_file = db_rec['image'] filename = db_rec['filename'] if 'filename' in db_rec else '' imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else '' if self.data_format == 'zip': from utils import zipreader data_numpy = zipreader.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) else: data_numpy = cv2.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) if data_numpy is None: logger.error('=> fail to read {}'.format(image_file)) raise ValueError('Fail to read {}'.format(image_file)) joints = db_rec['joints_3d'] joints_vis = db_rec['joints_3d_vis'] c = db_rec['center'] s = db_rec['scale'] score = db_rec['score'] if 'score' in db_rec else 1 r = 0 if self.is_train: sf = self.scale_factor rf = self.rotation_factor s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \ if random.random() <= 0.6 else 0 if self.flip and random.random() <= 0.5: data_numpy = data_numpy[:, ::-1, :] joints, joints_vis = fliplr_joints(joints, joints_vis, data_numpy.shape[1], self.flip_pairs) c[0] = data_numpy.shape[1] - c[0] - 1 trans = get_affine_transform(c, s, r, self.image_size) input = cv2.warpAffine( data_numpy, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) if self.transform: input = self.transform(input) for i in range(self.num_joints): if joints_vis[i, 0] > 0.0: joints[i, 0:2] = affine_transform(joints[i, 0:2], trans) target, target_weight = self.generate_target(joints, joints_vis) target = torch.from_numpy(target) target_weight = torch.from_numpy(target_weight) meta = { 'image': image_file, 'filename': filename, 'imgnum': imgnum, 'joints': joints, 'joints_vis': joints_vis, 'center': c, 'scale': s, 'rotation': r, 'score': score } from boxx import cf if cf.args.task == 'ssm': feat_stride = self.image_size / self.heatmap_size joints_h = copy.deepcopy(joints) # TODO 减少量化损失 joints_h[:, 0] = (joints_h[:, 0] / feat_stride[0] + 0.5) joints_h[:, 1] = (joints_h[:, 1] / feat_stride[1] + 0.5) joints_h = joints_h.astype(np.int32) meta['joints_h'] = joints_h return input, target, target_weight, meta
)) image_names = [] for i in range(len(img_files['images'])): image_names.append(img_files['images'][i]['file_name']) data_numpy = cv2.imread(image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) if data_numpy is None: raise ValueError('Fail to read {}'.format(image_file)) # object detection box box = [300, 100, 200, 250] c, s = _box2cs(box, data_numpy.shape[0], data_numpy.shape[1]) r = 0 trans = get_affine_transform(c, s, r, config.MODEL.IMAGE_SIZE) input = cv2.warpAffine( data_numpy, trans, (int(config.MODEL.IMAGE_SIZE[0]), int(config.MODEL.IMAGE_SIZE[1])), flags=cv2.INTER_LINEAR) # vis transformed image cv2.imshow('image', input) cv2.waitKey(1000) transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) input = transform(input).unsqueeze(0)
def __getitem__(self, index): train_item = self.train_list[index] im_path = os.path.join(self.root, self.dataset + '_images', train_item + '.jpg') parsing_anno_path = os.path.join(self.root, self.dataset + '_segmentations', train_item + '.png') im = cv2.imread(im_path, cv2.IMREAD_COLOR) h, w, _ = im.shape parsing_anno = np.zeros((h, w), dtype=np.long) # Get person center and scale person_center, s = self._box2cs([0, 0, w - 1, h - 1]) r = 0 if self.dataset != 'test': # Get pose annotation parsing_anno = cv2.imread(parsing_anno_path, cv2.IMREAD_GRAYSCALE) if self.dataset == 'train' or self.dataset == 'trainval': sf = self.scale_factor rf = self.rotation_factor s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) r = np.clip(np.random.randn() * rf, -rf * 2, rf * 2) if random.random() <= 0.6 else 0 if random.random() <= self.flip_prob: im = im[:, ::-1, :] parsing_anno = parsing_anno[:, ::-1] person_center[0] = im.shape[1] - person_center[0] - 1 right_idx = [15, 17, 19] left_idx = [14, 16, 18] for i in range(0, 3): right_pos = np.where(parsing_anno == right_idx[i]) left_pos = np.where(parsing_anno == left_idx[i]) parsing_anno[right_pos[0], right_pos[1]] = left_idx[i] parsing_anno[left_pos[0], left_pos[1]] = right_idx[i] trans = get_affine_transform(person_center, s, r, self.crop_size) input = cv2.warpAffine( im, trans, (int(self.crop_size[1]), int(self.crop_size[0])), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT, borderValue=(0, 0, 0)) if self.transform: input = self.transform(input) meta = { 'name': train_item, 'center': person_center, 'height': h, 'width': w, 'scale': s, 'rotation': r } if self.dataset == 'val' or self.dataset == 'test': return input, meta else: label_parsing = cv2.warpAffine( parsing_anno, trans, (int(self.crop_size[1]), int(self.crop_size[0])), flags=cv2.INTER_NEAREST, borderMode=cv2.BORDER_CONSTANT, borderValue=(255)) label_parsing = torch.from_numpy(label_parsing) return input, label_parsing, meta
def __getitem__(self, idx): db_rec = copy.deepcopy(self.db[idx]) image_file = db_rec['image'] filename = db_rec['filename'] if 'filename' in db_rec else '' imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else '' if self.data_format == 'zip': from utils import zipreader data_numpy = zipreader.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) else: data_numpy = cv2.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) if self.color_rgb: data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB) if data_numpy is None: logger.error('=> fail to read {}'.format(image_file)) raise ValueError('Fail to read {}'.format(image_file)) joints = db_rec['joints_3d'] joints_vis = db_rec['joints_3d_vis'] c = db_rec['center'] s = db_rec['scale'] score = db_rec['score'] if 'score' in db_rec else 1 r = 0 if self.is_train: if (np.sum(joints_vis[:, 0]) > self.num_joints_half_body and np.random.rand() < self.prob_half_body): c_half_body, s_half_body = self.half_body_transform( joints, joints_vis) if c_half_body is not None and s_half_body is not None: c, s = c_half_body, s_half_body sf = self.scale_factor rf = self.rotation_factor s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \ if random.random() <= 0.6 else 0 if self.flip and random.random() <= 0.5: data_numpy = data_numpy[:, ::-1, :] joints, joints_vis = fliplr_joints(joints, joints_vis, data_numpy.shape[1], self.flip_pairs) c[0] = data_numpy.shape[1] - c[0] - 1 trans = get_affine_transform(c, s, r, self.image_size) input = cv2.warpAffine( data_numpy, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) if self.transform: input = self.transform(input) for i in range(self.num_joints): if joints_vis[i, 0] > 0.0: joints[i, 0:2] = affine_transform(joints[i, 0:2], trans) target, target_weight = self.generate_target(joints, joints_vis) target = torch.from_numpy(target) target_weight = torch.from_numpy(target_weight) meta = { 'image': image_file, 'filename': filename, 'imgnum': imgnum, 'joints': joints, 'joints_vis': joints_vis, 'center': c, 'scale': s, 'rotation': r, 'score': score } return input, target, target_weight, meta
def __getitem__(self, idx): db_rec = copy.deepcopy(self.db[idx]) image_file = db_rec['image'] filename = db_rec['filename'] if 'filename' in db_rec else '' imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else '' if self.data_format == 'zip': from utils import zipreader data_numpy = zipreader.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) else: data_numpy = cv2.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) if data_numpy is None: logger.error('=> fail to read {}'.format(image_file)) raise ValueError('Fail to read {}'.format(image_file)) joints = db_rec['joints_3d'] joints_vis = db_rec['joints_3d_vis'] c = db_rec['center'] s = db_rec['scale'] score = db_rec['score'] if 'score' in db_rec else 1 r = 0 ############################################## data augmentation if self.is_train: # scale and rotation augmentation sf = self.scale_factor rf = self.rotation_factor s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) r = np.clip(np.random.randn() * rf, -rf * 2, rf * 2) if random.random() <= 0.6 else 0 # flips images if self.flip and random.random() <= 0.5: data_numpy = data_numpy[:, ::-1, :] joints, joints_vis = fliplr_joints(joints, joints_vis, data_numpy.shape[1], self.flip_pairs) c[0] = data_numpy.shape[1] - c[0] - 1 # brighten/darken image by shifting all pixels. not sure if this actually helps # if self.brighten and random.random() <= 0.5: # shift = 2 * np.random.randn() # data_numpy = np.clip(data_numpy + shift, 0, 255).astype(np.uint8) trans = get_affine_transform(c, s, r, self.image_size) # NOTE: This scales images and crops them to be 256*256. During eval, replace with input = data_numpy input = data_numpy if not 'TEST_MODE' in self.cfg: input = cv2.warpAffine( data_numpy, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) if self.transform: input = self.transform(input) for i in range(self.num_joints): if joints_vis[i, 0] > 0.0: joints[i, 0:2] = affine_transform(joints[i, 0:2], trans) target, target_weight = self.generate_target(joints, joints_vis) target = torch.from_numpy(target) target_weight = torch.from_numpy(target_weight) meta = { 'image': image_file, 'filename': filename, 'imgnum': imgnum, 'joints': joints, 'joints_vis': joints_vis, 'center': c, 'scale': s, 'rotation': r, 'score': score } return input, target, target_weight, meta