def test_transformation(meta_data, image_size=(288, 384)): joints_original = meta_data['j_original'].squeeze().data.cpu().numpy() joints = meta_data['joints'].squeeze().data.cpu().numpy( ) # coordinates in 384*288 image box center, scale = meta_data['center'], meta_data['scale'] center = center.data.cpu().numpy().reshape(2) scale = scale.data.cpu().numpy().reshape(2) trans = get_affine_transform(center=center, scale=scale, rot=0.0, output_size=image_size, inv=0) trans_inv = get_affine_transform(center=center, scale=scale, rot=0.0, output_size=image_size, inv=1) # calculate the distance in terms of pixels transformed_coordinates = affine_transform_modified(joints_original, trans) transformed_coordinates2 = affine_transform_modified(joints, trans_inv) dif1 = joints - transformed_coordinates dif2 = joints_original - transformed_coordinates2 # compute inverse matrix inv_compute = np.zeros(trans.shape, trans.dtype) inv_compute[:2, :2] = np.linalg.inv(trans[:2, :2]) inv_compute[:, 2] = -trans[:, 2] transformed_coordinates3 = affine_transform_modified(joints, inv_compute) dif3 = joints_original - transformed_coordinates3 print(dif1, dif2, dif3) return
def gather_inputs(args, logger, image_size=(288, 384)): root = args.data_path img_names = os.listdir(root) normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) transform = transforms.Compose([ transforms.ToTensor(), normalize, ]) inputs = [] # these testing images were cropped from videos of subject 9 and 11 for name in img_names: pass image_file = os.path.join(root, name) data_numpy = cv2.imread(image_file, 1 | 128) data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB) if data_numpy is None: logger.error('=> fail to read {}'.format(image_file)) raise ValueError('Fail to read {}'.format(image_file)) c, s = xywh2cs(0, 0, data_numpy.shape[1], data_numpy.shape[0]) r = 0 trans = get_affine_transform(c, s, r, image_size) input = cv2.warpAffine(data_numpy, trans, (image_size[0], image_size[1]), flags=cv2.INTER_LINEAR) inputs.append(transform(input).unsqueeze(0)) return torch.cat(inputs)
def accuracy_pixel(output, meta_data, image_size=(288.0, 384.0), arg_max='hard'): ''' Report errors in terms of pixels in the original image plane. ''' if arg_max == 'soft': if isinstance(output, np.ndarray): pred, max_vals = get_max_preds_soft(output) else: pred, max_vals = get_max_preds_soft_pt(output) elif arg_max == 'hard': if not isinstance(output, np.ndarray): output = output.data.cpu().numpy() pred, max_vals = get_max_preds(output) else: raise NotImplementedError # multiply by down-sample ratio if not isinstance(pred, np.ndarray): pred = pred.data.cpu().numpy() max_vals = max_vals.data.cpu().numpy() pred *= image_size[0] / output.shape[3] # inverse transform and compare pixel didstance centers, scales, rots = meta_data['center'], meta_data['scale'], meta_data[ 'rotation'] centers = centers.data.cpu().numpy() scales = scales.data.cpu().numpy() rots = rots.data.cpu().numpy() joints_original_batch = meta_data['j_original'].data.cpu().numpy() distance_list = [] all_src_coordinates = [] for sample_idx in range(len(pred)): trans_inv = get_affine_transform(centers[sample_idx], scales[sample_idx], rots[sample_idx], image_size, inv=1) joints_original = joints_original_batch[sample_idx] pred_src_coordinates = affine_transform_modified( pred[sample_idx], trans_inv) all_src_coordinates.append( pred_src_coordinates.reshape(1, len(pred_src_coordinates), 2)) distance_list.append( get_distance(joints_original, pred_src_coordinates)) all_distance = np.hstack(distance_list) acc = all_distance avg_acc = all_distance.mean() cnt = len(distance_list) * len(all_distance) return acc, avg_acc, cnt, np.concatenate(all_src_coordinates, axis=0), pred, max_vals
def __getitem__(self, idx): db_rec = copy.deepcopy(self.db[idx]) image_file = db_rec['image'] filename = db_rec['filename'] if 'filename' in db_rec else '' imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else '' if self.data_format == 'zip': from utils import zipreader data_numpy = zipreader.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) else: data_numpy = cv2.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) if self.color_rgb: data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB) if data_numpy is None: logger.error('=> fail to read {}'.format(image_file)) raise ValueError('Fail to read {}'.format(image_file)) joints = db_rec['joints_3d'] joints_vis = db_rec['joints_3d_vis'] c = db_rec['center'] s = db_rec['scale'] score = db_rec['score'] if 'score' in db_rec else 1 r = 0 if self.is_train: if (np.sum(joints_vis[:, 0]) > self.num_joints_half_body and np.random.rand() < self.prob_half_body): c_half_body, s_half_body = self.half_body_transform( joints, joints_vis) if c_half_body is not None and s_half_body is not None: c, s = c_half_body, s_half_body sf = self.scale_factor rf = self.rotation_factor s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \ if random.random() <= 0.6 else 0 if self.flip and random.random() <= 0.5: data_numpy = data_numpy[:, ::-1, :] joints, joints_vis = fliplr_joints(joints, joints_vis, data_numpy.shape[1], self.flip_pairs) c[0] = data_numpy.shape[1] - c[0] - 1 trans = get_affine_transform(c, s, r, self.image_size) input = cv2.warpAffine( data_numpy, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) if self.transform: input = self.transform(input) for i in range(self.num_joints): if joints_vis[i, 0] > 0.0: joints[i, 0:2] = affine_transform(joints[i, 0:2], trans) target, target_weight = self.generate_target(joints, joints_vis) target = torch.from_numpy(target) target_weight = torch.from_numpy(target_weight) meta = { 'image': image_file, 'filename': filename, 'imgnum': imgnum, 'joints': joints, 'joints_vis': joints_vis, 'center': c, 'scale': s, 'rotation': r, 'score': score } return input, target, target_weight, meta