def do_validation_step(model, input, target, target_weight=None, flip=False): assert not model.training, 'model must be in evaluation mode.' assert len(input) == len( target), 'input and target must contain the same number of examples.' # Forward pass and loss calculation. output = model(input) loss = sum(joints_mse_loss(o, target, target_weight) for o in output) # Get the heatmaps. if flip: # If `flip` is true, perform horizontally flipped inference as well. This should # result in more robust predictions at the expense of additional compute. flip_input = fliplr(input.clone().cpu().numpy()) flip_input = torch.as_tensor(flip_input, dtype=torch.float32, device=device) flip_output = model(flip_input) flip_output = flip_output[-1].cpu() flip_output = flip_back(flip_output) heatmaps = (output[-1].cpu() + flip_output) / 2 else: heatmaps = output[-1].cpu() return heatmaps, loss.item()
def do_validation_step(model, input, target, data_info, target_weight=None, flip=False): assert not model.training, 'model must be in evaluation mode.' assert len(input) == len( target), 'input and target must contain the same number of examples.' # Forward pass and loss calculation. start = time.time() output = model(input) inference_time = (time.time() - start) * 1000 loss = sum(joints_mse_loss(o, target, target_weight) for o in output) # Get the heatmaps. if flip: # If `flip` is true, perform horizontally flipped inference as well. This should # result in more robust predictions at the expense of additional compute. flip_input = fliplr(input) flip_output = model(flip_input) flip_output = flip_output[-1].cpu() flip_output = flip_back(flip_output.detach(), data_info.hflip_indices) heatmaps = (output[-1].cpu() + flip_output) / 2 else: heatmaps = output[-1].cpu() return heatmaps, loss.item(), inference_time
def test_fliplr(device): tensor = torch.as_tensor([[ [1, 2, 3], [4, 5, 6], [7, 8, 9], ]], dtype=torch.float32) expected = torch.as_tensor([[ [3, 2, 1], [6, 5, 4], [9, 8, 7], ]], dtype=torch.float32) actual = fliplr(tensor) assert_allclose(actual, expected)
def estimate_heatmaps(self, images, flip=False): is_batched = _check_batched(images) raw_images = images if is_batched else images.unsqueeze(0) input_tensor = torch.empty((len(raw_images), 3, *self.input_shape), device=self.device, dtype=torch.float32) for i, raw_image in enumerate(raw_images): input_tensor[i] = self.prepare_image(raw_image) heatmaps = self.do_forward(input_tensor)[-1].cpu() if flip: flip_input = fliplr(input_tensor) flip_heatmaps = self.do_forward(flip_input)[-1].cpu() heatmaps += flip_back(flip_heatmaps, self.data_info.hflip_indices) heatmaps /= 2 if is_batched: return heatmaps else: return heatmaps[0]
def estimate_heatmaps(self, images, mean, stddev, flip=False): is_batched = _check_batched(images) raw_images = images if is_batched else images.unsqueeze(0) input_tensor = torch.empty((len(raw_images), 3, 256, 256), device=self.device, dtype=torch.float32) for i, raw_image in enumerate(raw_images): input_tensor[i] = self.prepare_image(raw_image, mean, stddev) heatmaps = self.do_forward(input_tensor)[-1].cpu() if flip: flip_input = fliplr(input_tensor.cpu().clone().numpy()) flip_input = torch.as_tensor(flip_input, device=self.device, dtype=torch.float32) flip_heatmaps = self.do_forward(flip_input)[-1].cpu() heatmaps += flip_back(flip_heatmaps) heatmaps /= 2 if is_batched: return heatmaps else: return heatmaps[0]
def __getitem__(self, index): sf = self.scale_factor rf = self.rot_factor if self.is_train: a = self.anno[self.train_list[index]] else: a = self.anno[self.valid_list[index]] img_path = os.path.join(self.img_folder, a['img_paths']) pts = torch.Tensor(a['joint_self']) # pts[:, 0:2] -= 1 # Convert pts to zero based # c = torch.Tensor(a['objpos']) - 1 c = torch.Tensor(a['objpos']) s = a['scale_provided'] # Adjust center/scale slightly to avoid cropping limbs if c[0] != -1: c[1] = c[1] + 15 * s s = s * 1.25 # For single-person pose estimation with a centered/scaled figure nparts = pts.size(0) img = load_image(img_path) # CxHxW r = 0 if self.is_train: s = s*torch.randn(1).mul_(sf).add_(1).clamp(1-sf, 1+sf)[0] r = torch.randn(1).mul_(rf).clamp(-2*rf, 2*rf)[0] if random.random() <= 0.6 else 0 # Flip if random.random() <= 0.5: img = torch.from_numpy(fliplr(img.numpy())).float() pts = shufflelr(pts, width=img.size(2), dataset='mpii') c[0] = img.size(2) - c[0] # Color img[0, :, :].mul_(random.uniform(0.8, 1.2)).clamp_(0, 1) img[1, :, :].mul_(random.uniform(0.8, 1.2)).clamp_(0, 1) img[2, :, :].mul_(random.uniform(0.8, 1.2)).clamp_(0, 1) # Prepare image and groundtruth map inp = crop(img, c, s, [self.inp_res, self.inp_res], rot=r) inp = color_normalize(inp, self.mean, self.std) # Generate ground truth tpts = pts.clone() target = torch.zeros(nparts, self.out_res, self.out_res) target_weight = tpts[:, 2].clone().view(nparts, 1) for i in range(nparts): # if tpts[i, 2] > 0: # This is evil!! if tpts[i, 1] > 0: tpts[i, 0:2] = to_torch(transform(tpts[i, 0:2]+1, c, s, [self.out_res, self.out_res], rot=r)) target[i], vis = draw_labelmap(target[i], tpts[i]-1, self.sigma, type=self.label_type) target_weight[i, 0] *= vis # Meta info meta = {'index' : index, 'center' : c, 'scale' : s, 'pts' : pts, 'tpts' : tpts, 'target_weight': target_weight} return inp, target, meta
def __getitem__(self, index): """Get an image referenced by index.""" sf = self.scale_factor # Generally from 0 to 0.25 rf = self.rot_factor if self.is_train: a = self.train_list.iloc[index] else: a = self.valid_list.iloc[index] img_path = a['img_paths'] # cv2 based image transformations img = cv2.imread(img_path, cv2.IMREAD_COLOR) # HxWxC rows, cols, colors = img.shape # Joint label positions pts = torch.Tensor(a['joint_self']) # pts[:, 0:2] -= 1 # Convert pts to zero based c = tuple(a['objpos']) s = a['scale_provided'] # In Mpii, scale_provided is the dim of the boundary box wrt 200 px # Depending on the flag "crop", we can decide to either: # True: Crop to crop_size around obj_pos # False: Keep original res # Then we downsize to inp_res if s == -1: # Yogi data scale_provided is initialized to -1 if self.crop: # If crop, then crop crop_size x crop_size around obj_pos s = self.crop_size / 200 # Move enter away from the joint by a random distance < max_dist pixels max_dist = 64 c = (int( torch.randn(1).clamp(-1, 1).mul(max_dist).add(c[0]).clamp( 0, cols - 1)), int( torch.randn(1).clamp(-1, 1).mul(max_dist).add(c[1]).clamp( 0, rows - 1))) else: # If no crop, then use the entire image s = rows / 200 # Use the center of the image to rotate c = (int(cols / 2), int(rows / 2)) # # Adjust scale slightly to avoid cropping limbs # if c[0] != -1: # c[1] = c[1] + 15 * s # s = s * 1.25 # For pose estimation with a centered/scaled figure nparts = pts.size(0) r = 0 if self.is_train: # Given sf, choose scale from [1-sf, 1+sf] # For sf = 0.25, scale is chosen from [0.75, 1.25] s = torch.randn(1).mul_(sf).add_(1).clamp(1 - sf, 1 + sf)[0] # Given rf, choose scale from [-rf, rf] # For sf = 30, scale is chosen from [-30, 30] r = torch.randn(1).mul_(rf).clamp( -rf, rf)[0] if random.random() <= 0.6 else 0 if self.mode == 'original': img = load_image(img_path) # CxHxW c = torch.Tensor(c) if self.is_train: # Flip if self.fliplr and random.random() <= 0.5: img = torch.from_numpy(fliplr(img.numpy())).float() pts = shufflelr(pts, width=img.size(2), dataset='yogi') # TODO c[0] = img.size(2) - c[0] # Color # img[0, :, :].mul_(random.uniform(0.8, 1.2)).clamp_(0, 1) # img[1, :, :].mul_(random.uniform(0.8, 1.2)).clamp_(0, 1) # img[2, :, :].mul_(random.uniform(0.8, 1.2)).clamp_(0, 1) # Prepare image and groundtruth map inp = crop(img, c, s, [self.inp_res, self.inp_res], rot=r) inp = color_normalize(inp, self.mean, self.std) t = None else: if self.is_train: # Flip if self.fliplr and random.random() <= 0.5: img = cv2.flip(img, 1) pts = torch.Tensor([[cols - x[0] - 1, x[1]] for x in pts]) # TODO: Shuffle left and right labels # Rotate, scale and crop image using inp_res # And get transformation matrix img, t_inp = cv2_crop(img, c, s, (self.inp_res, self.inp_res), rot=r, crop=self.crop, crop_size=self.crop_size) # Get transformation matrix for resizing from inp_res to out_res # No other changes, i.e. new_center is center, no cropping, etc. # Please note scaling to out_res has to be done before _, t_resize = cv2_resize(img, (self.out_res, self.out_res)) t = combine_transformations(t_resize, t_inp) # TODO Update color normalize inp = img_normalize(img, self.mean, self.std) # if self.is_train: # # Color # inp[0, :, :].mul_(random.uniform(0.8, 1.2)).clamp_(0, 1) # inp[1, :, :].mul_(random.uniform(0.8, 1.2)).clamp_(0, 1) # inp[2, :, :].mul_(random.uniform(0.8, 1.2)).clamp_(0, 1) # Generate ground truth tpts = pts.clone() target = torch.zeros(nparts, self.out_res, self.out_res) target_weight = tpts[:, 2].clone().view(nparts, 1) for i in range(nparts): if tpts[i, 2] > 0: # This is evil!! # if tpts[i, 1] > 0: # Hack: Change later - # The + 1 and -1 wrt tpts is there in the original code # Using int(self.mode == 'original') to do the + 1, -1 tpts[i, 0:2] = to_torch( transform(tpts[i, 0:2] + int(self.mode == 'original'), c, s, [self.out_res, self.out_res], rot=r, t=t)) target[i], vis = draw_labelmap(target[i], tpts[i] - int(self.mode == 'original'), self.sigma, type=self.label_type) target_weight[i, 0] *= vis # Meta info meta = { 'index': index, 'center': c, 'scale': s, 'pts': pts, 'tpts': tpts, 'target_weight': target_weight, 'inp_res': self.inp_res, 'out_res': self.out_res, 'rot': r, 'img_paths': img_path } return inp, target, meta