def inference_model(self, x, flip_pairs=None): """Inference function. Returns: output_heatmap (np.ndarray): Output heatmaps. Args: x (torch.Tensor[NxKxHxW]): Input features. flip_pairs (None | list[tuple()): Pairs of keypoints which are mirrored. """ output = self.forward(x) if flip_pairs is not None: N, K, D, H, W = output.shape # reshape 3D heatmap to 2D heatmap output = output.reshape(N, K * D, H, W) # 2D heatmap flip output_heatmap = flip_back( output.detach().cpu().numpy(), flip_pairs, target_type=self.target_type) # reshape back to 3D heatmap output_heatmap = output_heatmap.reshape(N, K, D, H, W) # feature is not aligned, shift flipped heatmap for higher accuracy if self.test_cfg.get('shift_heatmap', False): output_heatmap[..., 1:] = output_heatmap[..., :-1] else: output_heatmap = output.detach().cpu().numpy() return output_heatmap
def forward_test(self, img, img_metas, **kwargs): assert img.size(0) == 1 assert len(img_metas) == 1 img_metas = img_metas[0] flip_pairs = img_metas['flip_pairs'] # compute output output = self.backbone(img) if self.with_keypoint: output = self.keypoint_head(output) if isinstance(output, list): output = output[-1] if self.test_cfg['flip_test']: img_flipped = img.flip(3) output_flipped = self.backbone(img_flipped) if self.with_keypoint: output_flipped = self.keypoint_head(output_flipped) if isinstance(output_flipped, list): output_flipped = output_flipped[-1] output_flipped = flip_back(output_flipped.cpu().numpy(), flip_pairs) output_flipped = torch.from_numpy(output_flipped.copy()).to( output.device) # feature is not aligned, shift flipped heatmap for higher accuracy if self.test_cfg['shift_heatmap']: output_flipped[:, :, :, 1:] = \ output_flipped.clone()[:, :, :, 0:-1] output = (output + output_flipped) * 0.5 c = img_metas['center'].reshape(1, -1) s = img_metas['scale'].reshape(1, -1) score = np.array(img_metas['bbox_score']).reshape(-1) preds, maxvals = keypoints_from_heatmaps( output.clone().cpu().numpy(), c, s, post_process=self.test_cfg['post_process'], unbiased=self.test_cfg['unbiased_decoding'], kernel=self.test_cfg['modulate_kernel']) all_preds = np.zeros((1, output.shape[1], 3), dtype=np.float32) all_boxes = np.zeros((1, 6)) image_path = [] all_preds[0, :, 0:2] = preds[:, :, 0:2] all_preds[0, :, 2:3] = maxvals all_boxes[0, 0:2] = c[:, 0:2] all_boxes[0, 2:4] = s[:, 0:2] all_boxes[0, 4] = np.prod(s * 200.0, 1) all_boxes[0, 5] = score image_path.extend(img_metas['image_file']) return all_preds, all_boxes, image_path
def inference_model(self, x, flip_pairs=None): """Inference function. Returns: output_heatmap (np.ndarray): Output heatmaps. Args: x (List[torch.Tensor[NxKxHxW]]): Input features. flip_pairs (None | list[tuple()): Pairs of keypoints which are mirrored. """ output = self.forward(x) assert isinstance(output, list) output = output[-1] if flip_pairs is not None: # perform flip output_heatmap = flip_back(output.detach().cpu().numpy(), flip_pairs, target_type=self.target_type) # feature is not aligned, shift flipped heatmap for higher accuracy if self.test_cfg.get('shift_heatmap', False): output_heatmap[:, :, :, 1:] = output_heatmap[:, :, :, :-1] else: output_heatmap = output.detach().cpu().numpy() return output_heatmap
def process_head(self, output, img, img_metas, return_heatmap=False): """Process heatmap and keypoints from backbone features.""" flip_pairs = img_metas['flip_pairs'] if self.with_keypoint: output = self.keypoint_head(output) if isinstance(output, list): output = output[-1] output_heatmap = output.detach().cpu().numpy() if self.test_cfg['flip_test']: img_flipped = img.flip(3) output_flipped = self.backbone(img_flipped) if self.with_keypoint: output_flipped = self.keypoint_head(output_flipped) if isinstance(output_flipped, list): output_flipped = output_flipped[-1] output_flipped = flip_back(output_flipped.detach().cpu().numpy(), flip_pairs) # feature is not aligned, shift flipped heatmap for higher accuracy if self.test_cfg['shift_heatmap']: output_flipped[:, :, :, 1:] = output_flipped[:, :, :, :-1] output_heatmap = (output_heatmap + output_flipped) * 0.5 c = img_metas['center'].reshape(1, -1) s = img_metas['scale'].reshape(1, -1) score = 1.0 if 'bbox_score' in img_metas: score = np.array(img_metas['bbox_score']).reshape(-1) preds, maxvals = keypoints_from_heatmaps( output_heatmap, c, s, post_process=self.test_cfg['post_process'], unbiased=self.test_cfg['unbiased_decoding'], kernel=self.test_cfg['modulate_kernel']) all_preds = np.zeros((1, output_heatmap.shape[1], 3), dtype=np.float32) all_boxes = np.zeros((1, 6), dtype=np.float32) image_path = [] all_preds[0, :, 0:2] = preds[:, :, 0:2] all_preds[0, :, 2:3] = maxvals all_boxes[0, 0:2] = c[:, 0:2] all_boxes[0, 2:4] = s[:, 0:2] all_boxes[0, 4] = np.prod(s * 200.0, axis=1) all_boxes[0, 5] = score image_path.extend(img_metas['image_file']) if not return_heatmap: output_heatmap = None return all_preds, all_boxes, image_path, output_heatmap
def inference_model(self, x, flip_pairs=None): """Inference function. Returns: output (list[np.ndarray]): list of output hand keypoint heatmaps, relative root depth and hand type. Args: x (torch.Tensor[NxKxHxW]): Input features. flip_pairs (None | list[tuple()): Pairs of keypoints which are mirrored. """ output = self.forward(x) if flip_pairs is not None: # flip 3D heatmap heatmap_3d = output[0] N, K, D, H, W = heatmap_3d.shape # reshape 3D heatmap to 2D heatmap heatmap_3d = heatmap_3d.reshape(N, K * D, H, W) # 2D heatmap flip heatmap_3d_flipped_back = flip_back( heatmap_3d.detach().cpu().numpy(), flip_pairs, target_type=self.target_type) # reshape back to 3D heatmap heatmap_3d_flipped_back = heatmap_3d_flipped_back.reshape( N, K, D, H, W) # feature is not aligned, shift flipped heatmap for higher accuracy if self.test_cfg.get('shift_heatmap', False): heatmap_3d_flipped_back[..., 1:] = heatmap_3d_flipped_back[..., :-1] output[0] = heatmap_3d_flipped_back # flip relative hand root depth output[1] = -output[1].detach().cpu().numpy() # flip hand type hand_type = output[2].detach().cpu().numpy() hand_type_flipped_back = hand_type.copy() hand_type_flipped_back[:, 0] = hand_type[:, 1] hand_type_flipped_back[:, 1] = hand_type[:, 0] output[2] = hand_type_flipped_back else: output = [out.detach().cpu().numpy() for out in output] return output
def process_head(self, output, img, img_metas, return_heatmap=False): """Process heatmap and keypoints from backbone features.""" flip_pairs = img_metas[0]['flip_pairs'] batch_size = img.size(0) if batch_size > 1: assert 'bbox_id' in img_metas[0] if self.with_keypoint: output = self.keypoint_head(output) if isinstance(output, list): output = output[-1] output_heatmap = output.detach().cpu().numpy() if self.test_cfg['flip_test']: img_flipped = img.flip(3) output_flipped = self.backbone(img_flipped) if self.with_keypoint: output_flipped = self.keypoint_head(output_flipped) if isinstance(output_flipped, list): output_flipped = output_flipped[-1] output_flipped = flip_back(output_flipped.detach().cpu().numpy(), flip_pairs, target_type=self.target_type) # feature is not aligned, shift flipped heatmap for higher accuracy if self.test_cfg['shift_heatmap']: output_flipped[:, :, :, 1:] = output_flipped[:, :, :, :-1] output_heatmap = (output_heatmap + output_flipped) * 0.5 c = np.zeros((batch_size, 2)) s = np.zeros((batch_size, 2)) image_path = [] score = np.ones(batch_size) bbox_ids = None if 'bbox_id' in img_metas[0]: bbox_ids = [] for i in range(batch_size): c[i, :] = img_metas[i]['center'] s[i, :] = img_metas[i]['scale'] image_path.append(img_metas[i]['image_file']) if 'bbox_score' in img_metas[i]: score[i] = np.array(img_metas[i]['bbox_score']).reshape(-1) if bbox_ids is not None: bbox_ids.append(img_metas[i]['bbox_id']) preds, maxvals = keypoints_from_heatmaps( output_heatmap, c, s, post_process=self.test_cfg['post_process'], unbiased=self.test_cfg.get('unbiased_decoding', False), kernel=self.test_cfg['modulate_kernel'], use_udp=self.test_cfg.get('use_udp', False), valid_radius_factor=self.test_cfg.get('valid_radius_factor', 0.0546875), target_type=self.test_cfg.get('target_type', 'GaussianHeatMap')) all_preds = np.zeros((batch_size, output.shape[1], 3), dtype=np.float32) all_boxes = np.zeros((batch_size, 6), dtype=np.float32) all_preds[:, :, 0:2] = preds[:, :, 0:2] all_preds[:, :, 2:3] = maxvals all_boxes[:, 0:2] = c[:, 0:2] all_boxes[:, 2:4] = s[:, 0:2] all_boxes[:, 4] = np.prod(s * 200.0, axis=1) all_boxes[:, 5] = score if not return_heatmap: output_heatmap = None if bbox_ids is not None: return all_preds, all_boxes, image_path, output_heatmap, bbox_ids else: return all_preds, all_boxes, image_path, output_heatmap
def process_head(self, output, img, img_metas, return_heatmap=False): """Process heatmap and keypoints from backbone features.""" num_images = len(img_metas) flip_pairs = img_metas[0]['flip_pairs'] if self.with_keypoint: output = self.keypoint_head(output) if isinstance(output, list): output = output[-1] output_heatmap = output.detach().cpu().numpy() if self.test_cfg['flip_test']: img_flipped = img.flip(3) output_flipped = self.backbone(img_flipped) if self.with_keypoint: output_flipped = self.keypoint_head(output_flipped) if isinstance(output_flipped, list): output_flipped = output_flipped[-1] output_flipped = flip_back(output_flipped.detach().cpu().numpy(), flip_pairs, target_type=self.target_type) # feature is not aligned, shift flipped heatmap for higher accuracy if self.test_cfg['shift_heatmap']: output_flipped[:, :, :, 1:] = output_flipped[:, :, :, :-1] output_heatmap = (output_heatmap + output_flipped) * 0.5 c_list = [item['center'].reshape(1, -1) for item in img_metas] s_list = [item['scale'].reshape(1, -1) for item in img_metas] c = np.concatenate(c_list) s = np.concatenate(s_list) if 'bbox_score' in img_metas[0]: score = [ np.array(item['bbox_score']).reshape(-1) for item in img_metas ] else: score = np.ones(num_images) preds, maxvals = keypoints_from_heatmaps( output_heatmap, c, s, post_process=self.test_cfg['post_process'], unbiased=self.test_cfg.get('unbiased_decoding', False), kernel=self.test_cfg['modulate_kernel'], use_udp=self.test_cfg.get('use_udp', False), valid_radius_factor=self.test_cfg.get('valid_radius_factor', 0.0546875), target_type=self.test_cfg.get('target_type', 'GaussianHeatMap')) results = [] for i in range(num_images): image_path = [] all_preds = np.zeros((1, preds.shape[1], 3), dtype=np.float32) all_boxes = np.zeros((1, 6), dtype=np.float32) all_preds[0, :, 0:2] = preds[i, :, 0:2] all_preds[0, :, 2:3] = maxvals[i] all_boxes[0, 0:2] = c[i, 0:2] all_boxes[0, 2:4] = s[i, 0:2] all_boxes[0, 4] = np.prod(s[i][np.newaxis, ...] * 200.0, axis=1) all_boxes[0, 5] = score[i] image_path.extend(img_metas[i]['image_file']) if not return_heatmap: output_heatmap = None results.append([all_preds, all_boxes, image_path, output_heatmap]) return results