Exemple #1
0
    def inference_model(self, x, flip_pairs=None):
        """Inference function.

        Returns:
            output_heatmap (np.ndarray): Output heatmaps.

        Args:
            x (torch.Tensor[NxKxHxW]): Input features.
            flip_pairs (None | list[tuple()):
                Pairs of keypoints which are mirrored.
        """

        output = self.forward(x)

        if flip_pairs is not None:
            N, K, D, H, W = output.shape
            # reshape 3D heatmap to 2D heatmap
            output = output.reshape(N, K * D, H, W)
            # 2D heatmap flip
            output_heatmap = flip_back(
                output.detach().cpu().numpy(),
                flip_pairs,
                target_type=self.target_type)
            # reshape back to 3D heatmap
            output_heatmap = output_heatmap.reshape(N, K, D, H, W)
            # feature is not aligned, shift flipped heatmap for higher accuracy
            if self.test_cfg.get('shift_heatmap', False):
                output_heatmap[..., 1:] = output_heatmap[..., :-1]
        else:
            output_heatmap = output.detach().cpu().numpy()
        return output_heatmap
Exemple #2
0
    def forward_test(self, img, img_metas, **kwargs):
        assert img.size(0) == 1
        assert len(img_metas) == 1

        img_metas = img_metas[0]

        flip_pairs = img_metas['flip_pairs']
        # compute output
        output = self.backbone(img)
        if self.with_keypoint:
            output = self.keypoint_head(output)

        if isinstance(output, list):
            output = output[-1]

        if self.test_cfg['flip_test']:
            img_flipped = img.flip(3)

            output_flipped = self.backbone(img_flipped)
            if self.with_keypoint:
                output_flipped = self.keypoint_head(output_flipped)
            if isinstance(output_flipped, list):
                output_flipped = output_flipped[-1]
            output_flipped = flip_back(output_flipped.cpu().numpy(),
                                       flip_pairs)

            output_flipped = torch.from_numpy(output_flipped.copy()).to(
                output.device)

            # feature is not aligned, shift flipped heatmap for higher accuracy
            if self.test_cfg['shift_heatmap']:
                output_flipped[:, :, :, 1:] = \
                    output_flipped.clone()[:, :, :, 0:-1]
            output = (output + output_flipped) * 0.5

        c = img_metas['center'].reshape(1, -1)
        s = img_metas['scale'].reshape(1, -1)
        score = np.array(img_metas['bbox_score']).reshape(-1)

        preds, maxvals = keypoints_from_heatmaps(
            output.clone().cpu().numpy(),
            c,
            s,
            post_process=self.test_cfg['post_process'],
            unbiased=self.test_cfg['unbiased_decoding'],
            kernel=self.test_cfg['modulate_kernel'])

        all_preds = np.zeros((1, output.shape[1], 3), dtype=np.float32)
        all_boxes = np.zeros((1, 6))
        image_path = []

        all_preds[0, :, 0:2] = preds[:, :, 0:2]
        all_preds[0, :, 2:3] = maxvals
        all_boxes[0, 0:2] = c[:, 0:2]
        all_boxes[0, 2:4] = s[:, 0:2]
        all_boxes[0, 4] = np.prod(s * 200.0, 1)
        all_boxes[0, 5] = score
        image_path.extend(img_metas['image_file'])

        return all_preds, all_boxes, image_path
    def inference_model(self, x, flip_pairs=None):
        """Inference function.

        Returns:
            output_heatmap (np.ndarray): Output heatmaps.

        Args:
            x (List[torch.Tensor[NxKxHxW]]): Input features.
            flip_pairs (None | list[tuple()):
                Pairs of keypoints which are mirrored.
        """
        output = self.forward(x)
        assert isinstance(output, list)
        output = output[-1]

        if flip_pairs is not None:
            # perform flip
            output_heatmap = flip_back(output.detach().cpu().numpy(),
                                       flip_pairs,
                                       target_type=self.target_type)
            # feature is not aligned, shift flipped heatmap for higher accuracy
            if self.test_cfg.get('shift_heatmap', False):
                output_heatmap[:, :, :, 1:] = output_heatmap[:, :, :, :-1]
        else:
            output_heatmap = output.detach().cpu().numpy()

        return output_heatmap
Exemple #4
0
    def process_head(self, output, img, img_metas, return_heatmap=False):
        """Process heatmap and keypoints from backbone features."""
        flip_pairs = img_metas['flip_pairs']

        if self.with_keypoint:
            output = self.keypoint_head(output)

        if isinstance(output, list):
            output = output[-1]

        output_heatmap = output.detach().cpu().numpy()
        if self.test_cfg['flip_test']:
            img_flipped = img.flip(3)

            output_flipped = self.backbone(img_flipped)
            if self.with_keypoint:
                output_flipped = self.keypoint_head(output_flipped)
            if isinstance(output_flipped, list):
                output_flipped = output_flipped[-1]
            output_flipped = flip_back(output_flipped.detach().cpu().numpy(),
                                       flip_pairs)

            # feature is not aligned, shift flipped heatmap for higher accuracy
            if self.test_cfg['shift_heatmap']:
                output_flipped[:, :, :, 1:] = output_flipped[:, :, :, :-1]
            output_heatmap = (output_heatmap + output_flipped) * 0.5

        c = img_metas['center'].reshape(1, -1)
        s = img_metas['scale'].reshape(1, -1)

        score = 1.0
        if 'bbox_score' in img_metas:
            score = np.array(img_metas['bbox_score']).reshape(-1)

        preds, maxvals = keypoints_from_heatmaps(
            output_heatmap,
            c,
            s,
            post_process=self.test_cfg['post_process'],
            unbiased=self.test_cfg['unbiased_decoding'],
            kernel=self.test_cfg['modulate_kernel'])

        all_preds = np.zeros((1, output_heatmap.shape[1], 3), dtype=np.float32)
        all_boxes = np.zeros((1, 6), dtype=np.float32)
        image_path = []

        all_preds[0, :, 0:2] = preds[:, :, 0:2]
        all_preds[0, :, 2:3] = maxvals
        all_boxes[0, 0:2] = c[:, 0:2]
        all_boxes[0, 2:4] = s[:, 0:2]
        all_boxes[0, 4] = np.prod(s * 200.0, axis=1)
        all_boxes[0, 5] = score
        image_path.extend(img_metas['image_file'])

        if not return_heatmap:
            output_heatmap = None

        return all_preds, all_boxes, image_path, output_heatmap
Exemple #5
0
    def inference_model(self, x, flip_pairs=None):
        """Inference function.

        Returns:
            output (list[np.ndarray]): list of output hand keypoint
            heatmaps, relative root depth and hand type.

        Args:
            x (torch.Tensor[NxKxHxW]): Input features.
            flip_pairs (None | list[tuple()):
                Pairs of keypoints which are mirrored.
        """

        output = self.forward(x)

        if flip_pairs is not None:
            # flip 3D heatmap
            heatmap_3d = output[0]
            N, K, D, H, W = heatmap_3d.shape
            # reshape 3D heatmap to 2D heatmap
            heatmap_3d = heatmap_3d.reshape(N, K * D, H, W)
            # 2D heatmap flip
            heatmap_3d_flipped_back = flip_back(
                heatmap_3d.detach().cpu().numpy(),
                flip_pairs,
                target_type=self.target_type)
            # reshape back to 3D heatmap
            heatmap_3d_flipped_back = heatmap_3d_flipped_back.reshape(
                N, K, D, H, W)
            # feature is not aligned, shift flipped heatmap for higher accuracy
            if self.test_cfg.get('shift_heatmap', False):
                heatmap_3d_flipped_back[...,
                                        1:] = heatmap_3d_flipped_back[..., :-1]
            output[0] = heatmap_3d_flipped_back

            # flip relative hand root depth
            output[1] = -output[1].detach().cpu().numpy()

            # flip hand type
            hand_type = output[2].detach().cpu().numpy()
            hand_type_flipped_back = hand_type.copy()
            hand_type_flipped_back[:, 0] = hand_type[:, 1]
            hand_type_flipped_back[:, 1] = hand_type[:, 0]
            output[2] = hand_type_flipped_back
        else:
            output = [out.detach().cpu().numpy() for out in output]

        return output
Exemple #6
0
    def process_head(self, output, img, img_metas, return_heatmap=False):
        """Process heatmap and keypoints from backbone features."""
        flip_pairs = img_metas[0]['flip_pairs']
        batch_size = img.size(0)
        if batch_size > 1:
            assert 'bbox_id' in img_metas[0]

        if self.with_keypoint:
            output = self.keypoint_head(output)

        if isinstance(output, list):
            output = output[-1]

        output_heatmap = output.detach().cpu().numpy()
        if self.test_cfg['flip_test']:
            img_flipped = img.flip(3)

            output_flipped = self.backbone(img_flipped)
            if self.with_keypoint:
                output_flipped = self.keypoint_head(output_flipped)
            if isinstance(output_flipped, list):
                output_flipped = output_flipped[-1]
            output_flipped = flip_back(output_flipped.detach().cpu().numpy(),
                                       flip_pairs,
                                       target_type=self.target_type)

            # feature is not aligned, shift flipped heatmap for higher accuracy
            if self.test_cfg['shift_heatmap']:
                output_flipped[:, :, :, 1:] = output_flipped[:, :, :, :-1]
            output_heatmap = (output_heatmap + output_flipped) * 0.5

        c = np.zeros((batch_size, 2))
        s = np.zeros((batch_size, 2))
        image_path = []
        score = np.ones(batch_size)
        bbox_ids = None
        if 'bbox_id' in img_metas[0]:
            bbox_ids = []
        for i in range(batch_size):
            c[i, :] = img_metas[i]['center']
            s[i, :] = img_metas[i]['scale']
            image_path.append(img_metas[i]['image_file'])

            if 'bbox_score' in img_metas[i]:
                score[i] = np.array(img_metas[i]['bbox_score']).reshape(-1)
            if bbox_ids is not None:
                bbox_ids.append(img_metas[i]['bbox_id'])

        preds, maxvals = keypoints_from_heatmaps(
            output_heatmap,
            c,
            s,
            post_process=self.test_cfg['post_process'],
            unbiased=self.test_cfg.get('unbiased_decoding', False),
            kernel=self.test_cfg['modulate_kernel'],
            use_udp=self.test_cfg.get('use_udp', False),
            valid_radius_factor=self.test_cfg.get('valid_radius_factor',
                                                  0.0546875),
            target_type=self.test_cfg.get('target_type', 'GaussianHeatMap'))

        all_preds = np.zeros((batch_size, output.shape[1], 3),
                             dtype=np.float32)
        all_boxes = np.zeros((batch_size, 6), dtype=np.float32)

        all_preds[:, :, 0:2] = preds[:, :, 0:2]
        all_preds[:, :, 2:3] = maxvals
        all_boxes[:, 0:2] = c[:, 0:2]
        all_boxes[:, 2:4] = s[:, 0:2]
        all_boxes[:, 4] = np.prod(s * 200.0, axis=1)
        all_boxes[:, 5] = score
        if not return_heatmap:
            output_heatmap = None
        if bbox_ids is not None:
            return all_preds, all_boxes, image_path, output_heatmap, bbox_ids
        else:
            return all_preds, all_boxes, image_path, output_heatmap
Exemple #7
0
    def process_head(self, output, img, img_metas, return_heatmap=False):
        """Process heatmap and keypoints from backbone features."""

        num_images = len(img_metas)
        flip_pairs = img_metas[0]['flip_pairs']

        if self.with_keypoint:
            output = self.keypoint_head(output)

        if isinstance(output, list):
            output = output[-1]

        output_heatmap = output.detach().cpu().numpy()
        if self.test_cfg['flip_test']:
            img_flipped = img.flip(3)

            output_flipped = self.backbone(img_flipped)
            if self.with_keypoint:
                output_flipped = self.keypoint_head(output_flipped)
            if isinstance(output_flipped, list):
                output_flipped = output_flipped[-1]
            output_flipped = flip_back(output_flipped.detach().cpu().numpy(),
                                       flip_pairs,
                                       target_type=self.target_type)

            # feature is not aligned, shift flipped heatmap for higher accuracy
            if self.test_cfg['shift_heatmap']:
                output_flipped[:, :, :, 1:] = output_flipped[:, :, :, :-1]
            output_heatmap = (output_heatmap + output_flipped) * 0.5

        c_list = [item['center'].reshape(1, -1) for item in img_metas]
        s_list = [item['scale'].reshape(1, -1) for item in img_metas]
        c = np.concatenate(c_list)
        s = np.concatenate(s_list)

        if 'bbox_score' in img_metas[0]:
            score = [
                np.array(item['bbox_score']).reshape(-1) for item in img_metas
            ]
        else:
            score = np.ones(num_images)

        preds, maxvals = keypoints_from_heatmaps(
            output_heatmap,
            c,
            s,
            post_process=self.test_cfg['post_process'],
            unbiased=self.test_cfg.get('unbiased_decoding', False),
            kernel=self.test_cfg['modulate_kernel'],
            use_udp=self.test_cfg.get('use_udp', False),
            valid_radius_factor=self.test_cfg.get('valid_radius_factor',
                                                  0.0546875),
            target_type=self.test_cfg.get('target_type', 'GaussianHeatMap'))

        results = []
        for i in range(num_images):
            image_path = []
            all_preds = np.zeros((1, preds.shape[1], 3), dtype=np.float32)
            all_boxes = np.zeros((1, 6), dtype=np.float32)
            all_preds[0, :, 0:2] = preds[i, :, 0:2]
            all_preds[0, :, 2:3] = maxvals[i]
            all_boxes[0, 0:2] = c[i, 0:2]
            all_boxes[0, 2:4] = s[i, 0:2]
            all_boxes[0, 4] = np.prod(s[i][np.newaxis, ...] * 200.0, axis=1)
            all_boxes[0, 5] = score[i]
            image_path.extend(img_metas[i]['image_file'])

            if not return_heatmap:
                output_heatmap = None

            results.append([all_preds, all_boxes, image_path, output_heatmap])

        return results