예제 #1
0
def test_outs2results():
    from mmtrack.core import outs2results

    # pseudo data
    num_objects, num_classes, image_size = 8, 4, 100
    bboxes = random_boxes(num_objects, image_size)
    scores = torch.FloatTensor(num_objects, 1).uniform_(0, 1)
    bboxes = torch.cat([bboxes, scores], dim=1)
    # leave the results of the last class as empty
    labels = torch.randint(0, num_classes - 1, (num_objects, ))
    ids = torch.arange(num_objects)
    masks = torch.randint(0, 2, (num_objects, image_size, image_size)).bool()

    # test track2result without ids
    results = outs2results(
        bboxes=bboxes, labels=labels, masks=masks, num_classes=num_classes)

    for key in ['bbox_results', 'mask_results']:
        assert key in results
    assert len(results['bbox_results']) == num_classes
    assert isinstance(results['bbox_results'][0], np.ndarray)
    assert results['bbox_results'][-1].shape == (0, 5)
    assert len(results['mask_results']) == num_classes
    assert isinstance(results['mask_results'][-1], list)
    assert len(results['mask_results'][-1]) == 0
    for i in range(num_classes):
        assert results['bbox_results'][i].shape[0] == (labels == i).sum()
        assert results['bbox_results'][i].shape[1] == 5
        assert len(results['mask_results'][i]) == (labels == i).sum()
        if len(results['mask_results'][i]) > 0:
            assert results['mask_results'][i][0].shape == (image_size,
                                                           image_size)

    # test track2result with ids
    results = outs2results(
        bboxes=bboxes,
        labels=labels,
        masks=masks,
        ids=ids,
        num_classes=num_classes)

    for key in ['bbox_results', 'mask_results']:
        assert key in results
    assert len(results['bbox_results']) == num_classes
    assert isinstance(results['bbox_results'][0], np.ndarray)
    assert results['bbox_results'][-1].shape == (0, 6)
    assert len(results['mask_results']) == num_classes
    assert isinstance(results['mask_results'][-1], list)
    assert len(results['mask_results'][-1]) == 0
    for i in range(num_classes):
        assert results['bbox_results'][i].shape[0] == (labels == i).sum()
        assert results['bbox_results'][i].shape[1] == 6
        assert len(results['mask_results'][i]) == (labels == i).sum()
        if len(results['mask_results'][i]) > 0:
            assert results['mask_results'][i][0].shape == (image_size,
                                                           image_size)
예제 #2
0
    def simple_test(self, img, img_metas, rescale=False, **kwargs):
        """Test without augmentations.

        Args:
            img (Tensor): of shape (N, C, H, W) encoding input images.
                Typically these should be mean centered and std scaled.
            img_metas (list[dict]): list of image info dict where each dict
                has: 'img_shape', 'scale_factor', 'flip', and may also contain
                'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'.
            rescale (bool, optional): If False, then returned bboxes and masks
                will fit the scale of img, otherwise, returned bboxes and masks
                will fit the scale of original image shape. Defaults to False.

        Returns:
            dict[str : list(ndarray)]: The tracking results.
        """
        frame_id = img_metas[0].get('frame_id', -1)
        if frame_id == 0:
            self.tracker.reset()

        det_results = self.detector.simple_test(
            img, img_metas, rescale=rescale)
        assert len(det_results) == 1, 'Batch inference is not supported.'
        bbox_results = det_results[0]
        num_classes = len(bbox_results)

        outs_det = results2outs(bbox_results=bbox_results)
        det_bboxes = torch.from_numpy(outs_det['bboxes']).to(img)
        det_labels = torch.from_numpy(outs_det['labels']).to(img).long()

        track_bboxes, track_labels, track_ids = self.tracker.track(
            img=img,
            img_metas=img_metas,
            model=self,
            bboxes=det_bboxes,
            labels=det_labels,
            frame_id=frame_id,
            rescale=rescale,
            **kwargs)

        track_results = outs2results(
            bboxes=track_bboxes,
            labels=track_labels,
            ids=track_ids,
            num_classes=num_classes)
        det_results = outs2results(
            bboxes=det_bboxes, labels=det_labels, num_classes=num_classes)

        return dict(
            det_bboxes=det_results['bbox_results'],
            track_bboxes=track_results['bbox_results'])
예제 #3
0
def _create_coco_gt_results(dataset):
    from mmtrack.core import outs2results

    results = defaultdict(list)
    for img_info in dataset.data_infos:
        ann = dataset.get_ann_info(img_info)
        scores = np.ones((ann['bboxes'].shape[0], 1), dtype=np.float)
        bboxes = np.concatenate((ann['bboxes'], scores), axis=1)
        det_results = outs2results(bboxes=bboxes,
                                   labels=ann['labels'],
                                   num_classes=len(dataset.CLASSES))
        track_results = outs2results(bboxes=bboxes,
                                     labels=ann['labels'],
                                     ids=ann['instance_ids'].astype(np.int),
                                     num_classes=len(dataset.CLASSES))
        results['det_bboxes'].append(det_results['bbox_results'])
        results['track_bboxes'].append(track_results['bbox_results'])
    return results
예제 #4
0
    def simple_test(self, img, img_metas, rescale=False):
        """Test forward.

         Args:
            img (Tensor): of shape (N, C, H, W) encoding input images.
                Typically these should be mean centered and std scaled.
            img_metas (list[dict]): list of image info dict where each dict
                has: 'img_shape', 'scale_factor', 'flip', and may also contain
                'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'.
            rescale (bool): whether to rescale the bboxes.

        Returns:
            dict[str : Tensor]: Track results.
        """
        # TODO inherit from a base tracker
        assert self.with_track_head, 'track head must be implemented.'  # noqa
        frame_id = img_metas[0].get('frame_id', -1)
        if frame_id == 0:
            self.tracker.reset()

        x = self.detector.extract_feat(img)
        proposal_list = self.detector.rpn_head.simple_test_rpn(x, img_metas)

        det_results = self.detector.roi_head.simple_test(
            x, proposal_list, img_metas, rescale=rescale)

        bbox_results = det_results[0]
        num_classes = len(bbox_results)
        outs_det = results2outs(bbox_results=bbox_results)

        det_bboxes = torch.tensor(outs_det['bboxes']).to(img)
        det_labels = torch.tensor(outs_det['labels']).to(img).long()

        track_bboxes, track_labels, track_ids = self.tracker.track(
            img_metas=img_metas,
            feats=x,
            model=self,
            bboxes=det_bboxes,
            labels=det_labels,
            frame_id=frame_id)

        track_bboxes = outs2results(
            bboxes=track_bboxes,
            labels=track_labels,
            ids=track_ids,
            num_classes=num_classes)['bbox_results']

        return dict(det_bboxes=bbox_results, track_bboxes=track_bboxes)
예제 #5
0
    def simple_test(self,
                    img,
                    img_metas,
                    rescale=False,
                    public_bboxes=None,
                    **kwargs):
        """Test without augmentations.

        Args:
            img (Tensor): of shape (N, C, H, W) encoding input images.
                Typically these should be mean centered and std scaled.
            img_metas (list[dict]): list of image info dict where each dict
                has: 'img_shape', 'scale_factor', 'flip', and may also contain
                'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'.
            rescale (bool, optional): If False, then returned bboxes and masks
                will fit the scale of img, otherwise, returned bboxes and masks
                will fit the scale of original image shape. Defaults to False.
            public_bboxes (list[Tensor], optional): Public bounding boxes from
                the benchmark. Defaults to None.

        Returns:
            dict[str : list(ndarray)]: The tracking results.
        """
        frame_id = img_metas[0].get('frame_id', -1)
        if frame_id == 0:
            self.tracker.reset()

        x = self.detector.extract_feat(img)
        if hasattr(self.detector, 'roi_head'):
            # TODO: check whether this is the case
            if public_bboxes is not None:
                public_bboxes = [_[0] for _ in public_bboxes]
                proposals = public_bboxes
            else:
                proposals = self.detector.rpn_head.simple_test_rpn(
                    x, img_metas)
            det_bboxes, det_labels = self.detector.roi_head.simple_test_bboxes(
                x,
                img_metas,
                proposals,
                self.detector.roi_head.test_cfg,
                rescale=rescale)
            # TODO: support batch inference
            det_bboxes = det_bboxes[0]
            det_labels = det_labels[0]
            num_classes = self.detector.roi_head.bbox_head.num_classes
        elif hasattr(self.detector, 'bbox_head'):
            num_classes = self.detector.bbox_head.num_classes
            raise NotImplementedError(
                'Tracktor must need "roi_head" to refine proposals.')
        else:
            raise TypeError('detector must has roi_head or bbox_head.')

        track_bboxes, track_labels, track_ids = self.tracker.track(
            img=img,
            img_metas=img_metas,
            model=self,
            feats=x,
            bboxes=det_bboxes,
            labels=det_labels,
            frame_id=frame_id,
            rescale=rescale,
            **kwargs)

        track_results = outs2results(
            bboxes=track_bboxes,
            labels=track_labels,
            ids=track_ids,
            num_classes=num_classes)
        det_results = outs2results(
            bboxes=det_bboxes, labels=det_labels, num_classes=num_classes)

        return dict(
            det_bboxes=det_results['bbox_results'],
            track_bboxes=track_results['bbox_results'])