Esempio n. 1
0
    def format_track_results(self, results, infos, resfile):
        """Format tracking results."""

        results_per_video = []
        for frame_id, result in enumerate(results):
            outs_track = results2outs(bbox_results=result)
            track_ids, bboxes = outs_track['ids'], outs_track['bboxes']
            frame_ids = np.full_like(track_ids, frame_id)
            results_per_frame = np.concatenate(
                (frame_ids[:, None], track_ids[:, None], bboxes), axis=1)
            results_per_video.append(results_per_frame)
        # `results_per_video` is a ndarray with shape (N, 7). Each row denotes
        # (frame_id, track_id, x1, y1, x2, y2, score)
        results_per_video = np.concatenate(results_per_video)

        if self.interpolate_tracks_cfg is not None:
            results_per_video = interpolate_tracks(
                results_per_video, **self.interpolate_tracks_cfg)

        with open(resfile, 'wt') as f:
            for frame_id, info in enumerate(infos):
                # `mot_frame_id` is the actually frame id used for evaluation.
                # It may not start from 0.
                if 'mot_frame_id' in info:
                    mot_frame_id = info['mot_frame_id']
                else:
                    mot_frame_id = info['frame_id'] + 1

                results_per_frame = \
                    results_per_video[results_per_video[:, 0] == frame_id]
                for i in range(len(results_per_frame)):
                    _, track_id, x1, y1, x2, y2, conf = results_per_frame[i]
                    f.writelines(
                        f'{mot_frame_id},{track_id},{x1:.3f},{y1:.3f},' +
                        f'{(x2-x1):.3f},{(y2-y1):.3f},{conf:.3f},-1,-1,-1\n')
Esempio n. 2
0
def test_results2outs():
    from mmtrack.core import results2outs
    num_classes = 3
    num_objects = [2, 0, 2]
    gt_labels = []
    for id, num in enumerate(num_objects):
        gt_labels.extend([id for _ in range(num)])
    image_size = 100

    bbox_results = [
        np.random.randint(low=0, high=image_size, size=(num_objects[i], 5))
        for i in range(num_classes)
    ]
    bbox_results_with_ids = [
        np.random.randint(low=0, high=image_size, size=(num_objects[i], 6))
        for i in range(num_classes)
    ]
    mask_results = [[] for i in range(num_classes)]
    for cls_id in range(num_classes):
        for obj_id in range(num_objects[cls_id]):
            mask_results[cls_id].append(
                np.random.randint(0, 2, (image_size, image_size)))

    # test results2outs without ids
    outs = results2outs(
        bbox_results=bbox_results,
        mask_results=mask_results,
        mask_shape=(image_size, image_size))

    for key in ['bboxes', 'labels', 'masks']:
        assert key in outs
    assert outs['bboxes'].shape == (sum(num_objects), 5)
    assert (outs['labels'] == np.array(gt_labels)).all()
    assert outs['masks'].shape == (sum(num_objects), image_size, image_size)

    # test results2outs with ids
    outs = results2outs(
        bbox_results=bbox_results_with_ids,
        mask_results=mask_results,
        mask_shape=(image_size, image_size))

    for key in ['bboxes', 'labels', 'ids', 'masks']:
        assert key in outs
    assert outs['bboxes'].shape == (sum(num_objects), 5)
    assert (outs['labels'] == np.array(gt_labels)).all()
    assert outs['ids'].shape == (sum(num_objects), )
    assert outs['masks'].shape == (sum(num_objects), image_size, image_size)
Esempio n. 3
0
    def show_result(self,
                    img,
                    result,
                    score_thr=0.0,
                    thickness=1,
                    font_scale=0.5,
                    show=False,
                    out_file=None,
                    wait_time=0,
                    backend='cv2',
                    **kwargs):
        """Visualize tracking results.

        Args:
            img (str | ndarray): Filename of loaded image.
            result (dict): Tracking result.
                - The value of key 'track_bboxes' is list with length
                num_classes, and each element in list is ndarray with
                shape(n, 6) in [id, tl_x, tl_y, br_x, br_y, score] format.
                - The value of key 'det_bboxes' is list with length
                num_classes, and each element in list is ndarray with
                shape(n, 5) in [tl_x, tl_y, br_x, br_y, score] format.
            thickness (int, optional): Thickness of lines. Defaults to 1.
            font_scale (float, optional): Font scales of texts. Defaults
                to 0.5.
            show (bool, optional): Whether show the visualizations on the
                fly. Defaults to False.
            out_file (str | None, optional): Output filename. Defaults to None.
            backend (str, optional): Backend to draw the bounding boxes,
                options are `cv2` and `plt`. Defaults to 'cv2'.

        Returns:
            ndarray: Visualized image.
        """
        assert isinstance(result, dict)
        track_bboxes = result.get('track_bboxes', None)
        track_masks = result.get('track_masks', None)
        if isinstance(img, str):
            img = mmcv.imread(img)
        outs_track = results2outs(
            bbox_results=track_bboxes,
            mask_results=track_masks,
            mask_shape=img.shape[:2])
        img = imshow_tracks(
            img,
            outs_track.get('bboxes', None),
            outs_track.get('labels', None),
            outs_track.get('ids', None),
            outs_track.get('masks', None),
            classes=self.CLASSES,
            score_thr=score_thr,
            thickness=thickness,
            font_scale=font_scale,
            show=show,
            out_file=out_file,
            wait_time=wait_time,
            backend=backend)
        return img
Esempio n. 4
0
    def __call__(self, results):
        outs_det = results2outs(bbox_results=results['detections'])
        bboxes = outs_det['bboxes']
        labels = outs_det['labels']

        results['public_bboxes'] = bboxes[:, :4]
        if bboxes.shape[1] > 4:
            results['public_scores'] = bboxes[:, -1]
        results['public_labels'] = labels
        results['bbox_fields'].append('public_bboxes')
        return results
Esempio n. 5
0
    def simple_test(self, img, img_metas, rescale=False, **kwargs):
        """Test without augmentations.

        Args:
            img (Tensor): of shape (N, C, H, W) encoding input images.
                Typically these should be mean centered and std scaled.
            img_metas (list[dict]): list of image info dict where each dict
                has: 'img_shape', 'scale_factor', 'flip', and may also contain
                'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'.
            rescale (bool, optional): If False, then returned bboxes and masks
                will fit the scale of img, otherwise, returned bboxes and masks
                will fit the scale of original image shape. Defaults to False.

        Returns:
            dict[str : list(ndarray)]: The tracking results.
        """
        frame_id = img_metas[0].get('frame_id', -1)
        if frame_id == 0:
            self.tracker.reset()

        det_results = self.detector.simple_test(
            img, img_metas, rescale=rescale)
        assert len(det_results) == 1, 'Batch inference is not supported.'
        bbox_results = det_results[0]
        num_classes = len(bbox_results)

        outs_det = results2outs(bbox_results=bbox_results)
        det_bboxes = torch.from_numpy(outs_det['bboxes']).to(img)
        det_labels = torch.from_numpy(outs_det['labels']).to(img).long()

        track_bboxes, track_labels, track_ids = self.tracker.track(
            img=img,
            img_metas=img_metas,
            model=self,
            bboxes=det_bboxes,
            labels=det_labels,
            frame_id=frame_id,
            rescale=rescale,
            **kwargs)

        track_results = outs2results(
            bboxes=track_bboxes,
            labels=track_labels,
            ids=track_ids,
            num_classes=num_classes)
        det_results = outs2results(
            bboxes=det_bboxes, labels=det_labels, num_classes=num_classes)

        return dict(
            det_bboxes=det_results['bbox_results'],
            track_bboxes=track_results['bbox_results'])
Esempio n. 6
0
    def simple_test(self, img, img_metas, rescale=False):
        """Test forward.

         Args:
            img (Tensor): of shape (N, C, H, W) encoding input images.
                Typically these should be mean centered and std scaled.
            img_metas (list[dict]): list of image info dict where each dict
                has: 'img_shape', 'scale_factor', 'flip', and may also contain
                'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'.
            rescale (bool): whether to rescale the bboxes.

        Returns:
            dict[str : Tensor]: Track results.
        """
        # TODO inherit from a base tracker
        assert self.with_track_head, 'track head must be implemented.'  # noqa
        frame_id = img_metas[0].get('frame_id', -1)
        if frame_id == 0:
            self.tracker.reset()

        x = self.detector.extract_feat(img)
        proposal_list = self.detector.rpn_head.simple_test_rpn(x, img_metas)

        det_results = self.detector.roi_head.simple_test(
            x, proposal_list, img_metas, rescale=rescale)

        bbox_results = det_results[0]
        num_classes = len(bbox_results)
        outs_det = results2outs(bbox_results=bbox_results)

        det_bboxes = torch.tensor(outs_det['bboxes']).to(img)
        det_labels = torch.tensor(outs_det['labels']).to(img).long()

        track_bboxes, track_labels, track_ids = self.tracker.track(
            img_metas=img_metas,
            feats=x,
            model=self,
            bboxes=det_bboxes,
            labels=det_labels,
            frame_id=frame_id)

        track_bboxes = outs2results(
            bboxes=track_bboxes,
            labels=track_labels,
            ids=track_ids,
            num_classes=num_classes)['bbox_results']

        return dict(det_bboxes=bbox_results, track_bboxes=track_bboxes)
    def format_bbox_results(self, results, infos, resfile):
        """Format detection results."""
        with open(resfile, 'wt') as f:
            for res, info in zip(results, infos):
                if 'mot_frame_id' in info:
                    frame = info['mot_frame_id']
                else:
                    frame = info['frame_id'] + 1

                outs_det = results2outs(bbox_results=res)
                for bbox, label in zip(outs_det['bboxes'], outs_det['labels']):
                    x1, y1, x2, y2, conf = bbox
                    f.writelines(
                        f'{frame},-1,{x1:.3f},{y1:.3f},{(x2-x1):.3f},' +
                        f'{(y2-y1):.3f},{conf:.3f}\n')
            f.close()
    def format_track_results(self, results, infos, resfile):
        """Format tracking results."""
        with open(resfile, 'wt') as f:
            for res, info in zip(results, infos):
                if 'mot_frame_id' in info:
                    frame = info['mot_frame_id']
                else:
                    frame = info['frame_id'] + 1

                outs_track = results2outs(bbox_results=res)
                for bbox, label, id in zip(outs_track['bboxes'],
                                           outs_track['labels'],
                                           outs_track['ids']):
                    x1, y1, x2, y2, conf = bbox
                    f.writelines(
                        f'{frame},{id},{x1:.3f},{y1:.3f},{(x2-x1):.3f},' +
                        f'{(y2-y1):.3f},{conf:.3f},-1,-1,-1\n')
    def format_results(self,
                       results,
                       resfile_path=None,
                       metrics=['track_segm']):
        """Format the results to a zip file (standard format for YouTube-VIS
        Challenge).

        Args:
            results (dict(list[ndarray])): Testing results of the dataset.
            resfile_path (str, optional): Path to save the formatted results.
                Defaults to None.
            metrics (list[str], optional): The results of the specific metrics
                will be formatted. Defaults to ['track_segm'].

        Returns:
            tuple: (resfiles, tmp_dir), resfiles is the path of the result
            json file, tmp_dir is the temporal directory created for saving
            files.
        """
        assert isinstance(results, dict), 'results must be a dict.'
        if isinstance(metrics, str):
            metrics = [metrics]
        assert 'track_segm' in metrics
        if resfile_path is None:
            tmp_dir = tempfile.TemporaryDirectory()
            resfile_path = tmp_dir.name
        else:
            tmp_dir = None
        resfiles = osp.join(resfile_path, 'results.json')

        inds = [i for i, _ in enumerate(self.data_infos) if _['frame_id'] == 0]
        num_vids = len(inds)
        assert num_vids == len(self.vid_ids)
        inds.append(len(self.data_infos))
        vid_infos = self.coco.load_vids(self.vid_ids)

        json_results = []
        for i in range(num_vids):
            video_id = vid_infos[i]['id']
            # collect data for each instances in a video.
            collect_data = dict()
            for frame_id, (bbox_res, mask_res) in enumerate(
                    zip(results['track_bboxes'][inds[i]:inds[i + 1]],
                        results['track_masks'][inds[i]:inds[i + 1]])):
                outs_track = results2outs(bbox_results=bbox_res)
                bboxes = outs_track['bboxes']
                labels = outs_track['labels']
                ids = outs_track['ids']
                masks = mmcv.concat_list(mask_res)
                assert len(masks) == len(bboxes)
                for i, id in enumerate(ids):
                    if id not in collect_data:
                        collect_data[id] = dict(
                            category_ids=[], scores=[], segmentations=dict())
                    collect_data[id]['category_ids'].append(labels[i])
                    collect_data[id]['scores'].append(bboxes[i][4])
                    if isinstance(masks[i]['counts'], bytes):
                        masks[i]['counts'] = masks[i]['counts'].decode()
                    collect_data[id]['segmentations'][frame_id] = masks[i]

            # transform the collected data into official format
            for id, id_data in collect_data.items():
                output = dict()
                output['video_id'] = video_id
                output['score'] = np.array(id_data['scores']).mean().item()
                # majority voting for sequence category
                output['category_id'] = np.bincount(
                    np.array(id_data['category_ids'])).argmax().item() + 1
                output['segmentations'] = []
                for frame_id in range(inds[i + 1] - inds[i]):
                    if frame_id in id_data['segmentations']:
                        output['segmentations'].append(
                            id_data['segmentations'][frame_id])
                    else:
                        output['segmentations'].append(None)
                json_results.append(output)
        mmcv.dump(json_results, resfiles)

        # zip the json file in order to submit to the test server.
        zip_file_name = osp.join(resfile_path, 'submission_file.zip')
        zf = zipfile.ZipFile(zip_file_name, 'w', zipfile.ZIP_DEFLATED)
        print_log(f"zip the 'results.json' into '{zip_file_name}', "
                  'please submmit the zip file to the test server')
        zf.write(resfiles, 'results.json')
        zf.close()

        return resfiles, tmp_dir