Beispiel #1
0
def test_imrenormalize():
    from mmtrack.core import imrenormalize
    img_norm_cfg = dict(mean=[123.675, 116.28, 103.53],
                        std=[58.395, 57.12, 57.375],
                        to_rgb=True)
    new_img_norm_cfg = dict(mean=[123.675, 116.28, 103.53],
                            std=[58.395, 57.12, 57.375],
                            to_rgb=True)

    img = np.random.randn(128, 256, 3).astype(np.float32)
    new_img = imrenormalize(img, img_norm_cfg, new_img_norm_cfg)
    assert isinstance(new_img, np.ndarray)
    assert new_img.shape == (128, 256, 3)
    assert np.allclose(img, new_img, atol=1e-6)

    img = torch.randn(1, 3, 128, 256, dtype=torch.float)
    new_img = imrenormalize(img, img_norm_cfg, new_img_norm_cfg)
    assert isinstance(new_img, torch.Tensor)
    assert new_img.shape == (1, 3, 128, 256)
    assert np.allclose(img, new_img, atol=1e-6)
Beispiel #2
0
    def track(self,
              img,
              img_metas,
              model,
              bboxes,
              labels,
              frame_id,
              rescale=False,
              **kwargs):
        """Tracking forward function.

        Args:
            img (Tensor): of shape (N, C, H, W) encoding input images.
                Typically these should be mean centered and std scaled.
            img_metas (list[dict]): list of image info dict where each dict
                has: 'img_shape', 'scale_factor', 'flip', and may also contain
                'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'.
            model (nn.Module): MOT model.
            bboxes (Tensor): of shape (N, 5).
            labels (Tensor): of shape (N, ).
            frame_id (int): The id of current frame, 0-index.
            rescale (bool, optional): If True, the bounding boxes should be
                rescaled to fit the original scale of the image. Defaults to
                False.

        Returns:
            tuple: Tracking results.
        """
        if not hasattr(self, 'kf'):
            self.kf = model.motion

        if self.with_reid:
            if self.reid.get('img_norm_cfg', False):
                reid_img = imrenormalize(img, img_metas[0]['img_norm_cfg'],
                                         self.reid['img_norm_cfg'])
            else:
                reid_img = img.clone()

        valid_inds = bboxes[:, -1] > self.obj_score_thr
        bboxes = bboxes[valid_inds]
        labels = labels[valid_inds]

        if self.empty or bboxes.size(0) == 0:
            num_new_tracks = bboxes.size(0)
            ids = torch.arange(self.num_tracks,
                               self.num_tracks + num_new_tracks,
                               dtype=torch.long)
            self.num_tracks += num_new_tracks
            if self.with_reid:
                embeds = model.reid.simple_test(
                    self.crop_imgs(reid_img, img_metas, bboxes[:, :4].clone(),
                                   rescale))
        else:
            ids = torch.full((bboxes.size(0), ), -1, dtype=torch.long)

            # motion
            if model.with_motion:
                self.tracks, costs = model.motion.track(
                    self.tracks, bbox_xyxy_to_cxcyah(bboxes))

            active_ids = self.confirmed_ids
            if self.with_reid:
                embeds = model.reid.simple_test(
                    self.crop_imgs(reid_img, img_metas, bboxes[:, :4].clone(),
                                   rescale))
                # reid
                if len(active_ids) > 0:
                    track_embeds = self.get('embeds',
                                            active_ids,
                                            self.reid.get('num_samples', None),
                                            behavior='mean')
                    reid_dists = torch.cdist(track_embeds,
                                             embeds).cpu().numpy()

                    valid_inds = [list(self.ids).index(_) for _ in active_ids]
                    reid_dists[~np.isfinite(costs[valid_inds, :])] = np.nan

                    row, col = linear_sum_assignment(reid_dists)
                    for r, c in zip(row, col):
                        dist = reid_dists[r, c]
                        if not np.isfinite(dist):
                            continue
                        if dist <= self.reid['match_score_thr']:
                            ids[c] = active_ids[r]

            active_ids = [
                id for id in self.ids if id not in ids
                and self.tracks[id].frame_ids[-1] == frame_id - 1
            ]
            if len(active_ids) > 0:
                active_dets = torch.nonzero(ids == -1).squeeze(1)
                track_bboxes = self.get('bboxes', active_ids)
                ious = bbox_overlaps(
                    track_bboxes, bboxes[active_dets][:, :-1]).cpu().numpy()
                dists = 1 - ious
                row, col = linear_sum_assignment(dists)
                for r, c in zip(row, col):
                    dist = dists[r, c]
                    if dist < 1 - self.match_iou_thr:
                        ids[active_dets[c]] = active_ids[r]

            new_track_inds = ids == -1
            ids[new_track_inds] = torch.arange(self.num_tracks,
                                               self.num_tracks +
                                               new_track_inds.sum(),
                                               dtype=torch.long)
            self.num_tracks += new_track_inds.sum()

        self.update(ids=ids,
                    bboxes=bboxes[:, :4],
                    scores=bboxes[:, -1],
                    labels=labels,
                    embeds=embeds if self.with_reid else None,
                    frame_ids=frame_id)
        return bboxes, labels, ids
Beispiel #3
0
    def track(self,
              img,
              img_metas,
              model,
              feats,
              bboxes,
              labels,
              frame_id,
              rescale=False,
              **kwargs):
        """Tracking forward function.

        Args:
            img (Tensor): of shape (N, C, H, W) encoding input images.
                Typically these should be mean centered and std scaled.
            img_metas (list[dict]): list of image info dict where each dict
                has: 'img_shape', 'scale_factor', 'flip', and may also contain
                'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'.
            model (nn.Module): MOT model.
            feats (tuple): Backbone features of the input image.
            bboxes (Tensor): of shape (N, 5).
            labels (Tensor): of shape (N, ).
            frame_id (int): The id of current frame, 0-index.
            rescale (bool, optional): If True, the bounding boxes should be
                rescaled to fit the original scale of the image. Defaults to
                False.

        Returns:
            tuple: Tracking results.
        """
        if self.with_reid:
            if self.reid.get('img_norm_cfg', False):
                reid_img = imrenormalize(img, img_metas[0]['img_norm_cfg'],
                                         self.reid['img_norm_cfg'])
            else:
                reid_img = img.clone()

        valid_inds = bboxes[:, -1] > self.obj_score_thr
        bboxes = bboxes[valid_inds]
        labels = labels[valid_inds]

        if self.empty:
            num_new_tracks = bboxes.size(0)
            ids = torch.arange(self.num_tracks,
                               self.num_tracks + num_new_tracks,
                               dtype=torch.long)
            self.num_tracks += num_new_tracks
            if self.with_reid:
                embeds = model.reid.simple_test(
                    self.crop_imgs(reid_img, img_metas, bboxes[:, :4].clone(),
                                   rescale))
        else:
            # motion
            if model.with_cmc:
                if model.with_linear_motion:
                    num_samples = model.linear_motion.num_samples
                else:
                    num_samples = 1
                self.tracks = model.cmc.track(self.last_img, img, self.tracks,
                                              num_samples, frame_id)

            if model.with_linear_motion:
                self.tracks = model.linear_motion.track(self.tracks, frame_id)

            # propagate tracks
            prop_bboxes, prop_labels, prop_ids = self.regress_tracks(
                feats, img_metas, model.detector, frame_id, rescale)

            # filter bboxes with propagated tracks
            ious = bbox_overlaps(bboxes[:, :4], prop_bboxes[:, :4])
            valid_inds = (ious < self.regression['match_iou_thr']).all(dim=1)
            bboxes = bboxes[valid_inds]
            labels = labels[valid_inds]
            ids = torch.full((bboxes.size(0), ), -1, dtype=torch.long)

            if self.with_reid:
                prop_embeds = model.reid.simple_test(
                    self.crop_imgs(reid_img, img_metas,
                                   prop_bboxes[:, :4].clone(), rescale))
                if bboxes.size(0) > 0:
                    embeds = model.reid.simple_test(
                        self.crop_imgs(reid_img, img_metas,
                                       bboxes[:, :4].clone(), rescale))
                else:
                    embeds = prop_embeds.new_zeros((0, prop_embeds.size(1)))
                # reid
                active_ids = [int(_) for _ in self.ids if _ not in prop_ids]
                if len(active_ids) > 0 and bboxes.size(0) > 0:
                    track_embeds = self.get('embeds',
                                            active_ids,
                                            self.reid.get('num_samples', None),
                                            behavior='mean')
                    reid_dists = torch.cdist(track_embeds,
                                             embeds).cpu().numpy()

                    track_bboxes = self.get('bboxes', active_ids)
                    ious = bbox_overlaps(track_bboxes,
                                         bboxes[:, :4]).cpu().numpy()
                    iou_masks = ious < self.reid['match_iou_thr']
                    reid_dists[iou_masks] = 1e6

                    row, col = linear_sum_assignment(reid_dists)
                    for r, c in zip(row, col):
                        dist = reid_dists[r, c]
                        if dist <= self.reid['match_score_thr']:
                            ids[c] = active_ids[r]

            new_track_inds = ids == -1
            ids[new_track_inds] = torch.arange(self.num_tracks,
                                               self.num_tracks +
                                               new_track_inds.sum(),
                                               dtype=torch.long)
            self.num_tracks += new_track_inds.sum()

            if bboxes.shape[1] == 4:
                bboxes = bboxes.new_zeros((0, 5))
            if prop_bboxes.shape[1] == 4:
                prop_bboxes = prop_bboxes.new_zeros((0, 5))

            bboxes = torch.cat((prop_bboxes, bboxes), dim=0)
            labels = torch.cat((prop_labels, labels), dim=0)
            ids = torch.cat((prop_ids, ids), dim=0)
            if self.with_reid:
                embeds = torch.cat((prop_embeds, embeds), dim=0)

        self.update(ids=ids,
                    bboxes=bboxes[:, :4],
                    scores=bboxes[:, -1],
                    labels=labels,
                    embeds=embeds if self.with_reid else None,
                    frame_ids=frame_id)
        self.last_img = img
        return bboxes, labels, ids