def test_imrenormalize(): from mmtrack.core import imrenormalize img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) new_img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) img = np.random.randn(128, 256, 3).astype(np.float32) new_img = imrenormalize(img, img_norm_cfg, new_img_norm_cfg) assert isinstance(new_img, np.ndarray) assert new_img.shape == (128, 256, 3) assert np.allclose(img, new_img, atol=1e-6) img = torch.randn(1, 3, 128, 256, dtype=torch.float) new_img = imrenormalize(img, img_norm_cfg, new_img_norm_cfg) assert isinstance(new_img, torch.Tensor) assert new_img.shape == (1, 3, 128, 256) assert np.allclose(img, new_img, atol=1e-6)
def track(self, img, img_metas, model, bboxes, labels, frame_id, rescale=False, **kwargs): """Tracking forward function. Args: img (Tensor): of shape (N, C, H, W) encoding input images. Typically these should be mean centered and std scaled. img_metas (list[dict]): list of image info dict where each dict has: 'img_shape', 'scale_factor', 'flip', and may also contain 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'. model (nn.Module): MOT model. bboxes (Tensor): of shape (N, 5). labels (Tensor): of shape (N, ). frame_id (int): The id of current frame, 0-index. rescale (bool, optional): If True, the bounding boxes should be rescaled to fit the original scale of the image. Defaults to False. Returns: tuple: Tracking results. """ if not hasattr(self, 'kf'): self.kf = model.motion if self.with_reid: if self.reid.get('img_norm_cfg', False): reid_img = imrenormalize(img, img_metas[0]['img_norm_cfg'], self.reid['img_norm_cfg']) else: reid_img = img.clone() valid_inds = bboxes[:, -1] > self.obj_score_thr bboxes = bboxes[valid_inds] labels = labels[valid_inds] if self.empty or bboxes.size(0) == 0: num_new_tracks = bboxes.size(0) ids = torch.arange(self.num_tracks, self.num_tracks + num_new_tracks, dtype=torch.long) self.num_tracks += num_new_tracks if self.with_reid: embeds = model.reid.simple_test( self.crop_imgs(reid_img, img_metas, bboxes[:, :4].clone(), rescale)) else: ids = torch.full((bboxes.size(0), ), -1, dtype=torch.long) # motion if model.with_motion: self.tracks, costs = model.motion.track( self.tracks, bbox_xyxy_to_cxcyah(bboxes)) active_ids = self.confirmed_ids if self.with_reid: embeds = model.reid.simple_test( self.crop_imgs(reid_img, img_metas, bboxes[:, :4].clone(), rescale)) # reid if len(active_ids) > 0: track_embeds = self.get('embeds', active_ids, self.reid.get('num_samples', None), behavior='mean') reid_dists = torch.cdist(track_embeds, embeds).cpu().numpy() valid_inds = [list(self.ids).index(_) for _ in active_ids] reid_dists[~np.isfinite(costs[valid_inds, :])] = np.nan row, col = linear_sum_assignment(reid_dists) for r, c in zip(row, col): dist = reid_dists[r, c] if not np.isfinite(dist): continue if dist <= self.reid['match_score_thr']: ids[c] = active_ids[r] active_ids = [ id for id in self.ids if id not in ids and self.tracks[id].frame_ids[-1] == frame_id - 1 ] if len(active_ids) > 0: active_dets = torch.nonzero(ids == -1).squeeze(1) track_bboxes = self.get('bboxes', active_ids) ious = bbox_overlaps( track_bboxes, bboxes[active_dets][:, :-1]).cpu().numpy() dists = 1 - ious row, col = linear_sum_assignment(dists) for r, c in zip(row, col): dist = dists[r, c] if dist < 1 - self.match_iou_thr: ids[active_dets[c]] = active_ids[r] new_track_inds = ids == -1 ids[new_track_inds] = torch.arange(self.num_tracks, self.num_tracks + new_track_inds.sum(), dtype=torch.long) self.num_tracks += new_track_inds.sum() self.update(ids=ids, bboxes=bboxes[:, :4], scores=bboxes[:, -1], labels=labels, embeds=embeds if self.with_reid else None, frame_ids=frame_id) return bboxes, labels, ids
def track(self, img, img_metas, model, feats, bboxes, labels, frame_id, rescale=False, **kwargs): """Tracking forward function. Args: img (Tensor): of shape (N, C, H, W) encoding input images. Typically these should be mean centered and std scaled. img_metas (list[dict]): list of image info dict where each dict has: 'img_shape', 'scale_factor', 'flip', and may also contain 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'. model (nn.Module): MOT model. feats (tuple): Backbone features of the input image. bboxes (Tensor): of shape (N, 5). labels (Tensor): of shape (N, ). frame_id (int): The id of current frame, 0-index. rescale (bool, optional): If True, the bounding boxes should be rescaled to fit the original scale of the image. Defaults to False. Returns: tuple: Tracking results. """ if self.with_reid: if self.reid.get('img_norm_cfg', False): reid_img = imrenormalize(img, img_metas[0]['img_norm_cfg'], self.reid['img_norm_cfg']) else: reid_img = img.clone() valid_inds = bboxes[:, -1] > self.obj_score_thr bboxes = bboxes[valid_inds] labels = labels[valid_inds] if self.empty: num_new_tracks = bboxes.size(0) ids = torch.arange(self.num_tracks, self.num_tracks + num_new_tracks, dtype=torch.long) self.num_tracks += num_new_tracks if self.with_reid: embeds = model.reid.simple_test( self.crop_imgs(reid_img, img_metas, bboxes[:, :4].clone(), rescale)) else: # motion if model.with_cmc: if model.with_linear_motion: num_samples = model.linear_motion.num_samples else: num_samples = 1 self.tracks = model.cmc.track(self.last_img, img, self.tracks, num_samples, frame_id) if model.with_linear_motion: self.tracks = model.linear_motion.track(self.tracks, frame_id) # propagate tracks prop_bboxes, prop_labels, prop_ids = self.regress_tracks( feats, img_metas, model.detector, frame_id, rescale) # filter bboxes with propagated tracks ious = bbox_overlaps(bboxes[:, :4], prop_bboxes[:, :4]) valid_inds = (ious < self.regression['match_iou_thr']).all(dim=1) bboxes = bboxes[valid_inds] labels = labels[valid_inds] ids = torch.full((bboxes.size(0), ), -1, dtype=torch.long) if self.with_reid: prop_embeds = model.reid.simple_test( self.crop_imgs(reid_img, img_metas, prop_bboxes[:, :4].clone(), rescale)) if bboxes.size(0) > 0: embeds = model.reid.simple_test( self.crop_imgs(reid_img, img_metas, bboxes[:, :4].clone(), rescale)) else: embeds = prop_embeds.new_zeros((0, prop_embeds.size(1))) # reid active_ids = [int(_) for _ in self.ids if _ not in prop_ids] if len(active_ids) > 0 and bboxes.size(0) > 0: track_embeds = self.get('embeds', active_ids, self.reid.get('num_samples', None), behavior='mean') reid_dists = torch.cdist(track_embeds, embeds).cpu().numpy() track_bboxes = self.get('bboxes', active_ids) ious = bbox_overlaps(track_bboxes, bboxes[:, :4]).cpu().numpy() iou_masks = ious < self.reid['match_iou_thr'] reid_dists[iou_masks] = 1e6 row, col = linear_sum_assignment(reid_dists) for r, c in zip(row, col): dist = reid_dists[r, c] if dist <= self.reid['match_score_thr']: ids[c] = active_ids[r] new_track_inds = ids == -1 ids[new_track_inds] = torch.arange(self.num_tracks, self.num_tracks + new_track_inds.sum(), dtype=torch.long) self.num_tracks += new_track_inds.sum() if bboxes.shape[1] == 4: bboxes = bboxes.new_zeros((0, 5)) if prop_bboxes.shape[1] == 4: prop_bboxes = prop_bboxes.new_zeros((0, 5)) bboxes = torch.cat((prop_bboxes, bboxes), dim=0) labels = torch.cat((prop_labels, labels), dim=0) ids = torch.cat((prop_ids, ids), dim=0) if self.with_reid: embeds = torch.cat((prop_embeds, embeds), dim=0) self.update(ids=ids, bboxes=bboxes[:, :4], scores=bboxes[:, -1], labels=labels, embeds=embeds if self.with_reid else None, frame_ids=frame_id) self.last_img = img return bboxes, labels, ids