def test_outs2results(): from mmtrack.core import outs2results # pseudo data num_objects, num_classes, image_size = 8, 4, 100 bboxes = random_boxes(num_objects, image_size) scores = torch.FloatTensor(num_objects, 1).uniform_(0, 1) bboxes = torch.cat([bboxes, scores], dim=1) # leave the results of the last class as empty labels = torch.randint(0, num_classes - 1, (num_objects, )) ids = torch.arange(num_objects) masks = torch.randint(0, 2, (num_objects, image_size, image_size)).bool() # test track2result without ids results = outs2results( bboxes=bboxes, labels=labels, masks=masks, num_classes=num_classes) for key in ['bbox_results', 'mask_results']: assert key in results assert len(results['bbox_results']) == num_classes assert isinstance(results['bbox_results'][0], np.ndarray) assert results['bbox_results'][-1].shape == (0, 5) assert len(results['mask_results']) == num_classes assert isinstance(results['mask_results'][-1], list) assert len(results['mask_results'][-1]) == 0 for i in range(num_classes): assert results['bbox_results'][i].shape[0] == (labels == i).sum() assert results['bbox_results'][i].shape[1] == 5 assert len(results['mask_results'][i]) == (labels == i).sum() if len(results['mask_results'][i]) > 0: assert results['mask_results'][i][0].shape == (image_size, image_size) # test track2result with ids results = outs2results( bboxes=bboxes, labels=labels, masks=masks, ids=ids, num_classes=num_classes) for key in ['bbox_results', 'mask_results']: assert key in results assert len(results['bbox_results']) == num_classes assert isinstance(results['bbox_results'][0], np.ndarray) assert results['bbox_results'][-1].shape == (0, 6) assert len(results['mask_results']) == num_classes assert isinstance(results['mask_results'][-1], list) assert len(results['mask_results'][-1]) == 0 for i in range(num_classes): assert results['bbox_results'][i].shape[0] == (labels == i).sum() assert results['bbox_results'][i].shape[1] == 6 assert len(results['mask_results'][i]) == (labels == i).sum() if len(results['mask_results'][i]) > 0: assert results['mask_results'][i][0].shape == (image_size, image_size)
def simple_test(self, img, img_metas, rescale=False, **kwargs): """Test without augmentations. Args: img (Tensor): of shape (N, C, H, W) encoding input images. Typically these should be mean centered and std scaled. img_metas (list[dict]): list of image info dict where each dict has: 'img_shape', 'scale_factor', 'flip', and may also contain 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'. rescale (bool, optional): If False, then returned bboxes and masks will fit the scale of img, otherwise, returned bboxes and masks will fit the scale of original image shape. Defaults to False. Returns: dict[str : list(ndarray)]: The tracking results. """ frame_id = img_metas[0].get('frame_id', -1) if frame_id == 0: self.tracker.reset() det_results = self.detector.simple_test( img, img_metas, rescale=rescale) assert len(det_results) == 1, 'Batch inference is not supported.' bbox_results = det_results[0] num_classes = len(bbox_results) outs_det = results2outs(bbox_results=bbox_results) det_bboxes = torch.from_numpy(outs_det['bboxes']).to(img) det_labels = torch.from_numpy(outs_det['labels']).to(img).long() track_bboxes, track_labels, track_ids = self.tracker.track( img=img, img_metas=img_metas, model=self, bboxes=det_bboxes, labels=det_labels, frame_id=frame_id, rescale=rescale, **kwargs) track_results = outs2results( bboxes=track_bboxes, labels=track_labels, ids=track_ids, num_classes=num_classes) det_results = outs2results( bboxes=det_bboxes, labels=det_labels, num_classes=num_classes) return dict( det_bboxes=det_results['bbox_results'], track_bboxes=track_results['bbox_results'])
def _create_coco_gt_results(dataset): from mmtrack.core import outs2results results = defaultdict(list) for img_info in dataset.data_infos: ann = dataset.get_ann_info(img_info) scores = np.ones((ann['bboxes'].shape[0], 1), dtype=np.float) bboxes = np.concatenate((ann['bboxes'], scores), axis=1) det_results = outs2results(bboxes=bboxes, labels=ann['labels'], num_classes=len(dataset.CLASSES)) track_results = outs2results(bboxes=bboxes, labels=ann['labels'], ids=ann['instance_ids'].astype(np.int), num_classes=len(dataset.CLASSES)) results['det_bboxes'].append(det_results['bbox_results']) results['track_bboxes'].append(track_results['bbox_results']) return results
def simple_test(self, img, img_metas, rescale=False): """Test forward. Args: img (Tensor): of shape (N, C, H, W) encoding input images. Typically these should be mean centered and std scaled. img_metas (list[dict]): list of image info dict where each dict has: 'img_shape', 'scale_factor', 'flip', and may also contain 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'. rescale (bool): whether to rescale the bboxes. Returns: dict[str : Tensor]: Track results. """ # TODO inherit from a base tracker assert self.with_track_head, 'track head must be implemented.' # noqa frame_id = img_metas[0].get('frame_id', -1) if frame_id == 0: self.tracker.reset() x = self.detector.extract_feat(img) proposal_list = self.detector.rpn_head.simple_test_rpn(x, img_metas) det_results = self.detector.roi_head.simple_test( x, proposal_list, img_metas, rescale=rescale) bbox_results = det_results[0] num_classes = len(bbox_results) outs_det = results2outs(bbox_results=bbox_results) det_bboxes = torch.tensor(outs_det['bboxes']).to(img) det_labels = torch.tensor(outs_det['labels']).to(img).long() track_bboxes, track_labels, track_ids = self.tracker.track( img_metas=img_metas, feats=x, model=self, bboxes=det_bboxes, labels=det_labels, frame_id=frame_id) track_bboxes = outs2results( bboxes=track_bboxes, labels=track_labels, ids=track_ids, num_classes=num_classes)['bbox_results'] return dict(det_bboxes=bbox_results, track_bboxes=track_bboxes)
def simple_test(self, img, img_metas, rescale=False, public_bboxes=None, **kwargs): """Test without augmentations. Args: img (Tensor): of shape (N, C, H, W) encoding input images. Typically these should be mean centered and std scaled. img_metas (list[dict]): list of image info dict where each dict has: 'img_shape', 'scale_factor', 'flip', and may also contain 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'. rescale (bool, optional): If False, then returned bboxes and masks will fit the scale of img, otherwise, returned bboxes and masks will fit the scale of original image shape. Defaults to False. public_bboxes (list[Tensor], optional): Public bounding boxes from the benchmark. Defaults to None. Returns: dict[str : list(ndarray)]: The tracking results. """ frame_id = img_metas[0].get('frame_id', -1) if frame_id == 0: self.tracker.reset() x = self.detector.extract_feat(img) if hasattr(self.detector, 'roi_head'): # TODO: check whether this is the case if public_bboxes is not None: public_bboxes = [_[0] for _ in public_bboxes] proposals = public_bboxes else: proposals = self.detector.rpn_head.simple_test_rpn( x, img_metas) det_bboxes, det_labels = self.detector.roi_head.simple_test_bboxes( x, img_metas, proposals, self.detector.roi_head.test_cfg, rescale=rescale) # TODO: support batch inference det_bboxes = det_bboxes[0] det_labels = det_labels[0] num_classes = self.detector.roi_head.bbox_head.num_classes elif hasattr(self.detector, 'bbox_head'): num_classes = self.detector.bbox_head.num_classes raise NotImplementedError( 'Tracktor must need "roi_head" to refine proposals.') else: raise TypeError('detector must has roi_head or bbox_head.') track_bboxes, track_labels, track_ids = self.tracker.track( img=img, img_metas=img_metas, model=self, feats=x, bboxes=det_bboxes, labels=det_labels, frame_id=frame_id, rescale=rescale, **kwargs) track_results = outs2results( bboxes=track_bboxes, labels=track_labels, ids=track_ids, num_classes=num_classes) det_results = outs2results( bboxes=det_bboxes, labels=det_labels, num_classes=num_classes) return dict( det_bboxes=det_results['bbox_results'], track_bboxes=track_results['bbox_results'])