def simple_test_ope(self, img, frame_id, gt_bboxes): """Test using OPE test mode. Args: img (Tensor): of shape (1, C, H, W) encoding input image. frame_id (int): the id of current frame in the video. gt_bboxes (list[Tensor]): list of ground truth bboxes for each image with shape (1, 4) in [tl_x, tl_y, br_x, br_y] format or shape (1, 8) in [x1, y1, x2, y2, x3, y3, x4, y4]. Returns: bbox_pred (Tensor): in [tl_x, tl_y, br_x, br_y] format. best_score (Tensor): the tracking bbox confidence in range [0,1], and the score of initial frame is -1. """ if frame_id == 0: gt_bboxes = gt_bboxes[0][0] self.memo = Dict() self.memo.bbox = quad2bbox(gt_bboxes) self.memo.z_feat, self.memo.avg_channel = self.init( img, self.memo.bbox) best_score = -1. else: best_score, self.memo.bbox = self.track(img, self.memo.bbox, self.memo.z_feat, self.memo.avg_channel) bbox_pred = bbox_cxcywh_to_xyxy(self.memo.bbox) return bbox_pred, best_score
def test_quad2bbox(): quad = torch.zeros((5, 8), dtype=torch.float) low_coord_index = torch.tensor([0, 1, 3, 6], dtype=torch.long) high_coord_index = torch.tensor([2, 4, 5, 7], dtype=torch.long) quad[:, low_coord_index] = torch.randint(1, 10, (5, 4), dtype=torch.float) quad[:, high_coord_index] = torch.randint(10, 20, (5, 4), dtype=torch.float) bbox = quad2bbox(quad) assert (bbox > 0).all()
def simple_test_vot(self, img, frame_id, gt_bboxes, img_metas=None): """Test using VOT test mode. Args: img (Tensor): of shape (1, C, H, W) encoding input image. frame_id (int): the id of current frame in the video. gt_bboxes (list[Tensor]): list of ground truth bboxes for each image with shape (1, 4) in [tl_x, tl_y, br_x, br_y] format or shape (1, 8) in [x1, y1, x2, y2, x3, y3, x4, y4]. img_metas (list[dict]): list of image information dict where each dict has: 'img_shape', 'scale_factor', 'flip', and may also contain 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'. For details on the values of these keys see `mmtrack/datasets/pipelines/formatting.py:VideoCollect`. Returns: bbox_pred (Tensor): in [tl_x, tl_y, br_x, br_y] format. best_score (Tensor): the tracking bbox confidence in range [0,1], and the score of initial frame is -1. """ if frame_id == 0: self.init_frame_id = 0 if self.init_frame_id == frame_id: # initialization gt_bboxes = gt_bboxes[0][0] self.memo = Dict() self.memo.bbox = quad2bbox(gt_bboxes) self.memo.z_feat, self.memo.avg_channel = self.init( img, self.memo.bbox) # 1 denotes the initialization state bbox_pred = img.new_tensor([1.]) best_score = -1. elif self.init_frame_id > frame_id: # 0 denotes unknown state, namely the skipping frame after failure bbox_pred = img.new_tensor([0.]) best_score = -1. else: # normal tracking state best_score, self.memo.bbox = self.track(img, self.memo.bbox, self.memo.z_feat, self.memo.avg_channel) # convert bbox to region track_bbox = bbox_cxcywh_to_x1y1wh(self.memo.bbox).cpu().numpy() track_region = bbox2region(track_bbox) gt_bbox = gt_bboxes[0][0] if len(gt_bbox) == 4: gt_bbox = bbox_xyxy_to_x1y1wh(gt_bbox) gt_region = bbox2region(gt_bbox.cpu().numpy()) if img_metas is not None and 'img_shape' in img_metas[0]: image_shape = img_metas[0]['img_shape'] image_wh = (image_shape[1], image_shape[0]) else: image_wh = None Warning('image shape are need when calculating bbox overlap') overlap = calculate_region_overlap(track_region, gt_region, bounds=image_wh) if overlap <= 0: # tracking failure self.init_frame_id = frame_id + 5 # 2 denotes the failure state bbox_pred = img.new_tensor([2.]) else: bbox_pred = bbox_cxcywh_to_xyxy(self.memo.bbox) return bbox_pred, best_score