class TrackerSiamFC(Tracker): def __init__(self, cfg: TrackerConfig, device: Union[torch.device, str], model_path: Optional[str] = None, name: str = 'SiamFC') -> None: super().__init__(name=name, is_deterministic=True) self.cfg: TrackerConfig = cfg if isinstance(device, torch.device): self.device: torch.device = device else: self.device: torch.device = torch.device(device) self.model: SiamRPNModel = SiamRPNModel() if model_path is not None: self.model.load_state_dict( torch.load(model_path, map_location=lambda storage, location: storage)) self.model = self.model.to(self.device) self.cosine_win: np.ndarray = _create_square_cosine_window( self.cfg.response_size) self.curr_instance_side_size: int = self.cfg.instance_size self.target_bbox = None self.kernel_cls = None self.kernel_reg = None self.anchors = generate_anchors(self.cfg.scales, self.cfg.aspect_ratios, self.cfg.response_size, self.cfg.total_stride) self.on_exemplar_img_extract: TrackImgCb = None self.on_instance_img_extract: TrackImgCb = None self.on_response_map_calc: TrackImgCb = None @torch.no_grad() def init(self, img: ImageT, bbox: np.ndarray) -> None: self.model.eval() bbox = assure_int_bbox(bbox) self.target_bbox = BBox(*bbox) self.curr_instance_side_size = calc_bbox_side_size_with_context( self.target_bbox) size_ratio = self.cfg.exemplar_size / self.cfg.instance_size exemplar_side_size = int( round(self.curr_instance_side_size * size_ratio)) exemplar_bbox = BBox.build_from_center_and_size( self.target_bbox.center, np.asarray((exemplar_side_size, exemplar_side_size))) exemplar_img = center_crop_and_resize( img, exemplar_bbox, (self.cfg.exemplar_size, self.cfg.exemplar_size)) if self.on_exemplar_img_extract: self.on_exemplar_img_extract(exemplar_img) exemplar_img_tensor = torch.unsqueeze(pil_to_tensor(exemplar_img), 0) exemplar_img_tensor = exemplar_img_tensor.to(self.device) self.kernel_reg, self.kernel_cls = self.model.learn_kernels( exemplar_img_tensor) @torch.no_grad() def update(self, img: ImageT) -> np.ndarray: self.model.eval() side_size = int(round(self.curr_instance_side_size)) bbox = BBox.build_from_center_and_size( self.target_bbox.center, np.asarray((side_size, side_size))) instance_img = center_crop_and_resize( img, bbox, (self.cfg.instance_size, self.cfg.instance_size)) if self.on_instance_img_extract: self.on_instance_img_extract(instance_img) instance_img = pil_to_tensor(instance_img).to(self.device) pred_reg, pred_cls = self.model.inference(instance_img, self.kernel_reg, self.kernel_cls) pred_reg = pred_reg.squeeze() pred_cls = pred_cls.squeeze() pred_cls = F.softmax(pred_cls, dim=1) pred_cls_max = pred_cls.argmax(dim=1) # TODO Store the range somewhere as it may be faster. scores = pred_cls[list(range(len(pred_cls))), pred_cls_max] scores[pred_cls_max == 0] = 0 # The 0-th position is the background. # TODO Think of modifying the regression predictions in place. xy_vals = pred_reg[:, :2] * self.anchors[:, 2:] + self.anchors[:, :2] wh_vals = torch.exp(pred_reg[:, 2:]) * self.anchors[:, 2:] boxes = torch.hstack((xy_vals, wh_vals)) boxes = ops.box_convert(boxes, 'cxcywh', 'xyxy') boxes = ops.clip_boxes_to_image( boxes, (self.cfg.instance_size, self.cfg.instance_size)) response = (1 - self.cfg.cosine_win_influence) * response + \ self.cfg.cosine_win_influence * self.cosine_win # The assumption is that the peak response value is in the center of the # response map. Thus, we compute the change with respect to the center # and convert it back to the pixel coordinates in the image. peak_response_pos = np.asarray( np.unravel_index(response.argmax(), response.shape)) # Update target scale. self.curr_instance_side_size *= new_scale # Change from [row, col] to [x, y] coordinates. self.target_bbox.shift(disp_in_image[::-1]) self.target_bbox.rescale(new_scale, new_scale) return self.target_bbox.as_xywh()
class TestBBox(unittest.TestCase): def setUp(self) -> None: self.bbox = BBox(10, 20, 300, 400) def test_negative_width(self): with self.assertRaises(AssertionError): BBox(100, 100, -10, 200) def test_negative_height(self): with self.assertRaises(AssertionError): BBox(100, 100, 10, -200) def test_center(self): self.assertEqual(self.bbox.center.tolist(), [160, 220]) def test_size(self): self.assertEqual(self.bbox.size.tolist(), [300, 400]) def test_set_size_negative(self): with self.assertRaises(AssertionError): self.bbox.size = np.asarray((-10, 100)) def test_set_size_float(self): with self.assertRaises(AssertionError): self.bbox.size = np.asarray((10.0, 100.0)) def test_corners_calculation(self): self.assertEqual(self.bbox.as_corners().tolist(), [10, 20, 310, 420]) def test_top_left_bottom_right_calculation(self): tl, br = self.bbox.as_tl_br() self.assertEqual(tl.tolist(), [10, 20]) self.assertEqual(br.tolist(), [310, 420]) def test_x_y_width_height_calculation_zero_based(self): self.assertEqual(self.bbox.as_xywh().tolist(), [10, 20, 300, 400]) def test_x_y_width_height_calculation_one_based(self): self.assertEqual(self.bbox.as_xywh(False).tolist(), [11, 21, 300, 400]) def test_negative_width_scale_factor(self): with self.assertRaises(AssertionError): self.bbox.rescale(-0.5, 2) def test_negative_height_scale_factor(self): with self.assertRaises(AssertionError): self.bbox.rescale(0.5, -2) def test_center_shift(self): bbox_shifted = self.bbox.shift(np.asarray((100, -100)), in_place=False) self.assertEqual(bbox_shifted.center.tolist(), [260, 120]) def test_center_shift_inplace(self): bbox_shifted = self.bbox.shift(np.asarray((100, -100)), in_place=True) self.assertTrue(bbox_shifted is None) self.assertEqual(self.bbox.center.tolist(), [260, 120]) def test_float_center_shift(self): with self.assertRaises(AssertionError): self.bbox.shift(np.asarray((2.0, -5.5))) def test_upscale_twice(self): scale = 2 bbox_rescaled = self.bbox.rescale(scale, scale, in_place=False) self.assertEqual(bbox_rescaled.size.tolist(), [600, 800]) def test_downscale_twice(self): scale = 0.5 bbox_rescaled = self.bbox.rescale(scale, scale, in_place=False) self.assertEqual(bbox_rescaled.size.tolist(), [150, 200]) def test_no_scale_change(self): scale = 1 bbox_rescaled = self.bbox.rescale(scale, scale, in_place=False) self.assertEqual(bbox_rescaled.size.tolist(), [300, 400]) def test_upscale_twice_inplace(self): scale = 2 bbox_rescaled = self.bbox.rescale(scale, scale, in_place=True) self.assertTrue(bbox_rescaled is None) self.assertEqual(self.bbox.size.tolist(), [600, 800]) def test_downscale_twice_inplace(self): scale = 0.5 bbox_rescaled = self.bbox.rescale(scale, scale, in_place=True) self.assertTrue(bbox_rescaled is None) self.assertEqual(self.bbox.size.tolist(), [150, 200]) def test_no_scale_change_inplace(self): scale = 1 bbox_rescaled = self.bbox.rescale(scale, scale, in_place=True) self.assertTrue(bbox_rescaled is None) self.assertEqual(self.bbox.size.tolist(), [300, 400]) def test_repr_str(self): self.assertEqual(repr(self.bbox), 'BBox(10,20,300,400)') def test_build_from_repr(self): bbox_new = eval(repr(self.bbox)) self.assertEqual(self.bbox.center.tolist(), bbox_new.center.tolist()) self.assertEqual(self.bbox.size.tolist(), bbox_new.size.tolist())