Repo.clone_from(git_url, goturn_path) sys.path.insert(0, os.path.join(working_dir, "goturn_amoudgl")) sys.path.insert(0, os.path.join(working_dir, "goturn_amoudgl", "src")) from model import GoNet from pathlib import Path model_dir = Path(os.path.join(goturn_path, "checkpoints")) ckpt_path = next(model_dir.glob("*.pth")) # setup model model = GoNet() if ckpt_path is not None: checkpoint = torch.load(ckpt_path, map_location=lambda storage, loc: storage) model.load_state_dict(checkpoint["state_dict"]) model.eval() model = model.to(_device) pgt = PyTorchGoturn( model=model, input_shape=input_shape, clip_values=clip_values, preprocessing=preprocessing, device_type=_device, ) if benchmark_not_attack: from got10k.experiments import ExperimentGOT10k
class GOTURN: """Tester for OTB formatted sequences""" def __init__(self, model_path, device): # model_path = os.path.join(model_path, "/checkpoints/pytroch_goturn.pth.tar") if (not os.path.isfile(model_path)) or ( not os.path.exists(model_path)): fileName = input( "Whhoops! No such file! Please enter the name of the file you'd like to use." ) # Ros params self.init_variables_hard() self.init_publisher() self.init_subscribers() # GOTURN Model params self.device = device self.transform = NormalizeToTensor() self.scale = Rescale((224, 224)) self.model_path = model_path self.model = GoNet() self.opts = None self.prev_img = None self.curr_img = None checkpoint = torch.load(model_path, map_location=lambda storage, loc: storage) self.model.load_state_dict(checkpoint['state_dict']) self.model.to(device) def init_variables_hard(self): self.bbox_in_topic = rospy.get_param("~bbox_in_topic", "/mbz2020/perception/roi/rect") self.color_image_topic = rospy.get_param( "~color_img_topic", "/front_track_camera/fisheye1/camera_raw") self.publish_result_img = rospy.get_param("~publish_tracked", "False") self.oob_threshold = rospy.get_param("~oob_threshold", 10) self.max_bbox_ratio = rospy.get_param("~max_bbox_ratio", 1.0) self._bridge = CvBridge() self._current_color_msg = None self._inital_bbox = None self._last_bbox = None self._is_first_frame = True self._has_scale_changed = False self._color_timestamp = -1 self._original_distance = -1 self._current_distance = -1 self._previous_distance = -1 self._scale = 1.0 self._fallback_scale = 0.4 self._current_status = 1 def init_publisher(self): # regressed bbox self._pub_bbox = rospy.Publisher("/perception/tracker/bboxOut", BoundingBox2D, queue_size=10) # Image wiht overlayed bbox self._pub_result_img = rospy.Publisher("/perception/tracker/bboxImage", Image, queue_size=10) # Tracker status self._pub_status = rospy.Publisher("/perception/tracker/status", Int8, queue_size=10) def init_subscribers(self): # Input Image frame sub_color = message_filters.Subscriber(self.color_image_topic, Image) # Input color frame with callback sub_image = rospy.Subscriber(self.color_image_topic, Image, self.got_image) # Selected ROI from first frame sub_bbox = rospy.Subscriber(self.bbox_in_topic, BoundingBox2D, self.got_bounding_box) def got_bounding_box(self, boundingBox): self.init_variables_hard() center = (boundingBox.center.x, boundingBox.center.y) width = boundingBox.size_x height = boundingBox.size_y if self._inital_bbox is None: rospy.loginfo("BBox Received") self._inital_bbox = ( int(center[0] - width / 2), int(center[1] - height / 2), width, height, ) def calculate_bbox_center(self, bbox): center = (bbox[0] + bbox[2] / 2, bbox[1] + bbox[3] / 2) center = tuple([int(x) for x in center]) return center def got_image(self, rgb_msg): color_image = self._bridge.imgmsg_to_cv2(rgb_msg) if self._is_first_frame and self._inital_bbox is not None: rospy.loginfo("Initializing tracker") self._last_bbox = self._inital_bbox self._is_first_frame = False self.prev_img = color_image elif not self._is_first_frame: self.model.eval() self.curr_img = color_image sample = self.transform(self._get_sample()) self._last_bbox = self.get_rect(sample) self.prev_img = self.curr_img if self._last_bbox is not None: bbox_message = BoundingBox2D() bbox_message.size_x = self._last_bbox[2] bbox_message.size_y = self._last_bbox[3] bbox_message.center.theta = 0 bbox_message.center.x = self._last_bbox[0] + self._last_bbox[2] / 2 bbox_message.center.y = self._last_bbox[1] + self._last_bbox[3] / 2 self._pub_bbox.publish(bbox_message) status_message = Int8() status_message.data = self._current_status self._pub_status.publish(status_message) if self.publish_result_img: self._last_bbox = tuple([int(i) for i in self._last_bbox]) if self._current_status == 1: cv2.rectangle(color_image, (self._last_bbox[0], self._last_bbox[1]), (self._last_bbox[0] + self._last_bbox[2], self._last_bbox[1] + self._last_bbox[3]), (0, 0, 255), 2) else: cv2.rectangle(color_image, (self._last_bbox[0], self._last_bbox[1]), (self._last_bbox[0] + self._last_bbox[2], self._last_bbox[1] + self._last_bbox[3]), (255, 0, 0), 2) imgmsg = self._bridge.cv2_to_imgmsg(color_image, encoding="mono8") self._pub_result_img.publish(imgmsg) def _get_sample(self): """ Returns cropped previous and current frame at the previous predicted location. Note that the images are scaled to (224,224,3). """ prev = self.prev_img curr = self.curr_img prevbb = self._last_bbox prev_sample, opts_prev = crop_sample({'image': prev, 'bb': prevbb}) curr_sample, opts_curr = crop_sample({'image': curr, 'bb': prevbb}) prev_img = bgr2rgb(self.scale(prev_sample, opts_prev)['image']) curr_img = bgr2rgb(self.scale(curr_sample, opts_curr)['image']) sample = {'previmg': prev_img, 'currimg': curr_img} self.curr_img = curr self.opts = opts_curr return sample def get_rect(self, sample): """ Regresses the bounding box coordinates in the original image dimensions for an input sample. """ x1, x2 = sample['previmg'], sample['currimg'] x1 = x1.unsqueeze(0).to(self.device) x2 = x2.unsqueeze(0).to(self.device) y = self.model(x1, x2) bb = y.data.cpu().numpy().transpose((1, 0)) bb = bb[:, 0] bbox = BoundingBox(bb[0], bb[1], bb[2], bb[3]) # inplace conversion bbox.unscale(self.opts['search_region']) bbox.uncenter(self.curr_img, self.opts['search_location'], self.opts['edge_spacing_x'], self.opts['edge_spacing_y']) return bbox.get_bb_list() def spin(self): rate = rospy.Rate(30) while not rospy.is_shutdown(): rate.sleep()
class GOTURN: """Tester for OTB formatted sequences""" def __init__(self, root_dir, model_path, device, live=False): self.root_dir = root_dir self.device = device self.transform = NormalizeToTensor() self.scale = Rescale((224, 224)) self.model_path = model_path self.model = GoNet() self.gt = [] self.opts = None self.curr_img = None checkpoint = torch.load(model_path, map_location=lambda storage, loc: storage) self.model.load_state_dict(checkpoint['state_dict']) self.model.to(device) if live: # contains only img. frames = glob.glob(os.path.join(root_dir, '*.jpg')) frames = sorted(frames) self.len = len(frames) - 1 frames = np.array(frames) frames.sort() self.x = [] self.img = [] for i in range(self.len): self.x.append([frames[i], frames[i + 1]]) img_prev = cv2.imread(frames[i]) # img_prev = bgr2rgb(img_prev) img_curr = cv2.imread(frames[i + 1]) # img_curr = bgr2rgb(img_curr) self.img.append([img_prev, img_curr]) self.x = np.array(self.x) else: frames = os.listdir(root_dir + '/img') frames = [root_dir + "/img/" + frame for frame in frames] self.len = len(frames) - 1 frames = np.array(frames) frames.sort() self.x = [] f = open(root_dir + '/groundtruth_rect.txt') lines = f.readlines() lines[0] = re.sub('\t', ',', lines[0]) lines[0] = re.sub(' +', ',', lines[0]) init_bbox = lines[0].strip().split(',') init_bbox = [float(x) for x in init_bbox] init_bbox = [ init_bbox[0], init_bbox[1], init_bbox[0] + init_bbox[2], init_bbox[1] + init_bbox[3] ] init_bbox = np.array(init_bbox) self.prev_rect = init_bbox self.img = [] for i in range(self.len): self.x.append([frames[i], frames[i + 1]]) img_prev = cv2.imread(frames[i]) img_prev = bgr2rgb(img_prev) img_curr = cv2.imread(frames[i + 1]) img_curr = bgr2rgb(img_curr) self.img.append([img_prev, img_curr]) lines[i + 1] = re.sub('\t', ',', lines[i + 1]) lines[i + 1] = re.sub(' +', ',', lines[i + 1]) bb = lines[i + 1].strip().split(',') bb = [float(x) for x in bb] bb = [bb[0], bb[1], bb[0] + bb[2], bb[1] + bb[3]] self.gt.append(bb) self.x = np.array(self.x) print(init_bbox) def set_init_box(self, initBox): self.prev_rect = np.array(initBox) def __getitem__(self, idx): """ Returns transformed torch tensor which is passed to the network. """ sample = self._get_sample(idx) return self.transform(sample) def _get_sample(self, idx): """ Returns cropped previous and current frame at the previous predicted location. Note that the images are scaled to (224,224,3). """ prev = self.img[idx][0] curr = self.img[idx][1] prevbb = self.prev_rect prev_sample, opts_prev = crop_sample({'image': prev, 'bb': prevbb}) curr_sample, opts_curr = crop_sample({'image': curr, 'bb': prevbb}) curr_img = self.scale(curr_sample, opts_curr)['image'] prev_img = self.scale(prev_sample, opts_prev)['image'] sample = {'previmg': prev_img, 'currimg': curr_img} self.curr_img = curr self.opts = opts_curr return sample def get_rect(self, sample): """ Regresses the bounding box coordinates in the original image dimensions for an input sample. """ x1, x2 = sample['previmg'], sample['currimg'] x1 = x1.unsqueeze(0).to(self.device) x2 = x2.unsqueeze(0).to(self.device) y = self.model(x1, x2) bb = y.data.cpu().numpy().transpose((1, 0)) bb = bb[:, 0] bbox = BoundingBox(bb[0], bb[1], bb[2], bb[3]) # inplace conversion bbox.unscale(self.opts['search_region']) bbox.uncenter(self.curr_img, self.opts['search_location'], self.opts['edge_spacing_x'], self.opts['edge_spacing_y']) return bbox.get_bb_list() def test(self): """ Loops through all the frames of test sequence and tracks the target. Prints predicted box location on console with frame ID. """ self.model.eval() st = time.time() for i in range(self.len): sample = self[i] bb = self.get_rect(sample) self.prev_rect = bb print("frame: {}".format(i + 1), bb) end = time.time() print("fps: {:.3f}".format(self.len / (end - st)))
class TrackerGOTURN(Tracker): """GOTURN got10k class for benchmark and evaluation on tracking datasets. This class overrides the default got10k 'Tracker' class methods namely, 'init' and 'update'. Attributes: cuda: flag if gpu device is available device: device on which GOTURN is evaluated ('cuda:0', 'cpu') net: GOTURN pytorch model prev_box: previous bounding box prev_img: previous tracking image transform_tensor: normalizes images and returns torch tensor. otps: bounding box config to unscale and uncenter network output. """ def __init__(self, net_path=None, **kargs): super(TrackerGOTURN, self).__init__(name='PyTorchGOTURN', is_deterministic=True) # setup GPU device if available self.cuda = torch.cuda.is_available() self.device = torch.device('cuda:0' if self.cuda else 'cpu') # setup model self.net = GoNet() if net_path is not None: checkpoint = torch.load(net_path, map_location=lambda storage, loc: storage) self.net.load_state_dict(checkpoint['state_dict']) self.net.eval() self.net = self.net.to(self.device) # setup transforms self.prev_img = None # previous image in numpy format self.prev_box = None # follows format: [xmin, ymin, xmax, ymax] self.scale = Rescale((224, 224)) self.transform_tensor = transforms.Compose([NormalizeToTensor()]) self.opts = None def init(self, image, box): """ Initiates the tracker at given box location. Aassumes that the initial box has format: [xmin, ymin, width, height] """ image = np.array(image) if image.ndim == 2: image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB) # goturn helper functions expect box in [xmin, ymin, xmax, ymax] format box[2] = box[0] + box[2] box[3] = box[1] + box[3] self.prev_box = box self.prev_img = image def update(self, image): """ Given current image, returns target box. """ image = np.array(image) if image.ndim == 2: image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB) # crop current and previous image at previous box location prev_sample, opts_prev = crop_sample({ 'image': self.prev_img, 'bb': self.prev_box }) curr_sample, opts_curr = crop_sample({ 'image': image, 'bb': self.prev_box }) self.opts = opts_curr self.curr_img = image curr_img = self.scale(curr_sample, opts_curr)['image'] prev_img = self.scale(prev_sample, opts_prev)['image'] sample = {'previmg': prev_img, 'currimg': curr_img} sample = self.transform_tensor(sample) # do forward pass to get box box = np.array(self._get_rect(sample)) # update previous box and image self.prev_img = image self.prev_box = np.copy(box) # convert [xmin, ymin, xmax, ymax] box to [xmin, ymin, width, height] # for correct evaluation by got10k toolkit box[2] = box[2] - box[0] box[3] = box[3] - box[1] return box def _get_rect(self, sample): """ Performs forward pass through the GOTURN network to regress bounding box coordinates in the original image dimensions. """ x1, x2 = sample['previmg'], sample['currimg'] x1 = x1.unsqueeze(0).to(self.device) x2 = x2.unsqueeze(0).to(self.device) y = self.net(x1, x2) bb = y.data.cpu().numpy().transpose((1, 0)) bb = bb[:, 0] bbox = BoundingBox(bb[0], bb[1], bb[2], bb[3]) # inplace conversion bbox.unscale(self.opts['search_region']) bbox.uncenter(self.curr_img, self.opts['search_location'], self.opts['edge_spacing_x'], self.opts['edge_spacing_y']) return bbox.get_bb_list()