def __init__(self, model_path, device):
        # model_path = os.path.join(model_path, "/checkpoints/pytroch_goturn.pth.tar")
        if (not os.path.isfile(model_path)) or (
                not os.path.exists(model_path)):
            fileName = input(
                "Whhoops! No such file! Please enter the name of the file you'd like to use."
            )
        # Ros params
        self.init_variables_hard()
        self.init_publisher()
        self.init_subscribers()

        # GOTURN Model params
        self.device = device
        self.transform = NormalizeToTensor()
        self.scale = Rescale((224, 224))
        self.model_path = model_path
        self.model = GoNet()
        self.opts = None
        self.prev_img = None
        self.curr_img = None
        checkpoint = torch.load(model_path,
                                map_location=lambda storage, loc: storage)
        self.model.load_state_dict(checkpoint['state_dict'])
        self.model.to(device)
예제 #2
0
 def __init__(self, root_dir, model_path, device):
     self.root_dir = root_dir
     self.device = device
     self.transform = NormalizeToTensor()
     self.scale = Rescale((224, 224))
     self.model_path = model_path
     self.model = GoNet()
     self.gt = []
     self.opts = None
     self.curr_img = None
     checkpoint = torch.load(
         model_path, map_location=lambda storage, loc: storage)
     self.model.load_state_dict(checkpoint['state_dict'])
     self.model.to(device)
     frames = [file for file in os.listdir(root_dir + '/img') if'jpeg' in file or 'jpg' in file]
     frames = [root_dir + "/img/" + frame for frame in frames]
     self.len = len(frames)-1
     frames = np.array(frames)
     frames.sort()
     self.x = []
     try:
         f = open(root_dir + '/groundtruth_rect.txt')
     except:
         f = open(root_dir + '/groundtruth.txt')
     lines = f.readlines()
     #print('frame length is', self.len)
     #print('gt length is', len(lines))
     #choose min between gt and the frame for evaluation
     self.len = min(self.len, len(lines)-1)
     lines[0] = re.sub('\t', ',', lines[0])
     lines[0] = re.sub(' +', ',', lines[0])
     init_bbox = lines[0].strip().split(',')
     init_bbox = [float(x) for x in init_bbox]
     init_bbox = [init_bbox[0], init_bbox[1], init_bbox[0]+init_bbox[2],
                  init_bbox[1] + init_bbox[3]]
     init_bbox = np.array(init_bbox)
     self.prev_rect = init_bbox
     self.img = []
     for i in range(self.len):
         self.x.append([frames[i], frames[i+1]])
         img_prev = cv2.imread(frames[i])
         img_prev = bgr2rgb(img_prev)
         img_curr = cv2.imread(frames[i+1])
         img_curr = bgr2rgb(img_curr)
         self.img.append([img_prev, img_curr])
         lines[i+1] = re.sub('\t', ',', lines[i+1])
         lines[i+1] = re.sub(' +', ',', lines[i+1])
         bb = lines[i+1].strip().split(',')
         bb = [float(x) for x in bb]
         bb = [bb[0], bb[1], bb[0]+bb[2], bb[1]+bb[3]]
         self.gt.append(bb)
     self.x = np.array(self.x)
     print(init_bbox)
예제 #3
0
    def __init__(self, net_path=None, **kargs):
        super(TrackerGOTURN, self).__init__(name='PyTorchGOTURN',
                                            is_deterministic=True)

        # setup GPU device if available
        self.cuda = torch.cuda.is_available()
        self.device = torch.device('cuda:0' if self.cuda else 'cpu')

        # setup model
        self.net = GoNet()
        if net_path is not None:
            checkpoint = torch.load(net_path,
                                    map_location=lambda storage, loc: storage)
            self.net.load_state_dict(checkpoint['state_dict'])
            self.net.eval()
        self.net = self.net.to(self.device)

        # setup transforms
        self.prev_img = None  # previous image in numpy format
        self.prev_box = None  # follows format: [xmin, ymin, xmax, ymax]
        self.scale = Rescale((224, 224))
        self.transform_tensor = transforms.Compose([NormalizeToTensor()])
        self.opts = None
예제 #4
0
        from git import Repo

        git_url = "[email protected]:amoudgl/pygoturn.git"
        Repo.clone_from(git_url, goturn_path)

    sys.path.insert(0, os.path.join(working_dir, "goturn_amoudgl"))
    sys.path.insert(0, os.path.join(working_dir, "goturn_amoudgl", "src"))

    from model import GoNet
    from pathlib import Path

    model_dir = Path(os.path.join(goturn_path, "checkpoints"))
    ckpt_path = next(model_dir.glob("*.pth"))

    # setup model
    model = GoNet()
    if ckpt_path is not None:
        checkpoint = torch.load(ckpt_path, map_location=lambda storage, loc: storage)
        model.load_state_dict(checkpoint["state_dict"])
        model.eval()
    model = model.to(_device)

pgt = PyTorchGoturn(
    model=model,
    input_shape=input_shape,
    clip_values=clip_values,
    preprocessing=preprocessing,
    device_type=_device,
)

if benchmark_not_attack:
예제 #5
0
 def __init__(self, root_dir, model_path, device, live=False):
     self.root_dir = root_dir
     self.device = device
     self.transform = NormalizeToTensor()
     self.scale = Rescale((224, 224))
     self.model_path = model_path
     self.model = GoNet()
     self.gt = []
     self.opts = None
     self.curr_img = None
     checkpoint = torch.load(model_path,
                             map_location=lambda storage, loc: storage)
     self.model.load_state_dict(checkpoint['state_dict'])
     self.model.to(device)
     if live:
         # contains only img.
         frames = glob.glob(os.path.join(root_dir, '*.jpg'))
         frames = sorted(frames)
         self.len = len(frames) - 1
         frames = np.array(frames)
         frames.sort()
         self.x = []
         self.img = []
         for i in range(self.len):
             self.x.append([frames[i], frames[i + 1]])
             img_prev = cv2.imread(frames[i])
             # img_prev = bgr2rgb(img_prev)
             img_curr = cv2.imread(frames[i + 1])
             # img_curr = bgr2rgb(img_curr)
             self.img.append([img_prev, img_curr])
         self.x = np.array(self.x)
     else:
         frames = os.listdir(root_dir + '/img')
         frames = [root_dir + "/img/" + frame for frame in frames]
         self.len = len(frames) - 1
         frames = np.array(frames)
         frames.sort()
         self.x = []
         f = open(root_dir + '/groundtruth_rect.txt')
         lines = f.readlines()
         lines[0] = re.sub('\t', ',', lines[0])
         lines[0] = re.sub(' +', ',', lines[0])
         init_bbox = lines[0].strip().split(',')
         init_bbox = [float(x) for x in init_bbox]
         init_bbox = [
             init_bbox[0], init_bbox[1], init_bbox[0] + init_bbox[2],
             init_bbox[1] + init_bbox[3]
         ]
         init_bbox = np.array(init_bbox)
         self.prev_rect = init_bbox
         self.img = []
         for i in range(self.len):
             self.x.append([frames[i], frames[i + 1]])
             img_prev = cv2.imread(frames[i])
             img_prev = bgr2rgb(img_prev)
             img_curr = cv2.imread(frames[i + 1])
             img_curr = bgr2rgb(img_curr)
             self.img.append([img_prev, img_curr])
             lines[i + 1] = re.sub('\t', ',', lines[i + 1])
             lines[i + 1] = re.sub(' +', ',', lines[i + 1])
             bb = lines[i + 1].strip().split(',')
             bb = [float(x) for x in bb]
             bb = [bb[0], bb[1], bb[0] + bb[2], bb[1] + bb[3]]
             self.gt.append(bb)
         self.x = np.array(self.x)
         print(init_bbox)
예제 #6
0
class GOTURN:
    """Tester for OTB formatted sequences"""
    def __init__(self, root_dir, model_path, device, live=False):
        self.root_dir = root_dir
        self.device = device
        self.transform = NormalizeToTensor()
        self.scale = Rescale((224, 224))
        self.model_path = model_path
        self.model = GoNet()
        self.gt = []
        self.opts = None
        self.curr_img = None
        checkpoint = torch.load(model_path,
                                map_location=lambda storage, loc: storage)
        self.model.load_state_dict(checkpoint['state_dict'])
        self.model.to(device)
        if live:
            # contains only img.
            frames = glob.glob(os.path.join(root_dir, '*.jpg'))
            frames = sorted(frames)
            self.len = len(frames) - 1
            frames = np.array(frames)
            frames.sort()
            self.x = []
            self.img = []
            for i in range(self.len):
                self.x.append([frames[i], frames[i + 1]])
                img_prev = cv2.imread(frames[i])
                # img_prev = bgr2rgb(img_prev)
                img_curr = cv2.imread(frames[i + 1])
                # img_curr = bgr2rgb(img_curr)
                self.img.append([img_prev, img_curr])
            self.x = np.array(self.x)
        else:
            frames = os.listdir(root_dir + '/img')
            frames = [root_dir + "/img/" + frame for frame in frames]
            self.len = len(frames) - 1
            frames = np.array(frames)
            frames.sort()
            self.x = []
            f = open(root_dir + '/groundtruth_rect.txt')
            lines = f.readlines()
            lines[0] = re.sub('\t', ',', lines[0])
            lines[0] = re.sub(' +', ',', lines[0])
            init_bbox = lines[0].strip().split(',')
            init_bbox = [float(x) for x in init_bbox]
            init_bbox = [
                init_bbox[0], init_bbox[1], init_bbox[0] + init_bbox[2],
                init_bbox[1] + init_bbox[3]
            ]
            init_bbox = np.array(init_bbox)
            self.prev_rect = init_bbox
            self.img = []
            for i in range(self.len):
                self.x.append([frames[i], frames[i + 1]])
                img_prev = cv2.imread(frames[i])
                img_prev = bgr2rgb(img_prev)
                img_curr = cv2.imread(frames[i + 1])
                img_curr = bgr2rgb(img_curr)
                self.img.append([img_prev, img_curr])
                lines[i + 1] = re.sub('\t', ',', lines[i + 1])
                lines[i + 1] = re.sub(' +', ',', lines[i + 1])
                bb = lines[i + 1].strip().split(',')
                bb = [float(x) for x in bb]
                bb = [bb[0], bb[1], bb[0] + bb[2], bb[1] + bb[3]]
                self.gt.append(bb)
            self.x = np.array(self.x)
            print(init_bbox)

    def set_init_box(self, initBox):
        self.prev_rect = np.array(initBox)

    def __getitem__(self, idx):
        """
        Returns transformed torch tensor which is passed to the network.
        """
        sample = self._get_sample(idx)
        return self.transform(sample)

    def _get_sample(self, idx):
        """
        Returns cropped previous and current frame at the previous predicted
        location. Note that the images are scaled to (224,224,3).
        """
        prev = self.img[idx][0]
        curr = self.img[idx][1]
        prevbb = self.prev_rect
        prev_sample, opts_prev = crop_sample({'image': prev, 'bb': prevbb})
        curr_sample, opts_curr = crop_sample({'image': curr, 'bb': prevbb})
        curr_img = self.scale(curr_sample, opts_curr)['image']
        prev_img = self.scale(prev_sample, opts_prev)['image']
        sample = {'previmg': prev_img, 'currimg': curr_img}
        self.curr_img = curr
        self.opts = opts_curr
        return sample

    def get_rect(self, sample):
        """
        Regresses the bounding box coordinates in the original image dimensions
        for an input sample.
        """
        x1, x2 = sample['previmg'], sample['currimg']
        x1 = x1.unsqueeze(0).to(self.device)
        x2 = x2.unsqueeze(0).to(self.device)
        y = self.model(x1, x2)
        bb = y.data.cpu().numpy().transpose((1, 0))
        bb = bb[:, 0]
        bbox = BoundingBox(bb[0], bb[1], bb[2], bb[3])

        # inplace conversion
        bbox.unscale(self.opts['search_region'])
        bbox.uncenter(self.curr_img, self.opts['search_location'],
                      self.opts['edge_spacing_x'], self.opts['edge_spacing_y'])
        return bbox.get_bb_list()

    def test(self):
        """
        Loops through all the frames of test sequence and tracks the target.
        Prints predicted box location on console with frame ID.
        """
        self.model.eval()
        st = time.time()
        for i in range(self.len):
            sample = self[i]
            bb = self.get_rect(sample)
            self.prev_rect = bb
            print("frame: {}".format(i + 1), bb)
        end = time.time()
        print("fps: {:.3f}".format(self.len / (end - st)))
class GOTURN:
    """Tester for OTB formatted sequences"""
    def __init__(self, model_path, device):
        # model_path = os.path.join(model_path, "/checkpoints/pytroch_goturn.pth.tar")
        if (not os.path.isfile(model_path)) or (
                not os.path.exists(model_path)):
            fileName = input(
                "Whhoops! No such file! Please enter the name of the file you'd like to use."
            )
        # Ros params
        self.init_variables_hard()
        self.init_publisher()
        self.init_subscribers()

        # GOTURN Model params
        self.device = device
        self.transform = NormalizeToTensor()
        self.scale = Rescale((224, 224))
        self.model_path = model_path
        self.model = GoNet()
        self.opts = None
        self.prev_img = None
        self.curr_img = None
        checkpoint = torch.load(model_path,
                                map_location=lambda storage, loc: storage)
        self.model.load_state_dict(checkpoint['state_dict'])
        self.model.to(device)

    def init_variables_hard(self):

        self.bbox_in_topic = rospy.get_param("~bbox_in_topic",
                                             "/mbz2020/perception/roi/rect")
        self.color_image_topic = rospy.get_param(
            "~color_img_topic", "/front_track_camera/fisheye1/camera_raw")
        self.publish_result_img = rospy.get_param("~publish_tracked", "False")
        self.oob_threshold = rospy.get_param("~oob_threshold", 10)
        self.max_bbox_ratio = rospy.get_param("~max_bbox_ratio", 1.0)

        self._bridge = CvBridge()

        self._current_color_msg = None
        self._inital_bbox = None
        self._last_bbox = None
        self._is_first_frame = True
        self._has_scale_changed = False

        self._color_timestamp = -1
        self._original_distance = -1
        self._current_distance = -1
        self._previous_distance = -1

        self._scale = 1.0
        self._fallback_scale = 0.4
        self._current_status = 1

    def init_publisher(self):
        # regressed bbox
        self._pub_bbox = rospy.Publisher("/perception/tracker/bboxOut",
                                         BoundingBox2D,
                                         queue_size=10)
        # Image wiht overlayed bbox
        self._pub_result_img = rospy.Publisher("/perception/tracker/bboxImage",
                                               Image,
                                               queue_size=10)
        # Tracker status
        self._pub_status = rospy.Publisher("/perception/tracker/status",
                                           Int8,
                                           queue_size=10)

    def init_subscribers(self):

        # Input Image frame
        sub_color = message_filters.Subscriber(self.color_image_topic, Image)

        # Input color frame with callback
        sub_image = rospy.Subscriber(self.color_image_topic, Image,
                                     self.got_image)

        # Selected ROI from first frame
        sub_bbox = rospy.Subscriber(self.bbox_in_topic, BoundingBox2D,
                                    self.got_bounding_box)

    def got_bounding_box(self, boundingBox):
        self.init_variables_hard()
        center = (boundingBox.center.x, boundingBox.center.y)
        width = boundingBox.size_x
        height = boundingBox.size_y

        if self._inital_bbox is None:
            rospy.loginfo("BBox Received")
            self._inital_bbox = (
                int(center[0] - width / 2),
                int(center[1] - height / 2),
                width,
                height,
            )

    def calculate_bbox_center(self, bbox):
        center = (bbox[0] + bbox[2] / 2, bbox[1] + bbox[3] / 2)
        center = tuple([int(x) for x in center])

        return center

    def got_image(self, rgb_msg):
        color_image = self._bridge.imgmsg_to_cv2(rgb_msg)

        if self._is_first_frame and self._inital_bbox is not None:
            rospy.loginfo("Initializing tracker")
            self._last_bbox = self._inital_bbox
            self._is_first_frame = False
            self.prev_img = color_image

        elif not self._is_first_frame:
            self.model.eval()
            self.curr_img = color_image
            sample = self.transform(self._get_sample())

            self._last_bbox = self.get_rect(sample)
            self.prev_img = self.curr_img

        if self._last_bbox is not None:

            bbox_message = BoundingBox2D()
            bbox_message.size_x = self._last_bbox[2]
            bbox_message.size_y = self._last_bbox[3]
            bbox_message.center.theta = 0
            bbox_message.center.x = self._last_bbox[0] + self._last_bbox[2] / 2
            bbox_message.center.y = self._last_bbox[1] + self._last_bbox[3] / 2

            self._pub_bbox.publish(bbox_message)

            status_message = Int8()
            status_message.data = self._current_status
            self._pub_status.publish(status_message)

            if self.publish_result_img:
                self._last_bbox = tuple([int(i) for i in self._last_bbox])

                if self._current_status == 1:
                    cv2.rectangle(color_image,
                                  (self._last_bbox[0], self._last_bbox[1]),
                                  (self._last_bbox[0] + self._last_bbox[2],
                                   self._last_bbox[1] + self._last_bbox[3]),
                                  (0, 0, 255), 2)
                else:
                    cv2.rectangle(color_image,
                                  (self._last_bbox[0], self._last_bbox[1]),
                                  (self._last_bbox[0] + self._last_bbox[2],
                                   self._last_bbox[1] + self._last_bbox[3]),
                                  (255, 0, 0), 2)

                imgmsg = self._bridge.cv2_to_imgmsg(color_image,
                                                    encoding="mono8")

                self._pub_result_img.publish(imgmsg)

    def _get_sample(self):
        """
        Returns cropped previous and current frame at the previous predicted
        location. Note that the images are scaled to (224,224,3).
        """
        prev = self.prev_img
        curr = self.curr_img
        prevbb = self._last_bbox
        prev_sample, opts_prev = crop_sample({'image': prev, 'bb': prevbb})
        curr_sample, opts_curr = crop_sample({'image': curr, 'bb': prevbb})
        prev_img = bgr2rgb(self.scale(prev_sample, opts_prev)['image'])
        curr_img = bgr2rgb(self.scale(curr_sample, opts_curr)['image'])
        sample = {'previmg': prev_img, 'currimg': curr_img}
        self.curr_img = curr
        self.opts = opts_curr
        return sample

    def get_rect(self, sample):
        """
        Regresses the bounding box coordinates in the original image dimensions
        for an input sample.
        """
        x1, x2 = sample['previmg'], sample['currimg']
        x1 = x1.unsqueeze(0).to(self.device)
        x2 = x2.unsqueeze(0).to(self.device)
        y = self.model(x1, x2)
        bb = y.data.cpu().numpy().transpose((1, 0))
        bb = bb[:, 0]
        bbox = BoundingBox(bb[0], bb[1], bb[2], bb[3])

        # inplace conversion
        bbox.unscale(self.opts['search_region'])
        bbox.uncenter(self.curr_img, self.opts['search_location'],
                      self.opts['edge_spacing_x'], self.opts['edge_spacing_y'])
        return bbox.get_bb_list()

    def spin(self):
        rate = rospy.Rate(30)

        while not rospy.is_shutdown():
            rate.sleep()
예제 #8
0
class TrackerGOTURN(Tracker):
    """GOTURN got10k class for benchmark and evaluation on tracking datasets.

    This class overrides the default got10k 'Tracker' class methods namely,
    'init' and 'update'.

    Attributes:
        cuda: flag if gpu device is available
        device: device on which GOTURN is evaluated ('cuda:0', 'cpu')
        net: GOTURN pytorch model
        prev_box: previous bounding box
        prev_img: previous tracking image
        transform_tensor: normalizes images and returns torch tensor.
        otps: bounding box config to unscale and uncenter network output.
    """
    def __init__(self, net_path=None, **kargs):
        super(TrackerGOTURN, self).__init__(name='PyTorchGOTURN',
                                            is_deterministic=True)

        # setup GPU device if available
        self.cuda = torch.cuda.is_available()
        self.device = torch.device('cuda:0' if self.cuda else 'cpu')

        # setup model
        self.net = GoNet()
        if net_path is not None:
            checkpoint = torch.load(net_path,
                                    map_location=lambda storage, loc: storage)
            self.net.load_state_dict(checkpoint['state_dict'])
            self.net.eval()
        self.net = self.net.to(self.device)

        # setup transforms
        self.prev_img = None  # previous image in numpy format
        self.prev_box = None  # follows format: [xmin, ymin, xmax, ymax]
        self.scale = Rescale((224, 224))
        self.transform_tensor = transforms.Compose([NormalizeToTensor()])
        self.opts = None

    def init(self, image, box):
        """
        Initiates the tracker at given box location.
        Aassumes that the initial box has format: [xmin, ymin, width, height]
        """
        image = np.array(image)
        if image.ndim == 2:
            image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
        # goturn helper functions expect box in [xmin, ymin, xmax, ymax] format
        box[2] = box[0] + box[2]
        box[3] = box[1] + box[3]
        self.prev_box = box
        self.prev_img = image

    def update(self, image):
        """
        Given current image, returns target box.
        """
        image = np.array(image)
        if image.ndim == 2:
            image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
        # crop current and previous image at previous box location
        prev_sample, opts_prev = crop_sample({
            'image': self.prev_img,
            'bb': self.prev_box
        })
        curr_sample, opts_curr = crop_sample({
            'image': image,
            'bb': self.prev_box
        })
        self.opts = opts_curr
        self.curr_img = image
        curr_img = self.scale(curr_sample, opts_curr)['image']
        prev_img = self.scale(prev_sample, opts_prev)['image']
        sample = {'previmg': prev_img, 'currimg': curr_img}
        sample = self.transform_tensor(sample)

        # do forward pass to get box
        box = np.array(self._get_rect(sample))

        # update previous box and image
        self.prev_img = image
        self.prev_box = np.copy(box)

        # convert [xmin, ymin, xmax, ymax] box to [xmin, ymin, width, height]
        # for correct evaluation by got10k toolkit
        box[2] = box[2] - box[0]
        box[3] = box[3] - box[1]
        return box

    def _get_rect(self, sample):
        """
        Performs forward pass through the GOTURN network to regress
        bounding box coordinates in the original image dimensions.
        """
        x1, x2 = sample['previmg'], sample['currimg']
        x1 = x1.unsqueeze(0).to(self.device)
        x2 = x2.unsqueeze(0).to(self.device)
        y = self.net(x1, x2)
        bb = y.data.cpu().numpy().transpose((1, 0))
        bb = bb[:, 0]
        bbox = BoundingBox(bb[0], bb[1], bb[2], bb[3])

        # inplace conversion
        bbox.unscale(self.opts['search_region'])
        bbox.uncenter(self.curr_img, self.opts['search_location'],
                      self.opts['edge_spacing_x'], self.opts['edge_spacing_y'])
        return bbox.get_bb_list()