Exemple #1
0
    def __init__(self, params, model_path = None, name='SiamRPN', **kargs):
        super(TrackerSiamRPNLate, self).__init__(name=name, is_deterministic=True)

        self.model = SiameseAlexNetLate()

        self.cuda = torch.cuda.is_available()
        self.device = torch.device('cuda:0' if self.cuda else 'cpu')

        checkpoint = torch.load(model_path, map_location = self.device)
        #print("1")
        if 'model' in checkpoint.keys():
            self.model.load_state_dict(torch.load(model_path, map_location = self.device)['model'])
        else:
            self.model.load_state_dict(torch.load(model_path, map_location = self.device))


        if self.cuda:
            self.model = self.model.cuda()
        self.model.eval()
        self.transforms = transforms.Compose([
            ToTensor()
        ])

        valid_scope = 2 * config.valid_scope + 1
        self.anchors = util.generate_anchors(   config.total_stride,
                                                config.anchor_base_size,
                                                config.anchor_scales,
                                                config.anchor_ratios,
                                                valid_scope)
        self.window = np.tile(np.outer(np.hanning(config.score_size), np.hanning(config.score_size))[None, :],
                              [config.anchor_num, 1, 1]).flatten()

        self.data_loader = TrackerRGBTDataLoader()
        self.old_loader = TrackerDataLoader()
Exemple #2
0
class TrackerSiamRPNLate(Tracker):
    def __init__(self, params, model_path = None, name='SiamRPN', **kargs):
        super(TrackerSiamRPNLate, self).__init__(name=name, is_deterministic=True)

        self.model = SiameseAlexNetLate()

        self.cuda = torch.cuda.is_available()
        self.device = torch.device('cuda:0' if self.cuda else 'cpu')

        checkpoint = torch.load(model_path, map_location = self.device)
        #print("1")
        if 'model' in checkpoint.keys():
            self.model.load_state_dict(torch.load(model_path, map_location = self.device)['model'])
        else:
            self.model.load_state_dict(torch.load(model_path, map_location = self.device))


        if self.cuda:
            self.model = self.model.cuda()
        self.model.eval()
        self.transforms = transforms.Compose([
            ToTensor()
        ])

        valid_scope = 2 * config.valid_scope + 1
        self.anchors = util.generate_anchors(   config.total_stride,
                                                config.anchor_base_size,
                                                config.anchor_scales,
                                                config.anchor_ratios,
                                                valid_scope)
        self.window = np.tile(np.outer(np.hanning(config.score_size), np.hanning(config.score_size))[None, :],
                              [config.anchor_num, 1, 1]).flatten()

        self.data_loader = TrackerRGBTDataLoader()
        self.old_loader = TrackerDataLoader()


    def _cosine_window(self, size):
        """
            get the cosine window
        """
        cos_window = np.hanning(int(size[0]))[:, np.newaxis].dot(np.hanning(int(size[1]))[np.newaxis, :])
        cos_window = cos_window.astype(np.float32)
        cos_window /= np.sum(cos_window)
        return cos_window

    def init(self, exemplar_rgb_img, exemplar_ir_img, bbox): #

        """ initialize siamfc tracker
        Args:
            frame: an RGB image
            bbox: one-based bounding box [x, y, width, height]
        """


        self.pos = np.array([bbox[0] + bbox[2] / 2 - 1 / 2, bbox[1] + bbox[3] / 2 - 1 / 2])  # center x, center y, zero based
        #self.pos = np.array([bbox[0], bbox[1]])  # center x, center y, zero based

        self.target_sz = np.array([bbox[2], bbox[3]])  # width, height
        self.bbox = np.array([bbox[0] + bbox[2] / 2 - 1 / 2, bbox[1] + bbox[3] / 2 - 1 / 2, bbox[2], bbox[3]])
        #print('Box:',self.bbox)
        #self.bbox = np.array([bbox[0], bbox[1], bbox[2], bbox[3]])

        self.origin_target_sz = np.array([bbox[2], bbox[3]])


        self.img_mean = np.mean(exemplar_rgb_img, axis=(0, 1))
        exemplar_rgb_img = np.asarray(exemplar_rgb_img)
        exemplar_rgb_img, _, _ = self.old_loader.get_exemplar_image(   exemplar_rgb_img,
                                                                        self.bbox,
                                                                        config.template_img_size,
                                                                        config.context_amount,
                                                                        self.img_mean)
        self.img_mean_ir = np.mean(exemplar_ir_img, axis=(0, 1))


        exemplar_ir_img, _, _ = self.data_loader.get_exemplar_image(   exemplar_ir_img,
                                                                       self.bbox,
                                                                       config.template_img_size,
                                                                       config.context_amount,
                                                                       self.img_mean_ir)
        #cv2.imshow('exemplar_img', exemplar_img)
        # get exemplar feature
        exemplar_rgb_img = self.transforms(exemplar_rgb_img)[None, :, :, :]
        exemplar_ir_img = self.transforms(exemplar_ir_img)[None, :, :, :]
        exemplar_ir_img = torch.from_numpy(np.zeros(exemplar_ir_img.size())).float()
        if self.cuda:
            self.model.track_init(exemplar_rgb_img.cuda(), exemplar_ir_img.cuda())
        else:
            self.model.track_init(exemplar_img, exemplar_ir_img)
        print('bbox', self.bbox)

    def update(self, instance_rgb_img, instance_ir_img):
        """track object based on the previous frame
        Args:
            frame: an RGB image

        Returns:
            bbox: tuple of 1-based bounding box(xmin, ymin, xmax, ymax)
        """
        instance_rgb_img = np.asarray(instance_rgb_img)
        frame = instance_rgb_img
        #cv2.imshow('instance_img', instance_ir_img)
        self.img_mean = np.mean(instance_rgb_img, axis=(0, 1))

        instance_rgb_img, _, _, scale_x = self.old_loader.get_instance_image(   instance_rgb_img,
                                                                           self.bbox,
                                                                           config.template_img_size,
                                                                           config.detection_img_size,
                                                                           config.context_amount,
                                                                           self.img_mean)
        self.img_mean_ir = np.mean(instance_ir_img, axis=(0, 1))


        instance_ir_img, _, _, _ = self.data_loader.get_instance_image(   instance_ir_img,
                                                                                self.bbox,
                                                                                config.template_img_size,
                                                                                config.detection_img_size,
                                                                                config.context_amount,
                                                                                self.img_mean_ir)

        instance_rgb_img = self.transforms(instance_rgb_img)[None, :, :, :]
        instance_ir_img = self.transforms(instance_ir_img)[None, :, :, :]
        instance_ir_img = torch.from_numpy(np.zeros(instance_ir_img.size())).float()

        if self.cuda:
            pred_score, pred_regression = self.model.track(instance_rgb_img.cuda(), instance_ir_img.cuda())
        else:
            pred_score, pred_regression = self.model.track(instance_rgb_img, instance_ir_img)

        pred_conf   = pred_score.reshape(-1, 2, config.size ).permute(0, 2, 1)
        pred_offset = pred_regression.reshape(-1, 4, config.size ).permute(0, 2, 1)
        delta = pred_offset[0].cpu().detach().numpy()
        #print(delta)
        box_pred = util.box_transform_inv(self.anchors, delta)
        #print(box_pred)
        score_pred = F.softmax(pred_conf, dim=2)[0, :, 1].cpu().detach().numpy()
        #print(score_pred)

        s_c = util.change(util.sz(box_pred[:, 2], box_pred[:, 3]) / (util.sz_wh(self.target_sz * scale_x)))  # scale penalty
        r_c = util.change((self.target_sz[0] / self.target_sz[1]) / (box_pred[:, 2] / box_pred[:, 3]))  # ratio penalty
        penalty = np.exp(-(r_c * s_c - 1.) * config.penalty_k)
        #print('penalty', penalty)
        pscore = penalty * score_pred
        pscore = pscore * (1 - config.window_influence) + self.window * config.window_influence
        #print('window', self.window)
        best_pscore_id = np.argmax(pscore)
        #print('id', np.argmax(pscore))
        target = box_pred[best_pscore_id, :] / scale_x
        #print(target)

        lr = penalty[best_pscore_id] * score_pred[best_pscore_id] * config.lr_box

        res_x = np.clip(target[0] + self.pos[0], 0, frame.shape[1])
        #print('resx', target[0] + self.pos[0])
        res_y = np.clip(target[1] + self.pos[1], 0, frame.shape[0])

        res_w = np.clip(self.target_sz[0] * (1 - lr) + target[2] * lr, config.min_scale * self.origin_target_sz[0],
                        config.max_scale * self.origin_target_sz[0])
        res_h = np.clip(self.target_sz[1] * (1 - lr) + target[3] * lr, config.min_scale * self.origin_target_sz[1],
                        config.max_scale * self.origin_target_sz[1])
        #print('res_h', self.target_sz[1] * (1 - lr))

        self.pos = np.array([res_x, res_y])
        self.target_sz = np.array([res_w, res_h])

        bbox = np.array([res_x, res_y, res_w, res_h])
        #print(bbox)
        self.bbox = (
            np.clip(bbox[0], 0, frame.shape[1]).astype(np.float64),
            np.clip(bbox[1], 0, frame.shape[0]).astype(np.float64),
            np.clip(bbox[2], 10, frame.shape[1]).astype(np.float64),
            np.clip(bbox[3], 10, frame.shape[0]).astype(np.float64))

        res_x = res_x - res_w/2 # x -> x1
        res_y = res_y - res_h/2 # y -> y1
        bbox = np.array([res_x, res_y, res_w, res_h])
        #print('result', bbox)
        return bbox
Exemple #3
0
class TrackerSiamRPNEval(Tracker):

    def __init__(self, modality=1, model_path=None, **kargs):

        super(TrackerSiamRPNEval, self).__init__(name='SiamRPN', is_deterministic=True)

        self.modality = modality

        if modality == 1:
            self.model = SiameseAlexNet()
        else:
            self.model = SiameseAlexNetMultimodal()


        self.cuda = torch.cuda.is_available()
        self.device = torch.device('cuda:0' if self.cuda else 'cpu')

        checkpoint = torch.load(model_path, map_location = self.device)

        if 'model' in checkpoint.keys():
            self.model.load_state_dict(torch.load(model_path, map_location = self.device)['model'])
        else:
            self.model.load_state_dict(torch.load(model_path, map_location = self.device))


        if self.cuda:
            self.model = self.model.cuda()

        self.model.eval()

        self.transforms = transforms.Compose([
            ToTensor()
        ])

        valid_scope = 2 * config.valid_scope + 1
        self.anchors = util.generate_anchors(   config.total_stride,
                                                config.anchor_base_size,
                                                config.anchor_scales,
                                                config.anchor_ratios,
                                                valid_scope)

        self.window = np.tile(np.outer(np.hanning(config.score_size), np.hanning(config.score_size))[None, :],
                              [config.anchor_num, 1, 1]).flatten()


        self.data_loader = TrackerRGBTDataLoader()


    def _cosine_window(self, size):
        """
            get the cosine window
        """
        cos_window = np.hanning(int(size[0]))[:, np.newaxis].dot(np.hanning(int(size[1]))[np.newaxis, :])
        cos_window = cos_window.astype(np.float32)
        cos_window /= np.sum(cos_window)
        return cos_window


    def init(self, frame_rgb, frame_ir, bbox):

        """ initialize tracker
        Args:
            frame: an RGB image
            bbox: one-based bounding box [x, y, width, height]
        """
        frame_rgb = np.asarray(frame_rgb)
        frame_ir = np.asarray(frame_ir)


        self.pos = np.array([bbox[0] + bbox[2] / 2 - 1 / 2, bbox[1] + bbox[3] / 2 - 1 / 2])  # center x, center y, zero based
        self.target_sz = np.array([bbox[2], bbox[3]])  # width, height
        self.bbox = np.array([bbox[0] + bbox[2] / 2 - 1 / 2, bbox[1] + bbox[3] / 2 - 1 / 2, bbox[2], bbox[3]])

        self.origin_target_sz = np.array([bbox[2], bbox[3]])

        self.img_rgb_mean = np.mean(frame_rgb, axis=(0, 1))
        self.img_ir_mean = np.mean(frame_ir)


        exemplar_img_rgb, _, _ = self.data_loader.get_exemplar_image(frame_rgb,
                                                                    self.bbox,
                                                                    config.template_img_size,
                                                                    config.context_amount,
                                                                    self.img_rgb_mean)

        exemplar_img_ir, _, _ = self.data_loader.get_exemplar_image(frame_ir,
                                                                    self.bbox,
                                                                    config.template_img_size,
                                                                    config.context_amount,
                                                                    self.img_ir_mean)
        # get exemplar feature
        exemplar_img_rgb = self.transforms(exemplar_img_rgb)[None, :, :, :]
        exemplar_img_ir = self.transforms(exemplar_img_ir)[None, :, :, :]

        if self.cuda:
            exemplar_img_rgb = exemplar_img_rgb.cuda()
            exemplar_img_ir = exemplar_img_ir.cuda()


        if self.modality == 1:
            self.model.track_init(exemplar_img_rgb)
        else:
            self.model.track_init(exemplar_img_rgb, exemplar_img_ir)



    def update(self, frame_rgb, frame_ir):


        """track object based on the previous frame
        Args:
            frame: an RGB image

        Returns:
            bbox: tuple of 1-based bounding box(xmin, ymin, xmax, ymax)
        """
        frame_rgb = np.asarray(frame_rgb)
        frame_ir = np.asarray(frame_ir)

        instance_img_rgb, _, _, scale_x = self.data_loader.get_instance_image(  frame_rgb,
                                                                            self.bbox,
                                                                            config.template_img_size,
                                                                            config.detection_img_size,
                                                                            config.context_amount,
                                                                            self.img_rgb_mean)

        instance_img_ir, _, _, scale_x = self.data_loader.get_instance_image(frame_ir,
                                                                            self.bbox,
                                                                            config.template_img_size,
                                                                            config.detection_img_size,
                                                                            config.context_amount,
                                                                            self.img_ir_mean)

        instance_img_rgb = self.transforms(instance_img_rgb)[None, :, :, :]
        instance_img_ir = self.transforms(instance_img_ir)[None, :, :, :]

        if self.cuda:
            instance_img_rgb = instance_img_rgb.cuda()
            instance_img_ir = instance_img_ir.cuda()


        if self.modality == 1:
            pred_score, pred_regression = self.model.track(instance_img_rgb)
        else:
            pred_score, pred_regression = self.model.track(instance_img_rgb, instance_img_ir)


        pred_conf   = pred_score.reshape(-1, 2, config.size ).permute(0, 2, 1)
        pred_offset = pred_regression.reshape(-1, 4, config.size ).permute(0, 2, 1)

        delta = pred_offset[0].cpu().detach().numpy()
        box_pred = util.box_transform_inv(self.anchors, delta)
        score_pred = F.softmax(pred_conf, dim=2)[0, :, 1].cpu().detach().numpy()

        s_c = util.change(util.sz(box_pred[:, 2], box_pred[:, 3]) / (util.sz_wh(self.target_sz * scale_x)))  # scale penalty
        r_c = util.change((self.target_sz[0] / self.target_sz[1]) / (box_pred[:, 2] / box_pred[:, 3]))  # ratio penalty
        penalty = np.exp(-(r_c * s_c - 1.) * config.penalty_k)
        pscore = penalty * score_pred
        pscore = pscore * (1 - config.window_influence) + self.window * config.window_influence
        best_pscore_id = np.argmax(pscore)
        target = box_pred[best_pscore_id, :] / scale_x

        lr = penalty[best_pscore_id] * score_pred[best_pscore_id] * config.lr_box


        res_x = np.clip(target[0] + self.pos[0], 0, frame_rgb.shape[1])
        res_y = np.clip(target[1] + self.pos[1], 0, frame_rgb.shape[0])


        res_w = np.clip(self.target_sz[0] * (1 - lr) + target[2] * lr, config.min_scale * self.origin_target_sz[0],
                        config.max_scale * self.origin_target_sz[0])
        res_h = np.clip(self.target_sz[1] * (1 - lr) + target[3] * lr, config.min_scale * self.origin_target_sz[1],
                        config.max_scale * self.origin_target_sz[1])

        self.pos = np.array([res_x, res_y])
        self.target_sz = np.array([res_w, res_h])

        bbox = np.array([res_x, res_y, res_w, res_h])

        self.bbox = (
            np.clip(bbox[0], 0, frame_rgb.shape[1]).astype(np.float64),
            np.clip(bbox[1], 0, frame_rgb.shape[0]).astype(np.float64),
            np.clip(bbox[2], 10, frame_rgb.shape[1]).astype(np.float64),
            np.clip(bbox[3], 10, frame_rgb.shape[0]).astype(np.float64))

        res_x = res_x - res_w/2 # x -> x1
        res_y = res_y - res_h/2 # y -> y1
        bbox = np.array([res_x, res_y, res_w, res_h])

        return bbox


    def track(self, img_rgb_files, img_ir_files, box, visualize=False):
        frame_num = len(img_rgb_files)
        boxes = np.zeros((frame_num, 4))
        boxes[0] = box
        times = np.zeros(frame_num)

        plotter = None


        for f, (img_rgb_file, img_ir_file) in enumerate(zip(img_rgb_files, img_ir_files)):


            img_rgb = Image.open(img_rgb_file).convert('RGB')
            img_ir = Image.open(img_ir_file).convert('L')

            img_rgb = np.asarray(img_rgb) / 255.
            img_ir = np.asarray(img_ir) / 255.

            start_time = time.time()
            if f == 0:
                self.init(img_rgb, img_ir, box)
            else:
                boxes[f, :] = self.update(img_rgb, img_ir)
            times[f] = time.time() - start_time

            if visualize:
                plotter = show_frame(img_ir,
                                     img_rgb,
                                     self.name,
                                     plotter,
                                     boxes[f, :]
                                     )

        return boxes, times