Beispiel #1
0
    def track(self, state, im):
        p = state['p']
        net = state['net']
        avg_chans = state['avg_chans']
        window = state['window']
        target_pos = state['target_pos']
        target_sz = state['target_sz']

        hc_z = target_sz[1] + p.context_amount * sum(target_sz)
        wc_z = target_sz[0] + p.context_amount * sum(target_sz)
        s_z = np.sqrt(wc_z * hc_z)
        scale_z = p.exemplar_size / s_z
        d_search = (p.instance_size -
                    p.exemplar_size) / 2  # slightly different from rpn++
        pad = d_search / scale_z
        s_x = s_z + 2 * pad

        x_crop, _ = get_subwindow_tracking(im, target_pos, p.instance_size,
                                           python2round(s_x), avg_chans)
        state['x_crop'] = x_crop.clone()  # torch float tensor, (3,H,W)
        x_crop = self.normalize(x_crop)
        x_crop = x_crop.unsqueeze(0)
        debug = True
        if debug:
            target_pos, target_sz, _, cls_score = self.update(net,
                                                              x_crop.cuda(),
                                                              target_pos,
                                                              target_sz *
                                                              scale_z,
                                                              window,
                                                              scale_z,
                                                              p,
                                                              debug=debug)
            state['cls_score'] = cls_score
        else:
            target_pos, target_sz, _ = self.update(net,
                                                   x_crop.cuda(),
                                                   target_pos,
                                                   target_sz * scale_z,
                                                   window,
                                                   scale_z,
                                                   p,
                                                   debug=debug)
        target_pos[0] = max(0, min(state['im_w'], target_pos[0]))
        target_pos[1] = max(0, min(state['im_h'], target_pos[1]))
        target_sz[0] = max(10, min(state['im_w'], target_sz[0]))
        target_sz[1] = max(10, min(state['im_h'], target_sz[1]))
        state['target_pos'] = target_pos
        state['target_sz'] = target_sz
        state['p'] = p

        return state
    def update(self, im, online_score=None, gt=None):
        '''Tracking Function'''

        im = np.array(im)  # PIL to numpy

        p = self.state['p']
        net = self.state['net']
        avg_chans = self.state['avg_chans']
        window = self.state['window']
        target_pos = self.state['target_pos']
        target_sz = self.state['target_sz']

        hc_z = target_sz[1] + p.context_amount * sum(target_sz)
        wc_z = target_sz[0] + p.context_amount * sum(target_sz)
        s_z = np.sqrt(wc_z * hc_z)
        scale_z = p.exemplar_size / s_z
        d_search = (p.instance_size -
                    p.exemplar_size) / 2  # slightly different from rpn++
        pad = d_search / scale_z
        s_x = s_z + 2 * pad

        x_crop, _ = get_subwindow_tracking(im, target_pos, p.instance_size,
                                           python2round(s_x), avg_chans)
        self.state['x_crop'] = x_crop.clone()  # torch float tensor, (3,H,W)
        x_crop = self.normalize(x_crop).unsqueeze(0)

        target_pos, target_sz, _ = self.forward(net, x_crop.cuda(), target_pos,
                                                target_sz * scale_z, window,
                                                scale_z, p)
        target_pos[0] = max(0, min(self.state['im_w'], target_pos[0]))
        target_pos[1] = max(0, min(self.state['im_h'], target_pos[1]))
        target_sz[0] = max(10, min(self.state['im_w'], target_sz[0]))
        target_sz[1] = max(10, min(self.state['im_h'], target_sz[1]))
        self.state['target_pos'] = target_pos
        self.state['target_sz'] = target_sz
        self.state['p'] = p

        location = cxy_wh_2_rect(self.state['target_pos'],
                                 self.state['target_sz'])
        return location
Beispiel #3
0
    def track(self, state, im):
        p = state['p']
        net = state['net']
        avg_chans = state['avg_chans']
        window = state['window']
        target_pos = state['target_pos']
        target_sz = state['target_sz']

        wc_z = target_sz[1] + p.context_amount * sum(target_sz)
        hc_z = target_sz[0] + p.context_amount * sum(target_sz)
        s_z = np.sqrt(wc_z * hc_z)
        scale_z = p.exemplar_size / s_z
        d_search = (p.instance_size - p.exemplar_size) / 2
        pad = d_search / scale_z
        s_x = s_z + 2 * pad

        # extract scaled crops for search region x at previous target position
        x_crop = Variable(
            get_subwindow_tracking(im, target_pos, p.instance_size,
                                   python2round(s_x), avg_chans).unsqueeze(0))
        if state["arch"] == "SiamRPNRes22":
            target_pos, target_sz, score = self.update(net, x_crop.cuda(),
                                                       target_pos,
                                                       target_sz * scale_z,
                                                       window, scale_z, p)
        elif state["arch"] == "CascadedSiamRPNRes22":
            target_pos, target_sz, score = self.update_stage12_mean(
                net, x_crop.cuda(), target_pos, target_sz * scale_z, window,
                scale_z, p)
        else:
            raise NotImplementedError
        target_pos[0] = max(0, min(state['im_w'], target_pos[0]))
        target_pos[1] = max(0, min(state['im_h'], target_pos[1]))
        target_sz[0] = max(10, min(state['im_w'], target_sz[0]))
        target_sz[1] = max(10, min(state['im_h'], target_sz[1]))
        state['target_pos'] = target_pos
        state['target_sz'] = target_sz
        state['score'] = score
        return state
Beispiel #4
0
    def init(self, im, target_pos, target_sz, model, hp=None):
        state = dict()
        state['im_h'] = im.shape[0]
        state['im_w'] = im.shape[1]
        p = RPNConfig()

        # single test

        prefix = [x for x in ['OTB', 'VOT'] if x in self.info.dataset]
        cfg_ = load_yaml('../experiments/test/{0}/{1}.yaml'.format(
            prefix[0], self.info.arch))
        # cfg_benchmark = cfg[self.info.dataset]
        # p.update(cfg_benchmark)
        # p.renew()

        net = model
        p.anchor = generate_anchor(p.total_stride, p.scales, p.ratios,
                                   p.score_size)

        avg_chans = np.mean(im, axis=(0, 1))

        wc_z = target_sz[0] + p.context_amount * sum(target_sz)
        hc_z = target_sz[1] + p.context_amount * sum(target_sz)
        s_z = python2round(np.sqrt(wc_z * hc_z))

        z_crop = get_subwindow_tracking(im, target_pos, p.exemplar_size, s_z,
                                        avg_chans)

        z = Variable(z_crop.unsqueeze(0))
        net.template(z.cuda())

        window = np.outer(np.hanning(p.score_size),
                          np.hanning(p.score_size))  # [17,17]

        window = np.expand_dims(window, axis=0)  # [1,17,17]

        window = np.repeat(window, p.anchor_num, axis=0)  # [5,17,17]
        if cfg.TRACK.USE_CLASSIFIER:
            # atom = ATOM().cuda().eval()
            # checkpoint_dict = torch.load("/home/zhuyi/Code/CRPN_511/atom_pretrain.pth")
            # atom.load_state_dict(checkpoint_dict["net"],strict=False)

            self.classifier = BaseClassifier(net)
            self.center_pos = np.array(target_pos)
            self.size = np.array(target_sz)
            bbox = [
                target_pos[0] - (target_sz[0] - 1) / 2,
                target_pos[1] - (target_sz[1] - 1) / 2, target_sz[0],
                target_sz[1]
            ]
            #bbox = [cx - (w - 1) / 2, cy - (h - 1) / 2, w, h]
            if cfg.TRACK.TEMPLATE_UPDATE:
                with torch.no_grad():
                    net.template_short_term(self.z_crop)

            s_xx = s_z * (cfg.TRACK.INSTANCE_SIZE * 2 /
                          cfg.TRACK.EXEMPLAR_SIZE)
            x_crop = get_subwindow_tracking(im, self.center_pos,
                                            cfg.TRACK.INSTANCE_SIZE * 2,
                                            round(s_xx), avg_chans)
            x_crop = x_crop.unsqueeze(0)
            self.classifier.initialize(x_crop.type(torch.FloatTensor), bbox)
        state['p'] = p
        state['net'] = net
        state['avg_chans'] = avg_chans
        state['window'] = window
        state['target_pos'] = target_pos
        state['target_sz'] = target_sz

        return state