Example #1
0
    def __init__(self, config_path, model_path):
        args = TrackArgs()
        args.config = config_path
        args.resume = model_path

        cfg = load_config(args)
        if args.arch == 'Custom':
            from custom import Custom
            self.model = Custom(anchors=cfg['anchors'])
        else:
            parser.error('invalid architecture: {}'.format(args.arch))

        if args.resume:
            assert isfile(args.resume), '{} is not a valid file'.format(args.resume)
            self.model = load_pretrain(self.model, args.resume)
        self.model.eval()
        self.device = torch.device('cuda' if (torch.cuda.is_available() and not args.cpu) else 'cpu')
        self.model = self.model.to(self.device)

        ################# Dangerous
        self.p = TrackerConfig()
        self.p.update(cfg['hp'] if 'hp' in cfg.keys() else None, self.model.anchors)
        self.p.renew()

        self.p.scales = self.model.anchors['scales']
        self.p.ratios = self.model.anchors['ratios']
        self.p.anchor_num = self.model.anchor_num
        self.p.anchor = generate_anchor(self.model.anchors, self.p.score_size)

        if self.p.windowing == 'cosine':
            self.window = np.outer(np.hanning(self.p.score_size), np.hanning(self.p.score_size))
        elif self.p.windowing == 'uniform':
            self.window = np.ones((self.p.score_size, self.p.score_size))
        self.window = np.tile(self.window.flatten(), self.p.anchor_num)
Example #2
0
def siamese_init(im, target_pos, target_sz, model, hp=None, device='cpu'):
    """
    初始化跟踪器,根据目标的信息构建state 字典
    :param im: 当前处理的图像
    :param target_pos: 目标的位置
    :param target_sz: 目标的尺寸
    :param model: 训练好的网络模型
    :param hp: 超参数
    :param device: 硬件信息
    :return: 跟踪器的state字典数据
    """

    # 初始化state字典
    state = dict()
    # 设置图像的宽高
    state['im_h'] = im.shape[0]
    state['im_w'] = im.shape[1]
    # 配置跟踪器的相关参数
    p = TrackerConfig()
    # 对参数进行更新
    p.update(hp, model.anchors)
    # 更新参数
    p.renew()
    # 获取网络模型
    net = model
    # 根据网络参数对跟踪器的参数进行更新,主要是anchors
    p.scales = model.anchors['scales']
    p.ratios = model.anchors['ratios']
    p.anchor_num = model.anchor_num
    # 生成锚点
    p.anchor = generate_anchor(model.anchors, p.score_size)
    # 图像的平均值
    avg_chans = np.mean(im, axis=(0, 1))
    # 根据设置的上下文比例,输入z 的宽高及尺寸
    wc_z = target_sz[0] + p.context_amount * sum(target_sz)
    hc_z = target_sz[1] + p.context_amount * sum(target_sz)
    s_z = round(np.sqrt(wc_z * hc_z))
    # 初始化跟踪目标 initialize the exemplar
    z_crop = get_subwindow_tracking(im, target_pos, p.exemplar_size, s_z,
                                    avg_chans)
    # 将其转换为Variable可在pythorch中进行反向传播
    z = Variable(z_crop.unsqueeze(0))
    # 专门处理模板
    net.template(z.to(device))
    # 设置使用的惩罚窗口
    if p.windowing == 'cosine':
        # 利用hanning窗的外积生成cosine窗口
        window = np.outer(np.hanning(p.score_size), np.hanning(p.score_size))
    elif p.windowing == 'uniform':
        window = np.ones((p.score_size, p.score_size))
    # 每一个anchor都有一个对应的惩罚窗口
    window = np.tile(window.flatten(), p.anchor_num)
    # 将信息更新到state字典中
    state['p'] = p
    state['net'] = net
    state['avg_chans'] = avg_chans
    state['window'] = window
    state['target_pos'] = target_pos
    state['target_sz'] = target_sz
    return state
Example #3
0
def siamese_init(im, target_pos, target_sz, model, hp=None, device='cpu'):
    # print("------siamese_init-------")
    state = dict()
    state['im_h'] = im.shape[0]
    state['im_w'] = im.shape[1]
    # print("im.shape[0] ", im.shape[0])
    p = TrackerConfig()
    p.update(hp, model.anchors)

    p.renew()

    net = model
    p.scales = model.anchors['scales']
    p.ratios = model.anchors['ratios']
    p.anchor_num = model.anchor_num

    p.anchor = generate_anchor(model.anchors, p.score_size)
    avg_chans = np.mean(im, axis=(0, 1))

    wc_z = target_sz[0] + p.context_amount * sum(target_sz)
    hc_z = target_sz[1] + p.context_amount * sum(target_sz)
    s_z = round(np.sqrt(wc_z * hc_z))
    # initialize the exemplar
    z_crop = get_subwindow_tracking(im, target_pos, p.exemplar_size, s_z,
                                    avg_chans)
    # print("z size (patch) ", z_crop.size())
    z = Variable(z_crop.unsqueeze(0))
    # La xarxa es guarda les features resultants (self.zf) d'haver passat el patch z per la siamesa
    net.template(z.to(device))

    if p.windowing == 'cosine':
        window = np.outer(np.hanning(p.score_size), np.hanning(p.score_size))
    elif p.windowing == 'uniform':
        window = np.ones((p.score_size, p.score_size))
    window = np.tile(window.flatten(), p.anchor_num)

    state['p'] = p
    state['net'] = net
    state['avg_chans'] = avg_chans
    state['window'] = window
    state['target_pos'] = target_pos
    state['target_sz'] = target_sz
    # print("window = ", state['window'])
    return state
Example #4
0
    def __init__(self, args):
        super(PatchTrainer, self).__init__()

        # Setup device
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        torch.backends.cudnn.benchmark = True

        # Setup tracker cfg
        cfg = load_config(args)
        p = TrackerConfig()
        p.renew()
        self.p = p

        # Setup tracker
        siammask = Tracker(p=p, anchors=cfg['anchors'])
        if args.resume:
            assert isfile(args.resume), 'Please download {} first.'.format(args.resume)
            siammask = load_pretrain(siammask, args.resume)
        siammask.eval().to(self.device)
        self.model = siammask
Example #5
0
def siamese_init(im, target_pos, target_sz, model, hp=None, device='cpu'):
    state = dict()
    state['im_h'] = im.shape[0]
    state['im_w'] = im.shape[1]
    p = TrackerConfig()
    p.update(hp, model.anchors)

    p.renew()

    net = model
    p.scales = model.anchors['scales']
    p.ratios = model.anchors['ratios']
    p.anchor_num = model.anchor_num
    p.anchor = generate_anchor(model.anchors, p.score_size)
    avg_chans = np.mean(im, axis=(0, 1))

    wc_z = target_sz[0] + p.context_amount * sum(target_sz)
    hc_z = target_sz[1] + p.context_amount * sum(target_sz)
    s_z = round(np.sqrt(wc_z * hc_z))
    # initialize the exemplar
    z_crop = get_subwindow_tracking(im, target_pos, p.exemplar_size, s_z,
                                    avg_chans)

    z = Variable(z_crop.unsqueeze(0))
    net.template(z.to(device))

    if p.windowing == 'cosine':
        window = np.outer(np.hanning(p.score_size),
                          np.hanning(p.score_size))  # 外积的结果是矩阵,内积的结果是一个数
    elif p.windowing == 'uniform':
        window = np.ones((p.score_size, p.score_size))
    window = np.tile(window.flatten(), p.anchor_num)

    state['p'] = p
    state['net'] = net
    state['avg_chans'] = avg_chans
    state['window'] = window
    state['target_pos'] = target_pos
    state['target_sz'] = target_sz
    return state
Example #6
0
def siamese_init(im,
                 model,
                 hp=None,
                 device='cpu',
                 targets=None,
                 detector=None):
    custom_objects = detector.CustomObjects(car=True, person=True)
    state = dict()
    state['im_h'] = im.shape[0]
    state['im_w'] = im.shape[1]
    p = TrackerConfig()
    p.update(hp, model.anchors)

    p.renew()

    net = model
    p.scales = model.anchors['scales']
    p.ratios = model.anchors['ratios']
    p.anchor_num = model.anchor_num
    p.anchor = generate_anchor(model.anchors, p.score_size)
    avg_chans = np.mean(im, axis=(0, 1))

    # s_z = [ round(np.sqrt(target["target_sz"][1] + 0.123 * sum(target["target_sz"])*target["target_sz"][0] + 0.123 * sum(target["target_sz"]) ))  for target in targets ]
    # s_z = np.array(s_z)
    # print(targe)
    # targets.append(targe)
    # print(targets)
    BLUE = [255, 255, 255]

    for i, target in enumerate(targets):
        wc_z = target["target_sz"][0] + p.context_amount * sum(
            target["target_sz"])
        hc_z = target["target_sz"][1] + p.context_amount * sum(
            target["target_sz"])
        target["s_z"] = round(np.sqrt(wc_z * hc_z))

    print("out")

    # initialize the exemplar
    targets = get_subwindow_tracking(
        im,
        p.exemplar_size,
        avg_chans,
        targets=targets,
    )

    # z_f = [ net.template(Variable(target["im_to_torch"].unsqueeze(0)).to(device))  for target in targets ]

    for i, target in enumerate(targets):
        # detections = detector.detectCustomObjectsFromImage(custom_objects=custom_objects, input_image=target["im_patch"],input_type="array", output_image_path=os.path.join("image {} custom.jpg".format(i)),output_type="file", minimum_percentage_probability=30)
        # detections = detector.detectCustomObjectsFromImage(custom_objects=custom_objects, input_image=target["img"],input_type="array", output_image_path=os.path.join(execution_path , "images.jpg"),output_type="file", minimum_percentage_probability=30)
        z = Variable(target["im_to_torch"].unsqueeze(0))
        target["zf"] = net.template(z.to(device))

        del target["im_to_torch"]
        # for eachObject in detections:
        #     print(eachObject["name"] , " : ", eachObject["percentage_probability"], " : ", eachObject["box_points"] )
        #     target["detection"] = eachObject["box_points"]

        #     print("--------------------------------")

    if p.windowing == 'cosine':
        window = np.outer(np.hanning(p.score_size), np.hanning(p.score_size))
    elif p.windowing == 'uniform':
        window = np.ones((p.score_size, p.score_size))
    window = np.tile(window.flatten(), p.anchor_num)

    state['p'] = p
    state['net'] = net
    state['avg_chans'] = avg_chans
    state['window'] = window
    state["targets"] = targets
    state["detector"] = detector
    # state["s_z"] = s_z
    # state["z_f"] = z_f
    return state
Example #7
0
class SingleTracker(object):
    def __init__(self, config_path, model_path):
        args = TrackArgs()
        args.config = config_path
        args.resume = model_path

        cfg = load_config(args)
        if args.arch == 'Custom':
            from custom import Custom
            self.model = Custom(anchors=cfg['anchors'])
        else:
            parser.error('invalid architecture: {}'.format(args.arch))

        if args.resume:
            assert isfile(args.resume), '{} is not a valid file'.format(args.resume)
            self.model = load_pretrain(self.model, args.resume)
        self.model.eval()
        self.device = torch.device('cuda' if (torch.cuda.is_available() and not args.cpu) else 'cpu')
        self.model = self.model.to(self.device)

        ################# Dangerous
        self.p = TrackerConfig()
        self.p.update(cfg['hp'] if 'hp' in cfg.keys() else None, self.model.anchors)
        self.p.renew()

        self.p.scales = self.model.anchors['scales']
        self.p.ratios = self.model.anchors['ratios']
        self.p.anchor_num = self.model.anchor_num
        self.p.anchor = generate_anchor(self.model.anchors, self.p.score_size)

        if self.p.windowing == 'cosine':
            self.window = np.outer(np.hanning(self.p.score_size), np.hanning(self.p.score_size))
        elif self.p.windowing == 'uniform':
            self.window = np.ones((self.p.score_size, self.p.score_size))
        self.window = np.tile(self.window.flatten(), self.p.anchor_num)
        ################


    def get_examplar_feature(self, img, target_pos, target_sz):
        avg_chans = np.mean(img, axis=(0, 1))

        wc_z = target_sz[0] + self.p.context_amount * sum(target_sz)
        hc_z = target_sz[1] + self.p.context_amount * sum(target_sz)
        s_z = round(np.sqrt(wc_z * hc_z))
        # initialize the exemplar
        examplar = get_subwindow_tracking(img, target_pos, self.p.exemplar_size, s_z, avg_chans)

        z = Variable(examplar.unsqueeze(0))
        return self.model.template(z.to(self.device))

    def siamese_track(self, img, target_pos, target_sz, examplar_feature, debug=False, mask_enable=True, refine_enable=True):
        avg_chans = np.mean(img, axis=(0, 1))
        im_h = img.shape[0]
        im_w = img.shape[1]

        wc_x = target_sz[0] + self.p.context_amount * sum(target_sz)
        hc_x = target_sz[1] + self.p.context_amount * sum(target_sz)
        s_x = np.sqrt(wc_x * hc_x)
        '''
        scale_x = self.p.exemplar_size / s_x
        d_search = (self.p.instance_size - self.p.exemplar_size) / 2
        pad = d_search / scale_x
        s_x = s_x + 2 * pad
        crop_box = [target_pos[0] - round(s_x) / 2, target_pos[1] - round(s_x) / 2, round(s_x), round(s_x)]
        '''
        # myy
        # 上面注释的部分, 原作者写的代码可以简化为下面三句
        scale_x = self.p.exemplar_size / s_x
        s_x = self.p.instance_size / self.p.exemplar_size * s_x
        crop_box = [target_pos[0] - round(s_x) / 2, target_pos[1] - round(s_x) / 2, round(s_x), round(s_x)]


        # extract scaled crops for search region x at previous target position
        x_crop = Variable(get_subwindow_tracking(img, target_pos, self.p.instance_size, round(s_x), avg_chans).unsqueeze(0))

        if mask_enable:
            score, delta, mask = self.model.track_mask(examplar_feature, x_crop.to(self.device))
        else:
            score, delta = self.model.track(examplar_feature, x_crop.to(self.device))

        delta = delta.permute(1, 2, 3, 0).contiguous().view(4, -1).data.cpu().numpy()
        score = F.softmax(score.permute(1, 2, 3, 0).contiguous().view(2, -1).permute(1, 0), dim=1).data[:,
                1].cpu().numpy()

        delta[0, :] = delta[0, :] * self.p.anchor[:, 2] + self.p.anchor[:, 0]
        delta[1, :] = delta[1, :] * self.p.anchor[:, 3] + self.p.anchor[:, 1]
        delta[2, :] = np.exp(delta[2, :]) * self.p.anchor[:, 2]
        delta[3, :] = np.exp(delta[3, :]) * self.p.anchor[:, 3]

        def change(r):
            return np.maximum(r, 1. / r)

        def sz(w, h):
            pad = (w + h) * 0.5
            sz2 = (w + pad) * (h + pad)
            return np.sqrt(sz2)

        def sz_wh(wh):
            pad = (wh[0] + wh[1]) * 0.5
            sz2 = (wh[0] + pad) * (wh[1] + pad)
            return np.sqrt(sz2)

        # size penalty
        target_sz_in_crop = target_sz*scale_x
        s_c = change(sz(delta[2, :], delta[3, :]) / (sz_wh(target_sz_in_crop)))  # scale penalty
        r_c = change((target_sz_in_crop[0] / target_sz_in_crop[1]) / (delta[2, :] / delta[3, :]))  # ratio penalty

        penalty = np.exp(-(r_c * s_c - 1) * self.p.penalty_k)
        pscore = penalty * score

        # cos window (motion model)
        pscore = pscore * (1 - self.p.window_influence) + self.window * self.p.window_influence
        best_pscore_id = np.argmax(pscore)

        pred_in_crop = delta[:, best_pscore_id] / scale_x
        lr = penalty[best_pscore_id] * score[best_pscore_id] * self.p.lr  # lr for OTB

        res_x = pred_in_crop[0] + target_pos[0]
        res_y = pred_in_crop[1] + target_pos[1]

        res_w = target_sz[0] * (1 - lr) + pred_in_crop[2] * lr
        res_h = target_sz[1] * (1 - lr) + pred_in_crop[3] * lr

        target_pos = np.array([res_x, res_y])
        target_sz = np.array([res_w, res_h])

        # for Mask Branch
        if mask_enable:
            best_pscore_id_mask = np.unravel_index(best_pscore_id, (5, self.p.score_size, self.p.score_size))
            delta_x, delta_y = best_pscore_id_mask[2], best_pscore_id_mask[1]

            if refine_enable:
                mask = self.model.track_refine((delta_y, delta_x)).to(self.device).sigmoid().squeeze().view(
                    self.p.out_size, self.p.out_size).cpu().data.numpy()
            else:
                mask = mask[0, :, delta_y, delta_x].sigmoid(). \
                    squeeze().view(self.p.out_size, self.p.out_size).cpu().data.numpy()

            def crop_back(image, bbox, out_sz, padding=-1):
                a = (out_sz[0] - 1) / bbox[2]
                b = (out_sz[1] - 1) / bbox[3]
                c = -a * bbox[0]
                d = -b * bbox[1]
                mapping = np.array([[a, 0, c],
                                    [0, b, d]]).astype(np.float)
                crop = cv2.warpAffine(image, mapping, (out_sz[0], out_sz[1]),
                                    flags=cv2.INTER_LINEAR,
                                    borderMode=cv2.BORDER_CONSTANT,
                                    borderValue=padding)
                return crop

            s = crop_box[2] / self.p.instance_size
            sub_box = [crop_box[0] + (delta_x - self.p.base_size / 2) * self.p.total_stride * s,
                    crop_box[1] + (delta_y - self.p.base_size / 2) * self.p.total_stride * s,
                    s * self.p.exemplar_size, s * self.p.exemplar_size]
            s = self.p.out_size / sub_box[2]
            back_box = [-sub_box[0] * s, -sub_box[1] * s, im_w * s, im_h * s]
            mask_in_img = crop_back(mask, back_box, (im_w, im_h))

            target_mask = (mask_in_img > self.p.seg_thr).astype(np.uint8)
            if cv2.__version__[-5] == '4':
                contours, _ = cv2.findContours(target_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
            else:
                _, contours, _ = cv2.findContours(target_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
            cnt_area = [cv2.contourArea(cnt) for cnt in contours]
            if len(contours) != 0 and np.max(cnt_area) > 100:
                contour = contours[np.argmax(cnt_area)]  # use max area polygon
                polygon = contour.reshape(-1, 2)
                # pbox = cv2.boundingRect(polygon)  # Min Max Rectangle
                prbox = cv2.boxPoints(cv2.minAreaRect(polygon))  # Rotated Rectangle

                # box_in_img = pbox
                rbox_in_img = prbox
            else:  # empty mask
                location = cxy_wh_2_rect(target_pos, target_sz)
                rbox_in_img = np.array([[location[0], location[1]],
                                        [location[0] + location[2], location[1]],
                                        [location[0] + location[2], location[1] + location[3]],
                                        [location[0], location[1] + location[3]]])

        target_pos[0] = max(0, min(im_w, target_pos[0]))
        target_pos[1] = max(0, min(im_h, target_pos[1]))
        target_sz[0] = max(10, min(im_w, target_sz[0]))
        target_sz[1] = max(10, min(im_h, target_sz[1]))

        score = score[best_pscore_id]
        mask = mask_in_img if mask_enable else []
        return target_pos, target_sz, score, mask
Example #8
0
def main():

    # args.base_path = base_path
    args.resume = "../SiamMask/experiments/siammask_sharp/SiamMask_DAVIS.pth"
    args.config = "../SiamMask/experiments/siammask_sharp/config_davis.json"
    print(join(args.base_path, 'groundtruth_rect.txt'))

    # Setup device
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    torch.backends.cudnn.benchmark = True

    # Setup Model
    cfg = load_config(args)
    p = TrackerConfig()
    p.renew()
    siammask = Tracker(p=p, anchors=cfg['anchors'])
    if args.resume:
        assert isfile(args.resume), 'Please download {} first.'.format(args.resume)
        siammask = load_pretrain(siammask, args.resume)
    siammask.eval().to(device)

    # Parse Image file
    img_files = sorted(glob.glob(join(join(args.base_path, 'imgs'), '*.jp*')))
    ims = [cv2.imread(imf) for imf in img_files]

    # Select ROI
    cv2.namedWindow("SiamMask", cv2.WND_PROP_FULLSCREEN)
    try:
        init_rect = cv2.selectROI('SiamMask', ims[0], False, False)
        gts = None
        x, y, w, h = init_rect
    except:
        exit()

    file1 = open(join(args.base_path, 'groundtruth_rect.txt'), 'w') 
    file1.write('{0:d},{1:d},{2:d},{3:d}\n'.format(x, y, w, h))

    toc = 0
    for f, im in enumerate(ims):
        tic = cv2.getTickCount()
        if f == 0:  # init
            target_pos = np.array([x + w / 2, y + h / 2])
            target_sz = np.array([w, h])
            state = tracker_init(im, target_pos, target_sz, siammask, device=device)  # init tracker
            state['gts'] = gts
            state['device'] = device
        elif f > 0:  # tracking
            state = tracker_track(state, im, siammask, device=device)  # track
            target_pos, target_sz =state['target_pos'], state['target_sz']
            x, y = (target_pos - target_sz/2).astype(int)
            x2, y2 = (target_pos + target_sz/2).astype(int)
            cv2.rectangle(im, (x, y), (x2, y2), (0, 255, 0), 4)
            cv2.imshow('SiamMask', im)
            key = cv2.waitKey(1)
            if key == ord('q'):
                break
            file1.write('{0:d},{1:d},{2:d},{3:d}\n'.format(x, y, x2-x, y2-y))
        toc += cv2.getTickCount() - tic
    file1.close() 

    toc /= cv2.getTickFrequency()
    fps = f / toc
    print('SiamMask Time: {:02.1f}s Speed: {:3.1f}fps (with visulization!)'.format(toc, fps))
Example #9
0
if __name__ == '__main__':

    # Setup cf and model file
    args.resume = "../SiamMask/experiments/siammask_sharp/SiamMask_DAVIS.pth"
    args.config = "../SiamMask/experiments/siammask_sharp/config_davis.json"

    cv2.namedWindow("template", cv2.WND_PROP_FULLSCREEN)
    cv2.namedWindow("SiamMask", cv2.WND_PROP_FULLSCREEN)

    # Setup device
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    torch.backends.cudnn.benchmark = True

    # Setup Model
    cfg = load_config(args)
    p = TrackerConfig()
    p.renew()
    siammask = Tracker(p=p, anchors=cfg['anchors'])
    if args.resume:
        assert isfile(args.resume), 'Please download {} first.'.format(
            args.resume)
        siammask = load_pretrain(siammask, args.resume)
    siammask.eval().to(device)
    model = siammask

    # Setup Dataset
    dataloader = DataLoader(AttackDataset(root_dir='data/Phone1',
                                          step=1,
                                          test=True),
                            batch_size=100)
Example #10
0
def siamese_init(im,
                 search_shape,
                 target_pos,
                 target_sz,
                 model,
                 hp=None,
                 device='cpu'):
    """
    generate anchors, inference the template image, set up window
    :param im: whole image
    :param target_pos: target position that are selected
    :param target_sz: target size that are selected
    :param model: SiamMask model
    :param hp: hyper parameters
    :param device:
    :return:
    """
    state = dict()
    state['im_h'] = search_shape[0]
    state['im_w'] = search_shape[1]
    p = TrackerConfig()
    p.update(hp, model.anchors)

    p.renew()

    net = model
    p.scales = model.anchors['scales']
    p.ratios = model.anchors['ratios']
    p.anchor_num = model.anchor_num
    p.anchor = generate_anchor(
        model.anchors, p.score_size)  # anchor size: (25*25*5, 4) --> (3125, 4)
    avg_chans = np.mean(im, axis=(0, 1))

    # wc_z = target_sz[0] + p.context_amount * sum(target_sz)
    # hc_z = target_sz[1] + p.context_amount * sum(target_sz)
    # s_z = round(np.sqrt(wc_z * hc_z))  # crop size = sqrt((w+(w+h)/2)*(h+(w+h)/2))
    ## initialize the exemplar
    #im_patch = get_subwindow_tracking(im, target_pos, p.exemplar_size, s_z, avg_chans, out_mode="numpy")
    im_patch = im
    im_patch = cv2.resize(im_patch, (p.exemplar_size, p.exemplar_size))

    cv2.imshow('crop_template', im_patch)
    cv2.waitKey(0)
    z_crop = im_to_torch(im_patch)

    z = Variable(z_crop.unsqueeze(0))
    net.template(z.to(device))

    if p.windowing == 'cosine':
        window = np.outer(np.hanning(p.score_size), np.hanning(p.score_size))
    elif p.windowing == 'uniform':
        window = np.ones((p.score_size, p.score_size))
    window = np.tile(window.flatten(), p.anchor_num)

    state['p'] = p
    state['net'] = net
    state['avg_chans'] = avg_chans
    state['window'] = window
    state['target_pos'] = target_pos
    state['target_sz'] = target_sz
    return state
Example #11
0
def siamese_init(im,
                 target_pos,
                 target_sz,
                 model,
                 hp=None,
                 device='cpu'):  #target_pos, target_sz输入的就是由gt轴对称得来的
    state = dict()
    state['im_h'] = im.shape[0]
    state['im_w'] = im.shape[1]
    p = TrackerConfig()  #配置参数
    p.update(hp,
             model.anchors)  #用hp,model.anchors更新p的参数,相当于用config_vot.json更新p
    p.renew()
    #    p.score_size=25
    net = model
    p.scales = model.anchors['scales']  #Custom的父类SiamMask里的属性
    p.ratios = model.anchors['ratios']
    p.anchor_num = model.anchor_num  #vot数据集上是5

    p.anchor = generate_anchor(
        model.anchors, p.score_size
    )  #generate_anchor 生成锚点。p.anchor.shape = (p.anchor_num*p.score_size*p.score_size,4)
    avg_chans = np.mean(im, axis=(0, 1))  #此处im单张图片,对每个颜色通道都求均值(3,)(B,G,R)

    #图像预处理,按比例外扩目标框,从而获得一定的 context 信息。p.context_amount = 0.5
    wc_z = target_sz[0] + p.context_amount * sum(
        target_sz)  #wc_z = w + p.context_amuont * (w+h)
    hc_z = target_sz[1] + p.context_amount * sum(
        target_sz)  #hc_z = h + p.context_amuont * (w+h)
    #需要将框定的框做一个大约2倍放大,以物体为中心, s_z为宽高,截取一个正方体的物体出来
    s_z = round(np.sqrt(wc_z *
                        hc_z))  #round四舍五入取整,round(2.5) = 2,round(2.51)=3
    # initialize the exemplar
    z_crop = get_subwindow_tracking(
        im, target_pos, p.exemplar_size, s_z,
        avg_chans)  #tensor<(3, 127, 127), float32, cpu>
    #TrackerConfig中的定义是 input z size,127
    #z_crop的维度是(127*127*3)

    z = Variable(
        z_crop.unsqueeze(0)
    )  #pytorch中的命令,扩充数据维度,变成神经网络的参数tensor<(1, 3, 127, 127), float32, cpu>
    net.template(z.to(device))  #将z送到cuda上面提取特征,即得到resnet50之后的结果

    if p.windowing == 'cosine':  #默认
        window = np.outer(
            np.hanning(p.score_size), np.hanning(p.score_size)
        )  #求外积 ndarray(p.score_size,p.score_size)即<(25, 25), float64>
    elif p.windowing == 'uniform':
        window = np.ones((p.score_size, p.score_size))

    window = np.tile(window.flatten(),
                     p.anchor_num)  #对window.flatten()在X轴进行重复p.anchor_num次
    #ndarray<(3125,), float64>,p.anchor_num=5
    state['p'] = p
    state['net'] = net
    state['avg_chans'] = avg_chans
    state['window'] = window
    state['target_pos'] = target_pos  #还是传进来的数据
    state['target_sz'] = target_sz  #还是传进来的数据
    return state
def trackres(cfg, outputs12, track12):
    delta = outputs12['predict'][0]
    score = outputs12['predict'][1]
    delta = delta.contiguous().view(delta.shape[0], 4, -1).data.cpu().numpy()
    score = F.softmax(score.contiguous().view(score.shape[0], 2, -1),
                      dim=1).data[:, 1].cpu().numpy()

    anchor = generate_anchor(cfg['anchors'], 3)  # anchors: anchor cfg
    delta[:, 0, :] = delta[:, 0, :] * anchor[:, 2] + anchor[:, 0]
    delta[:, 1, :] = delta[:, 1, :] * anchor[:, 3] + anchor[:, 1]
    delta[:, 2, :] = np.exp(delta[:, 2, :]) * anchor[:, 2]
    delta[:, 3, :] = np.exp(delta[:, 3, :]) * anchor[:, 3]

    p = TrackerConfig()

    def change(r):
        return np.maximum(r, 1. / r)

    def sz(w, h):
        pad = (w + h) * 0.5
        sz2 = (w + pad) * (h + pad)
        return np.sqrt(sz2)

    def sz_wh(wh):
        pad = (wh[:, 0] + wh[:, 1]) * 0.5
        sz2 = (wh[:, 0] + pad) * (wh[:, 1] + pad)
        return np.sqrt(sz2)

    target_sz = np.array(track12['template_bbox'].cpu().numpy()[:, 2:])
    scale_x = np.ones(target_sz.shape[0])

    # size penalty
    target_sz_in_crop = target_sz * scale_x[:, None]
    s_c = change(
        sz(delta[:, 2, :], delta[:, 3, :]) /
        (sz_wh(target_sz_in_crop))[:, None])  # scale penalty
    r_c = change((target_sz_in_crop[:, 0] / target_sz_in_crop[:, 1])[:, None] /
                 (delta[:, 2, :] / delta[:, 3, :]))  # ratio penalty
    penalty = np.exp(-(r_c * s_c - 1) * p.penalty_k)
    pscore = penalty * score

    if p.windowing == 'cosine':
        window = np.outer(np.hanning(3), np.hanning(3))
    elif p.windowing == 'uniform':
        window = np.ones((3, 3))
    window = np.tile(window.flatten(), p.anchor_num)
    # cos window (motion model)
    pscore = pscore * (1 - p.window_influence) + window * p.window_influence

    best_pscore_id = np.argmax(pscore, 1)
    pred_in_crop = delta[range(best_pscore_id.shape[0]), :,
                         best_pscore_id] / scale_x[:, None]
    # lr = penalty[range(best_pscore_id.shape[0]),best_pscore_id] * score[range(best_pscore_id.shape[0]),best_pscore_id] * p.lr  # lr for OTB

    res_cx = pred_in_crop[:, 0] + (track12['search'].shape[2] + 1) // 2
    res_cy = pred_in_crop[:, 1] + (track12['search'].shape[3] + 1) // 2
    res_w = pred_in_crop[:, 2]
    res_h = pred_in_crop[:, 3]
    target_pos = np.array([res_cx, res_cy]).T
    target_sz = np.array([res_w, res_h]).T

    def draw(image, box, name):
        image = np.transpose(image, (1, 2, 0)).copy()
        x1, y1, x2, y2 = map(lambda x: int(round(x)), box)
        image = cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0))
        cv2.imwrite(name, image)

    debug = False
    if debug:
        batch_id = 0
        img_search = track12['search'][batch_id].cpu().numpy()
        box = [
            res_cx[batch_id] - res_w[batch_id] / 2.,
            res_cy[batch_id] - res_h[batch_id] / 2.,
            res_cx[batch_id] + res_w[batch_id] / 2.,
            res_cy[batch_id] + res_h[batch_id] / 2.
        ]
        draw(img_search, box, "debug/{:06d}_pred.jpg".format(iter))
        img_temp = track12['template'][batch_id].cpu().numpy()
        box_temp = track12['template_bbox'][batch_id].cpu().numpy()
        draw(img_temp, box_temp, "debug/{:06d}_temp.jpg".format(iter))

    im_sz = track12['search'].shape[-2:]
    avg_chans = np.mean(track12['search'].cpu().numpy(), axis=(2, 3))
    sz = p.exemplar_size
    c = (sz + 1) / 2
    context_xmin = (target_pos[:, 0] - c).round()
    context_xmax = context_xmin + sz - 1
    context_ymin = (target_pos[:, 1] - c).round()
    context_ymax = context_ymin + sz - 1

    left_pad = int(max(0., -context_xmin.min()))
    top_pad = int(max(0., -context_ymin.min()))
    right_pad = int(max(0., context_xmax.max() - im_sz[1] + 1))
    bottom_pad = int(max(0., context_ymax.max() - im_sz[0] + 1))

    context_xmin = context_xmin + left_pad
    context_xmax = context_xmax + left_pad
    context_ymin = context_ymin + top_pad
    context_ymax = context_ymax + top_pad

    # zzp: a more easy speed version
    im = track12['search'].cpu().numpy()
    k, r, c = im.shape[-3:]
    if any([top_pad, bottom_pad, left_pad, right_pad]):
        te_im = np.zeros((args.batch, k, r + top_pad + bottom_pad,
                          c + left_pad + right_pad), np.uint8)
        te_im[:, :, top_pad:top_pad + r, left_pad:left_pad + c] = im
        if top_pad:
            te_im[:, :, 0:top_pad,
                  left_pad:left_pad + c] = avg_chans[:, :, None, None]
        if bottom_pad:
            te_im[:, :, r + top_pad:,
                  left_pad:left_pad + c] = avg_chans[:, :, None, None]
        if left_pad:
            te_im[:, :, :, 0:left_pad] = avg_chans[:, :, None, None]
        if right_pad:
            te_im[:, :, :, c + left_pad:] = avg_chans[:, :, None, None]
        im_patch_original = np.zeros((args.batch, k, sz, sz), np.uint8)
        for id in range(args.batch):
            im_patch_original[id] = te_im[
                id, :,
                int(context_ymin[id]):int(context_ymax[id] + 1),
                int(context_xmin[id]):int(context_xmax[id] + 1)]
    else:
        im_patch_original = np.zeros((args.batch, k, sz, sz), np.uint8)
        for id in range(args.batch):
            im_patch_original[id] = im[
                id, :,
                int(context_ymin[id]):int(context_ymax[id] + 1),
                int(context_xmin[id]):int(context_xmax[id] + 1)]
    im_patch = im_patch_original

    return im_patch