Example #1
0
def SiamRPN_track_upd(state, im, updatenet):
    p = state['p']
    net = state['net']
    avg_chans = state['avg_chans']
    window = state['window']
    target_pos = state['target_pos']
    target_sz = state['target_sz']

    wc_z = target_sz[1] + p.context_amount * sum(target_sz)
    hc_z = target_sz[0] + p.context_amount * sum(target_sz)
    s_z = np.sqrt(wc_z * hc_z)
    scale_z = p.exemplar_size / s_z
    d_search = (p.instance_size - p.exemplar_size) / 2
    pad = d_search / scale_z
    s_x = s_z + 2 * pad

    # extract scaled crops for search region x at previous target position
    x_crop = Variable(
        get_subwindow_tracking(im, target_pos, p.instance_size, round(s_x),
                               avg_chans).unsqueeze(0))

    target_pos, target_sz, score = tracker_eval(net, x_crop.cuda(), target_pos,
                                                target_sz * scale_z, window,
                                                scale_z, p)

    target_pos[0] = max(0, min(state['im_w'], target_pos[0]))
    target_pos[1] = max(0, min(state['im_h'], target_pos[1]))
    target_sz[0] = max(10, min(state['im_w'], target_sz[0]))
    target_sz[1] = max(10, min(state['im_h'], target_sz[1]))

    # extract z template to update the z
    z_crop = Variable(
        get_subwindow_tracking(im, target_pos, p.exemplar_size, round(s_z),
                               avg_chans).unsqueeze(0))
    z_f = net.featextract(z_crop.cuda())

    #z_f_ = (1-zLR) * Variable(state['z_f']).cuda() + zLR * z_f
    #temp = np.concatenate((init, pre, cur), axis=1)
    temp = torch.cat(
        (Variable(state['z_0']).cuda(), Variable(state['z_f']).cuda(), z_f), 1)
    init_inp = Variable(state['z_0']).cuda()

    z_f_ = updatenet(temp, init_inp)

    # print('updatenet input: ', temp.shape, init_inp.shape)
    # print('updatenet output: ', z_f_.shape)

    net.kernel(z_f_)

    state['z_f'] = z_f_.cpu().data
    state['net'] = net
    state['target_pos'] = target_pos
    state['target_sz'] = target_sz
    state['score'] = score
    return state
Example #2
0
def SiamRPN_init_batch(exemplar_list, exemplar_cxy_list, net):
    train_config = dict()

    batch_size = len(exemplar_list)
    p = TrackerConfig()
    p.update(net.cfg)

    p.anchor = generate_anchor(p.total_stride, p.scales, p.ratios, int(p.score_size))
    avg_chans_list = [None for i in range(batch_size)]
    z_list = [None for i in range(batch_size)]
    z_large_list = [None for i in range(batch_size)]
    
    for batch in range(batch_size):
        target_pos = exemplar_cxy_list[batch][0]
        target_sz = exemplar_cxy_list[batch][1]

        avg_chans = np.mean(exemplar_list[batch], axis=(0, 1))
        avg_chans_list[batch] = avg_chans

        wc_z = target_sz[0] + p.context_amount * sum(target_sz)
        hc_z = target_sz[1] + p.context_amount * sum(target_sz)
        s_z = round(np.sqrt(wc_z * hc_z))
        scale_z = p.exemplar_size / s_z
        d_search = (p.instance_size - p.exemplar_size) / 2
        pad = d_search / scale_z
        s_x = s_z + 2 * pad
        # initialize the exemplar
        z_crop = get_subwindow_tracking(exemplar_list[batch], target_pos, p.exemplar_size, s_z, avg_chans)
        z_crop_large = get_subwindow_tracking(exemplar_list[batch], target_pos, p.instance_size, round(s_x), avg_chans)
        z = z_crop.unsqueeze(0)
        z_large = z_crop_large.unsqueeze(0)
        z_list[batch] = z
        z_large_list[batch] = z_large

    z_batch = z_list[0]
    z_large_batch = z_large_list[0]
    for idx in range(1, batch_size):
        z_batch = torch.cat((z_batch, z_list[idx]), dim=0)
        z_large_batch = torch.cat((z_large_batch, z_large_list[idx]), dim=0)

    assert z_batch.size(0)==batch_size
    assert z_large_batch.size(0)==batch_size

    net.temple(z_batch.cuda(), z_large_batch.cuda())

    train_config['avg_chans_list'] = avg_chans_list
    train_config['p'] = p
    train_config['net'] = net

    return train_config
Example #3
0
def SiamRPN_track(state, im):
    p = state['p']
    net = state['net']
    avg_chans = state['avg_chans']
    window = state['window']
    target_pos = state['target_pos']
    target_sz = state['target_sz']
    ctr = state['ctr']

    wc_z = target_sz[1] + p.context_amount * sum(target_sz)
    hc_z = target_sz[0] + p.context_amount * sum(target_sz)
    s_z = np.sqrt(wc_z * hc_z)
    scale_z = p.exemplar_size / s_z
    d_search = (p.instance_size - p.exemplar_size) / 2
    pad = d_search / scale_z
    s_x = s_z + 2 * pad

    # extract scaled crops for search region x at previous target position
    x_crop = Variable(get_subwindow_tracking(im, target_pos, p.instance_size, round(s_x), avg_chans).unsqueeze(0))

    target_pos, target_sz, score = tracker_eval(net, x_crop.cuda(), target_pos, target_sz * scale_z, window, scale_z, p)
    target_pos[0] = max(0, min(state['im_w'], target_pos[0]))
    target_pos[1] = max(0, min(state['im_h'], target_pos[1]))
    target_sz[0] = max(10, min(state['im_w'], target_sz[0]))
    target_sz[1] = max(10, min(state['im_h'], target_sz[1]))
    state['target_pos'] = target_pos
    state['target_sz'] = target_sz
    state['score'] = score
    state['ctr'] = ctr+1
    
    if ctr % 50 == 4:
        label = np.zeros(im.shape)
        x_low, x_high = target_pos[0] - target_sz[0]/2, target_pos[0] + target_sz[0]/2
        y_low, y_high = target_pos[1] - target_sz[1]/2, target_pos[1] + target_sz[1]/2
        x_low, x_high, y_low, y_high = int(x_low), int(x_high), int(y_low), int(y_high)
        label[y_low:y_high,x_low:x_high,:] = 2
        label = (get_subwindow_tracking(label, target_pos, p.instance_size, round(s_x), 0, out_mode="image"))
        label = cv2.split(label)[0]
        if p.instance_size == 271:
            label = cv2.resize(label, (19, 19))
        else:
            label = cv2.resize(label, (21, 21))
        
        label = torch.Tensor([(2-label)]*5 + [label]*5).unsqueeze(0)
        
        net.make_at_small(x_crop.cuda(), label.cuda())

    return state
Example #4
0
    def init(self, frame, bbox):
        """ initialize siamfc tracker
        Args:
            frame: an RGB image
            bbox: zero-based bounding box [x, y, width, height]
        """
        self.pos = np.array([bbox[0] + bbox[2] / 2,
                             bbox[1] + bbox[3] / 2])  # center x, center y
        self.target_sz = np.array([bbox[2], bbox[3]])  # width, height

        wc_z = self.target_sz[0] + 0.5 * sum(self.target_sz)
        hc_z = self.target_sz[1] + 0.5 * sum(self.target_sz)
        self.s_z = np.sqrt(wc_z * hc_z)
        self.s_x = self.s_z * config.instance_size / config.exemplar_size

        # get exemplar img
        img_mean = tuple(map(int, frame.mean(axis=(0, 1))))
        exemplar_img = get_subwindow_tracking(frame, self.pos,
                                              config.exemplar_size,
                                              python2round(self.s_z), img_mean)
        exemplar_img = self.transforms(exemplar_img)[None, :, :, :]

        # get exemplar feature
        with torch.cuda.device(self.gpu_id):
            exemplar_img = Variable(exemplar_img.cuda(), requires_grad=False)
            self.model(exemplar_img, None)

        # create hanning window
        self.hann_window = np.outer(np.hanning(self.response_sz),
                                    np.hanning(self.response_sz))
        self.hann_window = np.tile(
            self.hann_window.flatten(),
            len(config.anchor_ratios) * len(config.anchor_scales))
        self.counter_re = 0
Example #5
0
def SiamRPN_set_source_batch(train_config, source_list, source_cxy_list):
    p = train_config['p']
    net = train_config['net']
    avg_chans_list = train_config['avg_chans_list']
    batch_size = len(source_list)
    x_batch = None

    for batch in range(batch_size):
        target_pos = source_cxy_list[batch][0]
        target_sz = source_cxy_list[batch][1]

        wc_z = target_sz[1] + p.context_amount * sum(target_sz)
        hc_z = target_sz[0] + p.context_amount * sum(target_sz)
        s_z = np.sqrt(wc_z * hc_z)
        scale_z = p.exemplar_size / s_z  #the ratio between the in-model sizes and the real sizes
        d_search = (p.instance_size - p.exemplar_size) / 2
        pad = d_search / scale_z
        s_x = s_z + 2 * pad

        # extract scaled crops for search region x at previous target position
        x_crop = get_subwindow_tracking(source_list[batch], target_pos, p.instance_size, round(s_x), avg_chans_list[batch]).unsqueeze(0)
        if type(x_batch)!=torch.Tensor:
            x_batch = x_crop
        else:
            x_batch = torch.cat((x_batch, x_crop), dim=0)

    assert x_batch.size(0)==batch_size, '{}'.format(x_batch.size())

    net(x_batch.cuda(), set_source = True)
def SiamRPN_track(state, im):
    # 接收网络参数
    p = state['p']
    net = state['net']
    avg_chans = state['avg_chans']
    window = state['window']
    # 主要接收上一帧目标跟踪的位置以及尺寸
    target_pos = state['target_pos']
    target_sz = state['target_sz']

    # 更新搜索区域,决定本帧的搜索区域(根据上一帧的检测结果来设置本帧搜索区域)
    wc_z = target_sz[1] + p.context_amount * sum(target_sz)
    hc_z = target_sz[0] + p.context_amount * sum(target_sz)
    s_z = np.sqrt(wc_z * hc_z)
    # 获取尺度变化率
    scale_z = p.exemplar_size / s_z
    # 调整搜索区域
    d_search = (p.instance_size - p.exemplar_size) / 2
    pad = d_search / scale_z
    s_x = s_z + 2 * pad

    # extract scaled crops for search region x at previous target position
    x_crop = Variable(get_subwindow_tracking(im, target_pos, p.instance_size, round(s_x), avg_chans).unsqueeze(0))

    # 获得本帧预测结果,target_pos-目标位置 target_sz-目标尺度 score-置信分数
    # Ps: target_sz * scale_z表示本帧的搜索区域大小
    target_pos, target_sz, score = tracker_eval(net, x_crop.cuda(), target_pos, target_sz * scale_z, window, scale_z, p)
    target_pos[0] = max(0, min(state['im_w'], target_pos[0]))
    target_pos[1] = max(0, min(state['im_h'], target_pos[1]))
    target_sz[0] = max(10, min(state['im_w'], target_sz[0]))
    target_sz[1] = max(10, min(state['im_h'], target_sz[1]))
    state['target_pos'] = target_pos
    state['target_sz'] = target_sz
    state['score'] = score
    return state
Example #7
0
def SiamRPN_track_bbox(net, state, im, next_mask, conf_mask, index_1, index_2,
                       frame_num, data_dir, gtbbox):
    p = state['p']
    net = state['net']
    avg_chans = state['avg_chans']
    window = state['window']
    target_pos = state['target_pos']
    target_sz = state['target_sz']

    wc_z = target_sz[1] + p.context_amount * sum(target_sz)
    hc_z = target_sz[0] + p.context_amount * sum(target_sz)
    s_z = np.sqrt(wc_z * hc_z)
    scale_z = p.exemplar_size / s_z
    d_search = (p.instance_size - p.exemplar_size) / 2
    pad = d_search / scale_z
    s_x = s_z + 2 * pad

    # extract scaled crops for search region x at previous target position
    x_crop = Variable(
        get_subwindow_tracking(im, target_pos, p.instance_size, round(s_x),
                               avg_chans).unsqueeze(0))

    target_pos, target_sz, score, alternative = tracker_eval_record_data(
        net, x_crop.cuda(), target_pos, target_sz * scale_z, window, scale_z,
        p, im, next_mask, conf_mask, index_1, index_2, frame_num, data_dir,
        gtbbox)
    target_pos[0] = max(0, min(state['im_w'], target_pos[0]))
    target_pos[1] = max(0, min(state['im_h'], target_pos[1]))
    target_sz[0] = max(10, min(state['im_w'], target_sz[0]))
    target_sz[1] = max(10, min(state['im_h'], target_sz[1]))
    state['target_pos'] = target_pos
    state['target_sz'] = target_sz
    state['score'] = score
    state['fg'] = alternative
    return state
Example #8
0
def SiamRPN_track(state, im):
    p = state['p']
    net = state['net']
    avg_chans = state['avg_chans']
    window = state['window']
    target_pos = state['target_pos']
    target_sz = state['target_sz']

    wc_z = target_sz[1] + p.context_amount * sum(target_sz)
    hc_z = target_sz[0] + p.context_amount * sum(target_sz)
    s_z = np.sqrt(wc_z * hc_z)
    scale_z = p.exemplar_size / s_z
    d_search = (p.instance_size - p.exemplar_size) / 2
    pad = d_search / scale_z
    s_x = s_z + 2 * pad

    # extract scaled crops for search region x at previous target position
    x_crop = Variable(
        get_subwindow_tracking(im, target_pos, p.instance_size, round(s_x),
                               avg_chans).unsqueeze(0))

    target_pos, target_sz, score = tracker_eval(net, x_crop.cuda(), target_pos,
                                                target_sz * scale_z, window,
                                                scale_z, p)
    target_pos[0] = max(0, min(state['im_w'], target_pos[0]))
    target_pos[1] = max(0, min(state['im_h'], target_pos[1]))
    target_sz[0] = max(10, min(state['im_w'], target_sz[0]))
    target_sz[1] = max(10, min(state['im_h'], target_sz[1]))
    state['target_pos'] = target_pos
    state['target_sz'] = target_sz
    state['score'] = score
    return state
Example #9
0
def SiamRPN_init(im, target_pos, target_sz, net):
    state = dict()
    p = TrackerConfig()
    p.update(net.cfg)
    state['im_h'] = im.shape[0]
    state['im_w'] = im.shape[1]

    if p.adaptive:
        if ((target_sz[0] * target_sz[1]) / float(state['im_h'] * state['im_w'])) < 0.004:
            p.instance_size = 287  # small object big search region
        else:
            p.instance_size = 271

    p.score_size = (p.instance_size - p.exemplar_size) / p.total_stride + 1
    p.anchor = generate_anchor(p.total_stride, p.scales, p.ratios, int(p.score_size))
    avg_chans = np.mean(im, axis=(0, 1))

    wc_z = target_sz[0] + p.context_amount * sum(target_sz)
    hc_z = target_sz[1] + p.context_amount * sum(target_sz)
    s_z = round(np.sqrt(wc_z * hc_z))
    scale_z = p.exemplar_size / s_z
    d_search = (p.instance_size - p.exemplar_size) / 2
    pad = d_search / scale_z
    s_x = s_z + 2 * pad
    # initialize the exemplar
    z_crop = get_subwindow_tracking(im, target_pos, p.exemplar_size, s_z, avg_chans)
    z_crop_large = get_subwindow_tracking(im, target_pos, p.instance_size, round(s_x), avg_chans)

    z = z_crop.unsqueeze(0)         # removed the Variable interface
    z_large = z_crop_large.unsqueeze(0)
    net.temple(z.cuda(), z_large.cuda())

    if p.windowing == 'cosine':
        window = np.outer(np.hanning(p.score_size), np.hanning(p.score_size))
    elif p.windowing == 'uniform':
        window = np.ones((p.score_size, p.score_size))
    window = np.tile(window.flatten(), p.anchor_num)

    state['p'] = p
    state['net'] = net
    state['avg_chans'] = avg_chans
    state['window'] = window
    state['target_pos'] = target_pos
    state['target_sz'] = target_sz
    return state
Example #10
0
def SiamRPN_init(im, target_pos, target_sz, net, net_name):
    state = dict()
    if 'SiamRPNPP' in net_name:
        p = TrackerConfig_SiamRPNPP()
    else:
        p = TrackerConfig()
    p.update(net.cfg)
    state['im_h'] = im.shape[0]
    state['im_w'] = im.shape[1]

    if p.adaptive:
        if ((target_sz[0] * target_sz[1]) /
                float(state['im_h'] * state['im_w'])) < 0.004:
            p.instance_size = 287  # small object big search region
        else:
            p.instance_size = 255

        # p.score_size = (p.instance_size - p.exemplar_size) / p.total_stride + 1

    p.anchor = generate_anchor(p.total_stride, p.scales, p.ratios,
                               int(p.score_size))

    avg_chans = np.mean(im, axis=(0, 1))

    wc_z = target_sz[0] + p.context_amount * sum(target_sz)
    hc_z = target_sz[1] + p.context_amount * sum(target_sz)
    s_z = round(np.sqrt(wc_z * hc_z))
    # initialize the exemplar
    z_crop = get_subwindow_tracking(im,
                                    target_pos,
                                    p.exemplar_size,
                                    s_z,
                                    avg_chans,
                                    out_mode='np')

    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])
    z = Variable(transform(z_crop).unsqueeze(0))

    net.temple(z.cuda())

    if p.windowing == 'cosine':
        window = np.outer(np.hanning(p.score_size), np.hanning(p.score_size))
    elif p.windowing == 'uniform':
        window = np.ones((p.score_size, p.score_size))
    window = np.tile(window.flatten(), p.anchor_num)

    state['p'] = p
    state['net'] = net
    state['avg_chans'] = avg_chans
    state['window'] = window
    state['target_pos'] = target_pos
    state['target_sz'] = target_sz
    return state
Example #11
0
def SiamRPN_init(im, target_pos, target_sz, net):
    """
    SiamRPN_init:SiamRPN网络初始化
        :param im: 跟踪的图片
        :param target_pos: 目标的中心点
        :param target_sz: 目标区域的宽高
        :param net: 跟踪网络
    """
    state = dict()
    p = TrackerConfig()
    p.update(net.cfg)
    state['im_h'] = im.shape[0]  # 图片的高度
    state['im_w'] = im.shape[1]  # 图片的宽度

    if p.adaptive:
        # 根据目标和输入图像的大小调整搜索区域,比例小于0.4%,需要调大搜索区域
        if ((target_sz[0] * target_sz[1]) /
                float(state['im_h'] * state['im_w'])) < 0.004:
            p.instance_size = 287  # small object big search region
        else:
            p.instance_size = 271
        # 根据网络总步长计算出得分图大小
        p.score_size = (p.instance_size - p.exemplar_size) / p.total_stride + 1
    # generate_anchor:构造出以图像中心为原点,格式为[cx, cy, w, h]的锚点矩阵
    p.anchor = generate_anchor(p.total_stride, p.scales, p.ratios,
                               int(p.score_size))

    # 求图片RGB三像素的行列均值,len(avg_chans)=3
    avg_chans = np.mean(im, axis=(0, 1))

    # wc_z和hc_z表示纹理填充后的宽高,s_z为等效边长
    wc_z = target_sz[0] + p.context_amount * sum(target_sz)
    hc_z = target_sz[1] + p.context_amount * sum(target_sz)
    s_z = round(np.sqrt(wc_z * hc_z))

    # initialize the exemplar
    # get_subwindow_tracking:填充并截取出目标
    z_crop = get_subwindow_tracking(im, target_pos, p.exemplar_size, s_z,
                                    avg_chans)

    z = Variable(z_crop.unsqueeze(0))  # z.size=([1, 3, 127, 127])
    net.temple(z.cuda())  # 运行 temple 函数计算模板结果

    # 两种窗
    if p.windowing == 'cosine':
        window = np.outer(np.hanning(p.score_size), np.hanning(p.score_size))
    elif p.windowing == 'uniform':
        window = np.ones((p.score_size, p.score_size))
    window = np.tile(window.flatten(), p.anchor_num)

    state['p'] = p
    state['net'] = net
    state['avg_chans'] = avg_chans
    state['window'] = window
    state['target_pos'] = target_pos
    state['target_sz'] = target_sz
    return state
Example #12
0
def SiamRPN_init(im, target_pos, target_sz, net):
    state = dict()  # 创建一个字典
    p = TrackerConfig()     # 初始化Tracker对象
    p.update(net.cfg)       # 为不同的net(model)作更新
    state['im_h'] = im.shape[0]     # 整幅图像的 高
    state['im_w'] = im.shape[1]     # 整幅图像的 宽

    if p.adaptive:
        # 根据目标和输入图像的大小调整搜索区域
        if ((target_sz[0] * target_sz[1]) / float(state['im_h'] * state['im_w'])) < 0.004:  # 目标面积占比小于 0.4%
            p.instance_size = 287  # small object big search region
        else:
            p.instance_size = 271

        p.score_size = (p.instance_size - p.exemplar_size) / p.total_stride + 1     # (271-127)/8 + 1 = 19

    # 构造出以图像中心为原点,格式为[cx, cy, w, h]的锚点矩阵
    p.anchor = generate_anchor(p.total_stride, p.scales, p.ratios, int(p.score_size))
    # for i in range(p.anchor.shape[0]):
    #     box = p.anchor[i]
    #     cv2.rectangle(im, (box[0], box[1]), (box[0] + box[2], box[1] + box[3]), (0, 255, 255), 0)
    # cv2.imshow('im', im)
    # cv2.waitKey(0)

    avg_chans = np.mean(im, axis=(0, 1))

    # p.context_amount * sum(target_sz)为填充边界。wc_z和hc_z表示纹理填充后的宽高,s_z为等效边长。
    wc_z = target_sz[0] + p.context_amount * sum(target_sz)
    hc_z = target_sz[1] + p.context_amount * sum(target_sz)
    s_z = round(np.sqrt(wc_z * hc_z))   # 202
    # initialize the exemplar   填充并截取出目标
    z_crop = get_subwindow_tracking(im, target_pos, p.exemplar_size, s_z, avg_chans)

    # 包裹张量并记录应用于它的操作
    z = Variable(z_crop.unsqueeze(0))
    # 运行 temple 函数计算模板结果
    net.temple(z.cuda())

    if p.windowing == 'cosine':
        window = np.outer(np.hanning(p.score_size), np.hanning(p.score_size))
    elif p.windowing == 'uniform':
        window = np.ones((p.score_size, p.score_size))
    window = np.tile(window.flatten(), p.anchor_num)

    state['p'] = p
    state['net'] = net
    state['avg_chans'] = avg_chans
    state['window'] = window
    state['target_pos'] = target_pos
    state['target_sz'] = target_sz
    # --------------------------------------------------------------------------------------------------------------
    state['s_z_original'] = s_z
    state['s_x'] = 0
    # --------------------------------------------------------------------------------------------------------------
    return state
Example #13
0
def SiamRPN_init(im, target_pos, target_sz, net): 
    # 设置一个空的字典
    state = dict()  
    # 设置跟踪器的相关参数
    p = TrackerConfig() 
    # 将网络的参数加载至跟踪模型中
    p.update(net.cfg)   
    # 将图像的尺寸加载至state中
    state['im_h'] = im.shape[0] 
    state['im_w'] = im.shape[1]

    if p.adaptive:  # 初始化设置为True
        if ((target_sz[0] * target_sz[1]) / float(state['im_h'] * state['im_w'])) < 0.004:
            p.instance_size = 287  # small object big search region
        else:
            p.instance_size = 271   # 用于设置instance_size,为score_size计算做准备

        # 与TrackerConfig类中计算方式一致
        p.score_size = (p.instance_size - p.exemplar_size) / p.total_stride + 1 

    # 生成候选框尺寸锚点
    p.anchor = generate_anchor(p.total_stride, p.scales, p.ratios, int(p.score_size))   

    # 计算图像均值
    avg_chans = np.mean(im, axis=(0, 1))

    # context_amount参数已经初始化,默认0.5,
    wc_z = target_sz[0] + p.context_amount * sum(target_sz) # target_sz是模板的横纵尺寸,通过sum函数加在一起
    hc_z = target_sz[1] + p.context_amount * sum(target_sz)
    # round取整数
    s_z = round(np.sqrt(wc_z * hc_z))
    # initialize the exemplar 初始化目标模板
    z_crop = get_subwindow_tracking(im, target_pos, p.exemplar_size, s_z, avg_chans)

    # 数据降维
    z = Variable(z_crop.unsqueeze(0))
    # 将网络加载至GPU上运行
    net.temple(z.cuda())

    if p.windowing == 'cosine':
        window = np.outer(np.hanning(p.score_size), np.hanning(p.score_size))
    elif p.windowing == 'uniform':
        window = np.ones((p.score_size, p.score_size))
    window = np.tile(window.flatten(), p.anchor_num)

    # 将所有的参数写入字典中,进行保存
    state['p'] = p
    state['net'] = net
    state['avg_chans'] = avg_chans
    state['window'] = window
    state['target_pos'] = target_pos
    state['target_sz'] = target_sz
    # 返回网络的初始化设置 以字典形式返回
    return state
Example #14
0
 def _pad_crop_resize_detection(self):
     wc_z = self.ret['detection_target_sz'][1] + self.ret['p'].context_amount * sum(self.ret['detection_target_sz'])
     hc_z = self.ret['detection_target_sz'][0] + self.ret['p'].context_amount * sum(self.ret['detection_target_sz'])
     s_z = np.sqrt(wc_z * hc_z)
     scale_z = self.ret['p'].exemplar_size / s_z
     d_search = (self.ret['p'].instance_size - self.ret['p'].exemplar_size) / 2
     pad = d_search / scale_z
     s_x = s_z + 2 * pad
     avg_chans = np.mean(self.ret['img_detection'], axis=(0, 1))  # 图像均值
     # extract scaled crops for search region x at previous target position
     x_crop = Variable(get_subwindow_tracking(self.ret['img_detection'], self.ret['detection_target_pos'], self.ret['p'].instance_size, round(s_x), avg_chans).unsqueeze(0))
     self.ret['detection'] = x_crop
Example #15
0
def SiamRPN_track(state, im):
    # 从state中获取所需变量
    p = state['p']
    net = state['net']
    avg_chans = state['avg_chans']
    window = state['window']
    target_pos = state['target_pos']
    target_sz = state['target_sz']
    # v-------------------------------------------------------------------------------------------------------------
    s_z_original = state['s_z_original']
    # ^-------------------------------------------------------------------------------------------------------------

    # 计算扩展后尺寸   context_amount = 0.5, exemplar_size = 127, instance_size = 271(287)
    wc_z = target_sz[1] + p.context_amount * sum(target_sz)
    hc_z = target_sz[0] + p.context_amount * sum(target_sz)
    s_z = np.sqrt(wc_z * hc_z)
    # v-------------------------------------------------------------------------------------------------------------
    # 缩放系数
    zoom = s_z/s_z_original
    # ^-------------------------------------------------------------------------------------------------------------
    scale_z = p.exemplar_size / s_z
    d_search = (p.instance_size - p.exemplar_size) / 2
    pad = d_search / scale_z
    s_x = s_z + 2 * pad     # 受 target_sz 大小影响

    # extract scaled crops for search region x at previous target position
    # 在前一个目标位置为搜索区域x提取缩放的截图
    x_crop = Variable(get_subwindow_tracking(im, target_pos, p.instance_size, round(s_x), avg_chans).unsqueeze(0))

    # tracker_eval 预测出新的位置和得分       .cuda()将内存中的数据复制到GPU显存中去
    # target_pos, target_sz, score = tracker_eval(net, x_crop.cuda(), target_pos, target_sz * scale_z, window, scale_z, p)
    target_pos, target_sz, score = tracker_eval(net, x_crop.cuda(), target_pos, target_sz * scale_z, window, scale_z, p, im, zoom)
    target_pos[0] = max(0, min(state['im_w'], target_pos[0]))
    target_pos[1] = max(0, min(state['im_h'], target_pos[1]))
    target_sz[0] = max(10, min(state['im_w'], target_sz[0]))
    target_sz[1] = max(10, min(state['im_h'], target_sz[1]))
    state['target_pos'] = target_pos    # 返回跟踪框左上角坐标
    state['target_sz'] = target_sz
    state['score'] = score
    # v-------------------------------------------------------------------------------------------------------------
    state['s_x'] = round(s_x)
    # <-----形变时的模板更新----->
    print(score)
    # if score <= 0.8:
    #     z_crop = Variable(get_subwindow_tracking(im, target_pos, p.exemplar_size, round(s_x), avg_chans).unsqueeze(0))
    #     net.temple(z_crop.cuda())
    # ^-------------------------------------------------------------------------------------------------------------
    return state
Example #16
0
def SiamRPN_init(im, target_pos, target_sz, net):
    state = dict()
    p = TrackerConfig()
    #p.zLR = zLR
    state['im_h'] = im.shape[0]
    state['im_w'] = im.shape[1]

    if ((target_sz[0] * target_sz[1]) /
            float(state['im_h'] * state['im_w'])) < 0.004:
        p.instance_size = 287  # small object big search region
    else:
        p.instance_size = 271

    p.score_size = int((p.instance_size - p.exemplar_size) / p.total_stride +
                       1)

    p.anchor = generate_anchor(p.total_stride, p.scales, p.ratios,
                               p.score_size)

    avg_chans = np.mean(im, axis=(0, 1))

    wc_z = target_sz[0] + p.context_amount * sum(target_sz)
    hc_z = target_sz[1] + p.context_amount * sum(target_sz)
    s_z = round(np.sqrt(wc_z * hc_z))
    # initialize the exemplar
    z_crop = get_subwindow_tracking(im, target_pos, p.exemplar_size, s_z,
                                    avg_chans)
    z = Variable(z_crop.unsqueeze(0))
    z_f = net.featextract(z.cuda())
    net.kernel(z_f)
    #net.temple(z.cuda())

    if p.windowing == 'cosine':
        window = np.outer(np.hanning(p.score_size), np.hanning(p.score_size))
    elif p.windowing == 'uniform':
        window = np.ones((p.score_size, p.score_size))
    window = np.tile(window.flatten(), p.anchor_num)

    state['p'] = p
    state['net'] = net
    state['avg_chans'] = avg_chans
    state['window'] = window
    state['target_pos'] = target_pos
    state['target_sz'] = target_sz
    state['z_0'] = z_f.cpu().data
    state['z_f'] = z_f.cpu().data
    return state
Example #17
0
def SiamRPN_init(im, target_pos, target_sz, net):
    state = dict()
    p = TrackerConfig()
    p.update(net.cfg)
    state['im_h'] = im.shape[0]
    state['im_w'] = im.shape[1]

    if p.adaptive:
        if ((target_sz[0] * target_sz[1]) /
                float(state['im_h'] * state['im_w'])) < 0.004:
            p.instance_size = 255  # small object big search region
        else:
            p.instance_size = 255

        p.score_size = (p.instance_size - p.exemplar_size) / p.total_stride + 1

    p.anchor = generate_anchor(p.total_stride, p.scales, p.ratios,
                               int(p.score_size))

    avg_chans = np.mean(im, axis=(0, 1))

    wc_z = target_sz[0] + p.context_amount * sum(target_sz)
    hc_z = target_sz[1] + p.context_amount * sum(target_sz)
    s_z = round(np.sqrt(wc_z * hc_z))
    # initialize the exemplar
    z_crop = get_subwindow_tracking(im, target_pos, p.exemplar_size, s_z,
                                    avg_chans)
    z_crop = z_crop / 256
    z_crop[0, :, :] = (z_crop[0, :, :] - 0.485) / 0.229
    z_crop[1, :, :] = (z_crop[1, :, :] - 0.456) / 0.224
    z_crop[2, :, :] = (z_crop[2, :, :] - 0.406) / 0.225
    z = Variable(z_crop.unsqueeze(0))
    net.temple(z.cuda())

    if p.windowing == 'cosine':
        window = np.outer(np.hanning(p.score_size), np.hanning(p.score_size))
    elif p.windowing == 'uniform':
        window = np.ones((p.score_size, p.score_size))
    window = np.tile(window.flatten(), p.anchor_num)

    state['p'] = p
    state['net'] = net
    state['avg_chans'] = avg_chans
    state['window'] = window
    state['target_pos'] = target_pos
    state['target_sz'] = target_sz
    return state
Example #18
0
def SiamRPN_track(state, im, z_crop, ids, name):
    p = state['p']
    net = state['net']
    avg_chans = state['avg_chans']
    window = state['window']
    target_pos = state['target_pos']
    target_sz = state['target_sz']  #background bbox

    wc_z = target_sz[1] + p.context_amount * sum(target_sz)
    hc_z = target_sz[0] + p.context_amount * sum(target_sz)
    s_z = np.sqrt(wc_z * hc_z)
    scale_z = p.exemplar_size / s_z  #scale ratio of template
    d_search = (p.instance_size - p.exemplar_size) / 2
    pad = d_search / scale_z
    s_x = s_z + 2 * pad

    # extract scaled crops for search region x at previous target position
    # 这里相当于目标的位置仍然在原位
    # 然后以此中心截取 s_x 并且做缩放
    # 这样做的缺点在于如果高速移动 就容易crop不到
    x_crop = Variable(
        get_subwindow_tracking(im, target_pos, p.instance_size, round(s_x),
                               avg_chans).unsqueeze(0))
    #print(x_crop.shape)#(1L, 3L, 271L, 271L)
    save_img = x_crop.data.squeeze(0).numpy().transpose(
        (1, 2, 0)).astype(np.int32)
    save_path = os.path.join('/home/ly/chz/srpn_tmp', name,
                             '{:03d}_detection_input.jpg'.format(ids))
    cv2.imwrite(save_path, save_img)
    #print('save detection input image @ {}'.format(save_path))

    target_pos, target_sz, score = tracker_eval(net, x_crop.cuda(),
                                                z_crop.cuda(), target_pos,
                                                target_sz * scale_z, window,
                                                scale_z, p, ids, name, im)
    target_pos[0] = max(0, min(state['im_w'], target_pos[0]))
    target_pos[1] = max(0, min(state['im_h'], target_pos[1]))
    target_sz[0] = max(10, min(state['im_w'], target_sz[0]))
    target_sz[1] = max(10, min(state['im_h'], target_sz[1]))
    state['target_pos'] = target_pos
    state['target_sz'] = target_sz
    state['score'] = score
    return state
Example #19
0
def SiamRPN_train_batch(train_config, instance_list, source_cxy_list, instance_cxy_list):
    p = train_config['p']
    net = train_config['net']
    avg_chans_list = train_config['avg_chans_list']
    batch_size = len(instance_list)
    x_batch = None
    shift = np.zeros([batch_size, 2])
    gt_sz_list = np.zeros([batch_size, 2])
    boxB = np.zeros([batch_size, 4])

    for batch in range(batch_size):
        target_pos = source_cxy_list[batch][0]
        target_sz = source_cxy_list[batch][1]
        gt_pos = instance_cxy_list[batch][0]
        gt_sz = instance_cxy_list[batch][1]

        wc_z = target_sz[1] + p.context_amount * sum(target_sz)
        hc_z = target_sz[0] + p.context_amount * sum(target_sz)
        s_z = np.sqrt(wc_z * hc_z)
        #scale_z transfer
        scale_z = p.exemplar_size / s_z  #the ratio between the in-model sizes and the real sizes
        gt_sz = gt_sz*scale_z
        gt_sz_list[batch,:] = gt_sz
        target_sz = target_sz*scale_z

        d_search = (p.instance_size - p.exemplar_size) / 2
        pad = d_search / scale_z
        s_x = s_z + 2 * pad
        # extract scaled crops for search region x at previous target position
        x_crop = get_subwindow_tracking(instance_list[batch], target_pos, p.instance_size, round(s_x), avg_chans_list[batch]).unsqueeze(0)
        if type(x_batch)!=torch.Tensor:
            x_batch = x_crop
        else:
            x_batch = torch.cat((x_batch, x_crop), dim=0)

        shift[batch, :] = np.asarray([(gt_pos[0] - target_pos[0])*scale_z, (gt_pos[1] - target_pos[1])*scale_z], dtype=np.float32)
        boxB[batch, :] = np.asarray([shift[batch, 0]-gt_sz[0]/2, shift[batch, 1]-gt_sz[1]/2, \
                            shift[batch, 0]+gt_sz[0]/2, shift[batch, 1]+gt_sz[1]/2], dtype=np.float32)

    assert x_batch.size(0)==batch_size

    cls_loss, box_loss = tracker_train_batch(net, x_batch.cuda(), shift, boxB, gt_sz_list, p)
    return cls_loss, box_loss
Example #20
0
def SiamRPN_set_source(state, im, source_pos, source_sz):
    p = state['p']
    net = state['net']
    avg_chans = state['avg_chans']
    target_pos = source_pos
    target_sz = source_sz

    wc_z = target_sz[1] + p.context_amount * sum(target_sz)
    hc_z = target_sz[0] + p.context_amount * sum(target_sz)
    s_z = np.sqrt(wc_z * hc_z)
    scale_z = p.exemplar_size / s_z  #the ratio between the in-model sizes and the real sizes
    d_search = (p.instance_size - p.exemplar_size) / 2
    pad = d_search / scale_z
    s_x = s_z + 2 * pad

    # extract scaled crops for search region x at previous target position
    x_crop = get_subwindow_tracking(im, target_pos, p.instance_size, round(s_x), avg_chans).unsqueeze(0).cuda() 
    # x_crop removed the torch.Variable interface, due to the deprecated Variable in torch 0.4.0
    net(x_crop, set_source = True)
Example #21
0
def SiamRPN_track(state, im):
    p = state['p']
    net = state['net']
    avg_chans = state['avg_chans']
    window = state['window']
    target_pos = state['target_pos']
    target_sz = state['target_sz']

    wc_z = target_sz[1] + p.context_amount * sum(target_sz)
    hc_z = target_sz[0] + p.context_amount * sum(target_sz)
    s_z = np.sqrt(wc_z * hc_z)
    scale_z = p.exemplar_size / s_z
    d_search = (p.instance_size - p.exemplar_size) / 2
    pad = d_search / scale_z
    s_x = s_z + 2 * pad

    # extract scaled crops for search region x at previous target position
    x_crop = get_subwindow_tracking(im,
                                    target_pos,
                                    p.instance_size,
                                    round(s_x),
                                    avg_chans,
                                    out_mode='np')

    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])
    x_crop = Variable(transform(x_crop).unsqueeze(0))

    target_pos, target_sz, score = tracker_eval(net, x_crop.cuda(), target_pos,
                                                target_sz * scale_z, window,
                                                scale_z, p)
    target_pos[0] = max(0, min(state['im_w'], target_pos[0]))
    target_pos[1] = max(0, min(state['im_h'], target_pos[1]))
    target_sz[0] = max(10, min(state['im_w'], target_sz[0]))
    target_sz[1] = max(10, min(state['im_h'], target_sz[1]))
    state['target_pos'] = target_pos
    state['target_sz'] = target_sz
    state['score'] = score
    return state
Example #22
0
 def _pad_crop_resize_template(self):
     self.ret['im_h'] = self.ret['img_template'].shape[0]
     self.ret['im_w'] = self.ret['img_template'].shape[1]
     self.ret['p'].score_size = (self.ret['p'].instance_size - self.ret['p'].exemplar_size) / self.ret['p'].total_stride + 1
     self.ret['p'].anchor = generate_anchor(self.ret['p'].total_stride, self.ret['p'].scales, self.ret['p'].ratios, int(self.ret['p'].score_size))
     avg_chans = np.mean(self.ret['img_template'], axis=(0, 1))  # 图像均值
     wc_z = self.ret['template_target_sz'][0] + self.ret['p'].context_amount * sum(self.ret['template_target_sz'])
     hc_z = self.ret['template_target_sz'][1] + self.ret['p'].context_amount * sum(self.ret['template_target_sz'])
     s_z = round(np.sqrt(wc_z * hc_z))
     # initialize the exemplar
     z_crop = get_subwindow_tracking(self.ret['img_template'], self.ret['template_target_pos'], self.ret['p'].exemplar_size, s_z, avg_chans)
     z = Variable(z_crop.unsqueeze(0))
     # net.temple(z.cuda())
     if self.ret['p'].windowing == 'cosine':
         window = np.outer(np.hanning(self.ret['p'].score_size), np.hanning(self.ret['p'].score_size))
     elif self.ret['p'].windowing == 'uniform':
         window = np.ones((self.ret['p'].score_size, self.ret['p'].score_size))
     window = np.tile(window.flatten(), self.ret['p'].anchor_num)
     self.ret['temple'] = z
     self.ret['avg_chans'] = avg_chans
     self.ret['window'] = window
Example #23
0
def SiamRPN_train(state, im, old_pos, old_sz, gt_pos, gt_sz):
    p = state['p']
    net = state['net']
    avg_chans = state['avg_chans']
    window = state['window']
    target_pos = old_pos  #atually is the old ground truth of the last frame
    target_sz = old_sz

    wc_z = target_sz[1] + p.context_amount * sum(target_sz)
    hc_z = target_sz[0] + p.context_amount * sum(target_sz)
    s_z = np.sqrt(wc_z * hc_z)
    scale_z = p.exemplar_size / s_z  #the ratio between the in-model sizes and the real sizes
    d_search = (p.instance_size - p.exemplar_size) / 2
    pad = d_search / scale_z
    s_x = s_z + 2 * pad

    # extract scaled crops for search region x at previous target position
    x_crop = get_subwindow_tracking(im, target_pos, p.instance_size, round(s_x), avg_chans).unsqueeze(0).cuda()

    cls_loss, box_loss = tracker_train(net, x_crop, target_pos, target_sz* scale_z, scale_z, p, gt_pos, gt_sz* scale_z)
    return cls_loss, box_loss
def SiamRPN_track(state, im):
    p = state['p']  # tracking config
    net = state['net']
    avg_chans = state['avg_chans']
    window = state['window']  # cosine window
    target_pos = state['target_pos']  # cx, cy of target in the previous frame
    target_sz = state['target_sz']  # w, h of target in the previous frame
    template_feat = state['template_feat']

    wc_z = target_sz[0] + p.context_amount * sum(target_sz)
    hc_z = target_sz[1] + p.context_amount * sum(target_sz)
    s_z = np.sqrt(wc_z * hc_z)
    scale_z = p.exemplar_size / s_z

    ###'Local to Global': if failure mode is activated then expand d_search; otherwise set d_search to normal
    d_search = (p.instance_size - p.exemplar_size) / 2
    if state['score'] < 0.3:
        d_search *= 2

    pad = d_search / scale_z
    s_x = s_z + 2 * pad

    # extract scaled crops for search region x at previous target position
    x_crop = Variable(
        get_subwindow_tracking(im, target_pos, p.instance_size, round(s_x),
                               avg_chans).unsqueeze(0))
    # where the third argument is the model size and the fourth is the orginal size in the raw image.

    target_pos, target_sz, score = tracker_eval_distractor_aware(
        x_crop.cuda(), target_sz * scale_z, scale_z, state)

    target_pos[0] = max(0, min(state['im_w'], target_pos[0]))
    target_pos[1] = max(0, min(state['im_h'], target_pos[1]))
    target_sz[0] = max(10, min(state['im_w'], target_sz[0]))
    target_sz[1] = max(10, min(state['im_h'], target_sz[1]))

    state['target_pos'] = target_pos
    state['target_sz'] = target_sz
    state['score'] = score
    return state
Example #25
0
def SiamRPN_track(state,
                  im,
                  f,
                  last_result,
                  att_per,
                  def_per,
                  image_save,
                  iter=10):
    p = state['p']
    net = state['net']
    avg_chans = state['avg_chans']
    window = state['window']
    target_pos = state['target_pos']
    target_sz = state['target_sz']

    wc_z = target_sz[1] + p.context_amount * sum(target_sz)
    hc_z = target_sz[0] + p.context_amount * sum(target_sz)
    s_z = np.sqrt(wc_z * hc_z)
    scale_z = p.exemplar_size / s_z
    d_search = (p.instance_size - p.exemplar_size) / 2
    pad = d_search / scale_z
    s_x = s_z + 2 * pad

    # extract scaled crops for search region x at previous target position
    x_crop = Variable(
        get_subwindow_tracking(im, target_pos, p.instance_size, round(s_x),
                               avg_chans).unsqueeze(0))

    x_crop = x_crop.cuda()

    # adversarial attack
    if type(att_per) != type(0):
        att_per = att_per.cpu().detach().numpy()
        att_per = np.resize(
            att_per, (1, x_crop.shape[1], x_crop.shape[2], x_crop.shape[3]))
        att_per = torch.from_numpy(att_per).cuda()
    x_crop_init = x_crop + att_per * 1
    x_crop_init = torch.clamp(x_crop_init, 0, 255)
    x_adv1 = rtaa_attack(net,
                         x_crop_init,
                         x_crop,
                         last_result,
                         target_pos,
                         target_sz,
                         scale_z,
                         p,
                         iteration=iter)
    att_per = x_adv1 - x_crop

    # adversarial defense
    if type(def_per) != type(0):
        def_per = def_per.cpu().detach().numpy()
        def_per = np.resize(
            def_per, (1, x_crop.shape[1], x_crop.shape[2], x_crop.shape[3]))
        def_per = torch.from_numpy(def_per).cuda()
    x_adv2_mask = x_adv1 + def_per * 0.01
    x_adv2_mask = torch.clamp(x_adv2_mask, 0, 255)
    x_adv2 = rtaa_defnese(net,
                          x_adv2_mask,
                          x_adv1,
                          last_result,
                          target_pos,
                          target_sz,
                          scale_z,
                          p,
                          iteration=iter)
    def_per = x_adv2 - x_adv1

    target_pos, target_sz, score = tracker_eval(net, x_adv2, target_pos,
                                                target_sz * scale_z, window,
                                                scale_z, p, f, last_result,
                                                state)

    target_pos[0] = max(0, min(state['im_w'], target_pos[0]))
    target_pos[1] = max(0, min(state['im_h'], target_pos[1]))
    target_sz[0] = max(10, min(state['im_w'], target_sz[0]))
    target_sz[1] = max(10, min(state['im_h'], target_sz[1]))
    state['target_pos'] = target_pos
    state['target_sz'] = target_sz
    state['score'] = score
    return state, att_per, def_per
Example #26
0
    def redection(self, frame, s_x, img_mean, ratio):
        # get global instance img
        instance_img_global = get_subwindow_tracking(
            frame, self.pos, config.instance_size * ratio, ratio * s_x,
            img_mean)
        instance_img_global = self.transforms(instance_img_global)[
            None, :, :, :]
        # get global instance feature
        with torch.cuda.device(self.gpu_id):
            instance_img_global = Variable(instance_img_global.cuda())
            pred_cls, pred_reg = self.model(None, instance_img_global)

        # global response
        score_size = int((config.instance_size * ratio -
                          config.exemplar_size) / config.total_stride + 1)
        global_response = torch.sigmoid(pred_cls).squeeze().view(
            -1).detach().cpu().numpy()
        global_best_id = np.argmax(global_response)
        global_anchor_id = global_best_id // (score_size * score_size)

        extreme_points = extreme_point_detection(
            global_response.reshape(config.anchor_num, score_size,
                                    score_size)[global_anchor_id])

        # print(extreme_points)
        if len(extreme_points) <= 0:
            return None, None, None, None, None

        max_value = 0.5
        for p in extreme_points:
            p[0] = float(p[0] - 8) * (config.total_stride * s_x *
                                      ratio) / config.instance_size
            p[1] = float(p[1] - 8) * (config.total_stride * s_x *
                                      ratio) / config.instance_size
            candidate_pos = self.pos + p
            # get candidate instance img
            instance_img = get_subwindow_tracking(frame, candidate_pos,
                                                  config.instance_size, s_x,
                                                  img_mean)
            instance_img = self.transforms(instance_img)[None, :, :, :]
            # get candidate instance feature
            with torch.cuda.device(self.gpu_id):
                instance_img = Variable(instance_img.cuda())
                pred_cls, pred_reg = self.model(None, instance_img)

            # candidate offsets
            candidate_offsets = pred_reg.squeeze().view(
                4, -1).detach().cpu().numpy()
            candidate_offsets[0] = candidate_offsets[
                0] * self.anchors[:, 2] + self.anchors[:, 0]
            candidate_offsets[1] = candidate_offsets[
                1] * self.anchors[:, 3] + self.anchors[:, 1]
            candidate_offsets[2] = np.exp(
                candidate_offsets[2]) * self.anchors[:, 2]
            candidate_offsets[3] = np.exp(
                candidate_offsets[3]) * self.anchors[:, 3]

            candidate_response = torch.sigmoid(pred_cls).squeeze().view(
                -1).detach().cpu().numpy()
            candidate_response_raw = candidate_response
            candidate_response = (
                1 - config.window_influence
            ) * candidate_response + config.window_influence * self.hann_window
            candidate_best_id = np.argmax(candidate_response)
            candidate_anchor_id = candidate_best_id // 289
            candidate_response_map = candidate_response_raw[
                candidate_anchor_id * 289:candidate_anchor_id * 289 + 289]

            if candidate_response.max() > max_value:
                best_candidate_pos = candidate_pos
                best_candidate_id = candidate_best_id
                best_candidate_anchor_id = candidate_anchor_id
                best_candidate_offsets = candidate_offsets
                best_candidate_response_map = candidate_response_map
                max_value = candidate_response.max()

        if max_value == 0.5:
            return None, None, None, None, None

        return best_candidate_pos, best_candidate_id, best_candidate_anchor_id, best_candidate_offsets, best_candidate_response_map
Example #27
0
def SiamRPN_init(im, target_pos, target_sz, net, gtbox):
    state = dict()
    p = TrackerConfig()
    state['im_h'] = im.shape[0]
    state['im_w'] = im.shape[1]
    state['ctr'] = 0

    if ((target_sz[0] * target_sz[1]) / float(state['im_h'] * state['im_w'])) < 0.004:
        p.instance_size = 287  # small object big search region
    else:
        p.instance_size = 271

    p.score_size = (p.instance_size - p.exemplar_size) // p.total_stride + 1

    p.anchor = generate_anchor(p.total_stride, p.scales, p.ratios, p.score_size)

    avg_chans = np.mean(im, axis=(0, 1))

    wc_z = target_sz[0] + p.context_amount * sum(target_sz)
    hc_z = target_sz[1] + p.context_amount * sum(target_sz)
    s_z = round(np.sqrt(wc_z * hc_z))
    # initialize the exemplar
    z_crop = get_subwindow_tracking(im, target_pos, p.exemplar_size, s_z, avg_chans)

    z = Variable(z_crop.unsqueeze(0))
    net.temple(z.cuda())
    
    s_z = np.sqrt(wc_z * hc_z)
    scale_z = p.exemplar_size / s_z
    d_search = (p.instance_size - p.exemplar_size) / 2
    pad = d_search / scale_z
    s_x = s_z + 2 * pad

    # extract scaled crops for search region x at previous target position
    x_crop = Variable(get_subwindow_tracking(im, target_pos, p.instance_size, round(s_x), avg_chans).unsqueeze(0))
    
    label = np.zeros(im.shape)
    x_low, x_high = target_pos[0] - target_sz[0]/2, target_pos[0] + target_sz[0]/2
    y_low, y_high = target_pos[1] - target_sz[1]/2, target_pos[1] + target_sz[1]/2
    x_low, x_high, y_low, y_high = int(x_low), int(x_high), int(y_low), int(y_high)
    label[y_low:y_high,x_low:x_high,:] = 2
    label = (get_subwindow_tracking(label, target_pos, p.instance_size, round(s_x), 0, out_mode="image"))
    label = cv2.split(label)[0]
    if p.instance_size == 271:
        label = cv2.resize(label, (19, 19))
    else:
        label = cv2.resize(label, (21, 21))
    
    label = torch.Tensor([(2-label)]*5 + [label]*5).unsqueeze(0)
    
    net.make_at(x_crop.cuda(), label.cuda())
    
    if p.windowing == 'cosine':
        window = np.outer(np.hanning(p.score_size), np.hanning(p.score_size))
    elif p.windowing == 'uniform':
        window = np.ones((p.score_size, p.score_size))
    window = np.tile(window.flatten(), p.anchor_num)

    state['p'] = p
    state['net'] = net
    state['avg_chans'] = avg_chans
    state['window'] = window
    state['target_pos'] = target_pos
    state['target_sz'] = target_sz
    
    return state
def SiamRPN_init(im, target_pos, target_sz, net):
    ## target_pos is (cx, cy)
    ## target_sz is (w, h)

    state = dict()
    p = TrackerConfig()
    state['im_h'] = im.shape[0]
    state['im_w'] = im.shape[1]

    if ((target_sz[0] * target_sz[1]) /
            float(state['im_h'] * state['im_w'])) < 0.004:
        p.instance_size = 287  # small object big search region
    else:
        p.instance_size = 255  #271

    p.delta_score_size = int(
        (p.instance_size - p.exemplar_size) / p.total_stride +
        1)  # size of the last feature map, expected to be 17

    # all anchors of each aspect ratio and scale at each location are generated.
    p.anchors, _ = generate_all_anchors(
        (p.delta_score_size, p.delta_score_size),
        (p.instance_size, p.instance_size))
    # of shape (dropping from 2420 down to 433, 4)

    avg_chans = np.mean(im, axis=(0, 1))  #???????????

    wc_z = target_sz[0] + p.context_amount * sum(
        target_sz)  # adding some context info
    hc_z = target_sz[1] + p.context_amount * sum(
        target_sz)  # adding some context info
    s_z = round(np.sqrt(wc_z * hc_z))

    # initialize the exemplar
    z_crop = get_subwindow_tracking(im, target_pos, p.exemplar_size, s_z,
                                    avg_chans)

    z = Variable(z_crop.unsqueeze(0))
    template_feat = net.template(z.cuda())

    if p.windowing == 'cosine':
        # return the outer product of two hanning vectors, which is a matrix of the same size as the feature map of search region
        window = np.outer(
            np.hanning(p.delta_score_size),
            np.hanning(p.delta_score_size))  ############### p.score_size???
    elif p.windowing == 'uniform':
        window = np.ones(
            (p.delta_score_size,
             p.delta_score_size))  ################## p.score_size???

    # flatten and replicate the cosine window
    window = np.tile(window.flatten(), p.basic_anchor_num)

    state['p'] = p
    state['net'] = net
    state['avg_chans'] = avg_chans
    state['window'] = window
    state['target_pos'] = target_pos
    state['target_sz'] = target_sz
    state['score'] = 1.0

    # for distractor-aware incremental learning
    template_feat_cpu = template_feat.cpu().detach().numpy()
    state['template_feat'] = template_feat_cpu
    state['acc_beta_phi'] = template_feat_cpu
    state['acc_beta'] = 1.0
    state['acc_beta_alpha_phi'] = np.zeros_like(template_feat_cpu)

    return state
Example #29
0
    def update(self, frame, gt, clf, random_shift, frame_num):
        """track object based on the previous frame
        Args:
            frame: an BGR image

        Returns:
            bbox: tuple of 1-based bounding box(xmin, ymin, xmax, ymax)
        """

        #######################
        if random_shift:
            pos_ = np.array([gt[0] + gt[2] / 2, gt[1] + gt[3] / 2
                             ])  # center x, center y, zero based
            max_translate = 2 * (self.s_x /
                                 config.instance_size) * config.total_stride
            self.pos[0] = np.random.uniform(pos_[0] - max_translate,
                                            pos_[0] + max_translate)
            self.pos[1] = np.random.uniform(pos_[1] - max_translate,
                                            pos_[1] + max_translate)
        #########################
        # get instance img
        img_mean = tuple(map(int, frame.mean(axis=(0, 1))))
        instance_img = get_subwindow_tracking(frame, self.pos,
                                              config.instance_size,
                                              python2round(self.s_x), img_mean)
        instance_img = self.transforms(instance_img)[None, :, :, :]

        # get instance feature
        with torch.cuda.device(self.gpu_id):
            instance_img = Variable(instance_img.cuda(), requires_grad=False)
            pred_cls, pred_reg = self.model(None, instance_img)

        #offsets
        offsets = pred_reg.squeeze().view(4, -1).detach().cpu().numpy()
        offsets[0] = offsets[0] * self.anchors[:, 2] + self.anchors[:, 0]
        offsets[1] = offsets[1] * self.anchors[:, 3] + self.anchors[:, 1]
        offsets[2] = np.exp(offsets[2]) * self.anchors[:, 2]
        offsets[3] = np.exp(offsets[3]) * self.anchors[:, 3]

        # scale and ratio penalty
        penalty = self._create_penalty(self.target_sz, offsets)

        # response
        max_value = pred_cls.max().detach().cpu().numpy()
        response = torch.sigmoid(pred_cls).squeeze().view(
            -1).detach().cpu().numpy()
        response_raw = response
        response = response * penalty
        response = (1 - config.window_influence
                    ) * response + config.window_influence * self.hann_window
        best_id = np.argmax(response)

        # anomaly detection
        anchor_id = best_id // 289
        response_map = response_raw[anchor_id * 289:anchor_id * 289 + 289]
        vis_heatmap(response_map.reshape(17, 17), max_value)

        clf_output = clf(
            torch.from_numpy(response_map).float().cuda()).data.cpu().numpy()
        state = np.argmax(clf_output)
        # print(state)

        # response_label = self.create_response_label(response_map.reshape(17, 17), self.s_x, anchor_id)
        # dae_output = sigmoid(dae(torch.from_numpy(response_map).float().cuda()).data.cpu().numpy())
        # loss = np.mean((dae_output-response_label.flatten())**2)

        update_flag = 1
        if state == 0:
            # print(' frame:'+str(frame_num)+'  '+str(response_raw.max()))
            update_flag = 0
            # self.counter_re += 1
            # window_influence_re = 0.26
            # response_re = response_raw * penalty
            # response_re = (1 - window_influence_re) * response_re + window_influence_re * self.hann_window
            # best_id = np.argmax(response_re)
        #     if self.counter_re % 12 == 0 and self.counter_re != 0:
        #         best_candidate_pos, best_candidate_id, best_candidate_anchor_id, best_candidate_offsets, \
        #         best_candidate_response_map = self.redection(frame, self.s_x, img_mean, 2)
        #         if best_candidate_pos is not None:
        #             clf_output_re = clf(torch.from_numpy(best_candidate_response_map).float().cuda()).data.cpu().numpy()
        #             state_re = np.argmax(clf_output_re)
        #             if state_re ==1 :
        #                 self.pos = best_candidate_pos
        #                 offsets = best_candidate_offsets
        #                 best_id = best_candidate_id
        #                 self.counter_re = 0
        # else:
        #     self.counter_re = 0

        # peak location
        offset = offsets[:, best_id] * self.s_z / config.exemplar_size

        # update center
        self.pos += np.array([offset[0], offset[1]])
        self.pos = np.clip(self.pos, 0, [frame.shape[1], frame.shape[0]])

        # update scale
        lr = response[best_id] * config.scale_lr
        self.target_sz = (1 - lr) * self.target_sz + lr * np.array(
            [offset[2], offset[3]])
        self.target_sz = np.clip(self.target_sz, 10,
                                 [frame.shape[1], frame.shape[0]])
        wc_z = self.target_sz[1] + 0.5 * sum(self.target_sz)
        hc_z = self.target_sz[0] + 0.5 * sum(self.target_sz)
        self.s_z = np.sqrt(wc_z * hc_z)
        self.s_x = self.s_z * config.instance_size / config.exemplar_size

        #update_model
        if update_flag:
            exemplar_img = get_subwindow_tracking(frame, self.pos,
                                                  config.exemplar_size,
                                                  python2round(self.s_z),
                                                  img_mean)
            exemplar_img = self.transforms(exemplar_img)[None, :, :, :]
            with torch.cuda.device(self.gpu_id):
                exemplar_img = Variable(exemplar_img.cuda(),
                                        requires_grad=False)
                self.model.update_model(exemplar_img)

        # return 1-indexed and left-top based bounding box
        bbox = np.array([
            self.pos[0] - (self.target_sz[0]) / 2,
            self.pos[1] - (self.target_sz[1]) / 2,
            self.pos[0] + (self.target_sz[0]) / 2,
            self.pos[1] + (self.target_sz[1]) / 2
        ])

        return bbox, response_map
Example #30
0
def SiamRPN_init(im, target_pos, target_sz, net):
    """
    输入第一帧
    target_pos [center_x, center_y]
    target_sz  [w, h]
    net
    return:
    state['im_h']
    state['im_w']
    state['p']  config for tracker
    state['net']
    state['avg_chan'] 通道均值
    """
    state = dict()
    p = TrackerConfig()
    state['im_h'] = im.shape[0]
    state['im_w'] = im.shape[1]

    # Input size, if target is small, input should be large?
    if ((target_sz[0] * target_sz[1]) /
            float(state['im_h'] * state['im_w'])) < 0.004:
        p.instance_size = 287  # small object big search region
    else:
        p.instance_size = 271

    # Input size - Template size
    # 计算每行有多少个感受野
    # 每个感受野size instance_size
    # 每次移动total_stride
    p.score_size = (p.instance_size - p.exemplar_size) / p.total_stride + 1

    p.anchor = generate_anchor(p.total_stride, p.scales, p.ratios,
                               p.score_size)

    # 每在一维做平均则下降一维
    # 1024 * 1024 * 3 => [x1, x2, x3]
    avg_chans = np.mean(im, axis=(0, 1))

    # 扩大template范围
    # 并且需要归一成正方形
    # detection不需要归一
    # w_ -> w + (w+h)/2
    # h_ -> h + (w+h)/2
    # s_ -> sqrt(w_ * h_)
    # target是实际bg 而s_z是相当于把bg变成了正方形
    wc_z = target_sz[0] + p.context_amount * sum(target_sz)
    hc_z = target_sz[1] + p.context_amount * sum(target_sz)
    s_z = round(np.sqrt(wc_z * hc_z))

    scale_z = p.exemplar_size / s_z
    d_search = (p.instance_size - p.exemplar_size) / 2
    pad = d_search / scale_z
    s_x = s_z + 2 * pad
    # initialize the exemplar
    # 将溢出部分用avg补充
    # target_pos是中心点
    # s_z是归一后正方形大小
    # exempler_size是后面需要resize的127
    z_crop = get_subwindow_tracking(im, target_pos, p.exemplar_size, s_z,
                                    avg_chans)
    template = z_crop.numpy().transpose((1, 2, 0))
    state['template'] = template

    z = Variable(z_crop.unsqueeze(0))

    x_crop = Variable(
        get_subwindow_tracking(im, target_pos, p.instance_size, round(s_x),
                               avg_chans).unsqueeze(0))

    #net.temple(z.cuda())
    net(z.cuda(), x_crop.cuda())
    if p.windowing == 'cosine':
        #outer (x1, x2)
        #x1中的每个值变为x2行向量的倍数
        window = np.outer(np.hanning(p.score_size), np.hanning(p.score_size))
    elif p.windowing == 'uniform':
        window = np.ones((p.score_size, p.score_size))
    window = np.tile(window.flatten(),
                     p.anchor_num)  #np.tile复制(row, col)倍 or directly copy x

    state['p'] = p
    state['net'] = net
    state['avg_chans'] = avg_chans
    state['window'] = window
    state['target_pos'] = target_pos
    state['target_sz'] = target_sz

    return state, z