コード例 #1
0
ファイル: data_prov.py プロジェクト: BruthYU/S-MDNet
    def __init__(self, img_list, gt, opts):
        self.img_list = np.asarray(img_list)
        self.gt = gt

        self.batch_frames = opts['batch_frames']
        self.batch_pos = opts['batch_pos']
        self.batch_neg = opts['batch_neg']

        self.overlap_pos = opts['overlap_pos']
        self.overlap_neg = opts['overlap_neg']

        self.crop_size = opts['img_size']
        self.padding = opts['padding']

        self.flip = opts.get('flip', False)
        self.rotate = opts.get('rotate', 0)
        self.blur = opts.get('blur', 0)

        self.index = np.random.permutation(len(self.img_list))
        self.pointer = 0

        image = Image.open(self.img_list[0]).convert('RGB')
        self.pos_generator = SampleGenerator('uniform', image.size,
                                             opts['trans_pos'],
                                             opts['scale_pos'])
        self.neg_generator = SampleGenerator('uniform', image.size,
                                             opts['trans_neg'],
                                             opts['scale_neg'])
コード例 #2
0
def init_actor(actor, image, gt):
    np.random.seed(123)
    torch.manual_seed(456)
    torch.cuda.manual_seed(789)

    batch_num = 64
    maxiter = 80
    actor = actor.cuda()
    actor.train()
    init_optimizer = torch.optim.Adam(actor.parameters(), lr=0.0001)
    loss_func = torch.nn.MSELoss()
    actor_samples = np.round(
        gen_samples(SampleGenerator('uniform', image.size, 0.3, 1.5, None), gt,
                    1500, [0.6, 1], [0.9, 1.1]))
    idx = np.random.permutation(actor_samples.shape[0])
    batch_img = getbatch_actor(np.array(image), actor_samples)
    batch_distance = cal_distance(actor_samples,
                                  np.tile(gt, [actor_samples.shape[0], 1]))
    batch_distance = np.array(batch_distance).astype(np.float32)
    while (len(idx) < batch_num * maxiter):
        idx = np.concatenate(
            [idx, np.random.permutation(actor_samples.shape[0])])

    pointer = 0
    torch_image = loader(image.resize((225, 225),
                                      Image.ANTIALIAS)).unsqueeze(0).cuda()
    for iter in range(maxiter):
        next = pointer + batch_num
        cur_idx = idx[pointer:next]
        pointer = next
        feat = actor(batch_img[cur_idx],
                     torch_image.repeat(batch_num, 1, 1, 1))
        loss = loss_func(
            feat,
            Variable(torch.FloatTensor(batch_distance[cur_idx])).cuda())

        actor.zero_grad()
        loss.backward()
        init_optimizer.step()
        if opts['show_train']:
            print("Iter %d, Loss %.10f" % (iter, loss.item()))
        if loss.item() < 0.0001:
            deta_flag = 0
            return deta_flag
        deta_flag = 1
        return deta_flag
コード例 #3
0
ファイル: MDNet_vot.py プロジェクト: xjtuwh/MDNet_SME
target_bbox = np.array(gt_rect)
model = MDNet(opts['model_path'])
if opts['use_gpu']:
    model = model.cuda()

# Init criterion and optimizer
criterion = BCELoss()
model.set_learnable_params(opts['ft_layers'])
init_optimizer = set_optimizer(model, opts['lr_init'], opts['lr_mult'])
update_optimizer = set_optimizer(model, opts['lr_update'], opts['lr_mult'])
# Load first image
image = Image.open(imagefile).convert('RGB')
# Draw pos/neg samples
pos_examples = SampleGenerator('gaussian', image.size, opts['trans_pos'],
                               opts['scale_pos'])(target_bbox,
                                                  opts['n_pos_init'],
                                                  opts['overlap_pos_init'])

neg_examples = np.concatenate([
    SampleGenerator('uniform', image.size, opts['trans_neg_init'],
                    opts['scale_neg_init'])(target_bbox,
                                            int(opts['n_neg_init'] * 0.5),
                                            opts['overlap_neg_init']),
    SampleGenerator('whole', image.size)(target_bbox,
                                         int(opts['n_neg_init'] * 0.5),
                                         opts['overlap_neg_init'])
])
neg_examples = np.random.permutation(neg_examples)

# Extract pos/neg features
pos_feats = forward_samples(model, image, pos_examples)
コード例 #4
0
def run_mdnet(img_list, init_bbox, gt=None, savefig_dir='', display=False):

    # Init bbox
    target_bbox = np.array(init_bbox)
    result = np.zeros((len(img_list), 4))
    result_bb = np.zeros((len(img_list), 4))
    result[0] = target_bbox
    result_bb[0] = target_bbox

    if gt is not None:
        overlap = np.zeros(len(img_list))
        overlap[0] = 1

    # Init model
    model = MDNet(opts['model_path'])
    if opts['use_gpu']:
        model = model.cuda()

    # Init criterion and optimizer
    criterion = BCELoss()
    model.set_learnable_params(opts['ft_layers'])
    init_optimizer = set_optimizer(model, opts['lr_init'], opts['lr_mult'])
    update_optimizer = set_optimizer(model, opts['lr_update'], opts['lr_mult'])

    tic = time.time()
    # Load first image
    image = Image.open(img_list[0]).convert('RGB')

    # Draw pos/neg samples
    pos_examples = SampleGenerator('gaussian', image.size, opts['trans_pos'],
                                   opts['scale_pos'])(target_bbox,
                                                      opts['n_pos_init'],
                                                      opts['overlap_pos_init'])

    neg_examples = np.concatenate([
        SampleGenerator('uniform', image.size, opts['trans_neg_init'],
                        opts['scale_neg_init'])(target_bbox,
                                                int(opts['n_neg_init'] * 0.5),
                                                opts['overlap_neg_init']),
        SampleGenerator('whole', image.size)(target_bbox,
                                             int(opts['n_neg_init'] * 0.5),
                                             opts['overlap_neg_init'])
    ])
    neg_examples = np.random.permutation(neg_examples)

    # Extract pos/neg features
    pos_feats = forward_samples(model, image, pos_examples)
    neg_feats = forward_samples(model, image, neg_examples)

    # Initial training
    train(model, criterion, init_optimizer, pos_feats, neg_feats,
          opts['maxiter_init'])
    del init_optimizer, neg_feats
    torch.cuda.empty_cache()

    # Train bbox regressor
    bbreg_examples = SampleGenerator(
        'uniform', image.size, opts['trans_bbreg'], opts['scale_bbreg'],
        opts['aspect_bbreg'])(target_bbox, opts['n_bbreg'],
                              opts['overlap_bbreg'])
    bbreg_feats = forward_samples(model, image, bbreg_examples)
    bbreg = BBRegressor(image.size)
    bbreg.train(bbreg_feats, bbreg_examples, target_bbox)
    del bbreg_feats
    torch.cuda.empty_cache()

    # Init sample generators for update:这三个是【生成器】,不是产生的数据
    sample_generator = SampleGenerator('gaussian', image.size, opts['trans'],
                                       opts['scale'])
    pos_generator = SampleGenerator('gaussian', image.size, opts['trans_pos'],
                                    opts['scale_pos'])
    neg_generator = SampleGenerator('uniform', image.size, opts['trans_neg'],
                                    opts['scale_neg'])

    # Init pos/neg features for update
    neg_examples = neg_generator(target_bbox, opts['n_neg_update'],
                                 opts['overlap_neg_init'])
    neg_feats = forward_samples(model, image, neg_examples)
    pos_feats_all = [pos_feats]
    neg_feats_all = [neg_feats]

    spf_total = time.time() - tic

    # Display
    savefig = savefig_dir != ''
    if display or savefig:
        dpi = 80.0
        figsize = (image.size[0] / dpi, image.size[1] / dpi)

        fig = plt.figure(frameon=False, figsize=figsize, dpi=dpi)
        ax = plt.Axes(fig, [0., 0., 1., 1.])
        ax.set_axis_off()
        fig.add_axes(ax)
        im = ax.imshow(image, aspect='auto')

        if gt is not None:
            gt_rect = plt.Rectangle(tuple(gt[0, :2]),
                                    gt[0, 2],
                                    gt[0, 3],
                                    linewidth=3,
                                    edgecolor="#00ff00",
                                    zorder=1,
                                    fill=False)
            ax.add_patch(gt_rect)

        rect = plt.Rectangle(tuple(result_bb[0, :2]),
                             result_bb[0, 2],
                             result_bb[0, 3],
                             linewidth=3,
                             edgecolor="#ff0000",
                             zorder=1,
                             fill=False)
        ax.add_patch(rect)

        if display:
            plt.pause(.01)
            plt.draw()
        if savefig:
            fig.savefig(os.path.join(savefig_dir, '0000.jpg'), dpi=dpi)

    # Main loop
    for i in range(1, len(img_list)):

        tic = time.time()
        # Load image
        image = Image.open(img_list[i]).convert('RGB')

        # Estimate target bbox:指的是下一次的target_bbox,用于迭代
        samples = sample_generator(target_bbox, opts['n_samples'])
        sample_scores = forward_samples(
            model, image, samples, out_layer='fc6'
        )  #forward_samples是根据当前的给当前的多个sample打分数,【相当于执行了一次网络判断】

        top_scores, top_idx = sample_scores[:, 1].topk(5)
        top_idx = top_idx.cpu()
        target_score = top_scores.mean()
        target_bbox = samples[top_idx]
        if top_idx.shape[0] > 1:
            target_bbox = target_bbox.mean(axis=0)  #多个target_bbox进行均值优化
        success = target_score > 0

        # Expand search area at failure:在【跟踪的同时】,根据跟踪情况,采用不同的sample生成参数
        if success:
            sample_generator.set_trans(opts['trans'])
        else:
            sample_generator.expand_trans(opts['trans_limit'])

        # Bbox regression :利用【当前:只是对当前帧图像进行回归,没有考虑前几帧】几名对应的box,最为regression的输入,来产生一个更好的Bbox
        if success:
            bbreg_samples = samples[top_idx]
            if top_idx.shape[0] == 1:
                bbreg_samples = bbreg_samples[None, :]
            bbreg_feats = forward_samples(model, image, bbreg_samples)
            bbreg_samples = bbreg.predict(bbreg_feats, bbreg_samples)
            bbreg_bbox = bbreg_samples.mean(axis=0)
        else:
            bbreg_bbox = target_bbox

        # Save result
        result[i] = target_bbox
        result_bb[i] = bbreg_bbox

        # Data collect
        if success:
            pos_examples = pos_generator(target_bbox, opts['n_pos_update'],
                                         opts['overlap_pos_update'])
            pos_feats = forward_samples(model, image, pos_examples)
            pos_feats_all.append(pos_feats)
            if len(pos_feats_all) > opts['n_frames_long']:
                del pos_feats_all[0]

            neg_examples = neg_generator(target_bbox, opts['n_neg_update'],
                                         opts['overlap_neg_update'])
            neg_feats = forward_samples(model, image, neg_examples)
            neg_feats_all.append(neg_feats)
            if len(neg_feats_all) > opts['n_frames_short']:
                del neg_feats_all[0]

        # Short term update :每张图片都要更新一次模型,这次的输入数据就是【前几帧累计的正样本和负样本】
        if not success:
            nframes = min(opts['n_frames_short'], len(pos_feats_all))
            pos_data = torch.cat(pos_feats_all[-nframes:], 0)
            neg_data = torch.cat(neg_feats_all, 0)
            train(model, criterion, update_optimizer, pos_data, neg_data,
                  opts['maxiter_update'])

        # Long term update
        elif i % opts['long_interval'] == 0:
            pos_data = torch.cat(pos_feats_all, 0)
            neg_data = torch.cat(neg_feats_all, 0)
            train(model, criterion, update_optimizer, pos_data, neg_data,
                  opts['maxiter_update'])

        torch.cuda.empty_cache()
        spf = time.time() - tic
        spf_total += spf

        # Display
        if display or savefig:
            im.set_data(image)

            if gt is not None:
                gt_rect.set_xy(gt[i, :2])
                gt_rect.set_width(gt[i, 2])
                gt_rect.set_height(gt[i, 3])

            rect.set_xy(result_bb[i, :2])
            rect.set_width(result_bb[i, 2])
            rect.set_height(result_bb[i, 3])

            if display:
                plt.pause(.01)
                plt.draw()
            if savefig:
                fig.savefig(os.path.join(savefig_dir, '{:04d}.jpg'.format(i)),
                            dpi=dpi)

        if gt is None:
            print('Frame {:d}/{:d}, Score {:.3f}, Time {:.3f}'.format(
                i, len(img_list), target_score, spf))
        else:
            overlap[i] = overlap_ratio(gt[i], result_bb[i])[0]
            print('Frame {:d}/{:d}, Overlap {:.3f}, Score {:.3f}, Time {:.3f}'.
                  format(i, len(img_list), overlap[i], target_score, spf))

    if gt is not None:
        print('meanIOU: {:.3f}'.format(overlap.mean()))
    fps = len(img_list) / spf_total
    return result, result_bb, fps
コード例 #5
0
def run_mdnet(img_list,
              init_bbox,
              gt=None,
              savefig_dir='',
              display=False,
              model_path='models/model001.pth'):

    # Init bbox
    target_bbox = np.array(init_bbox)
    result = np.zeros((len(img_list), 4))
    result_bb = np.zeros((len(img_list), 4))
    result[0] = target_bbox
    result_bb[0] = target_bbox

    if gt is not None:
        overlap = np.zeros(len(img_list))
        overlap[0] = 1

    # Init model
    opts['model_path'] = model_path

    print('********')
    print('model:', opts['model_path'])
    print('********')

    assert (model_path == 'models/model000.pth'
            or model_path == 'models/model001.pth')

    if model_path == 'models/model000.pth': model = MDNet0(opts['model_path'])
    else: model = MDNet1(opts['model_path'])

    if opts['use_gpu']:
        model = model.cuda()

    # Init criterion and optimizer
    criterion = BCELoss()
    model.set_learnable_params(opts['ft_layers'])
    init_optimizer = set_optimizer(model, opts['lr_init'], opts['lr_mult'])
    update_optimizer = set_optimizer(model, opts['lr_update'], opts['lr_mult'])

    tic = time.time()
    # Load first image
    image = Image.open(img_list[0]).convert('RGB')

    # Draw pos/neg samples
    pos_examples = SampleGenerator('gaussian', image.size, opts['trans_pos'],
                                   opts['scale_pos'])(target_bbox,
                                                      opts['n_pos_init'],
                                                      opts['overlap_pos_init'])

    neg_examples = np.concatenate([
        SampleGenerator('uniform', image.size, opts['trans_neg_init'],
                        opts['scale_neg_init'])(target_bbox,
                                                int(opts['n_neg_init'] * 0.5),
                                                opts['overlap_neg_init']),
        SampleGenerator('whole', image.size)(target_bbox,
                                             int(opts['n_neg_init'] * 0.5),
                                             opts['overlap_neg_init'])
    ])
    neg_examples = np.random.permutation(neg_examples)

    # Extract pos/neg features
    pos_feats = forward_samples(model, image, pos_examples)
    print(pos_feats)
    neg_feats = forward_samples(model, image, neg_examples)
    print(neg_feats)

    # Initial training
    train(model, criterion, init_optimizer, pos_feats, neg_feats,
          opts['maxiter_init'])
    del init_optimizer, neg_feats
    torch.cuda.empty_cache()

    # Train bbox regressor
    bbreg_examples = SampleGenerator(
        'uniform', image.size, opts['trans_bbreg'], opts['scale_bbreg'],
        opts['aspect_bbreg'])(target_bbox, opts['n_bbreg'],
                              opts['overlap_bbreg'])
    bbreg_feats = forward_samples(model, image, bbreg_examples)
    bbreg = BBRegressor(image.size)
    bbreg.train(bbreg_feats, bbreg_examples, target_bbox)
    del bbreg_feats
    torch.cuda.empty_cache()

    # Init sample generators for update
    sample_generator = SampleGenerator('gaussian', image.size, opts['trans'],
                                       opts['scale'])
    pos_generator = SampleGenerator('gaussian', image.size, opts['trans_pos'],
                                    opts['scale_pos'])
    neg_generator = SampleGenerator('uniform', image.size, opts['trans_neg'],
                                    opts['scale_neg'])

    # Init pos/neg features for update
    neg_examples = neg_generator(target_bbox, opts['n_neg_update'],
                                 opts['overlap_neg_init'])
    neg_feats = forward_samples(model, image, neg_examples)
    pos_feats_all = [pos_feats]
    neg_feats_all = [neg_feats]

    spf_total = time.time() - tic

    # Display
    savefig = savefig_dir != ''
    if display or savefig:
        dpi = 80.0
        figsize = (image.size[0] / dpi, image.size[1] / dpi)

        fig = plt.figure(frameon=False, figsize=figsize, dpi=dpi)
        ax = plt.Axes(fig, [0., 0., 1., 1.])
        ax.set_axis_off()
        fig.add_axes(ax)
        im = ax.imshow(image, aspect='auto')

        if gt is not None:
            gt_rect = plt.Rectangle(tuple(gt[0, :2]),
                                    gt[0, 2],
                                    gt[0, 3],
                                    linewidth=3,
                                    edgecolor="#00ff00",
                                    zorder=1,
                                    fill=False)
            ax.add_patch(gt_rect)

        rect = plt.Rectangle(tuple(result_bb[0, :2]),
                             result_bb[0, 2],
                             result_bb[0, 3],
                             linewidth=3,
                             edgecolor="#ff0000",
                             zorder=1,
                             fill=False)
        ax.add_patch(rect)

        if display:
            plt.pause(.01)
            plt.draw()
        if savefig:
            fig.savefig(os.path.join(savefig_dir, '0000.jpg'), dpi=dpi)

    # Main loop
    for i in range(1, len(img_list)):

        tic = time.time()
        # Load image
        image = Image.open(img_list[i]).convert('RGB')

        # Estimate target bbox
        samples = sample_generator(target_bbox, opts['n_samples'])
        sample_scores = forward_samples(model, image, samples, out_layer='fc6')

        top_scores, top_idx = sample_scores[:, 1].topk(5)

        # for top 5 samples, maximize score using hill-climbing algorithm
        for j in range(5):
            sample_ = samples[top_idx[j]]
            last_top_score = None

            # hill-climbing search
            while True:
                sample_left_p = [
                    sample_[0] + 1, sample_[1], sample_[2] - 1, sample_[3]
                ]
                sample_left_n = [
                    sample_[0] - 1, sample_[1], sample_[2] + 1, sample_[3]
                ]
                sample_up_p = [
                    sample_[0], sample_[1] + 1, sample_[2], sample_[3] - 1
                ]
                sample_up_n = [
                    sample_[0], sample_[1] - 1, sample_[2], sample_[3] + 1
                ]
                sample_right_p = [
                    sample_[0], sample_[1], sample_[2] + 1, sample_[3]
                ]
                sample_right_n = [
                    sample_[0], sample_[1], sample_[2] - 1, sample_[3]
                ]
                sample_bottom_p = [
                    sample_[0], sample_[1], sample_[2], sample_[3] + 1
                ]
                sample_bottom_n = [
                    sample_[0], sample_[1], sample_[2], sample_[3] - 1
                ]

                all_samples = [
                    sample_left_p, sample_left_n, sample_up_p, sample_up_n,
                    sample_right_p, sample_right_n, sample_bottom_p,
                    sample_bottom_n
                ]

                hillClimbingSS = forward_samples(model,
                                                 image,
                                                 np.array(all_samples),
                                                 out_layer='fc6')
                top_score, top_index = hillClimbingSS[:, 1].topk(1)
                top_score_float = top_score.cpu().numpy()[0]

                # End of hill climbing: this is THE BEST!
                if last_top_score != None:
                    if top_score_float < last_top_score: break

                sample_ = all_samples[top_index]
                samples[top_idx[j]] = all_samples[top_index]
                last_top_score = top_score_float

        # modify sample scores array
        sample_scores = forward_samples(model, image, samples, out_layer='fc6')
        top_scores, top_idx = sample_scores[:, 1].topk(5)

        sampleStore = []
        for j in range(len(samples)):
            temp = []
            for k in range(4):
                temp.append(samples[j][k])
            sampleStore.append(temp)

        # if mean score of bbox < 0, find everywhere
        target_score = top_scores.mean()

        if target_score < 0:
            # print('')
            # print('last bbox:')
            # print(result[i-1])
            last_left = result[i - 1][0]
            last_top = result[i - 1][1]

            # print('')
            # for j in range(len(samples)): print(j, samples[j], sample_scores[j])
            # print('')
            # print('sample top scores (before):')
            # print(top_scores)
            # print(top_idx)

            cnt = 0
            rl = [32, 16]

            for _ in range(len(rl)):
                everywhere_sample = []

                # find everywhere (near the last bbox)
                meanWidth = 0.0
                meanHeight = 0.0
                for j in range(len(samples)):
                    meanWidth += samples[j][2]
                    meanHeight += samples[j][3]
                meanWidth /= len(samples)
                meanHeight /= len(samples)

                width = image.size[0]
                height = image.size[1]

                for j in range(32):
                    for k in range(32):
                        jk = [
                            last_left + (31 - 2 * j) * meanWidth / rl[_],
                            last_top + (31 - 2 * k) * meanHeight / rl[_],
                            meanWidth, meanHeight
                        ]
                        # print(j, k, jk)
                        everywhere_sample.append(jk)

                everywhere_scores = forward_samples(
                    model, image, np.array(everywhere_sample), out_layer='fc6')
                everywhere_top_scores, everywhere_top_idx = everywhere_scores[:,
                                                                              1].topk(
                                                                                  5
                                                                              )

                # print('')
                # print('everywhere_sample:')
                # for j in range(len(everywhere_sample)): print(j, everywhere_sample[j], everywhere_scores[j])

                # print('')
                # print('everywhere top scores (before):')
                # print(everywhere_top_scores)
                # print(everywhere_top_idx)
                # for j in range(5): print(everywhere_sample[everywhere_top_idx[j]])

                # for top 5 samples in everywhere_sample, maximize score using hill-climbing algorithm
                for j in range(5):
                    # print('')
                    sample_ = everywhere_sample[everywhere_top_idx[j]]
                    last_top_score = None

                    # hill-climbing search
                    while True:
                        sample_left_p = [
                            sample_[0] + 1, sample_[1], sample_[2] - 1,
                            sample_[3]
                        ]
                        sample_left_n = [
                            sample_[0] - 1, sample_[1], sample_[2] + 1,
                            sample_[3]
                        ]
                        sample_up_p = [
                            sample_[0], sample_[1] + 1, sample_[2],
                            sample_[3] - 1
                        ]
                        sample_up_n = [
                            sample_[0], sample_[1] - 1, sample_[2],
                            sample_[3] + 1
                        ]
                        sample_right_p = [
                            sample_[0], sample_[1], sample_[2] + 1, sample_[3]
                        ]
                        sample_right_n = [
                            sample_[0], sample_[1], sample_[2] - 1, sample_[3]
                        ]
                        sample_bottom_p = [
                            sample_[0], sample_[1], sample_[2], sample_[3] + 1
                        ]
                        sample_bottom_n = [
                            sample_[0], sample_[1], sample_[2], sample_[3] - 1
                        ]

                        all_samples = [
                            sample_left_p, sample_left_n, sample_up_p,
                            sample_up_n, sample_right_p, sample_right_n,
                            sample_bottom_p, sample_bottom_n
                        ]

                        hillClimbingSS = forward_samples(model,
                                                         image,
                                                         np.array(all_samples),
                                                         out_layer='fc6')
                        top_score, top_index = hillClimbingSS[:, 1].topk(1)
                        top_score_float = top_score.cpu().numpy()[0]

                        # End of hill climbing: this is THE BEST!
                        if last_top_score != None:
                            # print(last_top_score)
                            if top_score_float < last_top_score: break

                        sample_ = all_samples[top_index]
                        everywhere_sample[
                            everywhere_top_idx[j]] = all_samples[top_index]
                        last_top_score = top_score_float

                everywhere_scores = forward_samples(
                    model, image, np.array(everywhere_sample), out_layer='fc6')
                everywhere_top_scores, everywhere_top_idx = everywhere_scores[:,
                                                                              1].topk(
                                                                                  5
                                                                              )

                # print('')
                # print('everywhere top scores (after):')
                # print(everywhere_top_scores)
                # print(everywhere_top_idx)
                # for j in range(5): print(everywhere_sample[everywhere_top_idx[j]])

                # merge 'samples' with everywhere samples
                everywhere_top5 = []
                for j in range(5):
                    everywhere_top5.append(
                        everywhere_sample[everywhere_top_idx[j]])
                samples = np.concatenate((samples, np.array(everywhere_top5)))

                sample_scores = forward_samples(model,
                                                image,
                                                samples,
                                                out_layer='fc6')
                top_scores, top_idx = sample_scores[:, 1].topk(5)

                if top_scores.mean() > 0:
                    # print('')
                    # for j in range(len(samples)): print(j, samples[j], sample_scores[j])
                    # print('')
                    # print('sample top scores (after):')
                    # print(top_scores)
                    # print(top_idx)
                    break
                cnt += 1

            # failure -> recover original samples
            if cnt == 2:
                # print('recovered')
                samples = np.array(sampleStore)
                sample_scores = forward_samples(model,
                                                image,
                                                samples,
                                                out_layer='fc6')
                top_scores, top_idx = sample_scores[:, 1].topk(5)

        # finally modify sample scores array
        sample_scores = forward_samples(model, image, samples, out_layer='fc6')
        top_scores, top_idx = sample_scores[:, 1].topk(5)

        top_idx = top_idx.cpu()
        target_score = top_scores.mean()
        target_bbox = samples[top_idx]
        if top_idx.shape[0] > 1:
            target_bbox = target_bbox.mean(axis=0)
        success = target_score > 0

        # Expand search area at failure
        if success:
            sample_generator.set_trans(opts['trans'])
        else:
            sample_generator.expand_trans(opts['trans_limit'])

        # Bbox regression
        if success:
            bbreg_samples = samples[top_idx]
            if top_idx.shape[0] == 1:
                bbreg_samples = bbreg_samples[None, :]
            bbreg_feats = forward_samples(model, image, bbreg_samples)
            bbreg_samples = bbreg.predict(bbreg_feats, bbreg_samples)
            bbreg_bbox = bbreg_samples.mean(axis=0)
        else:
            bbreg_bbox = target_bbox

        # Save result
        result[i] = target_bbox
        result_bb[i] = bbreg_bbox

        # Data collect
        if success:
            pos_examples = pos_generator(target_bbox, opts['n_pos_update'],
                                         opts['overlap_pos_update'])
            pos_feats = forward_samples(model, image, pos_examples)
            pos_feats_all.append(pos_feats)
            if len(pos_feats_all) > opts['n_frames_long']:
                del pos_feats_all[0]

            neg_examples = neg_generator(target_bbox, opts['n_neg_update'],
                                         opts['overlap_neg_update'])
            neg_feats = forward_samples(model, image, neg_examples)
            neg_feats_all.append(neg_feats)
            if len(neg_feats_all) > opts['n_frames_short']:
                del neg_feats_all[0]

        # Short term update
        if not success:
            nframes = min(opts['n_frames_short'], len(pos_feats_all))
            pos_data = torch.cat(pos_feats_all[-nframes:], 0)
            neg_data = torch.cat(neg_feats_all, 0)
            train(model, criterion, update_optimizer, pos_data, neg_data,
                  opts['maxiter_update'])

        # Long term update
        elif i % opts['long_interval'] == 0:
            pos_data = torch.cat(pos_feats_all, 0)
            neg_data = torch.cat(neg_feats_all, 0)
            train(model, criterion, update_optimizer, pos_data, neg_data,
                  opts['maxiter_update'])

        torch.cuda.empty_cache()
        spf = time.time() - tic
        spf_total += spf

        # Display
        if display or savefig:
            im.set_data(image)

            if gt is not None:
                gt_rect.set_xy(gt[i, :2])
                gt_rect.set_width(gt[i, 2])
                gt_rect.set_height(gt[i, 3])

            rect.set_xy(result_bb[i, :2])
            rect.set_width(result_bb[i, 2])
            rect.set_height(result_bb[i, 3])

            if display:
                plt.pause(.01)
                plt.draw()
            if savefig:
                fig.savefig(os.path.join(
                    savefig_dir,
                    ('M' + model_path[14] + 'T3_' + '{:04d}.jpg'.format(i))),
                            dpi=dpi)

        if gt is None:
            print('Frame {:d}/{:d}, Score {:.3f}, Time {:.3f}'.format(
                i, len(img_list), target_score, spf))
        else:
            overlap[i] = overlap_ratio(gt[i], result_bb[i])[0]
            print('Frame {:d}/{:d}, Overlap {:.3f}, Score {:.3f}, Time {:.3f}'.
                  format(i, len(img_list), overlap[i], target_score, spf))

    if gt is not None:
        print('meanIOU: {:.3f}'.format(overlap.mean()))
    fps = len(img_list) / spf_total
    plt.close('all')
    return result, result_bb, fps, overlap
コード例 #6
0
    def initialize(self, image_file, box):
        self.frame_idx = 0

        # Load first image
        cur_image = Image.open(image_file).convert("RGB")
        cur_image = np.asarray(cur_image)

        self.target_bbox = np.array(box)

        # Draw pos/neg samples
        ishape = cur_image.shape
        pos_examples = gen_samples(
            SampleGenerator("gaussian", (ishape[1], ishape[0]), 0.1, 1.2),
            self.target_bbox,
            opts["n_pos_init"],
            opts["overlap_pos_init"],
        )
        neg_examples = gen_samples(
            SampleGenerator("uniform", (ishape[1], ishape[0]), 1, 2, 1.1),
            self.target_bbox,
            opts["n_neg_init"],
            opts["overlap_neg_init"],
        )
        neg_examples = np.random.permutation(neg_examples)

        cur_bbreg_examples = gen_samples(
            SampleGenerator("uniform", (ishape[1], ishape[0]), 0.3, 1.5, 1.1),
            self.target_bbox,
            opts["n_bbreg"],
            opts["overlap_bbreg"],
            opts["scale_bbreg"],
        )

        # compute padded sample
        padded_x1 = (neg_examples[:, 0] - neg_examples[:, 2] *
                     (opts["padding"] - 1.0) / 2.0).min()
        padded_y1 = (neg_examples[:, 1] - neg_examples[:, 3] *
                     (opts["padding"] - 1.0) / 2.0).min()
        padded_x2 = (neg_examples[:, 0] + neg_examples[:, 2] *
                     (opts["padding"] + 1.0) / 2.0).max()
        padded_y2 = (neg_examples[:, 1] + neg_examples[:, 3] *
                     (opts["padding"] + 1.0) / 2.0).max()
        padded_scene_box = np.reshape(
            np.asarray((padded_x1, padded_y1, padded_x2 - padded_x1,
                        padded_y2 - padded_y1)),
            (1, 4),
        )

        scene_boxes = np.reshape(np.copy(padded_scene_box), (1, 4))
        if opts["jitter"]:
            # horizontal shift
            jittered_scene_box_horizon = np.copy(padded_scene_box)
            jittered_scene_box_horizon[0, 0] -= 4.0
            jitter_scale_horizon = 1.0

            # vertical shift
            jittered_scene_box_vertical = np.copy(padded_scene_box)
            jittered_scene_box_vertical[0, 1] -= 4.0
            jitter_scale_vertical = 1.0

            jittered_scene_box_reduce1 = np.copy(padded_scene_box)
            jitter_scale_reduce1 = 1.1**(-1)

            # vertical shift
            jittered_scene_box_enlarge1 = np.copy(padded_scene_box)
            jitter_scale_enlarge1 = 1.1**(1)

            # scale reduction
            jittered_scene_box_reduce2 = np.copy(padded_scene_box)
            jitter_scale_reduce2 = 1.1**(-2)
            # scale enlarge
            jittered_scene_box_enlarge2 = np.copy(padded_scene_box)
            jitter_scale_enlarge2 = 1.1**(2)

            scene_boxes = np.concatenate(
                [
                    scene_boxes,
                    jittered_scene_box_horizon,
                    jittered_scene_box_vertical,
                    jittered_scene_box_reduce1,
                    jittered_scene_box_enlarge1,
                    jittered_scene_box_reduce2,
                    jittered_scene_box_enlarge2,
                ],
                axis=0,
            )
            jitter_scale = [
                1.0,
                jitter_scale_horizon,
                jitter_scale_vertical,
                jitter_scale_reduce1,
                jitter_scale_enlarge1,
                jitter_scale_reduce2,
                jitter_scale_enlarge2,
            ]
        else:
            jitter_scale = [1.0]

        self.model.eval()
        for bidx in range(0, scene_boxes.shape[0]):
            crop_img_size = (scene_boxes[bidx, 2:4] * (
                (opts["img_size"], opts["img_size"]) / self.target_bbox[2:4])
                             ).astype("int64") * jitter_scale[bidx]
            cropped_image, cur_image_var = self.img_crop_model.crop_image(
                cur_image, np.reshape(scene_boxes[bidx], (1, 4)),
                crop_img_size)
            cropped_image = cropped_image - 128.0

            feat_map = self.model(cropped_image, out_layer="conv3")

            rel_target_bbox = np.copy(self.target_bbox)
            rel_target_bbox[0:2] -= scene_boxes[bidx, 0:2]

            batch_num = np.zeros((pos_examples.shape[0], 1))
            cur_pos_rois = np.copy(pos_examples)
            cur_pos_rois[:, 0:2] -= np.repeat(
                np.reshape(scene_boxes[bidx, 0:2], (1, 2)),
                cur_pos_rois.shape[0],
                axis=0,
            )
            scaled_obj_size = float(opts["img_size"]) * jitter_scale[bidx]
            cur_pos_rois = samples2maskroi(
                cur_pos_rois,
                self.model.receptive_field,
                (scaled_obj_size, scaled_obj_size),
                self.target_bbox[2:4],
                opts["padding"],
            )
            cur_pos_rois = np.concatenate((batch_num, cur_pos_rois), axis=1)
            cur_pos_rois = Variable(
                torch.from_numpy(cur_pos_rois.astype("float32"))).cuda()
            cur_pos_feats = self.model.roi_align_model(feat_map, cur_pos_rois)
            cur_pos_feats = cur_pos_feats.view(cur_pos_feats.size(0),
                                               -1).data.clone()

            batch_num = np.zeros((neg_examples.shape[0], 1))
            cur_neg_rois = np.copy(neg_examples)
            cur_neg_rois[:, 0:2] -= np.repeat(
                np.reshape(scene_boxes[bidx, 0:2], (1, 2)),
                cur_neg_rois.shape[0],
                axis=0,
            )
            cur_neg_rois = samples2maskroi(
                cur_neg_rois,
                self.model.receptive_field,
                (scaled_obj_size, scaled_obj_size),
                self.target_bbox[2:4],
                opts["padding"],
            )
            cur_neg_rois = np.concatenate((batch_num, cur_neg_rois), axis=1)
            cur_neg_rois = Variable(
                torch.from_numpy(cur_neg_rois.astype("float32"))).cuda()
            cur_neg_feats = self.model.roi_align_model(feat_map, cur_neg_rois)
            cur_neg_feats = cur_neg_feats.view(cur_neg_feats.size(0),
                                               -1).data.clone()

            # bbreg rois
            batch_num = np.zeros((cur_bbreg_examples.shape[0], 1))
            cur_bbreg_rois = np.copy(cur_bbreg_examples)
            cur_bbreg_rois[:, 0:2] -= np.repeat(
                np.reshape(scene_boxes[bidx, 0:2], (1, 2)),
                cur_bbreg_rois.shape[0],
                axis=0,
            )
            scaled_obj_size = float(opts["img_size"]) * jitter_scale[bidx]
            cur_bbreg_rois = samples2maskroi(
                cur_bbreg_rois,
                self.model.receptive_field,
                (scaled_obj_size, scaled_obj_size),
                self.target_bbox[2:4],
                opts["padding"],
            )
            cur_bbreg_rois = np.concatenate((batch_num, cur_bbreg_rois),
                                            axis=1)
            cur_bbreg_rois = Variable(
                torch.from_numpy(cur_bbreg_rois.astype("float32"))).cuda()
            cur_bbreg_feats = self.model.roi_align_model(
                feat_map, cur_bbreg_rois)
            cur_bbreg_feats = cur_bbreg_feats.view(cur_bbreg_feats.size(0),
                                                   -1).data.clone()

            self.feat_dim = cur_pos_feats.size(-1)

            if bidx == 0:
                pos_feats = cur_pos_feats
                neg_feats = cur_neg_feats
                # bbreg feature
                bbreg_feats = cur_bbreg_feats
                bbreg_examples = cur_bbreg_examples
            else:
                pos_feats = torch.cat((pos_feats, cur_pos_feats), dim=0)
                neg_feats = torch.cat((neg_feats, cur_neg_feats), dim=0)
                # bbreg feature
                bbreg_feats = torch.cat((bbreg_feats, cur_bbreg_feats), dim=0)
                bbreg_examples = np.concatenate(
                    (bbreg_examples, cur_bbreg_examples), axis=0)

        if pos_feats.size(0) > opts["n_pos_init"]:
            pos_idx = np.asarray(range(pos_feats.size(0)))
            np.random.shuffle(pos_idx)
            pos_feats = pos_feats[pos_idx[0:opts["n_pos_init"]], :]
        if neg_feats.size(0) > opts["n_neg_init"]:
            neg_idx = np.asarray(range(neg_feats.size(0)))
            np.random.shuffle(neg_idx)
            neg_feats = neg_feats[neg_idx[0:opts["n_neg_init"]], :]

        # bbreg
        if bbreg_feats.size(0) > opts["n_bbreg"]:
            bbreg_idx = np.asarray(range(bbreg_feats.size(0)))
            np.random.shuffle(bbreg_idx)
            bbreg_feats = bbreg_feats[bbreg_idx[0:opts["n_bbreg"]], :]
            bbreg_examples = bbreg_examples[bbreg_idx[0:opts["n_bbreg"]], :]
            # print bbreg_examples.shape

        # open images and crop patch from obj
        extra_obj_size = np.array((opts["img_size"], opts["img_size"]))
        extra_crop_img_size = extra_obj_size * (opts["padding"] + 0.6)
        replicateNum = 100
        for iidx in range(replicateNum):
            extra_target_bbox = np.copy(self.target_bbox)

            extra_scene_box = np.copy(extra_target_bbox)
            extra_scene_box_center = extra_scene_box[
                0:2] + extra_scene_box[2:4] / 2.0
            extra_scene_box_size = extra_scene_box[2:4] * (opts["padding"] +
                                                           0.6)
            extra_scene_box[
                0:2] = extra_scene_box_center - extra_scene_box_size / 2.0
            extra_scene_box[2:4] = extra_scene_box_size

            extra_shift_offset = np.clip(2.0 * np.random.randn(2), -4, 4)
            cur_extra_scale = 1.1**np.clip(np.random.randn(1), -2, 2)

            extra_scene_box[0] += extra_shift_offset[0]
            extra_scene_box[1] += extra_shift_offset[1]
            extra_scene_box[2:4] *= cur_extra_scale[0]

            scaled_obj_size = float(opts["img_size"]) / cur_extra_scale[0]

            cur_extra_cropped_image, _ = self.img_crop_model.crop_image(
                cur_image, np.reshape(extra_scene_box, (1, 4)),
                extra_crop_img_size)
            cur_extra_cropped_image = cur_extra_cropped_image.detach()

            cur_extra_pos_examples = gen_samples(
                SampleGenerator("gaussian", (ishape[1], ishape[0]), 0.1, 1.2),
                extra_target_bbox,
                opts["n_pos_init"] / replicateNum,
                opts["overlap_pos_init"],
            )
            cur_extra_neg_examples = gen_samples(
                SampleGenerator("uniform", (ishape[1], ishape[0]), 0.3, 2,
                                1.1),
                extra_target_bbox,
                opts["n_neg_init"] / replicateNum / 4,
                opts["overlap_neg_init"],
            )

            # bbreg sample
            cur_extra_bbreg_examples = gen_samples(
                SampleGenerator("uniform", (ishape[1], ishape[0]), 0.3, 1.5,
                                1.1),
                extra_target_bbox,
                opts["n_bbreg"] / replicateNum / 4,
                opts["overlap_bbreg"],
                opts["scale_bbreg"],
            )

            batch_num = iidx * np.ones((cur_extra_pos_examples.shape[0], 1))
            cur_extra_pos_rois = np.copy(cur_extra_pos_examples)
            cur_extra_pos_rois[:, 0:2] -= np.repeat(
                np.reshape(extra_scene_box[0:2], (1, 2)),
                cur_extra_pos_rois.shape[0],
                axis=0,
            )
            cur_extra_pos_rois = samples2maskroi(
                cur_extra_pos_rois,
                self.model.receptive_field,
                (scaled_obj_size, scaled_obj_size),
                extra_target_bbox[2:4],
                opts["padding"],
            )
            cur_extra_pos_rois = np.concatenate(
                (batch_num, cur_extra_pos_rois), axis=1)

            batch_num = iidx * np.ones((cur_extra_neg_examples.shape[0], 1))
            cur_extra_neg_rois = np.copy(cur_extra_neg_examples)
            cur_extra_neg_rois[:, 0:2] -= np.repeat(
                np.reshape(extra_scene_box[0:2], (1, 2)),
                cur_extra_neg_rois.shape[0],
                axis=0,
            )
            cur_extra_neg_rois = samples2maskroi(
                cur_extra_neg_rois,
                self.model.receptive_field,
                (scaled_obj_size, scaled_obj_size),
                extra_target_bbox[2:4],
                opts["padding"],
            )
            cur_extra_neg_rois = np.concatenate(
                (batch_num, cur_extra_neg_rois), axis=1)

            # bbreg rois
            batch_num = iidx * np.ones((cur_extra_bbreg_examples.shape[0], 1))
            cur_extra_bbreg_rois = np.copy(cur_extra_bbreg_examples)
            cur_extra_bbreg_rois[:, 0:2] -= np.repeat(
                np.reshape(extra_scene_box[0:2], (1, 2)),
                cur_extra_bbreg_rois.shape[0],
                axis=0,
            )
            cur_extra_bbreg_rois = samples2maskroi(
                cur_extra_bbreg_rois,
                self.model.receptive_field,
                (scaled_obj_size, scaled_obj_size),
                extra_target_bbox[2:4],
                opts["padding"],
            )
            cur_extra_bbreg_rois = np.concatenate(
                (batch_num, cur_extra_bbreg_rois), axis=1)

            if iidx == 0:
                extra_cropped_image = cur_extra_cropped_image

                extra_pos_rois = np.copy(cur_extra_pos_rois)
                extra_neg_rois = np.copy(cur_extra_neg_rois)
                # bbreg rois
                extra_bbreg_rois = np.copy(cur_extra_bbreg_rois)
                extra_bbreg_examples = np.copy(cur_extra_bbreg_examples)
            else:
                extra_cropped_image = torch.cat(
                    (extra_cropped_image, cur_extra_cropped_image), dim=0)

                extra_pos_rois = np.concatenate(
                    (extra_pos_rois, np.copy(cur_extra_pos_rois)), axis=0)
                extra_neg_rois = np.concatenate(
                    (extra_neg_rois, np.copy(cur_extra_neg_rois)), axis=0)
                # bbreg rois
                extra_bbreg_rois = np.concatenate(
                    (extra_bbreg_rois, np.copy(cur_extra_bbreg_rois)), axis=0)
                extra_bbreg_examples = np.concatenate(
                    (extra_bbreg_examples, np.copy(cur_extra_bbreg_examples)),
                    axis=0)

        extra_pos_rois = Variable(
            torch.from_numpy(extra_pos_rois.astype("float32"))).cuda()
        extra_neg_rois = Variable(
            torch.from_numpy(extra_neg_rois.astype("float32"))).cuda()
        # bbreg rois
        extra_bbreg_rois = Variable(
            torch.from_numpy(extra_bbreg_rois.astype("float32"))).cuda()

        extra_cropped_image -= 128.0

        extra_feat_maps = self.model(extra_cropped_image, out_layer="conv3")
        # Draw pos/neg samples
        ishape = cur_image.shape

        extra_pos_feats = self.model.roi_align_model(extra_feat_maps,
                                                     extra_pos_rois)
        extra_pos_feats = extra_pos_feats.view(extra_pos_feats.size(0),
                                               -1).data.clone()

        extra_neg_feats = self.model.roi_align_model(extra_feat_maps,
                                                     extra_neg_rois)
        extra_neg_feats = extra_neg_feats.view(extra_neg_feats.size(0),
                                               -1).data.clone()
        # bbreg feat
        extra_bbreg_feats = self.model.roi_align_model(extra_feat_maps,
                                                       extra_bbreg_rois)
        extra_bbreg_feats = extra_bbreg_feats.view(extra_bbreg_feats.size(0),
                                                   -1).data.clone()

        # concatenate extra features to original_features
        pos_feats = torch.cat((pos_feats, extra_pos_feats), dim=0)
        neg_feats = torch.cat((neg_feats, extra_neg_feats), dim=0)
        # concatenate extra bbreg feats to original_bbreg_feats
        bbreg_feats = torch.cat((bbreg_feats, extra_bbreg_feats), dim=0)
        bbreg_examples = np.concatenate((bbreg_examples, extra_bbreg_examples),
                                        axis=0)

        torch.cuda.empty_cache()
        self.model.zero_grad()

        self.P4 = torch.autograd.Variable(torch.eye(512 * 3 * 3 + 1).type(
            self.dtype),
                                          volatile=True)
        self.P5 = (torch.autograd.Variable(torch.eye(512 + 1).type(self.dtype),
                                           volatile=True) * 10)
        self.P6 = torch.autograd.Variable(torch.eye(512 + 1).type(self.dtype),
                                          volatile=True)

        self.W4 = torch.autograd.Variable(torch.zeros(512 * 3 * 3 + 1,
                                                      512).type(self.dtype),
                                          volatile=True)
        self.W5 = torch.autograd.Variable(torch.zeros(512 + 1,
                                                      512).type(self.dtype),
                                          volatile=True)
        self.W6 = torch.autograd.Variable(torch.zeros(512 + 1,
                                                      2).type(self.dtype),
                                          volatile=True)

        self.flag_old = 0

        # Initial training
        self.flag_old = train_owm(
            self.model,
            self.criterion,
            self.init_optimizer,
            pos_feats,
            neg_feats,
            opts["maxiter_init"],
            self.P4,
            self.P5,
            self.P6,
            self.W4,
            self.W5,
            self.W6,
            self.flag_old,
        )

        # bbreg train
        if bbreg_feats.size(0) > opts["n_bbreg"]:
            bbreg_idx = np.asarray(range(bbreg_feats.size(0)))
            np.random.shuffle(bbreg_idx)
            bbreg_feats = bbreg_feats[bbreg_idx[0:opts["n_bbreg"]], :]
            bbreg_examples = bbreg_examples[bbreg_idx[0:opts["n_bbreg"]], :]
        self.bbreg = BBRegressor((ishape[1], ishape[0]))
        self.bbreg.train(bbreg_feats, bbreg_examples, self.target_bbox)

        if pos_feats.size(0) > opts["n_pos_update"]:
            pos_idx = np.asarray(range(pos_feats.size(0)))
            np.random.shuffle(pos_idx)
            self.pos_feats_all = [
                pos_feats.index_select(
                    0,
                    torch.from_numpy(pos_idx[0:opts["n_pos_update"]]).cuda())
            ]
        if neg_feats.size(0) > opts["n_neg_update"]:
            neg_idx = np.asarray(range(neg_feats.size(0)))
            np.random.shuffle(neg_idx)
            self.neg_feats_all = [
                neg_feats.index_select(
                    0,
                    torch.from_numpy(neg_idx[0:opts["n_neg_update"]]).cuda())
            ]

        self.trans_f = opts["trans_f"]
コード例 #7
0
    def track(self, image_file):
        self.frame_idx += 1

        # Load image
        cur_image = Image.open(image_file).convert("RGB")
        cur_image = np.asarray(cur_image)

        # Estimate target bbox
        ishape = cur_image.shape
        samples = gen_samples(
            SampleGenerator(
                "gaussian",
                (ishape[1], ishape[0]),
                self.trans_f,
                opts["scale_f"],
                valid=True,
            ),
            self.target_bbox,
            opts["n_samples"],
        )

        padded_x1 = (samples[:, 0] - samples[:, 2] *
                     (opts["padding"] - 1.0) / 2.0).min()
        padded_y1 = (samples[:, 1] - samples[:, 3] *
                     (opts["padding"] - 1.0) / 2.0).min()
        padded_x2 = (samples[:, 0] + samples[:, 2] *
                     (opts["padding"] + 1.0) / 2.0).max()
        padded_y2 = (samples[:, 1] + samples[:, 3] *
                     (opts["padding"] + 1.0) / 2.0).max()
        padded_scene_box = np.asarray(
            (padded_x1, padded_y1, padded_x2 - padded_x1,
             padded_y2 - padded_y1))

        if padded_scene_box[0] > cur_image.shape[1]:
            padded_scene_box[0] = cur_image.shape[1] - 1
        if padded_scene_box[1] > cur_image.shape[0]:
            padded_scene_box[1] = cur_image.shape[0] - 1
        if padded_scene_box[0] + padded_scene_box[2] < 0:
            padded_scene_box[2] = -padded_scene_box[0] + 1
        if padded_scene_box[1] + padded_scene_box[3] < 0:
            padded_scene_box[3] = -padded_scene_box[1] + 1

        crop_img_size = (padded_scene_box[2:4] *
                         ((opts["img_size"], opts["img_size"]) /
                          self.target_bbox[2:4])).astype("int64")
        cropped_image, cur_image_var = self.img_crop_model.crop_image(
            cur_image, np.reshape(padded_scene_box, (1, 4)), crop_img_size)
        cropped_image = cropped_image - 128.0

        self.model.eval()
        feat_map = self.model(cropped_image, out_layer="conv3")

        # relative target bbox with padded_scene_box
        rel_target_bbox = np.copy(self.target_bbox)
        rel_target_bbox[0:2] -= padded_scene_box[0:2]

        # Extract sample features and get target location
        batch_num = np.zeros((samples.shape[0], 1))
        sample_rois = np.copy(samples)
        sample_rois[:, 0:2] -= np.repeat(np.reshape(padded_scene_box[0:2],
                                                    (1, 2)),
                                         sample_rois.shape[0],
                                         axis=0)
        sample_rois = samples2maskroi(
            sample_rois,
            self.model.receptive_field,
            (opts["img_size"], opts["img_size"]),
            self.target_bbox[2:4],
            opts["padding"],
        )
        sample_rois = np.concatenate((batch_num, sample_rois), axis=1)
        sample_rois = Variable(torch.from_numpy(
            sample_rois.astype("float32"))).cuda()
        sample_feats = self.model.roi_align_model(feat_map, sample_rois)
        sample_feats = sample_feats.view(sample_feats.size(0), -1).clone()
        sample_scores = self.model(sample_feats, in_layer="fc4")
        top_scores, top_idx = sample_scores[:, 1].topk(5)
        top_idx = top_idx.data.cpu().numpy()
        target_score = top_scores.data.mean()
        self.target_bbox = samples[top_idx].mean(axis=0)

        success = target_score > opts["success_thr"]

        # # Expand search area at failure
        if success:
            self.trans_f = opts["trans_f"]
        else:
            self.trans_f = opts["trans_f_expand"]

        # Bbox regression
        if success:
            bbreg_feats = sample_feats[top_idx, :]
            bbreg_samples = samples[top_idx]
            bbreg_samples = self.bbreg.predict(bbreg_feats.data, bbreg_samples)
            bbreg_bbox = bbreg_samples.mean(axis=0)
        else:
            bbreg_bbox = self.target_bbox

        # Data collect
        if success:

            # Draw pos/neg samples
            pos_examples = gen_samples(
                SampleGenerator("gaussian", (ishape[1], ishape[0]), 0.1, 1.2),
                self.target_bbox,
                opts["n_pos_update"],
                opts["overlap_pos_update"],
            )
            neg_examples = gen_samples(
                SampleGenerator("uniform", (ishape[1], ishape[0]), 1.5, 1.2),
                self.target_bbox,
                opts["n_neg_update"],
                opts["overlap_neg_update"],
            )

            padded_x1 = (neg_examples[:, 0] - neg_examples[:, 2] *
                         (opts["padding"] - 1.0) / 2.0).min()
            padded_y1 = (neg_examples[:, 1] - neg_examples[:, 3] *
                         (opts["padding"] - 1.0) / 2.0).min()
            padded_x2 = (neg_examples[:, 0] + neg_examples[:, 2] *
                         (opts["padding"] + 1.0) / 2.0).max()
            padded_y2 = (neg_examples[:, 1] + neg_examples[:, 3] *
                         (opts["padding"] + 1.0) / 2.0).max()
            padded_scene_box = np.reshape(
                np.asarray((padded_x1, padded_y1, padded_x2 - padded_x1,
                            padded_y2 - padded_y1)),
                (1, 4),
            )

            scene_boxes = np.reshape(np.copy(padded_scene_box), (1, 4))
            jitter_scale = [1.0]

            for bidx in range(0, scene_boxes.shape[0]):
                crop_img_size = (scene_boxes[bidx, 2:4] *
                                 ((opts["img_size"], opts["img_size"]) /
                                  self.target_bbox[2:4])
                                 ).astype("int64") * jitter_scale[bidx]
                cropped_image, cur_image_var = self.img_crop_model.crop_image(
                    cur_image, np.reshape(scene_boxes[bidx], (1, 4)),
                    crop_img_size)
                cropped_image = cropped_image - 128.0

                feat_map = self.model(cropped_image, out_layer="conv3")

                rel_target_bbox = np.copy(self.target_bbox)
                rel_target_bbox[0:2] -= scene_boxes[bidx, 0:2]

                batch_num = np.zeros((pos_examples.shape[0], 1))
                cur_pos_rois = np.copy(pos_examples)
                cur_pos_rois[:, 0:2] -= np.repeat(
                    np.reshape(scene_boxes[bidx, 0:2], (1, 2)),
                    cur_pos_rois.shape[0],
                    axis=0,
                )
                scaled_obj_size = float(opts["img_size"]) * jitter_scale[bidx]
                cur_pos_rois = samples2maskroi(
                    cur_pos_rois,
                    self.model.receptive_field,
                    (scaled_obj_size, scaled_obj_size),
                    self.target_bbox[2:4],
                    opts["padding"],
                )
                cur_pos_rois = np.concatenate((batch_num, cur_pos_rois),
                                              axis=1)
                cur_pos_rois = Variable(
                    torch.from_numpy(cur_pos_rois.astype("float32"))).cuda()
                cur_pos_feats = self.model.roi_align_model(
                    feat_map, cur_pos_rois)
                cur_pos_feats = cur_pos_feats.view(cur_pos_feats.size(0),
                                                   -1).data.clone()

                batch_num = np.zeros((neg_examples.shape[0], 1))
                cur_neg_rois = np.copy(neg_examples)
                cur_neg_rois[:, 0:2] -= np.repeat(
                    np.reshape(scene_boxes[bidx, 0:2], (1, 2)),
                    cur_neg_rois.shape[0],
                    axis=0,
                )
                cur_neg_rois = samples2maskroi(
                    cur_neg_rois,
                    self.model.receptive_field,
                    (scaled_obj_size, scaled_obj_size),
                    self.target_bbox[2:4],
                    opts["padding"],
                )
                cur_neg_rois = np.concatenate((batch_num, cur_neg_rois),
                                              axis=1)
                cur_neg_rois = Variable(
                    torch.from_numpy(cur_neg_rois.astype("float32"))).cuda()
                cur_neg_feats = self.model.roi_align_model(
                    feat_map, cur_neg_rois)
                cur_neg_feats = cur_neg_feats.view(cur_neg_feats.size(0),
                                                   -1).data.clone()

                self.feat_dim = cur_pos_feats.size(-1)

                if bidx == 0:
                    pos_feats = cur_pos_feats  # index select
                    neg_feats = cur_neg_feats
                else:
                    pos_feats = torch.cat((pos_feats, cur_pos_feats), dim=0)
                    neg_feats = torch.cat((neg_feats, cur_neg_feats), dim=0)

            if pos_feats.size(0) > opts["n_pos_update"]:
                pos_idx = np.asarray(range(pos_feats.size(0)))
                np.random.shuffle(pos_idx)
                pos_feats = pos_feats.index_select(
                    0,
                    torch.from_numpy(pos_idx[0:opts["n_pos_update"]]).cuda())
            if neg_feats.size(0) > opts["n_neg_update"]:
                neg_idx = np.asarray(range(neg_feats.size(0)))
                np.random.shuffle(neg_idx)
                neg_feats = neg_feats.index_select(
                    0,
                    torch.from_numpy(neg_idx[0:opts["n_neg_update"]]).cuda())

            self.pos_feats_all.append(pos_feats)
            self.neg_feats_all.append(neg_feats)

            if len(self.pos_feats_all) > opts["n_frames_long"]:
                del self.pos_feats_all[0]
            if len(self.neg_feats_all) > opts["n_frames_short"]:
                del self.neg_feats_all[0]

        # Short term update
        if not success:
            nframes = min(opts["n_frames_short"], len(self.pos_feats_all))
            pos_data = torch.stack(self.pos_feats_all[-nframes:],
                                   0).view(-1, self.feat_dim)
            neg_data = torch.stack(self.neg_feats_all,
                                   0).view(-1, self.feat_dim)
            self.flag_old = train(
                self.model,
                self.criterion,
                self.update_optimizer,
                pos_data,
                neg_data,
                opts["maxiter_update"],
                self.W4,
                self.W5,
                self.W6,
                self.flag_old,
            )

        # Long term update
        elif self.frame_idx % opts["long_interval"] == 0:
            nframes = min(opts["n_frames_short"], len(self.pos_feats_all))
            pos_data = torch.stack(self.pos_feats_all[-nframes:],
                                   0).view(-1, self.feat_dim)
            neg_data = torch.stack(self.neg_feats_all,
                                   0).view(-1, self.feat_dim)
            self.flag_old = train_owm(
                self.model,
                self.criterion,
                self.update_optimizer_owm,
                pos_data,
                neg_data,
                opts["maxiter_update"],
                self.P4,
                self.P5,
                self.P6,
                self.W4,
                self.W5,
                self.W6,
                self.flag_old,
            )

        return bbreg_bbox
コード例 #8
0
    def search_track(self, track_num, frame_idx, init_bbox, previous_num,
                     init_conf):
        # img_list, init_bbox

        # Init bbox

        # result = np.zeros((len(img_list), 4))
        # result_bb = np.zeros((len(img_list), 4))
        # result[0] = target_bbox
        # result_bb[0] = target_bbox

        # superorange params
        track_list = [(-1, -1)] * (self.TRACKLET_NUM + 1)
        bbox_list = [(frame_idx, init_bbox, init_conf)]
        IOU_count = 0
        # last_bbox = -1
        # next_frame = -1
        #        frameA_path = os.path.join(self.FILE_PATH, str(frame_idx) + "."+ext)

        frameA_path = os.path.join(self.FILE_PATH,
                                   str(frame_idx).zfill(5) + ".png")

        target_bbox = np.array(init_bbox)
        # Init model
        model = MDNet(opts['model_path'])
        if opts['use_gpu']:
            model = model.cuda()

        # Init criterion and optimizer
        criterion = BCELoss()
        model.set_learnable_params(opts['ft_layers'])
        init_optimizer = set_optimizer(model, opts['lr_init'], opts['lr_mult'])
        update_optimizer = set_optimizer(model, opts['lr_update'],
                                         opts['lr_mult'])

        tic = time.time()
        # Load first image
        image = Image.open(frameA_path).convert('RGB')

        # Draw pos/neg samples
        pos_examples = SampleGenerator('gaussian', image.size,
                                       opts['trans_pos'], opts['scale_pos'])(
                                           target_bbox, opts['n_pos_init'],
                                           opts['overlap_pos_init'])

        neg_examples = np.concatenate([
            SampleGenerator('uniform', image.size, opts['trans_neg_init'],
                            opts['scale_neg_init'])(target_bbox,
                                                    int(opts['n_neg_init'] *
                                                        0.5),
                                                    opts['overlap_neg_init']),
            SampleGenerator('whole', image.size)(target_bbox,
                                                 int(opts['n_neg_init'] * 0.5),
                                                 opts['overlap_neg_init'])
        ])
        neg_examples = np.random.permutation(neg_examples)

        # Extract pos/neg features
        if len(pos_examples) == 0 or len(neg_examples) == 0:
            print("!!pos_examples=0 skip!!")
            return
        pos_feats = self.forward_samples(model, image, pos_examples)
        neg_feats = self.forward_samples(model, image, neg_examples)

        # Initial training
        self.train(model, criterion, init_optimizer, pos_feats, neg_feats,
                   opts['maxiter_init'])
        del init_optimizer, neg_feats
        torch.cuda.empty_cache()

        # Train bbox regressor
        bbreg_examples = SampleGenerator(
            'uniform', image.size, opts['trans_bbreg'], opts['scale_bbreg'],
            opts['aspect_bbreg'])(target_bbox, opts['n_bbreg'],
                                  opts['overlap_bbreg'])
        bbreg_feats = self.forward_samples(model, image, bbreg_examples)
        bbreg = BBRegressor(image.size)
        bbreg.train(bbreg_feats, bbreg_examples, target_bbox)
        del bbreg_feats
        torch.cuda.empty_cache()

        # Init sample generators for update
        sample_generator = SampleGenerator('gaussian', image.size,
                                           opts['trans'], opts['scale'])
        pos_generator = SampleGenerator('gaussian', image.size,
                                        opts['trans_pos'], opts['scale_pos'])
        neg_generator = SampleGenerator('uniform', image.size,
                                        opts['trans_neg'], opts['scale_neg'])

        # Init pos/neg features for update
        neg_examples = neg_generator(target_bbox, opts['n_neg_update'],
                                     opts['overlap_neg_init'])
        neg_feats = self.forward_samples(model, image, neg_examples)
        pos_feats_all = [pos_feats]
        neg_feats_all = [neg_feats]

        spf_total = time.time() - tic

        # fps = len(img_list) / spf_total
        # return result, result_bb, fps

        # Main loop
        for i in range(0, self.TRACKLET_NUM):  # next 10 frame
            frameB_idx = frame_idx + i + 1
            #    print("frameB_idx="+str(frameB_idx))
            if frameB_idx > self.frame_num:
                break
            else:

                frameB_path = os.path.join(self.FILE_PATH,
                                           str(frameB_idx).zfill(5) + ".png")

                # ------------track by MDNet------------
                # Load image
                image = Image.open(frameB_path).convert('RGB')

                # Estimate target bbox
                samples = sample_generator(target_bbox, opts['n_samples'])
                sample_scores = self.forward_samples(model,
                                                     image,
                                                     samples,
                                                     out_layer='fc6')

                top_scores, top_idx = sample_scores[:, 1].topk(5)
                top_idx = top_idx.cpu()
                target_score = top_scores.mean()
                target_bbox = samples[top_idx]
                if top_idx.shape[0] > 1:
                    target_bbox = target_bbox.mean(axis=0)
                success = target_score > 0

                # Expand search area at failure
                if success:
                    sample_generator.set_trans(opts['trans'])
                else:
                    sample_generator.expand_trans(opts['trans_limit'])

                # Bbox regression
                if success:
                    bbreg_samples = samples[top_idx]
                    if top_idx.shape[0] == 1:
                        bbreg_samples = bbreg_samples[None, :]
                    bbreg_feats = self.forward_samples(model, image,
                                                       bbreg_samples)
                    bbreg_samples = bbreg.predict(bbreg_feats, bbreg_samples)
                    bbreg_bbox = bbreg_samples.mean(axis=0)
                else:
                    bbreg_bbox = target_bbox

                # Save result
                # result[i] = target_bbox
                # result_bb[i] = bbreg_bbox

        #       print(target_bbox)

        # Data collect
                if success:
                    pos_examples = pos_generator(target_bbox,
                                                 opts['n_pos_update'],
                                                 opts['overlap_pos_update'])
                    pos_feats = self.forward_samples(model, image,
                                                     pos_examples)
                    pos_feats_all.append(pos_feats)
                    if len(pos_feats_all) > opts['n_frames_long']:
                        del pos_feats_all[0]

                    neg_examples = neg_generator(target_bbox,
                                                 opts['n_neg_update'],
                                                 opts['overlap_neg_update'])
                    neg_feats = self.forward_samples(model, image,
                                                     neg_examples)
                    neg_feats_all.append(neg_feats)
                    if len(neg_feats_all) > opts['n_frames_short']:
                        del neg_feats_all[0]

                # Short term update
                if not success:
                    nframes = min(opts['n_frames_short'], len(pos_feats_all))
                    pos_data = torch.cat(pos_feats_all[-nframes:], 0)
                    neg_data = torch.cat(neg_feats_all, 0)
                    self.train(model, criterion, update_optimizer, pos_data,
                               neg_data, opts['maxiter_update'])

                # Long term update
                elif i % opts['long_interval'] == 0:
                    pos_data = torch.cat(pos_feats_all, 0)
                    neg_data = torch.cat(neg_feats_all, 0)
                    self.train(model, criterion, update_optimizer, pos_data,
                               neg_data, opts['maxiter_update'])

                torch.cuda.empty_cache()

                bboxT = bbreg_bbox
                anyIOU = False

                for bbox_id, bboxD in enumerate(frameBBoxList[frameB_idx]):
                    if bboxD.match == False:
                        IOU_ratio = overlap_ratio(bboxD.getRec(), bboxT)
                        print("IOUratio={}".format(IOU_ratio))
                        #             print(IOU_ratio)
                        if IOU_ratio > 0.3:
                            #                print("overlap")
                            track_list[i] = (frameB_idx, bbox_id)
                            IOU_count = IOU_count + 1
                            bbox_list.append(
                                (frameB_idx, bboxD.getRec(), bboxD.confidence))
                            anyIOU = True
                            break
                if not anyIOU:
                    bbox_list.append((frameB_idx, bboxT, 0))

        print("bbox_list=====")
        print(bbox_list)

        print("track_list=====")
        print(track_list)

        ##debug   show track_list
        # for idx in range(0,10):
        #    print(track_list[idx])
        #    print(bbox_list[idx])

        if IOU_count >= self.IOU_count_th:  # add track
            for idx in range(0, self.TRACKLET_NUM + 1):
                bbox_id = track_list[idx][1]
                if bbox_id != -1:
                    # print(frame_idx+idx+1)
                    # print(bbox_id)
                    frameBBoxList[frame_idx + idx + 1][bbox_id].setMatch()

            next_track_frame = -1
            init_bbox_next = []
            bbox_length = len(bbox_list)
            #!!! track_list length=10, bbox_list length=11
            for ii in range(1, bbox_length):

                idx = bbox_length - ii
                print("{} {} {}".format(ii, idx, track_list[idx][0]))
                if track_list[idx - 1][0] != -1:
                    next_track_frame = (-1) * ii  # count from back
                    print("idx ={} ii={} next_track_frame={}".format(
                        idx, ii, next_track_frame))
                    init_bbox_next = bbox_list[idx]
                    break

            # remove overlap range
            start_rm = -1
            while start_rm >= previous_num:
                del self.GLOBAL_TRACK_LIST[track_num][-1]  # rm 1 per move
                # GLOBAL_TRACK_LIST[track_num].remove(-1)
                start_rm = start_rm - 1

            if previous_num == 0:
                self.GLOBAL_TRACK_LIST.append(bbox_list)
            else:
                self.GLOBAL_TRACK_LIST[track_num].extend(bbox_list)

            next_start_frame = frame_idx + bbox_length + next_track_frame

            init_bbox_next = bbox_list[bbox_length + next_track_frame][1]
            init_conf_next = bbox_list[bbox_length + next_track_frame][2]
            # print(GLOBAL_TRACK_LIST)
            print("( " + str(track_num) + " " + str(next_start_frame) + " " +
                  str(next_track_frame) + ")")
            print("init_bbox_next:" + str(init_bbox_next))
            self.search_track(
                track_num, next_start_frame, init_bbox_next, next_track_frame,
                init_conf_next
            )  # next_start_frame = 310  next_track_frame=-3(will be deleted)
コード例 #9
0
def run_mdnet(img_list,
              init_bbox,
              gt=None,
              savefig_dir='',
              display=False,
              loss_index=1,
              model_path=opts['model_path'],
              seq_name=None):

    #def run_mdnet(k, img_list, init_bbox, gt=None, savefig_dir='', display=False,
    #              loss_index=1, model_path=opts['model_path'], seq_name=None):

    ############################
    if fewer_images:
        num_images = min(sequence_len_limit, len(img_list))
    else:
        num_images = len(img_list)
    ############################

    # Init bbox
    target_bbox = np.array(init_bbox)
    result = np.zeros((len(img_list), 4))
    result_bb = np.zeros((len(img_list), 4))
    result[0] = target_bbox
    result_bb[0] = target_bbox

    # Init iou and pred_iou
    iou_list = np.zeros((len(img_list), 1))  # shape: [113.1) ### list of ious
    iou_list[0] = 1.0  ### in first frame gt=result_bb (by definition)

    if gt is not None:
        overlap = np.zeros(len(img_list))
        overlap[0] = 1

    # Init model
    # model = MDNet(model_path=opts['model_path'],use_gpu=opts['use_gpu'])
    model = MDNet(model_path=model_path, use_gpu=opts['use_gpu'])
    if opts['use_gpu']:
        model = model.cuda()

    print('Init criterion and optimizer')
    criterion = BCELoss()
    model.set_learnable_params(opts['ft_layers'])
    init_optimizer = set_optimizer(model, opts['lr_init'], opts['lr_mult'])
    update_optimizer = set_optimizer(model, opts['lr_update'], opts['lr_mult'])

    tic = time.time()
    # Load first image
    image = Image.open(img_list[0]).convert('RGB')

    print('Draw pos/neg samples')
    # Draw pos/neg samples
    pos_examples = SampleGenerator('gaussian', image.size, opts['trans_pos'],
                                   opts['scale_pos'])(target_bbox,
                                                      opts['n_pos_init'],
                                                      opts['overlap_pos_init'])
    neg_examples = np.concatenate([
        SampleGenerator('uniform', image.size, opts['trans_neg_init'],
                        opts['scale_neg_init'])(target_bbox,
                                                int(opts['n_neg_init'] * 0.5),
                                                opts['overlap_neg_init']),
        SampleGenerator('whole', image.size)(target_bbox,
                                             int(opts['n_neg_init'] * 0.5),
                                             opts['overlap_neg_init'])
    ])
    neg_examples = np.random.permutation(neg_examples)

    print('Extract pos/neg features')
    # Extract pos/neg features
    if fewer_images:  # shorter run in general, less accurate
        pos_feats = forward_samples(model, image, pos_examples)
        neg_feats = forward_samples(model, image, neg_examples)
    else:
        pos_feats = forward_samples(model, image, pos_examples)
        neg_feats = forward_samples(model, image, neg_examples)

    print('Initial training')
    # Initial training
    train(model,
          criterion,
          init_optimizer,
          pos_feats,
          neg_feats,
          opts['maxiter_init'],
          loss_index=loss_index)  ### iou_pred_list
    del init_optimizer, neg_feats
    torch.cuda.empty_cache()

    print('Train bbox regressor')
    # Train bbox regressor
    bbreg_examples = SampleGenerator(
        'uniform', image.size, opts['trans_bbreg'], opts['scale_bbreg'],
        opts['aspect_bbreg'])(target_bbox, opts['n_bbreg'],
                              opts['overlap_bbreg'])
    bbreg_feats = forward_samples(
        model, image,
        bbreg_examples)  # calc features ### shape: [927, 4608] ###
    bbreg = BBRegressor(image.size)
    bbreg.train(bbreg_feats, bbreg_examples, target_bbox)
    del bbreg_feats
    torch.cuda.empty_cache()

    # Init sample generators for update
    sample_generator = SampleGenerator('gaussian', image.size, opts['trans'],
                                       opts['scale'])
    pos_generator = SampleGenerator('gaussian', image.size, opts['trans_pos'],
                                    opts['scale_pos'])
    neg_generator = SampleGenerator('uniform', image.size, opts['trans_neg'],
                                    opts['scale_neg'])

    print('Init pos/neg features for update')
    # Init pos/neg features for update
    neg_examples = neg_generator(target_bbox, opts['n_neg_update'],
                                 opts['overlap_neg_init'])
    neg_feats = forward_samples(model, image, neg_examples)
    pos_feats_all = [pos_feats]
    neg_feats_all = [neg_feats]

    spf_total = time.time() - tic

    # Display
    savefig = savefig_dir != ''
    if display or savefig:
        dpi = 80.0
        figsize = (image.size[0] / dpi, image.size[1] / dpi)

        fig = plt.figure(frameon=False, figsize=figsize, dpi=dpi)
        ax = plt.Axes(fig, [0., 0., 1., 1.])
        ax.set_axis_off()
        fig.add_axes(ax)
        im = ax.imshow(image, aspect='auto')

        if gt is not None:
            gt_rect = plt.Rectangle(tuple(gt[0, :2]),
                                    gt[0, 2],
                                    gt[0, 3],
                                    linewidth=3,
                                    edgecolor="#00ff00",
                                    zorder=1,
                                    fill=False)
            ax.add_patch(gt_rect)
            #################
            num_gts = np.minimum(gt.shape[0], num_images)
            # print('num_gts.shape: ', num_gts.shape)
            gt_centers = gt[:num_gts, :2] + gt[:num_gts, 2:] / 2
            result_centers = np.zeros_like(gt[:num_gts, :2])
            result_centers[0] = gt_centers[0]
            result_ious = np.zeros(num_gts, dtype='float64')
            result_ious[0] = 1.
            #################

        rect = plt.Rectangle(tuple(result_bb[0, :2]),
                             result_bb[0, 2],
                             result_bb[0, 3],
                             linewidth=3,
                             edgecolor="#ff0000",
                             zorder=1,
                             fill=False)
        ax.add_patch(rect)

        if display:
            plt.pause(.01)
            plt.draw()
        if savefig:
            fig.savefig(os.path.join(savefig_dir, '0000.jpg'), dpi=dpi)

    print('Main Loop')
    # Main loop
    spf_total = 0  # I don't want to take into account initialization
    for i in tqdm(range(1, num_images)):
        # for i in range(1, len(img_list)):
        #print('Frame: ', i)
        tic = time.time()
        # Load image
        image = Image.open(img_list[i]).convert('RGB')

        #print('Estimate target bbox (in run_mdnet)')
        # Estimate target bbox
        samples = sample_generator(target_bbox, opts['n_samples'])
        sample_scores = forward_samples(model, image, samples, out_layer='fc6')

        top_scores, top_idx = sample_scores[:, 1].topk(5)
        top_idx = top_idx.cpu()
        target_score = top_scores.mean()
        target_bbox = samples[top_idx]
        if top_idx.shape[0] > 1:
            target_bbox = target_bbox.mean(axis=0)
        success = target_score > 0

        # Expand search area at failure
        if success:
            sample_generator.set_trans(opts['trans'])
        else:
            sample_generator.expand_trans(opts['trans_limit'])

        #print('Bbox regression (in run_mdnet)')
        # Bbox regression
        if success:
            bbreg_samples = samples[top_idx]
            if top_idx.shape[0] == 1:
                bbreg_samples = bbreg_samples[None, :]
            bbreg_feats = forward_samples(model, image, bbreg_samples)
            bbreg_samples = bbreg.predict(bbreg_feats, bbreg_samples)
            bbreg_bbox = bbreg_samples.mean(axis=0)
        else:
            bbreg_bbox = target_bbox

        # Save result
        result[i] = target_bbox
        result_bb[i] = bbreg_bbox
        iou_list[i] = overlap_ratio(gt[i], result_bb[i])

        ###########################################
        # identify tracking failure and abort when in VOT mode

        IoU = overlap_ratio(result_bb[i], gt[i])[0]
        if (IoU == 0) and init_after_loss:
            print('    * lost track in frame %d since init*' % (i))
            result_distances = scipy.spatial.distance.cdist(
                result_centers[:i], gt_centers[:i],
                metric='euclidean').diagonal()
            num_images_tracked = i - 1  # we don't count frame 0 and current frame (lost track)

            im.set_data(image)
            if gt is not None:
                if i < gt.shape[0]:
                    gt_rect.set_xy(gt[i, :2])
                    gt_rect.set_width(gt[i, 2])
                    gt_rect.set_height(gt[i, 3])
                else:
                    gt_rect.set_xy(np.array([np.nan, np.nan]))
                    gt_rect.set_width(np.nan)
                    gt_rect.set_height(np.nan)

            rect.set_xy(result_bb[i, :2])
            rect.set_width(result_bb[i, 2])
            rect.set_height(result_bb[i, 3])

            plt.pause(.01)
            plt.draw()

            print(
                'Finished identify tracking failure and abort when in VOT mode'
            )
            return result[:
                          i], result_bb[:
                                        i], num_images_tracked, spf_total, result_distances, result_ious[:
                                                                                                         i], True
        ########################################

        # Data collect
        if success:
            pos_examples = pos_generator(target_bbox, opts['n_pos_update'],
                                         opts['overlap_pos_update'])
            pos_feats = forward_samples(model, image, pos_examples)
            pos_feats_all.append(pos_feats)
            if len(pos_feats_all) > opts['n_frames_long']:
                del pos_feats_all[0]

            neg_examples = neg_generator(target_bbox, opts['n_neg_update'],
                                         opts['overlap_neg_update'])
            neg_feats = forward_samples(model, image, neg_examples)
            neg_feats_all.append(neg_feats)
            if len(neg_feats_all) > opts['n_frames_short']:
                del neg_feats_all[0]

        # Short term update
        if not success:
            nframes = min(opts['n_frames_short'], len(pos_feats_all))
            pos_data = torch.cat(pos_feats_all[-nframes:], 0)
            neg_data = torch.cat(neg_feats_all, 0)
            train(model, criterion, update_optimizer, pos_data, neg_data,
                  opts['maxiter_update'])

        # Long term update
        elif i % opts['long_interval'] == 0:
            pos_data = torch.cat(pos_feats_all, 0)
            neg_data = torch.cat(neg_feats_all, 0)
            train(model, criterion, update_optimizer, pos_data, neg_data,
                  opts['maxiter_update'])

        torch.cuda.empty_cache()
        spf = time.time() - tic
        spf_total += spf
        #print('Time: ', spf)

        # Display
        if display or savefig:
            im.set_data(image)

            if gt is not None:
                gt_rect.set_xy(gt[i, :2])
                gt_rect.set_width(gt[i, 2])
                gt_rect.set_height(gt[i, 3])

                #################
                result_ious[i] = overlap_ratio(result_bb[i], gt[i])[0]
                result_centers[i] = result_bb[i, :2] + result_bb[i, 2:] / 2
                #################

            rect.set_xy(result_bb[i, :2])
            rect.set_width(result_bb[i, 2])
            rect.set_height(result_bb[i, 3])

            if display:
                plt.pause(.01)
                plt.draw()
            if savefig:
                fig.savefig(os.path.join(savefig_dir, '{:04d}.jpg'.format(i)),
                            dpi=dpi)

        ####################################
        if detailed_printing:
            if gt is None:
                print("      Frame %d/%d, Score %.3f, Time %.3f" % \
                      (i, num_images-1, target_score, spf))
            else:
                if i < gt.shape[0]:
                    print("      Frame %d/%d, Overlap %.3f, Score %.3f, Time %.3f" % \
                        (i, num_images-1, overlap_ratio(gt[i], result_bb[i])[0], target_score, spf))
                else:
                    print("      Frame %d/%d, Overlap %.3f, Score %.3f, Time %.3f" % \
                        (i, num_images-1, overlap_ratio(np.array([np.nan,np.nan,np.nan,np.nan]), result_bb[i])[0], target_score, spf))
        ####################################

        # if gt is None:
        #     print('Frame {:d}/{:d}, Score {:.3f}, Time {:.3f}'
        #         .format(i, len(img_list), target_score, spf))
        # else:
        #     overlap[i] = overlap_ratio(gt[i], result_bb[i])[0]
        #     print('Frame {:d}/{:d}, Overlap {:.3f}, Score {:.3f}, Time {:.3f}'
        #         .format(i, len(img_list), overlap[i], target_score, spf))

    ########################
    plt.close()
    result_distances = scipy.spatial.distance.cdist(
        result_centers, gt_centers, metric='euclidean').diagonal()
    num_images_tracked = num_images - 1  # I don't want to count initialization frame (i.e. frame 0)
    print('    main loop finished, %d frames' % (num_images))

    print('mean IoU: ', iou_list.mean())
    print('Finished run_mdnet()')

    return result, result_bb, num_images_tracked, spf_total, result_distances, result_ious, False
コード例 #10
0
ファイル: neomake_6891_1.py プロジェクト: abnerwang/VTAAN
def run_vtaan(img_list, init_bbox, gt=None, savefig_dir='', display=False):

    # Init bbox
    target_bbox = np.array(init_bbox)
    result = np.zeros((len(img_list), 4))
    result_bb = np.zeros((len(img_list), 4))
    result[0] = target_bbox
    result_bb[0] = target_bbox

    if gt is not None:
        overlap = np.zeros(len(img_list))
        overlap[0] = 1

    # Init model
    model = MDNet(opts['model_path'])
    model_g = NetG()
    if opts['use_gpu']:
        model = model.cuda()
        model_g = model_g.cuda()
    GBP = guided_backprop.GuidedBackprop(model, 1)

    # Init criterion and optimizer
    criterion = BCELoss()
    criterion_g = torch.nn.MSELoss(reduction='sum')
    model.set_learnable_params(opts['ft_layers'])
    model_g.set_learnable_params(opts['ft_layers'])
    init_optimizer = set_optimizer(model, opts['lr_init'], opts['lr_mult'])
    update_optimizer = set_optimizer(model, opts['lr_update'], opts['lr_mult'])

    tic = time.time()
    # Load first image
    image = Image.open(img_list[0]).convert('RGB')

    # Draw pos/neg samples
    pos_examples = SampleGenerator('gaussian', image.size, opts['trans_pos'],
                                   opts['scale_pos'])(target_bbox,
                                                      opts['n_pos_init'],
                                                      opts['overlap_pos_init'])

    neg_examples = np.concatenate([
        SampleGenerator('uniform', image.size, opts['trans_neg_init'],
                        opts['scale_neg_init'])(target_bbox,
                                                int(opts['n_neg_init'] * 0.5),
                                                opts['overlap_neg_init']),
        SampleGenerator('whole', image.size)(target_bbox,
                                             int(opts['n_neg_init'] * 0.5),
                                             opts['overlap_neg_init'])
    ])
    neg_examples = np.random.permutation(neg_examples)

    # Extract pos/neg features
    pos_feats = forward_samples(model, image, pos_examples)
    neg_feats = forward_samples(model, image, neg_examples)

    pos_imgids = np.array([[0]] * pos_feats.size(0))
    neg_imgids = np.array([[0]] * neg_feats.size(0))

    feat_dim = pos_feats.size(-1)

    # Initial training
    train(model, None, criterion, init_optimizer, pos_feats, neg_feats,
          opts['maxiter_init'], pos_imgids, pos_examples, neg_imgids,
          neg_examples, img_list, GBP)
    del init_optimizer, neg_feats
    torch.cuda.empty_cache()
    g_pretrain(model, model_g, criterion_g, pos_feats)
    torch.cuda.empty_cache()

    # Train bbox regressor
    bbreg_examples = SampleGenerator(
        'uniform', image.size, opts['trans_bbreg'], opts['scale_bbreg'],
        opts['aspect_bbreg'])(target_bbox, opts['n_bbreg'],
                              opts['overlap_bbreg'])
    bbreg_feats = forward_samples(model, image, bbreg_examples)
    bbreg = BBRegressor(image.size)
    bbreg.train(bbreg_feats, bbreg_examples, target_bbox)
    del bbreg_feats
    torch.cuda.empty_cache()

    # Init sample generators for update
    sample_generator = SampleGenerator('gaussian', image.size, opts['trans'],
                                       opts['scale'])
    pos_generator = SampleGenerator('gaussian', image.size, opts['trans_pos'],
                                    opts['scale_pos'])
    neg_generator = SampleGenerator('uniform', image.size, opts['trans_neg'],
                                    opts['scale_neg'])

    # Init pos/neg features for update
    neg_examples = neg_generator(target_bbox, opts['n_neg_update'],
                                 opts['overlap_neg_init'])
    neg_feats = forward_samples(model, image, neg_examples)
    pos_feats_all = [pos_feats[:opts['n_pos_update']]]
    neg_feats_all = [neg_feats[:opts['n_neg_update']]]

    pos_examples_all = [pos_examples[:opts['n_pos_update']]]
    neg_examples_all = [neg_examples[:opts['n_neg_update']]]

    pos_imgids_all = [pos_imgids[:opts['n_pos_update']]]
    neg_imgids_all = [neg_imgids[:opts['n_neg_update']]]

    spf_total = time.time() - tic

    # Display
    savefig = savefig_dir != ''
    if display or savefig:
        dpi = 80.0
        figsize = (image.size[0] / dpi, image.size[1] / dpi)

        fig = plt.figure(frameon=False, figsize=figsize, dpi=dpi)
        ax = plt.Axes(fig, [0., 0., 1., 1.])
        ax.set_axis_off()
        fig.add_axes(ax)
        im = ax.imshow(image, aspect='auto')

        if gt is not None:
            gt_rect = plt.Rectangle(tuple(gt[0, :2]),
                                    gt[0, 2],
                                    gt[0, 3],
                                    linewidth=3,
                                    edgecolor="#00ff00",
                                    zorder=1,
                                    fill=False)
            ax.add_patch(gt_rect)

        rect = plt.Rectangle(tuple(result_bb[0, :2]),
                             result_bb[0, 2],
                             result_bb[0, 3],
                             linewidth=3,
                             edgecolor="#ff0000",
                             zorder=1,
                             fill=False)
        ax.add_patch(rect)

        if display:
            plt.pause(.01)
            plt.draw()
        if savefig:
            fig.savefig(os.path.join(savefig_dir, '0000.jpg'), dpi=dpi)

    # Main loop
    for i in range(1, len(img_list)):

        tic = time.time()
        # Load image
        image = Image.open(img_list[i]).convert('RGB')

        # Estimate target bbox
        samples = sample_generator(target_bbox, opts['n_samples'])
        sample_scores = forward_samples(model, image, samples, out_layer='fc6')

        top_scores, top_idx = sample_scores[:, 1].topk(5)
        top_idx = top_idx.cpu()
        target_score = top_scores.mean()
        target_bbox = samples[top_idx]
        if top_idx.shape[0] > 1:
            target_bbox = target_bbox.mean(axis=0)
        success = target_score > 0

        # Expand search area at failure
        if success:
            sample_generator.set_trans(opts['trans'])
        else:
            sample_generator.expand_trans(opts['trans_limit'])

        # Bbox regression
        if success:
            bbreg_samples = samples[top_idx]
            if top_idx.shape[0] == 1:
                bbreg_samples = bbreg_samples[None, :]
            bbreg_feats = forward_samples(model, image, bbreg_samples)
            bbreg_samples = bbreg.predict(bbreg_feats, bbreg_samples)
            bbreg_bbox = bbreg_samples.mean(axis=0)
        else:
            bbreg_bbox = target_bbox

        # Save result
        result[i] = target_bbox
        result_bb[i] = bbreg_bbox

        # Data collect
        if success:
            pos_examples = pos_generator(target_bbox, opts['n_pos_update'],
                                         opts['overlap_pos_update'])
            pos_feats = forward_samples(model, image, pos_examples)
            pos_feats_all.append(pos_feats)
            if len(pos_feats_all) > opts['n_frames_long']:
                del pos_feats_all[0]
                del pos_examples_all[0]
                del pos_imgids_all[0]

            neg_examples = neg_generator(target_bbox, opts['n_neg_update'],
                                         opts['overlap_neg_update'])
            neg_feats = forward_samples(model, image, neg_examples)
            neg_feats_all.append(neg_feats)

            pos_examples_all.append(pos_examples)
            neg_examples_all.append(neg_examples)

            pos_imgids_all.append(np.array([[i]] * pos_feats.size(0)))
            neg_imgids_all.append(np.array([[i]] * neg_feats.size(0)))

            if len(neg_feats_all) > opts['n_frames_short']:
                del neg_feats_all[0]
                del neg_examples_all[0]
                del neg_imgids_all[0]

        # Short term update
        if not success:
            nframes = min(opts['n_frames_short'], len(pos_feats_all))
            pos_data = torch.stack(pos_feats_all[-nframes:],
                                   0).view(-1, feat_dim)
            neg_data = torch.stack(neg_feats_all, 0).view(-1, feat_dim)

            pos_examples_data = torch.from_numpy(
                np.stack(pos_examples_all[-nframes:], 0)).view(-1, 4).numpy()
            neg_examples_data = torch.from_numpy(np.stack(neg_examples_all,
                                                          0)).view(-1,
                                                                   4).numpy()

            pos_imgids_data = torch.from_numpy(
                np.stack(pos_imgids_all[-nframes:], 0)).view(-1, 1).numpy()
            neg_imgids_data = torch.from_numpy(np.stack(neg_imgids_all,
                                                        0)).view(-1,
                                                                 1).numpy()

            train(model, None, criterion, update_optimizer, pos_data, neg_data,
                  opts['maxiter_update'], pos_imgids_data, pos_examples_data,
                  neg_imgids_data, neg_examples_data, img_list, GBP)

        # Long term update
        elif i % opts['long_interval'] == 0:
            pos_data = t.stack(pos_feats_all, 0).view(-1, feat_dim)
            neg_data = t.stack(neg_feats_all, 0).view(-1, feat_dim)

            pos_examples_data = torch.from_numpy(np.stack(pos_examples_all,
                                                          0)).view(-1,
                                                                   4).numpy()
            neg_examples_data = torch.from_numpy(np.stack(neg_examples_all,
                                                          0)).view(-1,
                                                                   4).numpy()

            pos_imgids_data = torch.from_numpy(np.stack(pos_imgids_all,
                                                        0)).view(-1,
                                                                 1).numpy()
            neg_imgids_data = torch.from_numpy(np.stack(neg_imgids_all,
                                                        0)).view(-1,
                                                                 1).numpy()

            # train(model, model_g, criterion, update_optimizer, pos_data, neg_data, opts['maxiter_update'],
            #       pos_imgids_data, pos_examples_data, neg_imgids_data, neg_examples_data, img_list, GBP)
            train(model, model_g, criterion, update_optimizer, pos_data,
                  neg_data, opts['maxiter_update'], None, None, None, None,
                  img_list, GBP)

        torch.cuda.empty_cache()
        spf = time.time() - tic
        spf_total += spf

        # Display
        if display or savefig:
            im.set_data(image)

            if gt is not None:
                gt_rect.set_xy(gt[i, :2])
                gt_rect.set_width(gt[i, 2])
                gt_rect.set_height(gt[i, 3])

            rect.set_xy(result_bb[i, :2])
            rect.set_width(result_bb[i, 2])
            rect.set_height(result_bb[i, 3])

            if display:
                plt.pause(.01)
                plt.draw()
            if savefig:
                fig.savefig(os.path.join(savefig_dir, '{:04d}.jpg'.format(i)),
                            dpi=dpi)

        if gt is None:
            print('Frame {:d}/{:d}, Score {:.3f}, Time {:.3f}'.format(
                i + 1, len(img_list), target_score, spf))
        else:
            overlap[i] = overlap_ratio(gt[i], result_bb[i])[0]
            print('Frame {:d}/{:d}, Overlap {:.3f}, Score {:.3f}, Time {:.3f}'.
                  format(i + 1, len(img_list), overlap[i], target_score, spf))

    if gt is not None:
        print('meanIOU: {:.3f}'.format(overlap.mean()))
    fps = len(img_list) / spf_total
    return result, result_bb, fps
コード例 #11
0
    def init(self, image, init_bbox):
        self.rate = init_bbox[2] / init_bbox[3]
        self.target_bbox = np.array(init_bbox)
        self.init_bbox = np.array(init_bbox)

        self.result.append(self.target_bbox)
        self.result_bb.append(self.target_bbox)
        image = np.asarray(image)

        # Init model
        bbreg_examples = gen_samples(
            SampleGenerator('uniform', image.shape, 0.3, 1.5,
                            1.1), self.target_bbox, opts['n_bbreg'],
            opts['overlap_bbreg'], opts['scale_bbreg'])
        bbreg_feats = forward_samples(self.model, image, bbreg_examples)
        self.bbreg = BBRegressor(image.size)
        self.bbreg.train(bbreg_feats, bbreg_examples, self.target_bbox)

        pos_examples = gen_samples(
            SampleGenerator('gaussian', image.shape, 0.1, 1.2),
            self.target_bbox, opts['n_pos_init'], opts['overlap_pos_init'])
        neg_examples = np.concatenate([
            gen_samples(SampleGenerator('uniform', image.shape, 1, 2,
                                        1.1), self.target_bbox,
                        opts['n_neg_init'] // 2, opts['overlap_neg_init']),
            gen_samples(SampleGenerator('whole', image.shape, 0, 1.2,
                                        1.1), self.target_bbox,
                        opts['n_neg_init'] // 2, opts['overlap_neg_init'])
        ])
        neg_examples = np.random.permutation(neg_examples)

        pos_feats = forward_samples(self.model, image, pos_examples)
        neg_feats = forward_samples(self.model, image, neg_examples)
        train(self.model, self.criterion, self.init_optimizer, pos_feats,
              neg_feats, opts['maxiter_init'])
        self.deta_flag = init_actor(self.actor, image, self.target_bbox)
        self.init_generator = SampleGenerator('gaussian',
                                              image.shape,
                                              opts['trans_f'],
                                              1,
                                              valid=False)
        self.sample_generator = SampleGenerator('gaussian',
                                                image.shape,
                                                opts['trans_f'],
                                                opts['scale_f'],
                                                valid=False)
        self.pos_generator = SampleGenerator('gaussian', image.shape, 0.1, 1.2)
        self.neg_generator = SampleGenerator('uniform', image.shape, 1.5, 1.2)
        self.pos_feats_all = [pos_feats[:opts['n_pos_update']]]
        self.neg_feats_all = [neg_feats[:opts['n_neg_update']]]
        pos_score = forward_samples(self.model,
                                    image,
                                    np.array(init_bbox).reshape([1, 4]),
                                    out_layer='fc6')
        self.img_learn = [image]
        self.pos_learn = [init_bbox]
        self.score_pos = [pos_score.cpu().numpy()[0][1]]
        self.frame_learn = [0]
        self.pf_frame = []
        self.imageVar_first = cv2.Laplacian(
            crop_image_blur(np.array(image), self.target_bbox),
            cv2.CV_64F).var()
コード例 #12
0
class ACTTracker(Tracker):
    def __init__(self, net_path=None):
        super().init(name='ACTTracker', is_deterministic=True)
        np.random.seed(123)
        torch.manual_seed(456)
        torch.cuda.manual_seed(789)
        self.model = MDNet()
        self.actor = Actor()
        self.result = []
        self.result_bb = []
        self.success = 1
        if opts['use_gpu']:
            self.model = self.model.cuda()
            self.actor = self.actor.cuda()
        self.model.set_learnable_params(opts['ft_layers'])
        self.criterion = BinaryLoss()
        self.init_optimizer = set_optimizer(self.model, opts['lr_init'])
        self.update_optimizer = set_optimizer(self.model, opts['lr_update'])
        self.detetion = 0
        self.frame = 0

    def init(self, image, init_bbox):
        self.rate = init_bbox[2] / init_bbox[3]
        self.target_bbox = np.array(init_bbox)
        self.init_bbox = np.array(init_bbox)

        self.result.append(self.target_bbox)
        self.result_bb.append(self.target_bbox)
        image = np.asarray(image)

        # Init model
        bbreg_examples = gen_samples(
            SampleGenerator('uniform', image.shape, 0.3, 1.5,
                            1.1), self.target_bbox, opts['n_bbreg'],
            opts['overlap_bbreg'], opts['scale_bbreg'])
        bbreg_feats = forward_samples(self.model, image, bbreg_examples)
        self.bbreg = BBRegressor(image.size)
        self.bbreg.train(bbreg_feats, bbreg_examples, self.target_bbox)

        pos_examples = gen_samples(
            SampleGenerator('gaussian', image.shape, 0.1, 1.2),
            self.target_bbox, opts['n_pos_init'], opts['overlap_pos_init'])
        neg_examples = np.concatenate([
            gen_samples(SampleGenerator('uniform', image.shape, 1, 2,
                                        1.1), self.target_bbox,
                        opts['n_neg_init'] // 2, opts['overlap_neg_init']),
            gen_samples(SampleGenerator('whole', image.shape, 0, 1.2,
                                        1.1), self.target_bbox,
                        opts['n_neg_init'] // 2, opts['overlap_neg_init'])
        ])
        neg_examples = np.random.permutation(neg_examples)

        pos_feats = forward_samples(self.model, image, pos_examples)
        neg_feats = forward_samples(self.model, image, neg_examples)
        train(self.model, self.criterion, self.init_optimizer, pos_feats,
              neg_feats, opts['maxiter_init'])
        self.deta_flag = init_actor(self.actor, image, self.target_bbox)
        self.init_generator = SampleGenerator('gaussian',
                                              image.shape,
                                              opts['trans_f'],
                                              1,
                                              valid=False)
        self.sample_generator = SampleGenerator('gaussian',
                                                image.shape,
                                                opts['trans_f'],
                                                opts['scale_f'],
                                                valid=False)
        self.pos_generator = SampleGenerator('gaussian', image.shape, 0.1, 1.2)
        self.neg_generator = SampleGenerator('uniform', image.shape, 1.5, 1.2)
        self.pos_feats_all = [pos_feats[:opts['n_pos_update']]]
        self.neg_feats_all = [neg_feats[:opts['n_neg_update']]]
        pos_score = forward_samples(self.model,
                                    image,
                                    np.array(init_bbox).reshape([1, 4]),
                                    out_layer='fc6')
        self.img_learn = [image]
        self.pos_learn = [init_bbox]
        self.score_pos = [pos_score.cpu().numpy()[0][1]]
        self.frame_learn = [0]
        self.pf_frame = []
        self.imageVar_first = cv2.Laplacian(
            crop_image_blur(np.array(image), self.target_bbox),
            cv2.CV_64F).var()

    def update(self, image):
        # image = loader(image.resize((225,225),Image.ANTIALIAS)).unsqueeze(0).cuda()
        self.frame += 1
        update_lenth = 10
        np_image = np.array(image)
        if self.imageVar_first > 200:
            imageVar = cv2.Laplacian(
                crop_image_blur(np_image, self.target_bbox), cv2.CV_64F).var()
        else:
            imageVar = 200
        img_l = getbatch_actor(np_image, self.target_bbox.reshape([1, 4]))
        torch_image = loader(image.resize(
            (225, 225), Image.ANTIALIAS)).unsqueeze(0).cuda()
        deta_pos = self.actor(img_l, torch_image)
        deta_pos = deta_pos.data.clone().cpu().numpy()
        if self.deta_flag:
            deta_pos[:, 2] = 0
        if deta_pos[:, 2] > 0.05 or deta_pos[:, 2] < -0.05:
            deta_pos[:, 2] = 0
        if len(self.pf_frame) and self.frame == (self.pf_frame[-1] + 1):
            deta_pos[:, 2] = 0
        pos_ = np.round(
            move_crop(self.target_bbox, deta_pos,
                      (image.size[1], image.size[0]), self.rate))
        r = forward_samples(self.model,
                            image,
                            np.array(pos_).reshape([1, 4]),
                            out_layer='fc6')
        r = r.cpu().numpy()
        if r[0][1] > 0 and imageVar > 100:
            self.target_bbox = pos_
            target_score = r[0][1]
            bbreg_bbox = pos_
            success = 1
            if True:
                fin_score = r[0][1]
                self.img_learn.append(image)
                self.pos_learn.append(self.target_bbox)
                self.score_pos.append(fin_score)
                self.frame_learn.append(self.frame)
                while len(self.img_learn) > update_lenth * 2:
                    del self.img_learn[0]
                    del self.pos_learn[0]
                    del self.score_pos[0]
                    del self.frame_learn[0]
            self.result[self.frame] = self.target_bbox
            self.result_bb[self.frame] = bbreg_bbox
        else:
            self.detetion += 1
            if len(self.pf_frame) == 0:
                self.pf_frame = [self.frame]
            else:
                self.pf_frame.append(self.frame)

        if (len(self.frame_learn) == update_lenth * 2 and self.data_frame[-1]
                not in self.frame_learn) or self.data_frame[-1] == 0:
            for num in range(max(0,
                                 self.img_learn.__len__() - update_lenth),
                             self.img_learn.__len__()):
                if self.frame_learn[num] not in self.data_frame:
                    gt_ = self.pos_learn[num]
                    image_ = self.img_learn[num]
                    pos_examples = np.round(
                        gen_samples(self.pos_generator, gt_,
                                    opts['n_pos_update'],
                                    opts['overlap_pos_update']))
                    neg_examples = np.round(
                        gen_samples(self.neg_generator, gt_,
                                    opts['n_neg_update'],
                                    opts['overlap_neg_update']))
                    pos_feats_ = forward_samples(self.model, image_,
                                                 pos_examples)
                    neg_feats_ = forward_samples(self.model, image_,
                                                 neg_examples)

                    self.pos_feats_all.append(pos_feats_)
                    self.neg_feats_all.append(neg_feats_)
                    self.data_frame.append(self.frame_learn[num])
                    if len(self.pos_feats_all) > 10:
                        del self.pos_feats_all[0]
                        del self.neg_feats_all[0]
                        del self.data_frame[0]
                else:
                    pos_feats_ = self.pos_feats_all[self.data_frame.index(
                        self.frame_learn[num])]
                    neg_feats_ = self.neg_feats_all[self.data_frame.index(
                        self.frame_learn[num])]

                if num == max(0, self.img_learn.__len__() - update_lenth):
                    pos_feats = pos_feats_
                    neg_feats = neg_feats_

                else:
                    pos_feats = torch.cat([pos_feats, pos_feats_], 0)
                    neg_feats = torch.cat([neg_feats, neg_feats_], 0)
            train(self.model, self.criterion, self.update_optimizer, pos_feats,
                  neg_feats, opts['maxiter_update'])

            if success:
                self.sample_generator.set_trans_f(opts['trans_f'])
            else:
                self.sample_generator.set_trans_f(opts['trans_f_expand'])
            if imageVar < 100:
                samples = gen_samples(self.init_generator, self.target_bbox,
                                      opts['n_samples'])
            else:
                samples = gen_samples(self.sample_generator, self.target_bbox,
                                      opts['n_samples'])

                if i < 20 or ((self.init_bbox[2] * self.init_bbox[3]) > 1000
                              and
                              (self.target_bbox[2] * self.target_bbox[3] /
                               (self.init_bbox[2] * self.init_bbox[3]) > 2.5
                               or self.target_bbox[2] * self.target_bbox[3] /
                               (self.init_bbox[2] * self.init_bbox[3]) < 0.4)):
                    self.sample_generator.set_trans_f(opts['trans_f_expand'])
                    samples_ = np.round(
                        gen_samples(
                            self.sample_generator,
                            np.hstack([
                                self.target_bbox[0:2] +
                                self.target_bbox[2:4] / 2 -
                                self.init_bbox[2:4] / 2, self.init_bbox[2:4]
                            ]), opts['n_samples']))
                    samples = np.vstack([samples, samples_])

                sample_scores = forward_samples(self.model,
                                                image,
                                                samples,
                                                out_layer='fc6')
                top_scores, top_idx = sample_scores[:, 1].topk(5)
                top_idx = top_idx.cpu().numpy()
                target_score = top_scores.mean()
                self.target_bbox = samples[top_idx].mean(axis=0)
                success = target_score > opts['success_thr']

                # Bbox regression
                if success:
                    bbreg_samples = samples[top_idx]
                    bbreg_feats = forward_samples(self.model, image,
                                                  bbreg_samples)
                    bbreg_samples = self.bbreg.predict(bbreg_feats,
                                                       bbreg_samples)
                    bbreg_bbox = bbreg_samples.mean(axis=0)

                    self.img_learn.append(image)
                    self.pos_learn.append(self.target_bbox)
                    self.score_pos.append(self.target_score)
                    self.frame_learn.append(i)
                    while len(self.img_learn) > 2 * update_lenth:
                        del self.img_learn[0]
                        del self.pos_learn[0]
                        del self.score_pos[0]
                        del self.frame_learn[0]

                else:
                    bbreg_bbox = self.target_bbox

                # Copy previous result at failure
                if not success:
                    target_bbox = self.result[self.frame - 1]
                    bbreg_bbox = self.result_bb[self.frame - 1]

                # Save result
                self.result[self.frame] = target_bbox
                self.result_bb[self.frame] = bbreg_bbox

        return self.target_bbox