Esempio n. 1
0
def train_mdnet(opts):

    # Init dataset
    with open(opts['data_path'], 'rb') as fp:
        data = pickle.load(fp)
    K = len(data)
    dataset = [None] * K
    for k, seq in enumerate(data.values()):
        dataset[k] = RegionDataset(seq['images'], seq['gt'], opts)

    # Init model
    model = MDNet(opts['init_model_path'], K)
    if opts['use_gpu']:
        model = model.cuda()
    model.set_learnable_params(opts['ft_layers'])

    # Init criterion and optimizer
    criterion = BCELoss()
    evaluator = Precision()
    optimizer = set_optimizer(model, opts['lr'], opts['lr_mult'])

    # Main trainig loop
    for i in range(opts['n_cycles']):
        print('==== Start Cycle {:d}/{:d} ===='.format(i + 1, opts['n_cycles']))

        if i in opts.get('lr_decay', []):
            print('decay learning rate')
            for param_group in optimizer.param_groups:
                param_group['lr'] *= opts.get('gamma', 0.1)

        # Training
        model.train()
        prec = np.zeros(K)
        k_list = np.random.permutation(K)
        for j, k in enumerate(k_list):
            tic = time.time()
            # training
            pos_regions, neg_regions = dataset[k].next()
            if opts['use_gpu']:
                pos_regions = pos_regions.cuda()
                neg_regions = neg_regions.cuda()
            pos_score = model(pos_regions, k)
            neg_score = model(neg_regions, k)

            loss = criterion(pos_score, neg_score)

            batch_accum = opts.get('batch_accum', 1)
            if j % batch_accum == 0:
                model.zero_grad()
            loss.backward()
            if j % batch_accum == batch_accum - 1 or j == len(k_list) - 1:
                if 'grad_clip' in opts:
                    torch.nn.utils.clip_grad_norm_(model.parameters(), opts['grad_clip'])
                optimizer.step()

            prec[k] = evaluator(pos_score, neg_score)

            toc = time.time()-tic
            print('Cycle {:2d}/{:2d}, Iter {:2d}/{:2d} (Domain {:2d}), Loss {:.3f}, Precision {:.3f}, Time {:.3f}'
                    .format(i, opts['n_cycles'], j, len(k_list), k, loss.item(), prec[k], toc))

        print('Mean Precision: {:.3f}'.format(prec.mean()))
        print('Save model to {:s}'.format(opts['model_path']))
        if opts['use_gpu']:
            model = model.cpu()
        states = {'shared_layers': model.layers.state_dict()}
        torch.save(states, opts['model_path'])
        if opts['use_gpu']:
            model = model.cuda()
Esempio n. 2
0
    sys.exit(0)

gt_rect = [selection.x, selection.y, selection.width, selection.height]

np.random.seed(0)
torch.manual_seed(0)

target_bbox = np.array(gt_rect)
model = MDNet(opts['model_path'])
if opts['use_gpu']:
    model = model.cuda()

# Init criterion and optimizer
criterion = BCELoss()
model.set_learnable_params(opts['ft_layers'])
init_optimizer = set_optimizer(model, opts['lr_init'], opts['lr_mult'])
update_optimizer = set_optimizer(model, opts['lr_update'], opts['lr_mult'])
# Load first image
image = Image.open(imagefile).convert('RGB')
# Draw pos/neg samples
pos_examples = SampleGenerator('gaussian', image.size, opts['trans_pos'],
                               opts['scale_pos'])(target_bbox,
                                                  opts['n_pos_init'],
                                                  opts['overlap_pos_init'])

neg_examples = np.concatenate([
    SampleGenerator('uniform', image.size, opts['trans_neg_init'],
                    opts['scale_neg_init'])(target_bbox,
                                            int(opts['n_neg_init'] * 0.5),
                                            opts['overlap_neg_init']),
    SampleGenerator('whole', image.size)(target_bbox,
Esempio n. 3
0
def run_mdnet(img_list, init_bbox, gt=None, savefig_dir='', display=False):

    # Init bbox
    target_bbox = np.array(init_bbox)
    result = np.zeros((len(img_list), 4))
    result_bb = np.zeros((len(img_list), 4))
    result[0] = target_bbox
    result_bb[0] = target_bbox

    if gt is not None:
        overlap = np.zeros(len(img_list))
        overlap[0] = 1

    # Init model
    model = MDNet(opts['model_path'])
    if opts['use_gpu']:
        model = model.cuda()

    # Init criterion and optimizer
    criterion = BCELoss()
    model.set_learnable_params(opts['ft_layers'])
    init_optimizer = set_optimizer(model, opts['lr_init'], opts['lr_mult'])
    update_optimizer = set_optimizer(model, opts['lr_update'], opts['lr_mult'])

    tic = time.time()
    # Load first image
    image = Image.open(img_list[0]).convert('RGB')

    # Draw pos/neg samples
    pos_examples = SampleGenerator('gaussian', image.size, opts['trans_pos'],
                                   opts['scale_pos'])(target_bbox,
                                                      opts['n_pos_init'],
                                                      opts['overlap_pos_init'])

    neg_examples = np.concatenate([
        SampleGenerator('uniform', image.size, opts['trans_neg_init'],
                        opts['scale_neg_init'])(target_bbox,
                                                int(opts['n_neg_init'] * 0.5),
                                                opts['overlap_neg_init']),
        SampleGenerator('whole', image.size)(target_bbox,
                                             int(opts['n_neg_init'] * 0.5),
                                             opts['overlap_neg_init'])
    ])
    neg_examples = np.random.permutation(neg_examples)

    # Extract pos/neg features
    pos_feats = forward_samples(model, image, pos_examples)
    neg_feats = forward_samples(model, image, neg_examples)

    # Initial training
    train(model, criterion, init_optimizer, pos_feats, neg_feats,
          opts['maxiter_init'])
    del init_optimizer, neg_feats
    torch.cuda.empty_cache()

    # Train bbox regressor
    bbreg_examples = SampleGenerator(
        'uniform', image.size, opts['trans_bbreg'], opts['scale_bbreg'],
        opts['aspect_bbreg'])(target_bbox, opts['n_bbreg'],
                              opts['overlap_bbreg'])
    bbreg_feats = forward_samples(model, image, bbreg_examples)
    bbreg = BBRegressor(image.size)
    bbreg.train(bbreg_feats, bbreg_examples, target_bbox)
    del bbreg_feats
    torch.cuda.empty_cache()

    # Init sample generators for update:这三个是【生成器】,不是产生的数据
    sample_generator = SampleGenerator('gaussian', image.size, opts['trans'],
                                       opts['scale'])
    pos_generator = SampleGenerator('gaussian', image.size, opts['trans_pos'],
                                    opts['scale_pos'])
    neg_generator = SampleGenerator('uniform', image.size, opts['trans_neg'],
                                    opts['scale_neg'])

    # Init pos/neg features for update
    neg_examples = neg_generator(target_bbox, opts['n_neg_update'],
                                 opts['overlap_neg_init'])
    neg_feats = forward_samples(model, image, neg_examples)
    pos_feats_all = [pos_feats]
    neg_feats_all = [neg_feats]

    spf_total = time.time() - tic

    # Display
    savefig = savefig_dir != ''
    if display or savefig:
        dpi = 80.0
        figsize = (image.size[0] / dpi, image.size[1] / dpi)

        fig = plt.figure(frameon=False, figsize=figsize, dpi=dpi)
        ax = plt.Axes(fig, [0., 0., 1., 1.])
        ax.set_axis_off()
        fig.add_axes(ax)
        im = ax.imshow(image, aspect='auto')

        if gt is not None:
            gt_rect = plt.Rectangle(tuple(gt[0, :2]),
                                    gt[0, 2],
                                    gt[0, 3],
                                    linewidth=3,
                                    edgecolor="#00ff00",
                                    zorder=1,
                                    fill=False)
            ax.add_patch(gt_rect)

        rect = plt.Rectangle(tuple(result_bb[0, :2]),
                             result_bb[0, 2],
                             result_bb[0, 3],
                             linewidth=3,
                             edgecolor="#ff0000",
                             zorder=1,
                             fill=False)
        ax.add_patch(rect)

        if display:
            plt.pause(.01)
            plt.draw()
        if savefig:
            fig.savefig(os.path.join(savefig_dir, '0000.jpg'), dpi=dpi)

    # Main loop
    for i in range(1, len(img_list)):

        tic = time.time()
        # Load image
        image = Image.open(img_list[i]).convert('RGB')

        # Estimate target bbox:指的是下一次的target_bbox,用于迭代
        samples = sample_generator(target_bbox, opts['n_samples'])
        sample_scores = forward_samples(
            model, image, samples, out_layer='fc6'
        )  #forward_samples是根据当前的给当前的多个sample打分数,【相当于执行了一次网络判断】

        top_scores, top_idx = sample_scores[:, 1].topk(5)
        top_idx = top_idx.cpu()
        target_score = top_scores.mean()
        target_bbox = samples[top_idx]
        if top_idx.shape[0] > 1:
            target_bbox = target_bbox.mean(axis=0)  #多个target_bbox进行均值优化
        success = target_score > 0

        # Expand search area at failure:在【跟踪的同时】,根据跟踪情况,采用不同的sample生成参数
        if success:
            sample_generator.set_trans(opts['trans'])
        else:
            sample_generator.expand_trans(opts['trans_limit'])

        # Bbox regression :利用【当前:只是对当前帧图像进行回归,没有考虑前几帧】几名对应的box,最为regression的输入,来产生一个更好的Bbox
        if success:
            bbreg_samples = samples[top_idx]
            if top_idx.shape[0] == 1:
                bbreg_samples = bbreg_samples[None, :]
            bbreg_feats = forward_samples(model, image, bbreg_samples)
            bbreg_samples = bbreg.predict(bbreg_feats, bbreg_samples)
            bbreg_bbox = bbreg_samples.mean(axis=0)
        else:
            bbreg_bbox = target_bbox

        # Save result
        result[i] = target_bbox
        result_bb[i] = bbreg_bbox

        # Data collect
        if success:
            pos_examples = pos_generator(target_bbox, opts['n_pos_update'],
                                         opts['overlap_pos_update'])
            pos_feats = forward_samples(model, image, pos_examples)
            pos_feats_all.append(pos_feats)
            if len(pos_feats_all) > opts['n_frames_long']:
                del pos_feats_all[0]

            neg_examples = neg_generator(target_bbox, opts['n_neg_update'],
                                         opts['overlap_neg_update'])
            neg_feats = forward_samples(model, image, neg_examples)
            neg_feats_all.append(neg_feats)
            if len(neg_feats_all) > opts['n_frames_short']:
                del neg_feats_all[0]

        # Short term update :每张图片都要更新一次模型,这次的输入数据就是【前几帧累计的正样本和负样本】
        if not success:
            nframes = min(opts['n_frames_short'], len(pos_feats_all))
            pos_data = torch.cat(pos_feats_all[-nframes:], 0)
            neg_data = torch.cat(neg_feats_all, 0)
            train(model, criterion, update_optimizer, pos_data, neg_data,
                  opts['maxiter_update'])

        # Long term update
        elif i % opts['long_interval'] == 0:
            pos_data = torch.cat(pos_feats_all, 0)
            neg_data = torch.cat(neg_feats_all, 0)
            train(model, criterion, update_optimizer, pos_data, neg_data,
                  opts['maxiter_update'])

        torch.cuda.empty_cache()
        spf = time.time() - tic
        spf_total += spf

        # Display
        if display or savefig:
            im.set_data(image)

            if gt is not None:
                gt_rect.set_xy(gt[i, :2])
                gt_rect.set_width(gt[i, 2])
                gt_rect.set_height(gt[i, 3])

            rect.set_xy(result_bb[i, :2])
            rect.set_width(result_bb[i, 2])
            rect.set_height(result_bb[i, 3])

            if display:
                plt.pause(.01)
                plt.draw()
            if savefig:
                fig.savefig(os.path.join(savefig_dir, '{:04d}.jpg'.format(i)),
                            dpi=dpi)

        if gt is None:
            print('Frame {:d}/{:d}, Score {:.3f}, Time {:.3f}'.format(
                i, len(img_list), target_score, spf))
        else:
            overlap[i] = overlap_ratio(gt[i], result_bb[i])[0]
            print('Frame {:d}/{:d}, Overlap {:.3f}, Score {:.3f}, Time {:.3f}'.
                  format(i, len(img_list), overlap[i], target_score, spf))

    if gt is not None:
        print('meanIOU: {:.3f}'.format(overlap.mean()))
    fps = len(img_list) / spf_total
    return result, result_bb, fps
Esempio n. 4
0
def run_mdnet(img_list,
              init_bbox,
              gt=None,
              savefig_dir='',
              display=False,
              model_path='models/model001.pth'):

    # Init bbox
    target_bbox = np.array(init_bbox)
    result = np.zeros((len(img_list), 4))
    result_bb = np.zeros((len(img_list), 4))
    result[0] = target_bbox
    result_bb[0] = target_bbox

    if gt is not None:
        overlap = np.zeros(len(img_list))
        overlap[0] = 1

    # Init model
    opts['model_path'] = model_path

    print('********')
    print('model:', opts['model_path'])
    print('********')

    assert (model_path == 'models/model000.pth'
            or model_path == 'models/model001.pth')

    if model_path == 'models/model000.pth': model = MDNet0(opts['model_path'])
    else: model = MDNet1(opts['model_path'])

    if opts['use_gpu']:
        model = model.cuda()

    # Init criterion and optimizer
    criterion = BCELoss()
    model.set_learnable_params(opts['ft_layers'])
    init_optimizer = set_optimizer(model, opts['lr_init'], opts['lr_mult'])
    update_optimizer = set_optimizer(model, opts['lr_update'], opts['lr_mult'])

    tic = time.time()
    # Load first image
    image = Image.open(img_list[0]).convert('RGB')

    # Draw pos/neg samples
    pos_examples = SampleGenerator('gaussian', image.size, opts['trans_pos'],
                                   opts['scale_pos'])(target_bbox,
                                                      opts['n_pos_init'],
                                                      opts['overlap_pos_init'])

    neg_examples = np.concatenate([
        SampleGenerator('uniform', image.size, opts['trans_neg_init'],
                        opts['scale_neg_init'])(target_bbox,
                                                int(opts['n_neg_init'] * 0.5),
                                                opts['overlap_neg_init']),
        SampleGenerator('whole', image.size)(target_bbox,
                                             int(opts['n_neg_init'] * 0.5),
                                             opts['overlap_neg_init'])
    ])
    neg_examples = np.random.permutation(neg_examples)

    # Extract pos/neg features
    pos_feats = forward_samples(model, image, pos_examples)
    print(pos_feats)
    neg_feats = forward_samples(model, image, neg_examples)
    print(neg_feats)

    # Initial training
    train(model, criterion, init_optimizer, pos_feats, neg_feats,
          opts['maxiter_init'])
    del init_optimizer, neg_feats
    torch.cuda.empty_cache()

    # Train bbox regressor
    bbreg_examples = SampleGenerator(
        'uniform', image.size, opts['trans_bbreg'], opts['scale_bbreg'],
        opts['aspect_bbreg'])(target_bbox, opts['n_bbreg'],
                              opts['overlap_bbreg'])
    bbreg_feats = forward_samples(model, image, bbreg_examples)
    bbreg = BBRegressor(image.size)
    bbreg.train(bbreg_feats, bbreg_examples, target_bbox)
    del bbreg_feats
    torch.cuda.empty_cache()

    # Init sample generators for update
    sample_generator = SampleGenerator('gaussian', image.size, opts['trans'],
                                       opts['scale'])
    pos_generator = SampleGenerator('gaussian', image.size, opts['trans_pos'],
                                    opts['scale_pos'])
    neg_generator = SampleGenerator('uniform', image.size, opts['trans_neg'],
                                    opts['scale_neg'])

    # Init pos/neg features for update
    neg_examples = neg_generator(target_bbox, opts['n_neg_update'],
                                 opts['overlap_neg_init'])
    neg_feats = forward_samples(model, image, neg_examples)
    pos_feats_all = [pos_feats]
    neg_feats_all = [neg_feats]

    spf_total = time.time() - tic

    # Display
    savefig = savefig_dir != ''
    if display or savefig:
        dpi = 80.0
        figsize = (image.size[0] / dpi, image.size[1] / dpi)

        fig = plt.figure(frameon=False, figsize=figsize, dpi=dpi)
        ax = plt.Axes(fig, [0., 0., 1., 1.])
        ax.set_axis_off()
        fig.add_axes(ax)
        im = ax.imshow(image, aspect='auto')

        if gt is not None:
            gt_rect = plt.Rectangle(tuple(gt[0, :2]),
                                    gt[0, 2],
                                    gt[0, 3],
                                    linewidth=3,
                                    edgecolor="#00ff00",
                                    zorder=1,
                                    fill=False)
            ax.add_patch(gt_rect)

        rect = plt.Rectangle(tuple(result_bb[0, :2]),
                             result_bb[0, 2],
                             result_bb[0, 3],
                             linewidth=3,
                             edgecolor="#ff0000",
                             zorder=1,
                             fill=False)
        ax.add_patch(rect)

        if display:
            plt.pause(.01)
            plt.draw()
        if savefig:
            fig.savefig(os.path.join(savefig_dir, '0000.jpg'), dpi=dpi)

    # Main loop
    for i in range(1, len(img_list)):

        tic = time.time()
        # Load image
        image = Image.open(img_list[i]).convert('RGB')

        # Estimate target bbox
        samples = sample_generator(target_bbox, opts['n_samples'])
        sample_scores = forward_samples(model, image, samples, out_layer='fc6')

        top_scores, top_idx = sample_scores[:, 1].topk(5)

        # for top 5 samples, maximize score using hill-climbing algorithm
        for j in range(5):
            sample_ = samples[top_idx[j]]
            last_top_score = None

            # hill-climbing search
            while True:
                sample_left_p = [
                    sample_[0] + 1, sample_[1], sample_[2] - 1, sample_[3]
                ]
                sample_left_n = [
                    sample_[0] - 1, sample_[1], sample_[2] + 1, sample_[3]
                ]
                sample_up_p = [
                    sample_[0], sample_[1] + 1, sample_[2], sample_[3] - 1
                ]
                sample_up_n = [
                    sample_[0], sample_[1] - 1, sample_[2], sample_[3] + 1
                ]
                sample_right_p = [
                    sample_[0], sample_[1], sample_[2] + 1, sample_[3]
                ]
                sample_right_n = [
                    sample_[0], sample_[1], sample_[2] - 1, sample_[3]
                ]
                sample_bottom_p = [
                    sample_[0], sample_[1], sample_[2], sample_[3] + 1
                ]
                sample_bottom_n = [
                    sample_[0], sample_[1], sample_[2], sample_[3] - 1
                ]

                all_samples = [
                    sample_left_p, sample_left_n, sample_up_p, sample_up_n,
                    sample_right_p, sample_right_n, sample_bottom_p,
                    sample_bottom_n
                ]

                hillClimbingSS = forward_samples(model,
                                                 image,
                                                 np.array(all_samples),
                                                 out_layer='fc6')
                top_score, top_index = hillClimbingSS[:, 1].topk(1)
                top_score_float = top_score.cpu().numpy()[0]

                # End of hill climbing: this is THE BEST!
                if last_top_score != None:
                    if top_score_float < last_top_score: break

                sample_ = all_samples[top_index]
                samples[top_idx[j]] = all_samples[top_index]
                last_top_score = top_score_float

        # modify sample scores array
        sample_scores = forward_samples(model, image, samples, out_layer='fc6')
        top_scores, top_idx = sample_scores[:, 1].topk(5)

        sampleStore = []
        for j in range(len(samples)):
            temp = []
            for k in range(4):
                temp.append(samples[j][k])
            sampleStore.append(temp)

        # if mean score of bbox < 0, find everywhere
        target_score = top_scores.mean()

        if target_score < 0:
            # print('')
            # print('last bbox:')
            # print(result[i-1])
            last_left = result[i - 1][0]
            last_top = result[i - 1][1]

            # print('')
            # for j in range(len(samples)): print(j, samples[j], sample_scores[j])
            # print('')
            # print('sample top scores (before):')
            # print(top_scores)
            # print(top_idx)

            cnt = 0
            rl = [32, 16]

            for _ in range(len(rl)):
                everywhere_sample = []

                # find everywhere (near the last bbox)
                meanWidth = 0.0
                meanHeight = 0.0
                for j in range(len(samples)):
                    meanWidth += samples[j][2]
                    meanHeight += samples[j][3]
                meanWidth /= len(samples)
                meanHeight /= len(samples)

                width = image.size[0]
                height = image.size[1]

                for j in range(32):
                    for k in range(32):
                        jk = [
                            last_left + (31 - 2 * j) * meanWidth / rl[_],
                            last_top + (31 - 2 * k) * meanHeight / rl[_],
                            meanWidth, meanHeight
                        ]
                        # print(j, k, jk)
                        everywhere_sample.append(jk)

                everywhere_scores = forward_samples(
                    model, image, np.array(everywhere_sample), out_layer='fc6')
                everywhere_top_scores, everywhere_top_idx = everywhere_scores[:,
                                                                              1].topk(
                                                                                  5
                                                                              )

                # print('')
                # print('everywhere_sample:')
                # for j in range(len(everywhere_sample)): print(j, everywhere_sample[j], everywhere_scores[j])

                # print('')
                # print('everywhere top scores (before):')
                # print(everywhere_top_scores)
                # print(everywhere_top_idx)
                # for j in range(5): print(everywhere_sample[everywhere_top_idx[j]])

                # for top 5 samples in everywhere_sample, maximize score using hill-climbing algorithm
                for j in range(5):
                    # print('')
                    sample_ = everywhere_sample[everywhere_top_idx[j]]
                    last_top_score = None

                    # hill-climbing search
                    while True:
                        sample_left_p = [
                            sample_[0] + 1, sample_[1], sample_[2] - 1,
                            sample_[3]
                        ]
                        sample_left_n = [
                            sample_[0] - 1, sample_[1], sample_[2] + 1,
                            sample_[3]
                        ]
                        sample_up_p = [
                            sample_[0], sample_[1] + 1, sample_[2],
                            sample_[3] - 1
                        ]
                        sample_up_n = [
                            sample_[0], sample_[1] - 1, sample_[2],
                            sample_[3] + 1
                        ]
                        sample_right_p = [
                            sample_[0], sample_[1], sample_[2] + 1, sample_[3]
                        ]
                        sample_right_n = [
                            sample_[0], sample_[1], sample_[2] - 1, sample_[3]
                        ]
                        sample_bottom_p = [
                            sample_[0], sample_[1], sample_[2], sample_[3] + 1
                        ]
                        sample_bottom_n = [
                            sample_[0], sample_[1], sample_[2], sample_[3] - 1
                        ]

                        all_samples = [
                            sample_left_p, sample_left_n, sample_up_p,
                            sample_up_n, sample_right_p, sample_right_n,
                            sample_bottom_p, sample_bottom_n
                        ]

                        hillClimbingSS = forward_samples(model,
                                                         image,
                                                         np.array(all_samples),
                                                         out_layer='fc6')
                        top_score, top_index = hillClimbingSS[:, 1].topk(1)
                        top_score_float = top_score.cpu().numpy()[0]

                        # End of hill climbing: this is THE BEST!
                        if last_top_score != None:
                            # print(last_top_score)
                            if top_score_float < last_top_score: break

                        sample_ = all_samples[top_index]
                        everywhere_sample[
                            everywhere_top_idx[j]] = all_samples[top_index]
                        last_top_score = top_score_float

                everywhere_scores = forward_samples(
                    model, image, np.array(everywhere_sample), out_layer='fc6')
                everywhere_top_scores, everywhere_top_idx = everywhere_scores[:,
                                                                              1].topk(
                                                                                  5
                                                                              )

                # print('')
                # print('everywhere top scores (after):')
                # print(everywhere_top_scores)
                # print(everywhere_top_idx)
                # for j in range(5): print(everywhere_sample[everywhere_top_idx[j]])

                # merge 'samples' with everywhere samples
                everywhere_top5 = []
                for j in range(5):
                    everywhere_top5.append(
                        everywhere_sample[everywhere_top_idx[j]])
                samples = np.concatenate((samples, np.array(everywhere_top5)))

                sample_scores = forward_samples(model,
                                                image,
                                                samples,
                                                out_layer='fc6')
                top_scores, top_idx = sample_scores[:, 1].topk(5)

                if top_scores.mean() > 0:
                    # print('')
                    # for j in range(len(samples)): print(j, samples[j], sample_scores[j])
                    # print('')
                    # print('sample top scores (after):')
                    # print(top_scores)
                    # print(top_idx)
                    break
                cnt += 1

            # failure -> recover original samples
            if cnt == 2:
                # print('recovered')
                samples = np.array(sampleStore)
                sample_scores = forward_samples(model,
                                                image,
                                                samples,
                                                out_layer='fc6')
                top_scores, top_idx = sample_scores[:, 1].topk(5)

        # finally modify sample scores array
        sample_scores = forward_samples(model, image, samples, out_layer='fc6')
        top_scores, top_idx = sample_scores[:, 1].topk(5)

        top_idx = top_idx.cpu()
        target_score = top_scores.mean()
        target_bbox = samples[top_idx]
        if top_idx.shape[0] > 1:
            target_bbox = target_bbox.mean(axis=0)
        success = target_score > 0

        # Expand search area at failure
        if success:
            sample_generator.set_trans(opts['trans'])
        else:
            sample_generator.expand_trans(opts['trans_limit'])

        # Bbox regression
        if success:
            bbreg_samples = samples[top_idx]
            if top_idx.shape[0] == 1:
                bbreg_samples = bbreg_samples[None, :]
            bbreg_feats = forward_samples(model, image, bbreg_samples)
            bbreg_samples = bbreg.predict(bbreg_feats, bbreg_samples)
            bbreg_bbox = bbreg_samples.mean(axis=0)
        else:
            bbreg_bbox = target_bbox

        # Save result
        result[i] = target_bbox
        result_bb[i] = bbreg_bbox

        # Data collect
        if success:
            pos_examples = pos_generator(target_bbox, opts['n_pos_update'],
                                         opts['overlap_pos_update'])
            pos_feats = forward_samples(model, image, pos_examples)
            pos_feats_all.append(pos_feats)
            if len(pos_feats_all) > opts['n_frames_long']:
                del pos_feats_all[0]

            neg_examples = neg_generator(target_bbox, opts['n_neg_update'],
                                         opts['overlap_neg_update'])
            neg_feats = forward_samples(model, image, neg_examples)
            neg_feats_all.append(neg_feats)
            if len(neg_feats_all) > opts['n_frames_short']:
                del neg_feats_all[0]

        # Short term update
        if not success:
            nframes = min(opts['n_frames_short'], len(pos_feats_all))
            pos_data = torch.cat(pos_feats_all[-nframes:], 0)
            neg_data = torch.cat(neg_feats_all, 0)
            train(model, criterion, update_optimizer, pos_data, neg_data,
                  opts['maxiter_update'])

        # Long term update
        elif i % opts['long_interval'] == 0:
            pos_data = torch.cat(pos_feats_all, 0)
            neg_data = torch.cat(neg_feats_all, 0)
            train(model, criterion, update_optimizer, pos_data, neg_data,
                  opts['maxiter_update'])

        torch.cuda.empty_cache()
        spf = time.time() - tic
        spf_total += spf

        # Display
        if display or savefig:
            im.set_data(image)

            if gt is not None:
                gt_rect.set_xy(gt[i, :2])
                gt_rect.set_width(gt[i, 2])
                gt_rect.set_height(gt[i, 3])

            rect.set_xy(result_bb[i, :2])
            rect.set_width(result_bb[i, 2])
            rect.set_height(result_bb[i, 3])

            if display:
                plt.pause(.01)
                plt.draw()
            if savefig:
                fig.savefig(os.path.join(
                    savefig_dir,
                    ('M' + model_path[14] + 'T3_' + '{:04d}.jpg'.format(i))),
                            dpi=dpi)

        if gt is None:
            print('Frame {:d}/{:d}, Score {:.3f}, Time {:.3f}'.format(
                i, len(img_list), target_score, spf))
        else:
            overlap[i] = overlap_ratio(gt[i], result_bb[i])[0]
            print('Frame {:d}/{:d}, Overlap {:.3f}, Score {:.3f}, Time {:.3f}'.
                  format(i, len(img_list), overlap[i], target_score, spf))

    if gt is not None:
        print('meanIOU: {:.3f}'.format(overlap.mean()))
    fps = len(img_list) / spf_total
    plt.close('all')
    return result, result_bb, fps, overlap
Esempio n. 5
0
    def search_track(self, track_num, frame_idx, init_bbox, previous_num,
                     init_conf):
        # img_list, init_bbox

        # Init bbox

        # result = np.zeros((len(img_list), 4))
        # result_bb = np.zeros((len(img_list), 4))
        # result[0] = target_bbox
        # result_bb[0] = target_bbox

        # superorange params
        track_list = [(-1, -1)] * (self.TRACKLET_NUM + 1)
        bbox_list = [(frame_idx, init_bbox, init_conf)]
        IOU_count = 0
        # last_bbox = -1
        # next_frame = -1
        #        frameA_path = os.path.join(self.FILE_PATH, str(frame_idx) + "."+ext)

        frameA_path = os.path.join(self.FILE_PATH,
                                   str(frame_idx).zfill(5) + ".png")

        target_bbox = np.array(init_bbox)
        # Init model
        model = MDNet(opts['model_path'])
        if opts['use_gpu']:
            model = model.cuda()

        # Init criterion and optimizer
        criterion = BCELoss()
        model.set_learnable_params(opts['ft_layers'])
        init_optimizer = set_optimizer(model, opts['lr_init'], opts['lr_mult'])
        update_optimizer = set_optimizer(model, opts['lr_update'],
                                         opts['lr_mult'])

        tic = time.time()
        # Load first image
        image = Image.open(frameA_path).convert('RGB')

        # Draw pos/neg samples
        pos_examples = SampleGenerator('gaussian', image.size,
                                       opts['trans_pos'], opts['scale_pos'])(
                                           target_bbox, opts['n_pos_init'],
                                           opts['overlap_pos_init'])

        neg_examples = np.concatenate([
            SampleGenerator('uniform', image.size, opts['trans_neg_init'],
                            opts['scale_neg_init'])(target_bbox,
                                                    int(opts['n_neg_init'] *
                                                        0.5),
                                                    opts['overlap_neg_init']),
            SampleGenerator('whole', image.size)(target_bbox,
                                                 int(opts['n_neg_init'] * 0.5),
                                                 opts['overlap_neg_init'])
        ])
        neg_examples = np.random.permutation(neg_examples)

        # Extract pos/neg features
        if len(pos_examples) == 0 or len(neg_examples) == 0:
            print("!!pos_examples=0 skip!!")
            return
        pos_feats = self.forward_samples(model, image, pos_examples)
        neg_feats = self.forward_samples(model, image, neg_examples)

        # Initial training
        self.train(model, criterion, init_optimizer, pos_feats, neg_feats,
                   opts['maxiter_init'])
        del init_optimizer, neg_feats
        torch.cuda.empty_cache()

        # Train bbox regressor
        bbreg_examples = SampleGenerator(
            'uniform', image.size, opts['trans_bbreg'], opts['scale_bbreg'],
            opts['aspect_bbreg'])(target_bbox, opts['n_bbreg'],
                                  opts['overlap_bbreg'])
        bbreg_feats = self.forward_samples(model, image, bbreg_examples)
        bbreg = BBRegressor(image.size)
        bbreg.train(bbreg_feats, bbreg_examples, target_bbox)
        del bbreg_feats
        torch.cuda.empty_cache()

        # Init sample generators for update
        sample_generator = SampleGenerator('gaussian', image.size,
                                           opts['trans'], opts['scale'])
        pos_generator = SampleGenerator('gaussian', image.size,
                                        opts['trans_pos'], opts['scale_pos'])
        neg_generator = SampleGenerator('uniform', image.size,
                                        opts['trans_neg'], opts['scale_neg'])

        # Init pos/neg features for update
        neg_examples = neg_generator(target_bbox, opts['n_neg_update'],
                                     opts['overlap_neg_init'])
        neg_feats = self.forward_samples(model, image, neg_examples)
        pos_feats_all = [pos_feats]
        neg_feats_all = [neg_feats]

        spf_total = time.time() - tic

        # fps = len(img_list) / spf_total
        # return result, result_bb, fps

        # Main loop
        for i in range(0, self.TRACKLET_NUM):  # next 10 frame
            frameB_idx = frame_idx + i + 1
            #    print("frameB_idx="+str(frameB_idx))
            if frameB_idx > self.frame_num:
                break
            else:

                frameB_path = os.path.join(self.FILE_PATH,
                                           str(frameB_idx).zfill(5) + ".png")

                # ------------track by MDNet------------
                # Load image
                image = Image.open(frameB_path).convert('RGB')

                # Estimate target bbox
                samples = sample_generator(target_bbox, opts['n_samples'])
                sample_scores = self.forward_samples(model,
                                                     image,
                                                     samples,
                                                     out_layer='fc6')

                top_scores, top_idx = sample_scores[:, 1].topk(5)
                top_idx = top_idx.cpu()
                target_score = top_scores.mean()
                target_bbox = samples[top_idx]
                if top_idx.shape[0] > 1:
                    target_bbox = target_bbox.mean(axis=0)
                success = target_score > 0

                # Expand search area at failure
                if success:
                    sample_generator.set_trans(opts['trans'])
                else:
                    sample_generator.expand_trans(opts['trans_limit'])

                # Bbox regression
                if success:
                    bbreg_samples = samples[top_idx]
                    if top_idx.shape[0] == 1:
                        bbreg_samples = bbreg_samples[None, :]
                    bbreg_feats = self.forward_samples(model, image,
                                                       bbreg_samples)
                    bbreg_samples = bbreg.predict(bbreg_feats, bbreg_samples)
                    bbreg_bbox = bbreg_samples.mean(axis=0)
                else:
                    bbreg_bbox = target_bbox

                # Save result
                # result[i] = target_bbox
                # result_bb[i] = bbreg_bbox

        #       print(target_bbox)

        # Data collect
                if success:
                    pos_examples = pos_generator(target_bbox,
                                                 opts['n_pos_update'],
                                                 opts['overlap_pos_update'])
                    pos_feats = self.forward_samples(model, image,
                                                     pos_examples)
                    pos_feats_all.append(pos_feats)
                    if len(pos_feats_all) > opts['n_frames_long']:
                        del pos_feats_all[0]

                    neg_examples = neg_generator(target_bbox,
                                                 opts['n_neg_update'],
                                                 opts['overlap_neg_update'])
                    neg_feats = self.forward_samples(model, image,
                                                     neg_examples)
                    neg_feats_all.append(neg_feats)
                    if len(neg_feats_all) > opts['n_frames_short']:
                        del neg_feats_all[0]

                # Short term update
                if not success:
                    nframes = min(opts['n_frames_short'], len(pos_feats_all))
                    pos_data = torch.cat(pos_feats_all[-nframes:], 0)
                    neg_data = torch.cat(neg_feats_all, 0)
                    self.train(model, criterion, update_optimizer, pos_data,
                               neg_data, opts['maxiter_update'])

                # Long term update
                elif i % opts['long_interval'] == 0:
                    pos_data = torch.cat(pos_feats_all, 0)
                    neg_data = torch.cat(neg_feats_all, 0)
                    self.train(model, criterion, update_optimizer, pos_data,
                               neg_data, opts['maxiter_update'])

                torch.cuda.empty_cache()

                bboxT = bbreg_bbox
                anyIOU = False

                for bbox_id, bboxD in enumerate(frameBBoxList[frameB_idx]):
                    if bboxD.match == False:
                        IOU_ratio = overlap_ratio(bboxD.getRec(), bboxT)
                        print("IOUratio={}".format(IOU_ratio))
                        #             print(IOU_ratio)
                        if IOU_ratio > 0.3:
                            #                print("overlap")
                            track_list[i] = (frameB_idx, bbox_id)
                            IOU_count = IOU_count + 1
                            bbox_list.append(
                                (frameB_idx, bboxD.getRec(), bboxD.confidence))
                            anyIOU = True
                            break
                if not anyIOU:
                    bbox_list.append((frameB_idx, bboxT, 0))

        print("bbox_list=====")
        print(bbox_list)

        print("track_list=====")
        print(track_list)

        ##debug   show track_list
        # for idx in range(0,10):
        #    print(track_list[idx])
        #    print(bbox_list[idx])

        if IOU_count >= self.IOU_count_th:  # add track
            for idx in range(0, self.TRACKLET_NUM + 1):
                bbox_id = track_list[idx][1]
                if bbox_id != -1:
                    # print(frame_idx+idx+1)
                    # print(bbox_id)
                    frameBBoxList[frame_idx + idx + 1][bbox_id].setMatch()

            next_track_frame = -1
            init_bbox_next = []
            bbox_length = len(bbox_list)
            #!!! track_list length=10, bbox_list length=11
            for ii in range(1, bbox_length):

                idx = bbox_length - ii
                print("{} {} {}".format(ii, idx, track_list[idx][0]))
                if track_list[idx - 1][0] != -1:
                    next_track_frame = (-1) * ii  # count from back
                    print("idx ={} ii={} next_track_frame={}".format(
                        idx, ii, next_track_frame))
                    init_bbox_next = bbox_list[idx]
                    break

            # remove overlap range
            start_rm = -1
            while start_rm >= previous_num:
                del self.GLOBAL_TRACK_LIST[track_num][-1]  # rm 1 per move
                # GLOBAL_TRACK_LIST[track_num].remove(-1)
                start_rm = start_rm - 1

            if previous_num == 0:
                self.GLOBAL_TRACK_LIST.append(bbox_list)
            else:
                self.GLOBAL_TRACK_LIST[track_num].extend(bbox_list)

            next_start_frame = frame_idx + bbox_length + next_track_frame

            init_bbox_next = bbox_list[bbox_length + next_track_frame][1]
            init_conf_next = bbox_list[bbox_length + next_track_frame][2]
            # print(GLOBAL_TRACK_LIST)
            print("( " + str(track_num) + " " + str(next_start_frame) + " " +
                  str(next_track_frame) + ")")
            print("init_bbox_next:" + str(init_bbox_next))
            self.search_track(
                track_num, next_start_frame, init_bbox_next, next_track_frame,
                init_conf_next
            )  # next_start_frame = 310  next_track_frame=-3(will be deleted)
Esempio n. 6
0
def run_mdnet(img_list,
              init_bbox,
              gt=None,
              savefig_dir='',
              display=False,
              loss_index=1,
              model_path=opts['model_path'],
              seq_name=None):

    #def run_mdnet(k, img_list, init_bbox, gt=None, savefig_dir='', display=False,
    #              loss_index=1, model_path=opts['model_path'], seq_name=None):

    ############################
    if fewer_images:
        num_images = min(sequence_len_limit, len(img_list))
    else:
        num_images = len(img_list)
    ############################

    # Init bbox
    target_bbox = np.array(init_bbox)
    result = np.zeros((len(img_list), 4))
    result_bb = np.zeros((len(img_list), 4))
    result[0] = target_bbox
    result_bb[0] = target_bbox

    # Init iou and pred_iou
    iou_list = np.zeros((len(img_list), 1))  # shape: [113.1) ### list of ious
    iou_list[0] = 1.0  ### in first frame gt=result_bb (by definition)

    if gt is not None:
        overlap = np.zeros(len(img_list))
        overlap[0] = 1

    # Init model
    # model = MDNet(model_path=opts['model_path'],use_gpu=opts['use_gpu'])
    model = MDNet(model_path=model_path, use_gpu=opts['use_gpu'])
    if opts['use_gpu']:
        model = model.cuda()

    print('Init criterion and optimizer')
    criterion = BCELoss()
    model.set_learnable_params(opts['ft_layers'])
    init_optimizer = set_optimizer(model, opts['lr_init'], opts['lr_mult'])
    update_optimizer = set_optimizer(model, opts['lr_update'], opts['lr_mult'])

    tic = time.time()
    # Load first image
    image = Image.open(img_list[0]).convert('RGB')

    print('Draw pos/neg samples')
    # Draw pos/neg samples
    pos_examples = SampleGenerator('gaussian', image.size, opts['trans_pos'],
                                   opts['scale_pos'])(target_bbox,
                                                      opts['n_pos_init'],
                                                      opts['overlap_pos_init'])
    neg_examples = np.concatenate([
        SampleGenerator('uniform', image.size, opts['trans_neg_init'],
                        opts['scale_neg_init'])(target_bbox,
                                                int(opts['n_neg_init'] * 0.5),
                                                opts['overlap_neg_init']),
        SampleGenerator('whole', image.size)(target_bbox,
                                             int(opts['n_neg_init'] * 0.5),
                                             opts['overlap_neg_init'])
    ])
    neg_examples = np.random.permutation(neg_examples)

    print('Extract pos/neg features')
    # Extract pos/neg features
    if fewer_images:  # shorter run in general, less accurate
        pos_feats = forward_samples(model, image, pos_examples)
        neg_feats = forward_samples(model, image, neg_examples)
    else:
        pos_feats = forward_samples(model, image, pos_examples)
        neg_feats = forward_samples(model, image, neg_examples)

    print('Initial training')
    # Initial training
    train(model,
          criterion,
          init_optimizer,
          pos_feats,
          neg_feats,
          opts['maxiter_init'],
          loss_index=loss_index)  ### iou_pred_list
    del init_optimizer, neg_feats
    torch.cuda.empty_cache()

    print('Train bbox regressor')
    # Train bbox regressor
    bbreg_examples = SampleGenerator(
        'uniform', image.size, opts['trans_bbreg'], opts['scale_bbreg'],
        opts['aspect_bbreg'])(target_bbox, opts['n_bbreg'],
                              opts['overlap_bbreg'])
    bbreg_feats = forward_samples(
        model, image,
        bbreg_examples)  # calc features ### shape: [927, 4608] ###
    bbreg = BBRegressor(image.size)
    bbreg.train(bbreg_feats, bbreg_examples, target_bbox)
    del bbreg_feats
    torch.cuda.empty_cache()

    # Init sample generators for update
    sample_generator = SampleGenerator('gaussian', image.size, opts['trans'],
                                       opts['scale'])
    pos_generator = SampleGenerator('gaussian', image.size, opts['trans_pos'],
                                    opts['scale_pos'])
    neg_generator = SampleGenerator('uniform', image.size, opts['trans_neg'],
                                    opts['scale_neg'])

    print('Init pos/neg features for update')
    # Init pos/neg features for update
    neg_examples = neg_generator(target_bbox, opts['n_neg_update'],
                                 opts['overlap_neg_init'])
    neg_feats = forward_samples(model, image, neg_examples)
    pos_feats_all = [pos_feats]
    neg_feats_all = [neg_feats]

    spf_total = time.time() - tic

    # Display
    savefig = savefig_dir != ''
    if display or savefig:
        dpi = 80.0
        figsize = (image.size[0] / dpi, image.size[1] / dpi)

        fig = plt.figure(frameon=False, figsize=figsize, dpi=dpi)
        ax = plt.Axes(fig, [0., 0., 1., 1.])
        ax.set_axis_off()
        fig.add_axes(ax)
        im = ax.imshow(image, aspect='auto')

        if gt is not None:
            gt_rect = plt.Rectangle(tuple(gt[0, :2]),
                                    gt[0, 2],
                                    gt[0, 3],
                                    linewidth=3,
                                    edgecolor="#00ff00",
                                    zorder=1,
                                    fill=False)
            ax.add_patch(gt_rect)
            #################
            num_gts = np.minimum(gt.shape[0], num_images)
            # print('num_gts.shape: ', num_gts.shape)
            gt_centers = gt[:num_gts, :2] + gt[:num_gts, 2:] / 2
            result_centers = np.zeros_like(gt[:num_gts, :2])
            result_centers[0] = gt_centers[0]
            result_ious = np.zeros(num_gts, dtype='float64')
            result_ious[0] = 1.
            #################

        rect = plt.Rectangle(tuple(result_bb[0, :2]),
                             result_bb[0, 2],
                             result_bb[0, 3],
                             linewidth=3,
                             edgecolor="#ff0000",
                             zorder=1,
                             fill=False)
        ax.add_patch(rect)

        if display:
            plt.pause(.01)
            plt.draw()
        if savefig:
            fig.savefig(os.path.join(savefig_dir, '0000.jpg'), dpi=dpi)

    print('Main Loop')
    # Main loop
    spf_total = 0  # I don't want to take into account initialization
    for i in tqdm(range(1, num_images)):
        # for i in range(1, len(img_list)):
        #print('Frame: ', i)
        tic = time.time()
        # Load image
        image = Image.open(img_list[i]).convert('RGB')

        #print('Estimate target bbox (in run_mdnet)')
        # Estimate target bbox
        samples = sample_generator(target_bbox, opts['n_samples'])
        sample_scores = forward_samples(model, image, samples, out_layer='fc6')

        top_scores, top_idx = sample_scores[:, 1].topk(5)
        top_idx = top_idx.cpu()
        target_score = top_scores.mean()
        target_bbox = samples[top_idx]
        if top_idx.shape[0] > 1:
            target_bbox = target_bbox.mean(axis=0)
        success = target_score > 0

        # Expand search area at failure
        if success:
            sample_generator.set_trans(opts['trans'])
        else:
            sample_generator.expand_trans(opts['trans_limit'])

        #print('Bbox regression (in run_mdnet)')
        # Bbox regression
        if success:
            bbreg_samples = samples[top_idx]
            if top_idx.shape[0] == 1:
                bbreg_samples = bbreg_samples[None, :]
            bbreg_feats = forward_samples(model, image, bbreg_samples)
            bbreg_samples = bbreg.predict(bbreg_feats, bbreg_samples)
            bbreg_bbox = bbreg_samples.mean(axis=0)
        else:
            bbreg_bbox = target_bbox

        # Save result
        result[i] = target_bbox
        result_bb[i] = bbreg_bbox
        iou_list[i] = overlap_ratio(gt[i], result_bb[i])

        ###########################################
        # identify tracking failure and abort when in VOT mode

        IoU = overlap_ratio(result_bb[i], gt[i])[0]
        if (IoU == 0) and init_after_loss:
            print('    * lost track in frame %d since init*' % (i))
            result_distances = scipy.spatial.distance.cdist(
                result_centers[:i], gt_centers[:i],
                metric='euclidean').diagonal()
            num_images_tracked = i - 1  # we don't count frame 0 and current frame (lost track)

            im.set_data(image)
            if gt is not None:
                if i < gt.shape[0]:
                    gt_rect.set_xy(gt[i, :2])
                    gt_rect.set_width(gt[i, 2])
                    gt_rect.set_height(gt[i, 3])
                else:
                    gt_rect.set_xy(np.array([np.nan, np.nan]))
                    gt_rect.set_width(np.nan)
                    gt_rect.set_height(np.nan)

            rect.set_xy(result_bb[i, :2])
            rect.set_width(result_bb[i, 2])
            rect.set_height(result_bb[i, 3])

            plt.pause(.01)
            plt.draw()

            print(
                'Finished identify tracking failure and abort when in VOT mode'
            )
            return result[:
                          i], result_bb[:
                                        i], num_images_tracked, spf_total, result_distances, result_ious[:
                                                                                                         i], True
        ########################################

        # Data collect
        if success:
            pos_examples = pos_generator(target_bbox, opts['n_pos_update'],
                                         opts['overlap_pos_update'])
            pos_feats = forward_samples(model, image, pos_examples)
            pos_feats_all.append(pos_feats)
            if len(pos_feats_all) > opts['n_frames_long']:
                del pos_feats_all[0]

            neg_examples = neg_generator(target_bbox, opts['n_neg_update'],
                                         opts['overlap_neg_update'])
            neg_feats = forward_samples(model, image, neg_examples)
            neg_feats_all.append(neg_feats)
            if len(neg_feats_all) > opts['n_frames_short']:
                del neg_feats_all[0]

        # Short term update
        if not success:
            nframes = min(opts['n_frames_short'], len(pos_feats_all))
            pos_data = torch.cat(pos_feats_all[-nframes:], 0)
            neg_data = torch.cat(neg_feats_all, 0)
            train(model, criterion, update_optimizer, pos_data, neg_data,
                  opts['maxiter_update'])

        # Long term update
        elif i % opts['long_interval'] == 0:
            pos_data = torch.cat(pos_feats_all, 0)
            neg_data = torch.cat(neg_feats_all, 0)
            train(model, criterion, update_optimizer, pos_data, neg_data,
                  opts['maxiter_update'])

        torch.cuda.empty_cache()
        spf = time.time() - tic
        spf_total += spf
        #print('Time: ', spf)

        # Display
        if display or savefig:
            im.set_data(image)

            if gt is not None:
                gt_rect.set_xy(gt[i, :2])
                gt_rect.set_width(gt[i, 2])
                gt_rect.set_height(gt[i, 3])

                #################
                result_ious[i] = overlap_ratio(result_bb[i], gt[i])[0]
                result_centers[i] = result_bb[i, :2] + result_bb[i, 2:] / 2
                #################

            rect.set_xy(result_bb[i, :2])
            rect.set_width(result_bb[i, 2])
            rect.set_height(result_bb[i, 3])

            if display:
                plt.pause(.01)
                plt.draw()
            if savefig:
                fig.savefig(os.path.join(savefig_dir, '{:04d}.jpg'.format(i)),
                            dpi=dpi)

        ####################################
        if detailed_printing:
            if gt is None:
                print("      Frame %d/%d, Score %.3f, Time %.3f" % \
                      (i, num_images-1, target_score, spf))
            else:
                if i < gt.shape[0]:
                    print("      Frame %d/%d, Overlap %.3f, Score %.3f, Time %.3f" % \
                        (i, num_images-1, overlap_ratio(gt[i], result_bb[i])[0], target_score, spf))
                else:
                    print("      Frame %d/%d, Overlap %.3f, Score %.3f, Time %.3f" % \
                        (i, num_images-1, overlap_ratio(np.array([np.nan,np.nan,np.nan,np.nan]), result_bb[i])[0], target_score, spf))
        ####################################

        # if gt is None:
        #     print('Frame {:d}/{:d}, Score {:.3f}, Time {:.3f}'
        #         .format(i, len(img_list), target_score, spf))
        # else:
        #     overlap[i] = overlap_ratio(gt[i], result_bb[i])[0]
        #     print('Frame {:d}/{:d}, Overlap {:.3f}, Score {:.3f}, Time {:.3f}'
        #         .format(i, len(img_list), overlap[i], target_score, spf))

    ########################
    plt.close()
    result_distances = scipy.spatial.distance.cdist(
        result_centers, gt_centers, metric='euclidean').diagonal()
    num_images_tracked = num_images - 1  # I don't want to count initialization frame (i.e. frame 0)
    print('    main loop finished, %d frames' % (num_images))

    print('mean IoU: ', iou_list.mean())
    print('Finished run_mdnet()')

    return result, result_bb, num_images_tracked, spf_total, result_distances, result_ious, False
Esempio n. 7
0
def run_vtaan(img_list, init_bbox, gt=None, savefig_dir='', display=False):

    # Init bbox
    target_bbox = np.array(init_bbox)
    result = np.zeros((len(img_list), 4))
    result_bb = np.zeros((len(img_list), 4))
    result[0] = target_bbox
    result_bb[0] = target_bbox

    if gt is not None:
        overlap = np.zeros(len(img_list))
        overlap[0] = 1

    # Init model
    model = MDNet(opts['model_path'])
    model_g = NetG()
    if opts['use_gpu']:
        model = model.cuda()
        model_g = model_g.cuda()
    GBP = guided_backprop.GuidedBackprop(model, 1)

    # Init criterion and optimizer
    criterion = BCELoss()
    criterion_g = torch.nn.MSELoss(reduction='sum')
    model.set_learnable_params(opts['ft_layers'])
    model_g.set_learnable_params(opts['ft_layers'])
    init_optimizer = set_optimizer(model, opts['lr_init'], opts['lr_mult'])
    update_optimizer = set_optimizer(model, opts['lr_update'], opts['lr_mult'])

    tic = time.time()
    # Load first image
    image = Image.open(img_list[0]).convert('RGB')

    # Draw pos/neg samples
    pos_examples = SampleGenerator('gaussian', image.size, opts['trans_pos'],
                                   opts['scale_pos'])(target_bbox,
                                                      opts['n_pos_init'],
                                                      opts['overlap_pos_init'])

    neg_examples = np.concatenate([
        SampleGenerator('uniform', image.size, opts['trans_neg_init'],
                        opts['scale_neg_init'])(target_bbox,
                                                int(opts['n_neg_init'] * 0.5),
                                                opts['overlap_neg_init']),
        SampleGenerator('whole', image.size)(target_bbox,
                                             int(opts['n_neg_init'] * 0.5),
                                             opts['overlap_neg_init'])
    ])
    neg_examples = np.random.permutation(neg_examples)

    # Extract pos/neg features
    pos_feats = forward_samples(model, image, pos_examples)
    neg_feats = forward_samples(model, image, neg_examples)

    pos_imgids = np.array([[0]] * pos_feats.size(0))
    neg_imgids = np.array([[0]] * neg_feats.size(0))

    feat_dim = pos_feats.size(-1)

    # Initial training
    train(model, None, criterion, init_optimizer, pos_feats, neg_feats,
          opts['maxiter_init'], pos_imgids, pos_examples, neg_imgids,
          neg_examples, img_list, GBP)
    del init_optimizer, neg_feats
    torch.cuda.empty_cache()
    g_pretrain(model, model_g, criterion_g, pos_feats)
    torch.cuda.empty_cache()

    # Train bbox regressor
    bbreg_examples = SampleGenerator(
        'uniform', image.size, opts['trans_bbreg'], opts['scale_bbreg'],
        opts['aspect_bbreg'])(target_bbox, opts['n_bbreg'],
                              opts['overlap_bbreg'])
    bbreg_feats = forward_samples(model, image, bbreg_examples)
    bbreg = BBRegressor(image.size)
    bbreg.train(bbreg_feats, bbreg_examples, target_bbox)
    del bbreg_feats
    torch.cuda.empty_cache()

    # Init sample generators for update
    sample_generator = SampleGenerator('gaussian', image.size, opts['trans'],
                                       opts['scale'])
    pos_generator = SampleGenerator('gaussian', image.size, opts['trans_pos'],
                                    opts['scale_pos'])
    neg_generator = SampleGenerator('uniform', image.size, opts['trans_neg'],
                                    opts['scale_neg'])

    # Init pos/neg features for update
    neg_examples = neg_generator(target_bbox, opts['n_neg_update'],
                                 opts['overlap_neg_init'])
    neg_feats = forward_samples(model, image, neg_examples)
    pos_feats_all = [pos_feats[:opts['n_pos_update']]]
    neg_feats_all = [neg_feats[:opts['n_neg_update']]]

    pos_examples_all = [pos_examples[:opts['n_pos_update']]]
    neg_examples_all = [neg_examples[:opts['n_neg_update']]]

    pos_imgids_all = [pos_imgids[:opts['n_pos_update']]]
    neg_imgids_all = [neg_imgids[:opts['n_neg_update']]]

    spf_total = time.time() - tic

    # Display
    savefig = savefig_dir != ''
    if display or savefig:
        dpi = 80.0
        figsize = (image.size[0] / dpi, image.size[1] / dpi)

        fig = plt.figure(frameon=False, figsize=figsize, dpi=dpi)
        ax = plt.Axes(fig, [0., 0., 1., 1.])
        ax.set_axis_off()
        fig.add_axes(ax)
        im = ax.imshow(image, aspect='auto')

        if gt is not None:
            gt_rect = plt.Rectangle(tuple(gt[0, :2]),
                                    gt[0, 2],
                                    gt[0, 3],
                                    linewidth=3,
                                    edgecolor="#00ff00",
                                    zorder=1,
                                    fill=False)
            ax.add_patch(gt_rect)

        rect = plt.Rectangle(tuple(result_bb[0, :2]),
                             result_bb[0, 2],
                             result_bb[0, 3],
                             linewidth=3,
                             edgecolor="#ff0000",
                             zorder=1,
                             fill=False)
        ax.add_patch(rect)

        if display:
            plt.pause(.01)
            plt.draw()
        if savefig:
            fig.savefig(os.path.join(savefig_dir, '0000.jpg'), dpi=dpi)

    # Main loop
    for i in range(1, len(img_list)):

        tic = time.time()
        # Load image
        image = Image.open(img_list[i]).convert('RGB')

        # Estimate target bbox
        samples = sample_generator(target_bbox, opts['n_samples'])
        sample_scores = forward_samples(model, image, samples, out_layer='fc6')

        top_scores, top_idx = sample_scores[:, 1].topk(5)
        top_idx = top_idx.cpu()
        target_score = top_scores.mean()
        target_bbox = samples[top_idx]
        if top_idx.shape[0] > 1:
            target_bbox = target_bbox.mean(axis=0)
        success = target_score > 0

        # Expand search area at failure
        if success:
            sample_generator.set_trans(opts['trans'])
        else:
            sample_generator.expand_trans(opts['trans_limit'])

        # Bbox regression
        if success:
            bbreg_samples = samples[top_idx]
            if top_idx.shape[0] == 1:
                bbreg_samples = bbreg_samples[None, :]
            bbreg_feats = forward_samples(model, image, bbreg_samples)
            bbreg_samples = bbreg.predict(bbreg_feats, bbreg_samples)
            bbreg_bbox = bbreg_samples.mean(axis=0)
        else:
            bbreg_bbox = target_bbox

        # Save result
        result[i] = target_bbox
        result_bb[i] = bbreg_bbox

        # Data collect
        if success:
            pos_examples = pos_generator(target_bbox, opts['n_pos_update'],
                                         opts['overlap_pos_update'])
            pos_feats = forward_samples(model, image, pos_examples)
            pos_feats_all.append(pos_feats)
            if len(pos_feats_all) > opts['n_frames_long']:
                del pos_feats_all[0]
                del pos_examples_all[0]
                del pos_imgids_all[0]

            neg_examples = neg_generator(target_bbox, opts['n_neg_update'],
                                         opts['overlap_neg_update'])
            neg_feats = forward_samples(model, image, neg_examples)
            neg_feats_all.append(neg_feats)

            pos_examples_all.append(pos_examples)
            neg_examples_all.append(neg_examples)

            pos_imgids_all.append(np.array([[i]] * pos_feats.size(0)))
            neg_imgids_all.append(np.array([[i]] * neg_feats.size(0)))

            if len(neg_feats_all) > opts['n_frames_short']:
                del neg_feats_all[0]
                del neg_examples_all[0]
                del neg_imgids_all[0]

        # Short term update
        if not success:
            nframes = min(opts['n_frames_short'], len(pos_feats_all))
            pos_data = torch.stack(pos_feats_all[-nframes:],
                                   0).view(-1, feat_dim)
            neg_data = torch.stack(neg_feats_all, 0).view(-1, feat_dim)

            pos_examples_data = torch.from_numpy(
                np.stack(pos_examples_all[-nframes:], 0)).view(-1, 4).numpy()
            neg_examples_data = torch.from_numpy(np.stack(neg_examples_all,
                                                          0)).view(-1,
                                                                   4).numpy()

            pos_imgids_data = torch.from_numpy(
                np.stack(pos_imgids_all[-nframes:], 0)).view(-1, 1).numpy()
            neg_imgids_data = torch.from_numpy(np.stack(neg_imgids_all,
                                                        0)).view(-1,
                                                                 1).numpy()

            train(model, None, criterion, update_optimizer, pos_data, neg_data,
                  opts['maxiter_update'], pos_imgids_data, pos_examples_data,
                  neg_imgids_data, neg_examples_data, img_list, GBP)

        # Long term update
        elif i % opts['long_interval'] == 0:
            pos_data = t.stack(pos_feats_all, 0).view(-1, feat_dim)
            neg_data = t.stack(neg_feats_all, 0).view(-1, feat_dim)

            pos_examples_data = torch.from_numpy(np.stack(pos_examples_all,
                                                          0)).view(-1,
                                                                   4).numpy()
            neg_examples_data = torch.from_numpy(np.stack(neg_examples_all,
                                                          0)).view(-1,
                                                                   4).numpy()

            pos_imgids_data = torch.from_numpy(np.stack(pos_imgids_all,
                                                        0)).view(-1,
                                                                 1).numpy()
            neg_imgids_data = torch.from_numpy(np.stack(neg_imgids_all,
                                                        0)).view(-1,
                                                                 1).numpy()

            # train(model, model_g, criterion, update_optimizer, pos_data, neg_data, opts['maxiter_update'],
            #       pos_imgids_data, pos_examples_data, neg_imgids_data, neg_examples_data, img_list, GBP)
            train(model, model_g, criterion, update_optimizer, pos_data,
                  neg_data, opts['maxiter_update'], None, None, None, None,
                  img_list, GBP)

        torch.cuda.empty_cache()
        spf = time.time() - tic
        spf_total += spf

        # Display
        if display or savefig:
            im.set_data(image)

            if gt is not None:
                gt_rect.set_xy(gt[i, :2])
                gt_rect.set_width(gt[i, 2])
                gt_rect.set_height(gt[i, 3])

            rect.set_xy(result_bb[i, :2])
            rect.set_width(result_bb[i, 2])
            rect.set_height(result_bb[i, 3])

            if display:
                plt.pause(.01)
                plt.draw()
            if savefig:
                fig.savefig(os.path.join(savefig_dir, '{:04d}.jpg'.format(i)),
                            dpi=dpi)

        if gt is None:
            print('Frame {:d}/{:d}, Score {:.3f}, Time {:.3f}'.format(
                i + 1, len(img_list), target_score, spf))
        else:
            overlap[i] = overlap_ratio(gt[i], result_bb[i])[0]
            print('Frame {:d}/{:d}, Overlap {:.3f}, Score {:.3f}, Time {:.3f}'.
                  format(i + 1, len(img_list), overlap[i], target_score, spf))

    if gt is not None:
        print('meanIOU: {:.3f}'.format(overlap.mean()))
    fps = len(img_list) / spf_total
    return result, result_bb, fps