def init_pymdnet(self, image, init_bbox): target_bbox = np.array(init_bbox) self.last_result = target_bbox self.pymodel = MDNet(os.path.join(base_path, 'DiMP_LTMU/pyMDNet/models/mdnet_imagenet_vid.pth')) if opts['use_gpu']: self.pymodel = self.pymodel.cuda() self.pymodel.set_learnable_params(opts['ft_layers']) # Init criterion and optimizer self.criterion = BCELoss() init_optimizer = set_optimizer(self.pymodel, opts['lr_init'], opts['lr_mult']) self.update_optimizer = set_optimizer(self.pymodel, opts['lr_update'], opts['lr_mult']) tic = time.time() # Draw pos/neg samples pos_examples = SampleGenerator('gaussian', image.size, opts['trans_pos'], opts['scale_pos'])( target_bbox, opts['n_pos_init'], opts['overlap_pos_init']) neg_examples = np.concatenate([ SampleGenerator('uniform', image.size, opts['trans_neg_init'], opts['scale_neg_init'])( target_bbox, int(opts['n_neg_init'] * 0.5), opts['overlap_neg_init']), SampleGenerator('whole', image.size)( target_bbox, int(opts['n_neg_init'] * 0.5), opts['overlap_neg_init'])]) neg_examples = np.random.permutation(neg_examples) # Extract pos/neg features pos_feats = forward_samples(self.pymodel, image, pos_examples, opts) neg_feats = forward_samples(self.pymodel, image, neg_examples, opts) self.feat_dim = pos_feats.size(-1) # Initial training train(self.pymodel, self.criterion, init_optimizer, pos_feats, neg_feats, opts['maxiter_init'], opts=opts) del init_optimizer, neg_feats torch.cuda.empty_cache() # Train bbox regressor bbreg_examples = SampleGenerator('uniform', image.size, opts['trans_bbreg'], opts['scale_bbreg'], opts['aspect_bbreg'])( target_bbox, opts['n_bbreg'], opts['overlap_bbreg']) bbreg_feats = forward_samples(self.pymodel, image, bbreg_examples, opts) self.bbreg = BBRegressor(image.size) self.bbreg.train(bbreg_feats, bbreg_examples, target_bbox) del bbreg_feats torch.cuda.empty_cache() # Init sample generators self.sample_generator = SampleGenerator('gaussian', image.size, opts['trans'], opts['scale']) self.pos_generator = SampleGenerator('gaussian', image.size, opts['trans_pos'], opts['scale_pos']) self.neg_generator = SampleGenerator('uniform', image.size, opts['trans_neg'], opts['scale_neg']) # Init pos/neg features for update neg_examples = self.neg_generator(target_bbox, opts['n_neg_update'], opts['overlap_neg_init']) neg_feats = forward_samples(self.pymodel, image, neg_examples, opts) self.pos_feats_all = [pos_feats] self.neg_feats_all = [neg_feats] spf_total = time.time() - tic
def init_metricnet(self, image, init_bbox): target_bbox = np.array(init_bbox) self.last_result = target_bbox self.pymodel = MDNet( os.path.join(base_path, 'pyMDNet/models/mdnet_imagenet_vid.pth')) if opts['use_gpu']: self.pymodel = self.pymodel.cuda() self.pymodel.set_learnable_params(opts['ft_layers']) # Init criterion and optimizer self.criterion = BCELoss() init_optimizer = set_optimizer(self.pymodel, opts['lr_init'], opts['lr_mult']) self.update_optimizer = set_optimizer(self.pymodel, opts['lr_update'], opts['lr_mult']) tic = time.time() # metric self.metric_model = model_load(opts['metric_model']) #warmup tmp = np.random.rand(5, 3, 107, 107) tmp = torch.Tensor(tmp) tmp = (Variable(tmp)).type(torch.FloatTensor).cuda() self.metric_model.eval() tmp = self.metric_model(tmp) self.target_metric_feature = get_target_feature( self.metric_model, target_bbox, np.array(image)) self.target_metric_feature_all = [] self.target_metric_feature_all.append(self.target_metric_feature) # Draw pos/neg samples pos_examples = SampleGenerator('gaussian', image.size, opts['trans_pos'], opts['scale_pos'])( target_bbox, opts['n_pos_init'], opts['overlap_pos_init']) neg_examples = np.concatenate([ SampleGenerator('uniform', image.size, opts['trans_neg_init'], opts['scale_neg_init'])(target_bbox, int(opts['n_neg_init'] * 0.5), opts['overlap_neg_init']), SampleGenerator('whole', image.size)(target_bbox, int(opts['n_neg_init'] * 0.5), opts['overlap_neg_init']) ]) # print(neg_examples) neg_examples = np.random.permutation(neg_examples) #metric ii = 0 self.pos_all = np.zeros(pos_examples.shape[0]) self.pos_all_feature = np.zeros((pos_examples.shape[0], 1024)) while ii < pos_examples.shape[0]: with torch.no_grad(): pos_metric_feature, pos_metric_dist = get_metric_dist_lof( self.metric_model, pos_examples[ii:ii + 50], np.array(image), self.target_metric_feature, opts) self.pos_all[ii:ii + 50] = pos_metric_dist.cpu().detach().numpy() self.pos_all_feature[ii:ii + 50] = pos_metric_feature.cpu().detach( ).numpy() ii = ii + 50 self.pos_feature_center = torch.from_numpy( np.mean(self.pos_all_feature, axis=0).reshape( (1, 1024))).float().cuda() self.clf = lof_fit(self.pos_all_feature[0:opts['n_pos_update']], k=opts['pos_k'], method=opts['method']) del pos_metric_feature, pos_metric_dist torch.cuda.empty_cache() opts['pos_thresh'] = self.pos_all.max() * opts['pos_rate'] # 2.5 opts['metric_similar_thresh'] = self.pos_all.mean( ) * opts['similar_rate'] # print('pos_thresh is:', opts['pos_thresh']) # print('similar_thresh is:', opts['metric_similar_thresh']) # Extract pos/neg features pos_feats = forward_samples(self.pymodel, image, pos_examples, opts) neg_feats = forward_samples(self.pymodel, image, neg_examples, opts) self.feat_dim = pos_feats.size(-1) # Initial training train(self.pymodel, self.criterion, init_optimizer, pos_feats, neg_feats, opts['maxiter_init'], opts=opts) del init_optimizer, neg_feats torch.cuda.empty_cache() # Train bbox regressor bbreg_examples = SampleGenerator( 'uniform', image.size, opts['trans_bbreg'], opts['scale_bbreg'], opts['aspect_bbreg'])(target_bbox, opts['n_bbreg'], opts['overlap_bbreg']) bbreg_feats = forward_samples(self.pymodel, image, bbreg_examples, opts) self.bbreg = BBRegressor(image.size) self.bbreg.train(bbreg_feats, bbreg_examples, target_bbox) del bbreg_feats torch.cuda.empty_cache() # Init sample generators self.sample_generator = SampleGenerator('gaussian', image.size, opts['trans'], opts['scale']) self.pos_generator = SampleGenerator('gaussian', image.size, opts['trans_pos'], opts['scale_pos']) self.neg_generator = SampleGenerator('uniform', image.size, opts['trans_neg'], opts['scale_neg']) # Init pos/neg features for update neg_examples = self.neg_generator(target_bbox, opts['n_neg_update'], opts['overlap_neg_init']) neg_feats = forward_samples(self.pymodel, image, neg_examples, opts) self.pos_feats_all = [pos_feats] self.neg_feats_all = [neg_feats] samples = self.sample_generator(target_bbox, opts['n_samples']) sample_scores = forward_samples(self.pymodel, image, samples, out_layer='fc6', opts=opts) self.top_scores, _ = sample_scores[:, 1].topk(5) self.spf_total = 0
def run_mdnet(img_list, init_bbox, gt=None, savefig_dir='', display=False): # Init bbox target_bbox = np.array(init_bbox) result = np.zeros((len(img_list), 4)) result_bb = np.zeros((len(img_list), 4)) result[0] = target_bbox result_bb[0] = target_bbox if gt is not None: overlap = np.zeros(len(img_list)) overlap[0] = 1 # Init model model = MDNet(opts['model_path']) if opts['use_gpu']: model = model.cuda() # Init criterion and optimizer criterion = BCELoss() model.set_learnable_params(opts['ft_layers']) init_optimizer = set_optimizer(model, opts['lr_init'], opts['lr_mult']) update_optimizer = set_optimizer(model, opts['lr_update'], opts['lr_mult']) tic = time.time() # Load first image image = Image.open(img_list[0]).convert('RGB') # Draw pos/neg samples pos_examples = SampleGenerator('gaussian', image.size, opts['trans_pos'], opts['scale_pos'])(target_bbox, opts['n_pos_init'], opts['overlap_pos_init']) neg_examples = np.concatenate([ SampleGenerator('uniform', image.size, opts['trans_neg_init'], opts['scale_neg_init'])(target_bbox, int(opts['n_neg_init'] * 0.5), opts['overlap_neg_init']), SampleGenerator('whole', image.size)(target_bbox, int(opts['n_neg_init'] * 0.5), opts['overlap_neg_init']) ]) neg_examples = np.random.permutation(neg_examples) # Extract pos/neg features pos_feats = forward_samples(model, image, pos_examples) neg_feats = forward_samples(model, image, neg_examples) # Initial training train(model, criterion, init_optimizer, pos_feats, neg_feats, opts['maxiter_init']) del init_optimizer, neg_feats torch.cuda.empty_cache() # Train bbox regressor bbreg_examples = SampleGenerator( 'uniform', image.size, opts['trans_bbreg'], opts['scale_bbreg'], opts['aspect_bbreg'])(target_bbox, opts['n_bbreg'], opts['overlap_bbreg']) bbreg_feats = forward_samples(model, image, bbreg_examples) bbreg = BBRegressor(image.size) bbreg.train(bbreg_feats, bbreg_examples, target_bbox) del bbreg_feats torch.cuda.empty_cache() # Init sample generators for update sample_generator = SampleGenerator('gaussian', image.size, opts['trans'], opts['scale']) pos_generator = SampleGenerator('gaussian', image.size, opts['trans_pos'], opts['scale_pos']) neg_generator = SampleGenerator('uniform', image.size, opts['trans_neg'], opts['scale_neg']) # Init pos/neg features for update neg_examples = neg_generator(target_bbox, opts['n_neg_update'], opts['overlap_neg_init']) neg_feats = forward_samples(model, image, neg_examples) pos_feats_all = [pos_feats] neg_feats_all = [neg_feats] spf_total = time.time() - tic # Display savefig = savefig_dir != '' if display or savefig: dpi = 80.0 figsize = (image.size[0] / dpi, image.size[1] / dpi) fig = plt.figure(frameon=False, figsize=figsize, dpi=dpi) ax = plt.Axes(fig, [0., 0., 1., 1.]) ax.set_axis_off() fig.add_axes(ax) im = ax.imshow(image, aspect='auto') if gt is not None: gt_rect = plt.Rectangle(tuple(gt[0, :2]), gt[0, 2], gt[0, 3], linewidth=3, edgecolor="#00ff00", zorder=1, fill=False) ax.add_patch(gt_rect) rect = plt.Rectangle(tuple(result_bb[0, :2]), result_bb[0, 2], result_bb[0, 3], linewidth=3, edgecolor="#ff0000", zorder=1, fill=False) ax.add_patch(rect) if display: plt.pause(.01) plt.draw() if savefig: fig.savefig(os.path.join(savefig_dir, '0000.jpg'), dpi=dpi) # Main loop for i in range(1, len(img_list)): tic = time.time() # Load image image = Image.open(img_list[i]).convert('RGB') # Estimate target bbox samples = sample_generator(target_bbox, opts['n_samples']) sample_scores = forward_samples(model, image, samples, out_layer='fc6') top_scores, top_idx = sample_scores[:, 1].topk(5) top_idx = top_idx.cpu() target_score = top_scores.mean() target_bbox = samples[top_idx] if top_idx.shape[0] > 1: target_bbox = target_bbox.mean(axis=0) success = target_score > 0 # Expand search area at failure if success: sample_generator.set_trans(opts['trans']) else: sample_generator.expand_trans(opts['trans_limit']) # Bbox regression if success: bbreg_samples = samples[top_idx] if top_idx.shape[0] == 1: bbreg_samples = bbreg_samples[None, :] bbreg_feats = forward_samples(model, image, bbreg_samples) bbreg_samples = bbreg.predict(bbreg_feats, bbreg_samples) bbreg_bbox = bbreg_samples.mean(axis=0) else: bbreg_bbox = target_bbox # Save result result[i] = target_bbox result_bb[i] = bbreg_bbox # Data collect if success: pos_examples = pos_generator(target_bbox, opts['n_pos_update'], opts['overlap_pos_update']) pos_feats = forward_samples(model, image, pos_examples) pos_feats_all.append(pos_feats) if len(pos_feats_all) > opts['n_frames_long']: del pos_feats_all[0] neg_examples = neg_generator(target_bbox, opts['n_neg_update'], opts['overlap_neg_update']) neg_feats = forward_samples(model, image, neg_examples) neg_feats_all.append(neg_feats) if len(neg_feats_all) > opts['n_frames_short']: del neg_feats_all[0] # Short term update if not success: nframes = min(opts['n_frames_short'], len(pos_feats_all)) pos_data = torch.cat(pos_feats_all[-nframes:], 0) neg_data = torch.cat(neg_feats_all, 0) train(model, criterion, update_optimizer, pos_data, neg_data, opts['maxiter_update']) # Long term update elif i % opts['long_interval'] == 0: pos_data = torch.cat(pos_feats_all, 0) neg_data = torch.cat(neg_feats_all, 0) train(model, criterion, update_optimizer, pos_data, neg_data, opts['maxiter_update']) torch.cuda.empty_cache() spf = time.time() - tic spf_total += spf # Display if display or savefig: im.set_data(image) if gt is not None: gt_rect.set_xy(gt[i, :2]) gt_rect.set_width(gt[i, 2]) gt_rect.set_height(gt[i, 3]) rect.set_xy(result_bb[i, :2]) rect.set_width(result_bb[i, 2]) rect.set_height(result_bb[i, 3]) if display: plt.pause(.01) plt.draw() if savefig: fig.savefig(os.path.join(savefig_dir, '{:04d}.jpg'.format(i)), dpi=dpi) if gt is None: print('Frame {:d}/{:d}, Score {:.3f}, Time {:.3f}'.format( i, len(img_list), target_score, spf)) else: overlap[i] = overlap_ratio(gt[i], result_bb[i])[0] print('Frame {:d}/{:d}, Overlap {:.3f}, Score {:.3f}, Time {:.3f}'. format(i, len(img_list), overlap[i], target_score, spf)) if gt is not None: print('meanIOU: {:.3f}'.format(overlap.mean())) fps = len(img_list) / spf_total return result, result_bb, fps