def init_pymdnet(self, image, init_bbox): target_bbox = np.array(init_bbox) self.last_result = target_bbox self.pymodel = MDNet(os.path.join(base_path, 'DiMP_LTMU/pyMDNet/models/mdnet_imagenet_vid.pth')) if opts['use_gpu']: self.pymodel = self.pymodel.cuda() self.pymodel.set_learnable_params(opts['ft_layers']) # Init criterion and optimizer self.criterion = BCELoss() init_optimizer = set_optimizer(self.pymodel, opts['lr_init'], opts['lr_mult']) self.update_optimizer = set_optimizer(self.pymodel, opts['lr_update'], opts['lr_mult']) tic = time.time() # Draw pos/neg samples pos_examples = SampleGenerator('gaussian', image.size, opts['trans_pos'], opts['scale_pos'])( target_bbox, opts['n_pos_init'], opts['overlap_pos_init']) neg_examples = np.concatenate([ SampleGenerator('uniform', image.size, opts['trans_neg_init'], opts['scale_neg_init'])( target_bbox, int(opts['n_neg_init'] * 0.5), opts['overlap_neg_init']), SampleGenerator('whole', image.size)( target_bbox, int(opts['n_neg_init'] * 0.5), opts['overlap_neg_init'])]) neg_examples = np.random.permutation(neg_examples) # Extract pos/neg features pos_feats = forward_samples(self.pymodel, image, pos_examples, opts) neg_feats = forward_samples(self.pymodel, image, neg_examples, opts) self.feat_dim = pos_feats.size(-1) # Initial training train(self.pymodel, self.criterion, init_optimizer, pos_feats, neg_feats, opts['maxiter_init'], opts=opts) del init_optimizer, neg_feats torch.cuda.empty_cache() # Train bbox regressor bbreg_examples = SampleGenerator('uniform', image.size, opts['trans_bbreg'], opts['scale_bbreg'], opts['aspect_bbreg'])( target_bbox, opts['n_bbreg'], opts['overlap_bbreg']) bbreg_feats = forward_samples(self.pymodel, image, bbreg_examples, opts) self.bbreg = BBRegressor(image.size) self.bbreg.train(bbreg_feats, bbreg_examples, target_bbox) del bbreg_feats torch.cuda.empty_cache() # Init sample generators self.sample_generator = SampleGenerator('gaussian', image.size, opts['trans'], opts['scale']) self.pos_generator = SampleGenerator('gaussian', image.size, opts['trans_pos'], opts['scale_pos']) self.neg_generator = SampleGenerator('uniform', image.size, opts['trans_neg'], opts['scale_neg']) # Init pos/neg features for update neg_examples = self.neg_generator(target_bbox, opts['n_neg_update'], opts['overlap_neg_init']) neg_feats = forward_samples(self.pymodel, image, neg_examples, opts) self.pos_feats_all = [pos_feats] self.neg_feats_all = [neg_feats] spf_total = time.time() - tic
class Dimp_LTMU_Tracker(object): def __init__(self, image, region, p=None, groundtruth=None): self.p = p self.i = 0 self.t_id = 0 if groundtruth is not None: self.groundtruth = groundtruth tfconfig = tf.ConfigProto() tfconfig.gpu_options.per_process_gpu_memory_fraction = 0.3 self.sess = tf.Session(config=tfconfig) init_gt1 = [region.x, region.y, region.width, region.height] init_gt = [ init_gt1[1], init_gt1[0], init_gt1[1] + init_gt1[3], init_gt1[0] + init_gt1[2] ] # ymin xmin ymax xmax self.last_gt = init_gt self.init_pymdnet(image, init_gt1) self.local_init(image, init_gt1) self.Golbal_Track_init(image, init_gt1) if self.p.use_mask: self.siammask_init(image, init_gt1) self.tc_init(self.p.model_dir) self.metric_init(image, np.array(init_gt1)) self.dis_record = [] self.state_record = [] self.rv_record = [] self.all_map = [] self.count = 0 local_state1, self.score_map, update, self.score_max, dis, flag, update_score = self.local_track( image) self.local_Tracker.pos = torch.FloatTensor([ (self.last_gt[0] + self.last_gt[2] - 1) / 2, (self.last_gt[1] + self.last_gt[3] - 1) / 2 ]) self.local_Tracker.target_sz = torch.FloatTensor([ (self.last_gt[2] - self.last_gt[0]), (self.last_gt[3] - self.last_gt[1]) ]) def get_first_state(self): return self.score_map, self.score_max def siammask_init(self, im, init_gt): im = cv2.cvtColor(im, cv2.COLOR_RGB2BGR) parser = argparse.ArgumentParser(description='PyTorch Tracking Demo') parser.add_argument( '--resume', default='SiamMask/experiments/siammask/SiamMask_VOT_LD.pth', type=str, metavar='PATH', help='path to latest checkpoint (default: none)') parser.add_argument( '--config', dest='config', default='SiamMask/experiments/siammask/config_vot19lt.json', help='hyper-parameter of SiamMask in json format') args = parser.parse_args() device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') torch.backends.cudnn.benchmark = True # Setup Model cfg = load_config(args) self.siammask = Custom(anchors=cfg['anchors']) if args.resume: assert isfile(args.resume), '{} is not a valid file'.format( args.resume) self.siammask = load_pretrain(self.siammask, args.resume) self.siammask.eval().to(device) x = init_gt[0] y = init_gt[1] w = init_gt[2] h = init_gt[3] target_pos = np.array([x + w / 2, y + h / 2]) target_sz = np.array([w, h]) self.siamstate = siamese_init(im, target_pos, target_sz, self.siammask, cfg['hp']) def siammask_track(self, im): im = cv2.cvtColor(im, cv2.COLOR_RGB2BGR) self.siamstate = siamese_track(self.siamstate, im, mask_enable=True, refine_enable=True) # track # pdb.set_trace() score = np.max(self.siamstate['score']) location = self.siamstate['ploygon'].flatten() mask = self.siamstate['mask'] > self.siamstate['p'].seg_thr # im[:, :, 2] = (mask > 0) * 255 + (mask == 0) * im[:, :, 2] # # cv2.namedWindow("SiamMask", cv2.WINDOW_NORMAL) # cv2.rectangle(im, (int(self.siamstate['target_pos'][0] - self.siamstate['target_sz'][0] / 2.0), # int(self.siamstate['target_pos'][1] - self.siamstate['target_sz'][1] / 2.0)), # (int(self.siamstate['target_pos'][0] + self.siamstate['target_sz'][0] / 2.0), # int(self.siamstate['target_pos'][1] + self.siamstate['target_sz'][1] / 2.0)), [0, 255, 0], 2) # # cv2.imwrite("/home/xiaobai/Desktop/MBMD_vot_code/figure/%05d.jpg"%frame_id, im[:, :, -1::-1]) # cv2.imshow("SiamMask", im) # cv2.waitKey(1) return score, mask def Golbal_Track_init(self, image, init_box): init_box = [ init_box[0], init_box[1], init_box[0] + init_box[2], init_box[1] + init_box[3] ] cfg_file = 'Global_Track/configs/qg_rcnn_r50_fpn.py' ckp_file = 'Global_Track/checkpoints/qg_rcnn_r50_fpn_coco_got10k_lasot.pth' transforms = data.BasicPairTransforms(train=False) self.Global_Tracker = GlobalTrack(cfg_file, ckp_file, transforms, name_suffix='qg_rcnn_r50_fpn') self.Global_Tracker.init(image, init_box) def Global_Track_eval(self, image, num): # xywh results = self.Global_Tracker.update(image) index = np.argsort(results[:, -1])[::-1] max_index = index[:num] can_boxes = results[max_index][:, :4] can_boxes = np.array([ can_boxes[:, 0], can_boxes[:, 1], can_boxes[:, 2] - can_boxes[:, 0], can_boxes[:, 3] - can_boxes[:, 1] ]).transpose() return can_boxes def init_pymdnet(self, image, init_bbox): target_bbox = np.array(init_bbox) self.last_result = target_bbox self.pymodel = MDNet('./pyMDNet/models/mdnet_imagenet_vid.pth') if opts['use_gpu']: self.pymodel = self.pymodel.cuda() self.pymodel.set_learnable_params(opts['ft_layers']) # Init criterion and optimizer self.criterion = BCELoss() init_optimizer = set_optimizer(self.pymodel, opts['lr_init'], opts['lr_mult']) self.update_optimizer = set_optimizer(self.pymodel, opts['lr_update'], opts['lr_mult']) tic = time.time() # Draw pos/neg samples pos_examples = SampleGenerator('gaussian', image.size, opts['trans_pos'], opts['scale_pos'])( target_bbox, opts['n_pos_init'], opts['overlap_pos_init']) neg_examples = np.concatenate([ SampleGenerator('uniform', image.size, opts['trans_neg_init'], opts['scale_neg_init'])(target_bbox, int(opts['n_neg_init'] * 0.5), opts['overlap_neg_init']), SampleGenerator('whole', image.size)(target_bbox, int(opts['n_neg_init'] * 0.5), opts['overlap_neg_init']) ]) neg_examples = np.random.permutation(neg_examples) # Extract pos/neg features pos_feats = forward_samples(self.pymodel, image, pos_examples, opts) neg_feats = forward_samples(self.pymodel, image, neg_examples, opts) self.feat_dim = pos_feats.size(-1) # Initial training train(self.pymodel, self.criterion, init_optimizer, pos_feats, neg_feats, opts['maxiter_init'], opts=opts) del init_optimizer, neg_feats torch.cuda.empty_cache() # Train bbox regressor bbreg_examples = SampleGenerator( 'uniform', image.size, opts['trans_bbreg'], opts['scale_bbreg'], opts['aspect_bbreg'])(target_bbox, opts['n_bbreg'], opts['overlap_bbreg']) bbreg_feats = forward_samples(self.pymodel, image, bbreg_examples, opts) self.bbreg = BBRegressor(image.size) self.bbreg.train(bbreg_feats, bbreg_examples, target_bbox) del bbreg_feats torch.cuda.empty_cache() # Init sample generators self.sample_generator = SampleGenerator('gaussian', image.size, opts['trans'], opts['scale']) self.pos_generator = SampleGenerator('gaussian', image.size, opts['trans_pos'], opts['scale_pos']) self.neg_generator = SampleGenerator('uniform', image.size, opts['trans_neg'], opts['scale_neg']) # Init pos/neg features for update neg_examples = self.neg_generator(target_bbox, opts['n_neg_update'], opts['overlap_neg_init']) neg_feats = forward_samples(self.pymodel, image, neg_examples, opts) self.pos_feats_all = [pos_feats] self.neg_feats_all = [neg_feats] spf_total = time.time() - tic def pymdnet_eval(self, image, samples): sample_scores = forward_samples(self.pymodel, image, samples, out_layer='fc6', opts=opts) return sample_scores[:, 1][:].cpu().numpy() # def pymdnet_track(self, image): # self.image = image # target_bbox = self.last_result # samples = self.sample_generator(target_bbox, opts['n_samples']) # sample_scores = forward_samples(self.pymodel, image, samples, out_layer='fc6', opts=opts) # # top_scores, top_idx = sample_scores[:, 1].topk(5) # top_idx = top_idx.cpu().numpy() # target_score = top_scores.mean() # target_bbox = samples[top_idx].mean(axis=0) # # success = target_score > 0 # # # Expand search area at failure # if success: # self.sample_generator.set_trans(opts['trans']) # else: # self.sample_generator.expand_trans(opts['trans_limit']) # # self.last_result = target_bbox # # Bbox regression # bbreg_bbox = self.pymdnet_bbox_reg(success, samples, top_idx) # # # Save result # region = bbreg_bbox # # # Data collect # if success: # self.collect_samples_pymdnet() # # # Short term update # if not success: # self.pymdnet_short_term_update() # # # Long term update # elif self.i % opts['long_interval'] == 0: # self.pymdnet_long_term_update() # # return region, target_score def collect_samples_pymdnet(self, image): self.t_id += 1 target_bbox = np.array([ self.last_gt[1], self.last_gt[0], self.last_gt[3] - self.last_gt[1], self.last_gt[2] - self.last_gt[0] ]) pos_examples = self.pos_generator(target_bbox, opts['n_pos_update'], opts['overlap_pos_update']) if len(pos_examples) > 0: pos_feats = forward_samples(self.pymodel, image, pos_examples, opts) self.pos_feats_all.append(pos_feats) if len(self.pos_feats_all) > opts['n_frames_long']: del self.pos_feats_all[0] neg_examples = self.neg_generator(target_bbox, opts['n_neg_update'], opts['overlap_neg_update']) if len(neg_examples) > 0: neg_feats = forward_samples(self.pymodel, image, neg_examples, opts) self.neg_feats_all.append(neg_feats) if len(self.neg_feats_all) > opts['n_frames_short']: del self.neg_feats_all[0] def pymdnet_short_term_update(self): # Short term update nframes = min(opts['n_frames_short'], len(self.pos_feats_all)) pos_data = torch.cat(self.pos_feats_all[-nframes:], 0) neg_data = torch.cat(self.neg_feats_all, 0) train(self.pymodel, self.criterion, self.update_optimizer, pos_data, neg_data, opts['maxiter_update'], opts=opts) def pymdnet_long_term_update(self): if self.t_id % opts['long_interval'] == 0: # Long term update pos_data = torch.cat(self.pos_feats_all, 0) neg_data = torch.cat(self.neg_feats_all, 0) train(self.pymodel, self.criterion, self.update_optimizer, pos_data, neg_data, opts['maxiter_update'], opts=opts) # # def pymdnet_bbox_reg(self, success, samples, top_idx): # target_bbox = self.last_result # if success: # bbreg_samples = samples[top_idx] # if top_idx.shape[0] == 1: # bbreg_samples = bbreg_samples[None, :] # bbreg_feats = forward_samples(self.pymodel, self.image, bbreg_samples, opts) # bbreg_samples = self.bbreg.predict(bbreg_feats, bbreg_samples) # bbreg_bbox = bbreg_samples.mean(axis=0) # else: # bbreg_bbox = target_bbox # return bbreg_bbox def metric_init(self, im, init_box): self.metric_model = ft_net(class_num=1120) path = '../utils/metric_net/metric_model/metric_model.pt' self.metric_model.eval() self.metric_model = self.metric_model.cuda() self.metric_model.load_state_dict(torch.load(path)) tmp = np.random.rand(1, 3, 107, 107) tmp = (Variable(torch.Tensor(tmp))).type(torch.FloatTensor).cuda() # get target feature self.metric_model(tmp) init_box = init_box.reshape((1, 4)) anchor_region = me_extract_regions(im, init_box) anchor_region = process_regions(anchor_region) anchor_region = torch.Tensor(anchor_region) anchor_region = (Variable(anchor_region)).type( torch.FloatTensor).cuda() self.anchor_feature, _ = self.metric_model(anchor_region) def metric_eval(self, im, boxes, anchor_feature): box_regions = me_extract_regions(np.array(im), boxes) box_regions = process_regions(box_regions) box_regions = torch.Tensor(box_regions) box_regions = (Variable(box_regions)).type(torch.FloatTensor).cuda() box_features, class_result = self.metric_model(box_regions) class_result = torch.softmax(class_result, dim=1) ap_dist = torch.norm(anchor_feature - box_features, 2, dim=1).view(-1) return ap_dist def tc_init(self, model_dir): self.tc_model = tclstm() self.X_input = tf.placeholder( "float", [None, tcopts['time_steps'], tcopts['lstm_num_input']]) self.maps = tf.placeholder("float", [None, 19, 19, 1]) self.map_logits = self.tc_model.map_net(self.maps) self.Inputs = tf.concat((self.X_input, self.map_logits), axis=2) self.logits, _ = self.tc_model.net(self.Inputs) variables_to_restore = [ var for var in tf.global_variables() if (var.name.startswith('tclstm') or var.name.startswith('mapnet')) ] saver = tf.train.Saver(var_list=variables_to_restore) if self.p.checkpoint is None: checkpoint = tf.train.latest_checkpoint( os.path.join('./meta_updater', model_dir)) else: checkpoint = './meta_updater/' + self.p.model_dir + '/lstm_model.ckpt-' + str( self.p.checkpoint) saver.restore(self.sess, checkpoint) def local_init(self, image, init_bbox): local_tracker = Tracker('dimp', 'dimp50') params = local_tracker.get_parameters() debug_ = getattr(params, 'debug', 0) params.debug = debug_ params.tracker_name = local_tracker.name params.param_name = local_tracker.parameter_name self.local_Tracker = local_tracker.tracker_class(params) init_box = dict() init_box['init_bbox'] = init_bbox self.local_Tracker.initialize(image, init_box) def local_track(self, image): state, score_map, test_x, scale_ind, sample_pos, sample_scales, flag, s = self.local_Tracker.track_updater( image) update_score = 0 update_flag = flag not in ['not_found', 'uncertain'] update = update_flag max_score = max(score_map.flatten()) self.all_map.append(score_map) local_state = np.array(state).reshape((1, 4)) ap_dis = self.metric_eval(image, local_state, self.anchor_feature) self.dis_record.append(ap_dis.data.cpu().numpy()[0]) h = image.shape[0] w = image.shape[1] self.state_record.append([ local_state[0][0] / w, local_state[0][1] / h, (local_state[0][0] + local_state[0][2]) / w, (local_state[0][1] + local_state[0][3]) / h ]) self.rv_record.append(max_score) if len(self.state_record) >= self.p.start_frame: dis = np.array(self.dis_record[-tcopts["time_steps"]:]).reshape( (tcopts["time_steps"], 1)) rv = np.array(self.rv_record[-tcopts["time_steps"]:]).reshape( (tcopts["time_steps"], 1)) state_tc = np.array(self.state_record[-tcopts["time_steps"]:]) map_input = np.array(self.all_map[-tcopts["time_steps"]:]) map_input = np.reshape(map_input, [tcopts['time_steps'], 1, 19, 19]) map_input = map_input.transpose((0, 2, 3, 1)) X_input = np.concatenate((state_tc, rv, dis), axis=1) logits = self.sess.run(self.logits, feed_dict={ self.X_input: np.expand_dims(X_input, axis=0), self.maps: map_input }) update = logits[0][0] < logits[0][1] update_score = logits[0][1] hard_negative = (flag == 'hard_negative') learning_rate = getattr(self.local_Tracker.params, 'hard_negative_learning_rate', None) if hard_negative else None if update: # Get train sample train_x = test_x[scale_ind:scale_ind + 1, ...] # Create target_box and label for spatial sample target_box = self.local_Tracker.get_iounet_box( self.local_Tracker.pos, self.local_Tracker.target_sz, sample_pos[scale_ind, :], sample_scales[scale_ind]) # Update the classifier model self.local_Tracker.update_classifier(train_x, target_box, learning_rate, s[scale_ind, ...]) self.last_gt = [ state[1], state[0], state[1] + state[3], state[0] + state[2] ] return state, score_map, update, max_score, ap_dis.data.cpu().numpy( )[0], flag, update_score def locate(self, image): # Convert image im = numpy_to_torch(image) self.local_Tracker.im = im # For debugging only # ------- LOCALIZATION ------- # # Get sample sample_pos = self.local_Tracker.pos.round() sample_scales = self.local_Tracker.target_scale * self.local_Tracker.params.scale_factors test_x = self.local_Tracker.extract_processed_sample( im, self.local_Tracker.pos, sample_scales, self.local_Tracker.img_sample_sz) # Compute scores scores_raw = self.local_Tracker.apply_filter(test_x) translation_vec, scale_ind, s, flag = self.local_Tracker.localize_target( scores_raw) return translation_vec, scale_ind, s, flag, sample_pos, sample_scales, test_x def local_update(self, sample_pos, translation_vec, scale_ind, sample_scales, s, test_x, update_flag=None): # Check flags and set learning rate if hard negative if update_flag is None: update_flag = self.flag not in ['not_found', 'uncertain'] hard_negative = (self.flag == 'hard_negative') learning_rate = self.local_Tracker.params.hard_negative_learning_rate if hard_negative else None if update_flag: # Get train sample train_x = TensorList( [x[scale_ind:scale_ind + 1, ...] for x in test_x]) # Create label for sample train_y = self.local_Tracker.get_label_function( sample_pos, sample_scales[scale_ind]) # Update memory self.local_Tracker.update_memory(train_x, train_y, learning_rate) # Train filter if hard_negative: self.local_Tracker.filter_optimizer.run( self.local_Tracker.params.hard_negative_CG_iter) elif (self.local_Tracker.frame_num - 1) % self.local_Tracker.params.train_skipping == 0: self.local_Tracker.filter_optimizer.run( self.local_Tracker.params.CG_iter) def tracking(self, image): self.i += 1 mask = None candidate_bboxes = None # state, pyscore = self.pymdnet_track(image) # self.last_gt = [state[1], state[0], state[1] + state[3], state[0] + state[2]] self.local_Tracker.pos = torch.FloatTensor([ (self.last_gt[0] + self.last_gt[2] - 1) / 2, (self.last_gt[1] + self.last_gt[3] - 1) / 2 ]) self.local_Tracker.target_sz = torch.FloatTensor([ (self.last_gt[2] - self.last_gt[0]), (self.last_gt[3] - self.last_gt[1]) ]) tic = time.time() local_state, self.score_map, update, local_score, dis, flag, update_score = self.local_track( image) md_score = self.pymdnet_eval(image, np.array(local_state).reshape([-1, 4]))[0] self.score_max = md_score if md_score > 0 and flag == 'normal': self.flag = 'found' if self.p.use_mask: self.siamstate['target_pos'] = self.local_Tracker.pos.numpy( )[::-1] self.siamstate[ 'target_sz'] = self.local_Tracker.target_sz.numpy()[::-1] siamscore, mask = self.siammask_track( cv2.cvtColor(image, cv2.COLOR_RGB2BGR)) self.local_Tracker.pos = torch.FloatTensor( self.siamstate['target_pos'][::-1].copy()) self.local_Tracker.target_sz = torch.FloatTensor( self.siamstate['target_sz'][::-1].copy()) local_state = torch.cat( (self.local_Tracker.pos[[1, 0]] - (self.local_Tracker.target_sz[[1, 0]] - 1) / 2, self.local_Tracker.target_sz[[1, 0]])).data.cpu().numpy() self.last_gt = np.array([ local_state[1], local_state[0], local_state[1] + local_state[3], local_state[0] + local_state[2] ]) elif md_score < 0 or flag == 'not_found': self.count += 1 self.flag = 'not_found' candidate_bboxes = self.Global_Track_eval(image, 10) candidate_scores = self.pymdnet_eval(image, candidate_bboxes) max_id = np.argmax(candidate_scores) if candidate_scores[max_id] > 0: redet_bboxes = candidate_bboxes[max_id] if self.count >= 5: self.last_gt = np.array([ redet_bboxes[1], redet_bboxes[0], redet_bboxes[1] + redet_bboxes[3], redet_bboxes[2] + redet_bboxes[0] ]) self.local_Tracker.pos = torch.FloatTensor([ (self.last_gt[0] + self.last_gt[2] - 1) / 2, (self.last_gt[1] + self.last_gt[3] - 1) / 2 ]) self.local_Tracker.target_sz = torch.FloatTensor([ (self.last_gt[2] - self.last_gt[0]), (self.last_gt[3] - self.last_gt[1]) ]) self.score_max = candidate_scores[max_id] self.count = 0 if update: self.collect_samples_pymdnet(image) self.pymdnet_long_term_update() width = self.last_gt[3] - self.last_gt[1] height = self.last_gt[2] - self.last_gt[0] toc = time.time() - tic print(toc) # if self.flag == 'found' and self.score_max > 0: # confidence_score = 0.99 # elif self.flag == 'not_found': # confidence_score = 0.0 # else: # confidence_score = np.clip((local_score+np.arctan(0.2*self.score_max)/math.pi+0.5)/2, 0, 1) confidence_score = np.clip( (local_score + np.arctan(0.2 * self.score_max) / math.pi + 0.5) / 2, 0, 1) if self.p.visualization: show_res(cv2.cvtColor(image, cv2.COLOR_RGB2BGR), np.array(self.last_gt, dtype=np.int32), '2', groundtruth=self.groundtruth, update=update_score, can_bboxes=candidate_bboxes, frame_id=self.i, tracker_score=md_score, mask=mask) return [ float(self.last_gt[1]), float(self.last_gt[0]), float(width), float(height) ], self.score_map, 0, confidence_score, 0
neg_feats = forward_samples(model, image, neg_examples) # Initial training train(model, criterion, init_optimizer, pos_feats, neg_feats, opts['maxiter_init']) del init_optimizer, neg_feats torch.cuda.empty_cache() # Train bbox regressor bbreg_examples = SampleGenerator('uniform', image.size, opts['trans_bbreg'], opts['scale_bbreg'], opts['aspect_bbreg'])(target_bbox, opts['n_bbreg'], opts['overlap_bbreg']) bbreg_feats = forward_samples(model, image, bbreg_examples) bbreg = BBRegressor(image.size) bbreg.train(bbreg_feats, bbreg_examples, target_bbox) del bbreg_feats torch.cuda.empty_cache() # Init sample generators for update sample_generator = SampleGenerator('gaussian', image.size, opts['trans'], opts['scale']) pos_generator = SampleGenerator('gaussian', image.size, opts['trans_pos'], opts['scale_pos']) neg_generator = SampleGenerator('uniform', image.size, opts['trans_neg'], opts['scale_neg']) # Init pos/neg features for update neg_examples = neg_generator(target_bbox, opts['n_neg_update'], opts['overlap_neg_init'])
def run_mdnet(img_list, init_bbox, gt=None, savefig_dir='', display=False): # Init bbox target_bbox = np.array(init_bbox) result = np.zeros((len(img_list), 4)) result_bb = np.zeros((len(img_list), 4)) result[0] = target_bbox result_bb[0] = target_bbox if gt is not None: overlap = np.zeros(len(img_list)) overlap[0] = 1 # Init model model = MDNet(opts['model_path']) if opts['use_gpu']: model = model.cuda() # Init criterion and optimizer criterion = BCELoss() model.set_learnable_params(opts['ft_layers']) init_optimizer = set_optimizer(model, opts['lr_init'], opts['lr_mult']) update_optimizer = set_optimizer(model, opts['lr_update'], opts['lr_mult']) tic = time.time() # Load first image image = Image.open(img_list[0]).convert('RGB') # Draw pos/neg samples pos_examples = SampleGenerator('gaussian', image.size, opts['trans_pos'], opts['scale_pos'])(target_bbox, opts['n_pos_init'], opts['overlap_pos_init']) neg_examples = np.concatenate([ SampleGenerator('uniform', image.size, opts['trans_neg_init'], opts['scale_neg_init'])(target_bbox, int(opts['n_neg_init'] * 0.5), opts['overlap_neg_init']), SampleGenerator('whole', image.size)(target_bbox, int(opts['n_neg_init'] * 0.5), opts['overlap_neg_init']) ]) neg_examples = np.random.permutation(neg_examples) # Extract pos/neg features pos_feats = forward_samples(model, image, pos_examples) neg_feats = forward_samples(model, image, neg_examples) # Initial training train(model, criterion, init_optimizer, pos_feats, neg_feats, opts['maxiter_init']) del init_optimizer, neg_feats torch.cuda.empty_cache() # Train bbox regressor bbreg_examples = SampleGenerator( 'uniform', image.size, opts['trans_bbreg'], opts['scale_bbreg'], opts['aspect_bbreg'])(target_bbox, opts['n_bbreg'], opts['overlap_bbreg']) bbreg_feats = forward_samples(model, image, bbreg_examples) bbreg = BBRegressor(image.size) bbreg.train(bbreg_feats, bbreg_examples, target_bbox) del bbreg_feats torch.cuda.empty_cache() # Init sample generators for update:这三个是【生成器】,不是产生的数据 sample_generator = SampleGenerator('gaussian', image.size, opts['trans'], opts['scale']) pos_generator = SampleGenerator('gaussian', image.size, opts['trans_pos'], opts['scale_pos']) neg_generator = SampleGenerator('uniform', image.size, opts['trans_neg'], opts['scale_neg']) # Init pos/neg features for update neg_examples = neg_generator(target_bbox, opts['n_neg_update'], opts['overlap_neg_init']) neg_feats = forward_samples(model, image, neg_examples) pos_feats_all = [pos_feats] neg_feats_all = [neg_feats] spf_total = time.time() - tic # Display savefig = savefig_dir != '' if display or savefig: dpi = 80.0 figsize = (image.size[0] / dpi, image.size[1] / dpi) fig = plt.figure(frameon=False, figsize=figsize, dpi=dpi) ax = plt.Axes(fig, [0., 0., 1., 1.]) ax.set_axis_off() fig.add_axes(ax) im = ax.imshow(image, aspect='auto') if gt is not None: gt_rect = plt.Rectangle(tuple(gt[0, :2]), gt[0, 2], gt[0, 3], linewidth=3, edgecolor="#00ff00", zorder=1, fill=False) ax.add_patch(gt_rect) rect = plt.Rectangle(tuple(result_bb[0, :2]), result_bb[0, 2], result_bb[0, 3], linewidth=3, edgecolor="#ff0000", zorder=1, fill=False) ax.add_patch(rect) if display: plt.pause(.01) plt.draw() if savefig: fig.savefig(os.path.join(savefig_dir, '0000.jpg'), dpi=dpi) # Main loop for i in range(1, len(img_list)): tic = time.time() # Load image image = Image.open(img_list[i]).convert('RGB') # Estimate target bbox:指的是下一次的target_bbox,用于迭代 samples = sample_generator(target_bbox, opts['n_samples']) sample_scores = forward_samples( model, image, samples, out_layer='fc6' ) #forward_samples是根据当前的给当前的多个sample打分数,【相当于执行了一次网络判断】 top_scores, top_idx = sample_scores[:, 1].topk(5) top_idx = top_idx.cpu() target_score = top_scores.mean() target_bbox = samples[top_idx] if top_idx.shape[0] > 1: target_bbox = target_bbox.mean(axis=0) #多个target_bbox进行均值优化 success = target_score > 0 # Expand search area at failure:在【跟踪的同时】,根据跟踪情况,采用不同的sample生成参数 if success: sample_generator.set_trans(opts['trans']) else: sample_generator.expand_trans(opts['trans_limit']) # Bbox regression :利用【当前:只是对当前帧图像进行回归,没有考虑前几帧】几名对应的box,最为regression的输入,来产生一个更好的Bbox if success: bbreg_samples = samples[top_idx] if top_idx.shape[0] == 1: bbreg_samples = bbreg_samples[None, :] bbreg_feats = forward_samples(model, image, bbreg_samples) bbreg_samples = bbreg.predict(bbreg_feats, bbreg_samples) bbreg_bbox = bbreg_samples.mean(axis=0) else: bbreg_bbox = target_bbox # Save result result[i] = target_bbox result_bb[i] = bbreg_bbox # Data collect if success: pos_examples = pos_generator(target_bbox, opts['n_pos_update'], opts['overlap_pos_update']) pos_feats = forward_samples(model, image, pos_examples) pos_feats_all.append(pos_feats) if len(pos_feats_all) > opts['n_frames_long']: del pos_feats_all[0] neg_examples = neg_generator(target_bbox, opts['n_neg_update'], opts['overlap_neg_update']) neg_feats = forward_samples(model, image, neg_examples) neg_feats_all.append(neg_feats) if len(neg_feats_all) > opts['n_frames_short']: del neg_feats_all[0] # Short term update :每张图片都要更新一次模型,这次的输入数据就是【前几帧累计的正样本和负样本】 if not success: nframes = min(opts['n_frames_short'], len(pos_feats_all)) pos_data = torch.cat(pos_feats_all[-nframes:], 0) neg_data = torch.cat(neg_feats_all, 0) train(model, criterion, update_optimizer, pos_data, neg_data, opts['maxiter_update']) # Long term update elif i % opts['long_interval'] == 0: pos_data = torch.cat(pos_feats_all, 0) neg_data = torch.cat(neg_feats_all, 0) train(model, criterion, update_optimizer, pos_data, neg_data, opts['maxiter_update']) torch.cuda.empty_cache() spf = time.time() - tic spf_total += spf # Display if display or savefig: im.set_data(image) if gt is not None: gt_rect.set_xy(gt[i, :2]) gt_rect.set_width(gt[i, 2]) gt_rect.set_height(gt[i, 3]) rect.set_xy(result_bb[i, :2]) rect.set_width(result_bb[i, 2]) rect.set_height(result_bb[i, 3]) if display: plt.pause(.01) plt.draw() if savefig: fig.savefig(os.path.join(savefig_dir, '{:04d}.jpg'.format(i)), dpi=dpi) if gt is None: print('Frame {:d}/{:d}, Score {:.3f}, Time {:.3f}'.format( i, len(img_list), target_score, spf)) else: overlap[i] = overlap_ratio(gt[i], result_bb[i])[0] print('Frame {:d}/{:d}, Overlap {:.3f}, Score {:.3f}, Time {:.3f}'. format(i, len(img_list), overlap[i], target_score, spf)) if gt is not None: print('meanIOU: {:.3f}'.format(overlap.mean())) fps = len(img_list) / spf_total return result, result_bb, fps
def run_mdnet(img_list, init_bbox, gt=None, savefig_dir='', display=False, model_path='models/model001.pth'): # Init bbox target_bbox = np.array(init_bbox) result = np.zeros((len(img_list), 4)) result_bb = np.zeros((len(img_list), 4)) result[0] = target_bbox result_bb[0] = target_bbox if gt is not None: overlap = np.zeros(len(img_list)) overlap[0] = 1 # Init model opts['model_path'] = model_path print('********') print('model:', opts['model_path']) print('********') assert (model_path == 'models/model000.pth' or model_path == 'models/model001.pth') if model_path == 'models/model000.pth': model = MDNet0(opts['model_path']) else: model = MDNet1(opts['model_path']) if opts['use_gpu']: model = model.cuda() # Init criterion and optimizer criterion = BCELoss() model.set_learnable_params(opts['ft_layers']) init_optimizer = set_optimizer(model, opts['lr_init'], opts['lr_mult']) update_optimizer = set_optimizer(model, opts['lr_update'], opts['lr_mult']) tic = time.time() # Load first image image = Image.open(img_list[0]).convert('RGB') # Draw pos/neg samples pos_examples = SampleGenerator('gaussian', image.size, opts['trans_pos'], opts['scale_pos'])(target_bbox, opts['n_pos_init'], opts['overlap_pos_init']) neg_examples = np.concatenate([ SampleGenerator('uniform', image.size, opts['trans_neg_init'], opts['scale_neg_init'])(target_bbox, int(opts['n_neg_init'] * 0.5), opts['overlap_neg_init']), SampleGenerator('whole', image.size)(target_bbox, int(opts['n_neg_init'] * 0.5), opts['overlap_neg_init']) ]) neg_examples = np.random.permutation(neg_examples) # Extract pos/neg features pos_feats = forward_samples(model, image, pos_examples) print(pos_feats) neg_feats = forward_samples(model, image, neg_examples) print(neg_feats) # Initial training train(model, criterion, init_optimizer, pos_feats, neg_feats, opts['maxiter_init']) del init_optimizer, neg_feats torch.cuda.empty_cache() # Train bbox regressor bbreg_examples = SampleGenerator( 'uniform', image.size, opts['trans_bbreg'], opts['scale_bbreg'], opts['aspect_bbreg'])(target_bbox, opts['n_bbreg'], opts['overlap_bbreg']) bbreg_feats = forward_samples(model, image, bbreg_examples) bbreg = BBRegressor(image.size) bbreg.train(bbreg_feats, bbreg_examples, target_bbox) del bbreg_feats torch.cuda.empty_cache() # Init sample generators for update sample_generator = SampleGenerator('gaussian', image.size, opts['trans'], opts['scale']) pos_generator = SampleGenerator('gaussian', image.size, opts['trans_pos'], opts['scale_pos']) neg_generator = SampleGenerator('uniform', image.size, opts['trans_neg'], opts['scale_neg']) # Init pos/neg features for update neg_examples = neg_generator(target_bbox, opts['n_neg_update'], opts['overlap_neg_init']) neg_feats = forward_samples(model, image, neg_examples) pos_feats_all = [pos_feats] neg_feats_all = [neg_feats] spf_total = time.time() - tic # Display savefig = savefig_dir != '' if display or savefig: dpi = 80.0 figsize = (image.size[0] / dpi, image.size[1] / dpi) fig = plt.figure(frameon=False, figsize=figsize, dpi=dpi) ax = plt.Axes(fig, [0., 0., 1., 1.]) ax.set_axis_off() fig.add_axes(ax) im = ax.imshow(image, aspect='auto') if gt is not None: gt_rect = plt.Rectangle(tuple(gt[0, :2]), gt[0, 2], gt[0, 3], linewidth=3, edgecolor="#00ff00", zorder=1, fill=False) ax.add_patch(gt_rect) rect = plt.Rectangle(tuple(result_bb[0, :2]), result_bb[0, 2], result_bb[0, 3], linewidth=3, edgecolor="#ff0000", zorder=1, fill=False) ax.add_patch(rect) if display: plt.pause(.01) plt.draw() if savefig: fig.savefig(os.path.join(savefig_dir, '0000.jpg'), dpi=dpi) # Main loop for i in range(1, len(img_list)): tic = time.time() # Load image image = Image.open(img_list[i]).convert('RGB') # Estimate target bbox samples = sample_generator(target_bbox, opts['n_samples']) sample_scores = forward_samples(model, image, samples, out_layer='fc6') top_scores, top_idx = sample_scores[:, 1].topk(5) # for top 5 samples, maximize score using hill-climbing algorithm for j in range(5): sample_ = samples[top_idx[j]] last_top_score = None # hill-climbing search while True: sample_left_p = [ sample_[0] + 1, sample_[1], sample_[2] - 1, sample_[3] ] sample_left_n = [ sample_[0] - 1, sample_[1], sample_[2] + 1, sample_[3] ] sample_up_p = [ sample_[0], sample_[1] + 1, sample_[2], sample_[3] - 1 ] sample_up_n = [ sample_[0], sample_[1] - 1, sample_[2], sample_[3] + 1 ] sample_right_p = [ sample_[0], sample_[1], sample_[2] + 1, sample_[3] ] sample_right_n = [ sample_[0], sample_[1], sample_[2] - 1, sample_[3] ] sample_bottom_p = [ sample_[0], sample_[1], sample_[2], sample_[3] + 1 ] sample_bottom_n = [ sample_[0], sample_[1], sample_[2], sample_[3] - 1 ] all_samples = [ sample_left_p, sample_left_n, sample_up_p, sample_up_n, sample_right_p, sample_right_n, sample_bottom_p, sample_bottom_n ] hillClimbingSS = forward_samples(model, image, np.array(all_samples), out_layer='fc6') top_score, top_index = hillClimbingSS[:, 1].topk(1) top_score_float = top_score.cpu().numpy()[0] # End of hill climbing: this is THE BEST! if last_top_score != None: if top_score_float < last_top_score: break sample_ = all_samples[top_index] samples[top_idx[j]] = all_samples[top_index] last_top_score = top_score_float # modify sample scores array sample_scores = forward_samples(model, image, samples, out_layer='fc6') top_scores, top_idx = sample_scores[:, 1].topk(5) sampleStore = [] for j in range(len(samples)): temp = [] for k in range(4): temp.append(samples[j][k]) sampleStore.append(temp) # if mean score of bbox < 0, find everywhere target_score = top_scores.mean() if target_score < 0: # print('') # print('last bbox:') # print(result[i-1]) last_left = result[i - 1][0] last_top = result[i - 1][1] # print('') # for j in range(len(samples)): print(j, samples[j], sample_scores[j]) # print('') # print('sample top scores (before):') # print(top_scores) # print(top_idx) cnt = 0 rl = [32, 16] for _ in range(len(rl)): everywhere_sample = [] # find everywhere (near the last bbox) meanWidth = 0.0 meanHeight = 0.0 for j in range(len(samples)): meanWidth += samples[j][2] meanHeight += samples[j][3] meanWidth /= len(samples) meanHeight /= len(samples) width = image.size[0] height = image.size[1] for j in range(32): for k in range(32): jk = [ last_left + (31 - 2 * j) * meanWidth / rl[_], last_top + (31 - 2 * k) * meanHeight / rl[_], meanWidth, meanHeight ] # print(j, k, jk) everywhere_sample.append(jk) everywhere_scores = forward_samples( model, image, np.array(everywhere_sample), out_layer='fc6') everywhere_top_scores, everywhere_top_idx = everywhere_scores[:, 1].topk( 5 ) # print('') # print('everywhere_sample:') # for j in range(len(everywhere_sample)): print(j, everywhere_sample[j], everywhere_scores[j]) # print('') # print('everywhere top scores (before):') # print(everywhere_top_scores) # print(everywhere_top_idx) # for j in range(5): print(everywhere_sample[everywhere_top_idx[j]]) # for top 5 samples in everywhere_sample, maximize score using hill-climbing algorithm for j in range(5): # print('') sample_ = everywhere_sample[everywhere_top_idx[j]] last_top_score = None # hill-climbing search while True: sample_left_p = [ sample_[0] + 1, sample_[1], sample_[2] - 1, sample_[3] ] sample_left_n = [ sample_[0] - 1, sample_[1], sample_[2] + 1, sample_[3] ] sample_up_p = [ sample_[0], sample_[1] + 1, sample_[2], sample_[3] - 1 ] sample_up_n = [ sample_[0], sample_[1] - 1, sample_[2], sample_[3] + 1 ] sample_right_p = [ sample_[0], sample_[1], sample_[2] + 1, sample_[3] ] sample_right_n = [ sample_[0], sample_[1], sample_[2] - 1, sample_[3] ] sample_bottom_p = [ sample_[0], sample_[1], sample_[2], sample_[3] + 1 ] sample_bottom_n = [ sample_[0], sample_[1], sample_[2], sample_[3] - 1 ] all_samples = [ sample_left_p, sample_left_n, sample_up_p, sample_up_n, sample_right_p, sample_right_n, sample_bottom_p, sample_bottom_n ] hillClimbingSS = forward_samples(model, image, np.array(all_samples), out_layer='fc6') top_score, top_index = hillClimbingSS[:, 1].topk(1) top_score_float = top_score.cpu().numpy()[0] # End of hill climbing: this is THE BEST! if last_top_score != None: # print(last_top_score) if top_score_float < last_top_score: break sample_ = all_samples[top_index] everywhere_sample[ everywhere_top_idx[j]] = all_samples[top_index] last_top_score = top_score_float everywhere_scores = forward_samples( model, image, np.array(everywhere_sample), out_layer='fc6') everywhere_top_scores, everywhere_top_idx = everywhere_scores[:, 1].topk( 5 ) # print('') # print('everywhere top scores (after):') # print(everywhere_top_scores) # print(everywhere_top_idx) # for j in range(5): print(everywhere_sample[everywhere_top_idx[j]]) # merge 'samples' with everywhere samples everywhere_top5 = [] for j in range(5): everywhere_top5.append( everywhere_sample[everywhere_top_idx[j]]) samples = np.concatenate((samples, np.array(everywhere_top5))) sample_scores = forward_samples(model, image, samples, out_layer='fc6') top_scores, top_idx = sample_scores[:, 1].topk(5) if top_scores.mean() > 0: # print('') # for j in range(len(samples)): print(j, samples[j], sample_scores[j]) # print('') # print('sample top scores (after):') # print(top_scores) # print(top_idx) break cnt += 1 # failure -> recover original samples if cnt == 2: # print('recovered') samples = np.array(sampleStore) sample_scores = forward_samples(model, image, samples, out_layer='fc6') top_scores, top_idx = sample_scores[:, 1].topk(5) # finally modify sample scores array sample_scores = forward_samples(model, image, samples, out_layer='fc6') top_scores, top_idx = sample_scores[:, 1].topk(5) top_idx = top_idx.cpu() target_score = top_scores.mean() target_bbox = samples[top_idx] if top_idx.shape[0] > 1: target_bbox = target_bbox.mean(axis=0) success = target_score > 0 # Expand search area at failure if success: sample_generator.set_trans(opts['trans']) else: sample_generator.expand_trans(opts['trans_limit']) # Bbox regression if success: bbreg_samples = samples[top_idx] if top_idx.shape[0] == 1: bbreg_samples = bbreg_samples[None, :] bbreg_feats = forward_samples(model, image, bbreg_samples) bbreg_samples = bbreg.predict(bbreg_feats, bbreg_samples) bbreg_bbox = bbreg_samples.mean(axis=0) else: bbreg_bbox = target_bbox # Save result result[i] = target_bbox result_bb[i] = bbreg_bbox # Data collect if success: pos_examples = pos_generator(target_bbox, opts['n_pos_update'], opts['overlap_pos_update']) pos_feats = forward_samples(model, image, pos_examples) pos_feats_all.append(pos_feats) if len(pos_feats_all) > opts['n_frames_long']: del pos_feats_all[0] neg_examples = neg_generator(target_bbox, opts['n_neg_update'], opts['overlap_neg_update']) neg_feats = forward_samples(model, image, neg_examples) neg_feats_all.append(neg_feats) if len(neg_feats_all) > opts['n_frames_short']: del neg_feats_all[0] # Short term update if not success: nframes = min(opts['n_frames_short'], len(pos_feats_all)) pos_data = torch.cat(pos_feats_all[-nframes:], 0) neg_data = torch.cat(neg_feats_all, 0) train(model, criterion, update_optimizer, pos_data, neg_data, opts['maxiter_update']) # Long term update elif i % opts['long_interval'] == 0: pos_data = torch.cat(pos_feats_all, 0) neg_data = torch.cat(neg_feats_all, 0) train(model, criterion, update_optimizer, pos_data, neg_data, opts['maxiter_update']) torch.cuda.empty_cache() spf = time.time() - tic spf_total += spf # Display if display or savefig: im.set_data(image) if gt is not None: gt_rect.set_xy(gt[i, :2]) gt_rect.set_width(gt[i, 2]) gt_rect.set_height(gt[i, 3]) rect.set_xy(result_bb[i, :2]) rect.set_width(result_bb[i, 2]) rect.set_height(result_bb[i, 3]) if display: plt.pause(.01) plt.draw() if savefig: fig.savefig(os.path.join( savefig_dir, ('M' + model_path[14] + 'T3_' + '{:04d}.jpg'.format(i))), dpi=dpi) if gt is None: print('Frame {:d}/{:d}, Score {:.3f}, Time {:.3f}'.format( i, len(img_list), target_score, spf)) else: overlap[i] = overlap_ratio(gt[i], result_bb[i])[0] print('Frame {:d}/{:d}, Overlap {:.3f}, Score {:.3f}, Time {:.3f}'. format(i, len(img_list), overlap[i], target_score, spf)) if gt is not None: print('meanIOU: {:.3f}'.format(overlap.mean())) fps = len(img_list) / spf_total plt.close('all') return result, result_bb, fps, overlap
def run_vtaan(img_list, init_bbox, gt=None, savefig_dir='', display=False): # Init bbox target_bbox = np.array(init_bbox) result = np.zeros((len(img_list), 4)) result_bb = np.zeros((len(img_list), 4)) result[0] = target_bbox result_bb[0] = target_bbox if gt is not None: overlap = np.zeros(len(img_list)) overlap[0] = 1 # Init model model = MDNet(opts['model_path']) model_g = NetG() if opts['use_gpu']: model = model.cuda() model_g = model_g.cuda() GBP = guided_backprop.GuidedBackprop(model, 1) # Init criterion and optimizer criterion = BCELoss() criterion_g = torch.nn.MSELoss(reduction='sum') model.set_learnable_params(opts['ft_layers']) model_g.set_learnable_params(opts['ft_layers']) init_optimizer = set_optimizer(model, opts['lr_init'], opts['lr_mult']) update_optimizer = set_optimizer(model, opts['lr_update'], opts['lr_mult']) tic = time.time() # Load first image image = Image.open(img_list[0]).convert('RGB') # Draw pos/neg samples pos_examples = SampleGenerator('gaussian', image.size, opts['trans_pos'], opts['scale_pos'])(target_bbox, opts['n_pos_init'], opts['overlap_pos_init']) neg_examples = np.concatenate([ SampleGenerator('uniform', image.size, opts['trans_neg_init'], opts['scale_neg_init'])(target_bbox, int(opts['n_neg_init'] * 0.5), opts['overlap_neg_init']), SampleGenerator('whole', image.size)(target_bbox, int(opts['n_neg_init'] * 0.5), opts['overlap_neg_init']) ]) neg_examples = np.random.permutation(neg_examples) # Extract pos/neg features pos_feats = forward_samples(model, image, pos_examples) neg_feats = forward_samples(model, image, neg_examples) pos_imgids = np.array([[0]] * pos_feats.size(0)) neg_imgids = np.array([[0]] * neg_feats.size(0)) feat_dim = pos_feats.size(-1) # Initial training train(model, None, criterion, init_optimizer, pos_feats, neg_feats, opts['maxiter_init'], pos_imgids, pos_examples, neg_imgids, neg_examples, img_list, GBP) del init_optimizer, neg_feats torch.cuda.empty_cache() g_pretrain(model, model_g, criterion_g, pos_feats) torch.cuda.empty_cache() # Train bbox regressor bbreg_examples = SampleGenerator( 'uniform', image.size, opts['trans_bbreg'], opts['scale_bbreg'], opts['aspect_bbreg'])(target_bbox, opts['n_bbreg'], opts['overlap_bbreg']) bbreg_feats = forward_samples(model, image, bbreg_examples) bbreg = BBRegressor(image.size) bbreg.train(bbreg_feats, bbreg_examples, target_bbox) del bbreg_feats torch.cuda.empty_cache() # Init sample generators for update sample_generator = SampleGenerator('gaussian', image.size, opts['trans'], opts['scale']) pos_generator = SampleGenerator('gaussian', image.size, opts['trans_pos'], opts['scale_pos']) neg_generator = SampleGenerator('uniform', image.size, opts['trans_neg'], opts['scale_neg']) # Init pos/neg features for update neg_examples = neg_generator(target_bbox, opts['n_neg_update'], opts['overlap_neg_init']) neg_feats = forward_samples(model, image, neg_examples) pos_feats_all = [pos_feats[:opts['n_pos_update']]] neg_feats_all = [neg_feats[:opts['n_neg_update']]] pos_examples_all = [pos_examples[:opts['n_pos_update']]] neg_examples_all = [neg_examples[:opts['n_neg_update']]] pos_imgids_all = [pos_imgids[:opts['n_pos_update']]] neg_imgids_all = [neg_imgids[:opts['n_neg_update']]] spf_total = time.time() - tic # Display savefig = savefig_dir != '' if display or savefig: dpi = 80.0 figsize = (image.size[0] / dpi, image.size[1] / dpi) fig = plt.figure(frameon=False, figsize=figsize, dpi=dpi) ax = plt.Axes(fig, [0., 0., 1., 1.]) ax.set_axis_off() fig.add_axes(ax) im = ax.imshow(image, aspect='auto') if gt is not None: gt_rect = plt.Rectangle(tuple(gt[0, :2]), gt[0, 2], gt[0, 3], linewidth=3, edgecolor="#00ff00", zorder=1, fill=False) ax.add_patch(gt_rect) rect = plt.Rectangle(tuple(result_bb[0, :2]), result_bb[0, 2], result_bb[0, 3], linewidth=3, edgecolor="#ff0000", zorder=1, fill=False) ax.add_patch(rect) if display: plt.pause(.01) plt.draw() if savefig: fig.savefig(os.path.join(savefig_dir, '0000.jpg'), dpi=dpi) # Main loop for i in range(1, len(img_list)): tic = time.time() # Load image image = Image.open(img_list[i]).convert('RGB') # Estimate target bbox samples = sample_generator(target_bbox, opts['n_samples']) sample_scores = forward_samples(model, image, samples, out_layer='fc6') top_scores, top_idx = sample_scores[:, 1].topk(5) top_idx = top_idx.cpu() target_score = top_scores.mean() target_bbox = samples[top_idx] if top_idx.shape[0] > 1: target_bbox = target_bbox.mean(axis=0) success = target_score > 0 # Expand search area at failure if success: sample_generator.set_trans(opts['trans']) else: sample_generator.expand_trans(opts['trans_limit']) # Bbox regression if success: bbreg_samples = samples[top_idx] if top_idx.shape[0] == 1: bbreg_samples = bbreg_samples[None, :] bbreg_feats = forward_samples(model, image, bbreg_samples) bbreg_samples = bbreg.predict(bbreg_feats, bbreg_samples) bbreg_bbox = bbreg_samples.mean(axis=0) else: bbreg_bbox = target_bbox # Save result result[i] = target_bbox result_bb[i] = bbreg_bbox # Data collect if success: pos_examples = pos_generator(target_bbox, opts['n_pos_update'], opts['overlap_pos_update']) pos_feats = forward_samples(model, image, pos_examples) pos_feats_all.append(pos_feats) if len(pos_feats_all) > opts['n_frames_long']: del pos_feats_all[0] del pos_examples_all[0] del pos_imgids_all[0] neg_examples = neg_generator(target_bbox, opts['n_neg_update'], opts['overlap_neg_update']) neg_feats = forward_samples(model, image, neg_examples) neg_feats_all.append(neg_feats) pos_examples_all.append(pos_examples) neg_examples_all.append(neg_examples) pos_imgids_all.append(np.array([[i]] * pos_feats.size(0))) neg_imgids_all.append(np.array([[i]] * neg_feats.size(0))) if len(neg_feats_all) > opts['n_frames_short']: del neg_feats_all[0] del neg_examples_all[0] del neg_imgids_all[0] # Short term update if not success: nframes = min(opts['n_frames_short'], len(pos_feats_all)) pos_data = torch.stack(pos_feats_all[-nframes:], 0).view(-1, feat_dim) neg_data = torch.stack(neg_feats_all, 0).view(-1, feat_dim) pos_examples_data = torch.from_numpy( np.stack(pos_examples_all[-nframes:], 0)).view(-1, 4).numpy() neg_examples_data = torch.from_numpy(np.stack(neg_examples_all, 0)).view(-1, 4).numpy() pos_imgids_data = torch.from_numpy( np.stack(pos_imgids_all[-nframes:], 0)).view(-1, 1).numpy() neg_imgids_data = torch.from_numpy(np.stack(neg_imgids_all, 0)).view(-1, 1).numpy() train(model, None, criterion, update_optimizer, pos_data, neg_data, opts['maxiter_update'], pos_imgids_data, pos_examples_data, neg_imgids_data, neg_examples_data, img_list, GBP) # Long term update elif i % opts['long_interval'] == 0: pos_data = t.stack(pos_feats_all, 0).view(-1, feat_dim) neg_data = t.stack(neg_feats_all, 0).view(-1, feat_dim) pos_examples_data = torch.from_numpy(np.stack(pos_examples_all, 0)).view(-1, 4).numpy() neg_examples_data = torch.from_numpy(np.stack(neg_examples_all, 0)).view(-1, 4).numpy() pos_imgids_data = torch.from_numpy(np.stack(pos_imgids_all, 0)).view(-1, 1).numpy() neg_imgids_data = torch.from_numpy(np.stack(neg_imgids_all, 0)).view(-1, 1).numpy() # train(model, model_g, criterion, update_optimizer, pos_data, neg_data, opts['maxiter_update'], # pos_imgids_data, pos_examples_data, neg_imgids_data, neg_examples_data, img_list, GBP) train(model, model_g, criterion, update_optimizer, pos_data, neg_data, opts['maxiter_update'], None, None, None, None, img_list, GBP) torch.cuda.empty_cache() spf = time.time() - tic spf_total += spf # Display if display or savefig: im.set_data(image) if gt is not None: gt_rect.set_xy(gt[i, :2]) gt_rect.set_width(gt[i, 2]) gt_rect.set_height(gt[i, 3]) rect.set_xy(result_bb[i, :2]) rect.set_width(result_bb[i, 2]) rect.set_height(result_bb[i, 3]) if display: plt.pause(.01) plt.draw() if savefig: fig.savefig(os.path.join(savefig_dir, '{:04d}.jpg'.format(i)), dpi=dpi) if gt is None: print('Frame {:d}/{:d}, Score {:.3f}, Time {:.3f}'.format( i + 1, len(img_list), target_score, spf)) else: overlap[i] = overlap_ratio(gt[i], result_bb[i])[0] print('Frame {:d}/{:d}, Overlap {:.3f}, Score {:.3f}, Time {:.3f}'. format(i + 1, len(img_list), overlap[i], target_score, spf)) if gt is not None: print('meanIOU: {:.3f}'.format(overlap.mean())) fps = len(img_list) / spf_total return result, result_bb, fps
def run_mdnet(img_list, init_bbox, gt=None, savefig_dir='', display=False, loss_index=1, model_path=opts['model_path'], seq_name=None): #def run_mdnet(k, img_list, init_bbox, gt=None, savefig_dir='', display=False, # loss_index=1, model_path=opts['model_path'], seq_name=None): ############################ if fewer_images: num_images = min(sequence_len_limit, len(img_list)) else: num_images = len(img_list) ############################ # Init bbox target_bbox = np.array(init_bbox) result = np.zeros((len(img_list), 4)) result_bb = np.zeros((len(img_list), 4)) result[0] = target_bbox result_bb[0] = target_bbox # Init iou and pred_iou iou_list = np.zeros((len(img_list), 1)) # shape: [113.1) ### list of ious iou_list[0] = 1.0 ### in first frame gt=result_bb (by definition) if gt is not None: overlap = np.zeros(len(img_list)) overlap[0] = 1 # Init model # model = MDNet(model_path=opts['model_path'],use_gpu=opts['use_gpu']) model = MDNet(model_path=model_path, use_gpu=opts['use_gpu']) if opts['use_gpu']: model = model.cuda() print('Init criterion and optimizer') criterion = BCELoss() model.set_learnable_params(opts['ft_layers']) init_optimizer = set_optimizer(model, opts['lr_init'], opts['lr_mult']) update_optimizer = set_optimizer(model, opts['lr_update'], opts['lr_mult']) tic = time.time() # Load first image image = Image.open(img_list[0]).convert('RGB') print('Draw pos/neg samples') # Draw pos/neg samples pos_examples = SampleGenerator('gaussian', image.size, opts['trans_pos'], opts['scale_pos'])(target_bbox, opts['n_pos_init'], opts['overlap_pos_init']) neg_examples = np.concatenate([ SampleGenerator('uniform', image.size, opts['trans_neg_init'], opts['scale_neg_init'])(target_bbox, int(opts['n_neg_init'] * 0.5), opts['overlap_neg_init']), SampleGenerator('whole', image.size)(target_bbox, int(opts['n_neg_init'] * 0.5), opts['overlap_neg_init']) ]) neg_examples = np.random.permutation(neg_examples) print('Extract pos/neg features') # Extract pos/neg features if fewer_images: # shorter run in general, less accurate pos_feats = forward_samples(model, image, pos_examples) neg_feats = forward_samples(model, image, neg_examples) else: pos_feats = forward_samples(model, image, pos_examples) neg_feats = forward_samples(model, image, neg_examples) print('Initial training') # Initial training train(model, criterion, init_optimizer, pos_feats, neg_feats, opts['maxiter_init'], loss_index=loss_index) ### iou_pred_list del init_optimizer, neg_feats torch.cuda.empty_cache() print('Train bbox regressor') # Train bbox regressor bbreg_examples = SampleGenerator( 'uniform', image.size, opts['trans_bbreg'], opts['scale_bbreg'], opts['aspect_bbreg'])(target_bbox, opts['n_bbreg'], opts['overlap_bbreg']) bbreg_feats = forward_samples( model, image, bbreg_examples) # calc features ### shape: [927, 4608] ### bbreg = BBRegressor(image.size) bbreg.train(bbreg_feats, bbreg_examples, target_bbox) del bbreg_feats torch.cuda.empty_cache() # Init sample generators for update sample_generator = SampleGenerator('gaussian', image.size, opts['trans'], opts['scale']) pos_generator = SampleGenerator('gaussian', image.size, opts['trans_pos'], opts['scale_pos']) neg_generator = SampleGenerator('uniform', image.size, opts['trans_neg'], opts['scale_neg']) print('Init pos/neg features for update') # Init pos/neg features for update neg_examples = neg_generator(target_bbox, opts['n_neg_update'], opts['overlap_neg_init']) neg_feats = forward_samples(model, image, neg_examples) pos_feats_all = [pos_feats] neg_feats_all = [neg_feats] spf_total = time.time() - tic # Display savefig = savefig_dir != '' if display or savefig: dpi = 80.0 figsize = (image.size[0] / dpi, image.size[1] / dpi) fig = plt.figure(frameon=False, figsize=figsize, dpi=dpi) ax = plt.Axes(fig, [0., 0., 1., 1.]) ax.set_axis_off() fig.add_axes(ax) im = ax.imshow(image, aspect='auto') if gt is not None: gt_rect = plt.Rectangle(tuple(gt[0, :2]), gt[0, 2], gt[0, 3], linewidth=3, edgecolor="#00ff00", zorder=1, fill=False) ax.add_patch(gt_rect) ################# num_gts = np.minimum(gt.shape[0], num_images) # print('num_gts.shape: ', num_gts.shape) gt_centers = gt[:num_gts, :2] + gt[:num_gts, 2:] / 2 result_centers = np.zeros_like(gt[:num_gts, :2]) result_centers[0] = gt_centers[0] result_ious = np.zeros(num_gts, dtype='float64') result_ious[0] = 1. ################# rect = plt.Rectangle(tuple(result_bb[0, :2]), result_bb[0, 2], result_bb[0, 3], linewidth=3, edgecolor="#ff0000", zorder=1, fill=False) ax.add_patch(rect) if display: plt.pause(.01) plt.draw() if savefig: fig.savefig(os.path.join(savefig_dir, '0000.jpg'), dpi=dpi) print('Main Loop') # Main loop spf_total = 0 # I don't want to take into account initialization for i in tqdm(range(1, num_images)): # for i in range(1, len(img_list)): #print('Frame: ', i) tic = time.time() # Load image image = Image.open(img_list[i]).convert('RGB') #print('Estimate target bbox (in run_mdnet)') # Estimate target bbox samples = sample_generator(target_bbox, opts['n_samples']) sample_scores = forward_samples(model, image, samples, out_layer='fc6') top_scores, top_idx = sample_scores[:, 1].topk(5) top_idx = top_idx.cpu() target_score = top_scores.mean() target_bbox = samples[top_idx] if top_idx.shape[0] > 1: target_bbox = target_bbox.mean(axis=0) success = target_score > 0 # Expand search area at failure if success: sample_generator.set_trans(opts['trans']) else: sample_generator.expand_trans(opts['trans_limit']) #print('Bbox regression (in run_mdnet)') # Bbox regression if success: bbreg_samples = samples[top_idx] if top_idx.shape[0] == 1: bbreg_samples = bbreg_samples[None, :] bbreg_feats = forward_samples(model, image, bbreg_samples) bbreg_samples = bbreg.predict(bbreg_feats, bbreg_samples) bbreg_bbox = bbreg_samples.mean(axis=0) else: bbreg_bbox = target_bbox # Save result result[i] = target_bbox result_bb[i] = bbreg_bbox iou_list[i] = overlap_ratio(gt[i], result_bb[i]) ########################################### # identify tracking failure and abort when in VOT mode IoU = overlap_ratio(result_bb[i], gt[i])[0] if (IoU == 0) and init_after_loss: print(' * lost track in frame %d since init*' % (i)) result_distances = scipy.spatial.distance.cdist( result_centers[:i], gt_centers[:i], metric='euclidean').diagonal() num_images_tracked = i - 1 # we don't count frame 0 and current frame (lost track) im.set_data(image) if gt is not None: if i < gt.shape[0]: gt_rect.set_xy(gt[i, :2]) gt_rect.set_width(gt[i, 2]) gt_rect.set_height(gt[i, 3]) else: gt_rect.set_xy(np.array([np.nan, np.nan])) gt_rect.set_width(np.nan) gt_rect.set_height(np.nan) rect.set_xy(result_bb[i, :2]) rect.set_width(result_bb[i, 2]) rect.set_height(result_bb[i, 3]) plt.pause(.01) plt.draw() print( 'Finished identify tracking failure and abort when in VOT mode' ) return result[: i], result_bb[: i], num_images_tracked, spf_total, result_distances, result_ious[: i], True ######################################## # Data collect if success: pos_examples = pos_generator(target_bbox, opts['n_pos_update'], opts['overlap_pos_update']) pos_feats = forward_samples(model, image, pos_examples) pos_feats_all.append(pos_feats) if len(pos_feats_all) > opts['n_frames_long']: del pos_feats_all[0] neg_examples = neg_generator(target_bbox, opts['n_neg_update'], opts['overlap_neg_update']) neg_feats = forward_samples(model, image, neg_examples) neg_feats_all.append(neg_feats) if len(neg_feats_all) > opts['n_frames_short']: del neg_feats_all[0] # Short term update if not success: nframes = min(opts['n_frames_short'], len(pos_feats_all)) pos_data = torch.cat(pos_feats_all[-nframes:], 0) neg_data = torch.cat(neg_feats_all, 0) train(model, criterion, update_optimizer, pos_data, neg_data, opts['maxiter_update']) # Long term update elif i % opts['long_interval'] == 0: pos_data = torch.cat(pos_feats_all, 0) neg_data = torch.cat(neg_feats_all, 0) train(model, criterion, update_optimizer, pos_data, neg_data, opts['maxiter_update']) torch.cuda.empty_cache() spf = time.time() - tic spf_total += spf #print('Time: ', spf) # Display if display or savefig: im.set_data(image) if gt is not None: gt_rect.set_xy(gt[i, :2]) gt_rect.set_width(gt[i, 2]) gt_rect.set_height(gt[i, 3]) ################# result_ious[i] = overlap_ratio(result_bb[i], gt[i])[0] result_centers[i] = result_bb[i, :2] + result_bb[i, 2:] / 2 ################# rect.set_xy(result_bb[i, :2]) rect.set_width(result_bb[i, 2]) rect.set_height(result_bb[i, 3]) if display: plt.pause(.01) plt.draw() if savefig: fig.savefig(os.path.join(savefig_dir, '{:04d}.jpg'.format(i)), dpi=dpi) #################################### if detailed_printing: if gt is None: print(" Frame %d/%d, Score %.3f, Time %.3f" % \ (i, num_images-1, target_score, spf)) else: if i < gt.shape[0]: print(" Frame %d/%d, Overlap %.3f, Score %.3f, Time %.3f" % \ (i, num_images-1, overlap_ratio(gt[i], result_bb[i])[0], target_score, spf)) else: print(" Frame %d/%d, Overlap %.3f, Score %.3f, Time %.3f" % \ (i, num_images-1, overlap_ratio(np.array([np.nan,np.nan,np.nan,np.nan]), result_bb[i])[0], target_score, spf)) #################################### # if gt is None: # print('Frame {:d}/{:d}, Score {:.3f}, Time {:.3f}' # .format(i, len(img_list), target_score, spf)) # else: # overlap[i] = overlap_ratio(gt[i], result_bb[i])[0] # print('Frame {:d}/{:d}, Overlap {:.3f}, Score {:.3f}, Time {:.3f}' # .format(i, len(img_list), overlap[i], target_score, spf)) ######################## plt.close() result_distances = scipy.spatial.distance.cdist( result_centers, gt_centers, metric='euclidean').diagonal() num_images_tracked = num_images - 1 # I don't want to count initialization frame (i.e. frame 0) print(' main loop finished, %d frames' % (num_images)) print('mean IoU: ', iou_list.mean()) print('Finished run_mdnet()') return result, result_bb, num_images_tracked, spf_total, result_distances, result_ious, False
def RTMDNet_init(model_path, image_file, init_bbox): state = dict() target_bbox = np.array(init_bbox) model = MDNet(model_path) if opts['adaptive_align']: align_h = model.roi_align_model.aligned_height align_w = model.roi_align_model.aligned_width spatial_s = model.roi_align_model.spatial_scale model.roi_align_model = RoIAlignAdaMax(align_h, align_w, spatial_s) if opts['use_gpu']: model = model.cuda() model.set_learnable_params(opts['ft_layers']) # Init image crop model img_crop_model = imgCropper(1.) if opts['use_gpu']: img_crop_model.gpuEnable() # Init criterion and optimizer criterion = BinaryLoss() init_optimizer = set_optimizer(model, opts['lr_init']) update_optimizer = set_optimizer(model, opts['lr_update']) cur_image = Image.open(image_file).convert('RGB') cur_image = np.asarray(cur_image) # Draw pos/neg samples ishape = cur_image.shape # logging.info('ishape: %s, n_pos_init: %s, overlap_pos_init: %s, target_bbox: %s', ishape, opts['n_pos_init'], opts['overlap_pos_init'], target_bbox) pos_examples = gen_samples( SampleGenerator('gaussian', (ishape[1], ishape[0]), 0.1, 1.2), target_bbox, opts['n_pos_init'], opts['overlap_pos_init']) neg_examples = gen_samples( SampleGenerator('uniform', (ishape[1], ishape[0]), 1, 2, 1.1), target_bbox, opts['n_neg_init'], opts['overlap_neg_init']) neg_examples = np.random.permutation(neg_examples) cur_bbreg_examples = gen_samples( SampleGenerator('uniform', (ishape[1], ishape[0]), 0.3, 1.5, 1.1), target_bbox, opts['n_bbreg'], opts['overlap_bbreg'], opts['scale_bbreg']) # compute padded sample padded_x1 = (neg_examples[:, 0] - neg_examples[:, 2] * (opts['padding'] - 1.) / 2.).min() padded_y1 = (neg_examples[:, 1] - neg_examples[:, 3] * (opts['padding'] - 1.) / 2.).min() padded_x2 = (neg_examples[:, 0] + neg_examples[:, 2] * (opts['padding'] + 1.) / 2.).max() padded_y2 = (neg_examples[:, 1] + neg_examples[:, 3] * (opts['padding'] + 1.) / 2.).max() padded_scene_box = np.reshape( np.asarray((padded_x1, padded_y1, padded_x2 - padded_x1, padded_y2 - padded_y1)), (1, 4)) scene_boxes = np.reshape(np.copy(padded_scene_box), (1, 4)) if opts['jitter']: ## horizontal shift jittered_scene_box_horizon = np.copy(padded_scene_box) jittered_scene_box_horizon[0, 0] -= 4. jitter_scale_horizon = 1. ## vertical shift jittered_scene_box_vertical = np.copy(padded_scene_box) jittered_scene_box_vertical[0, 1] -= 4. jitter_scale_vertical = 1. jittered_scene_box_reduce1 = np.copy(padded_scene_box) jitter_scale_reduce1 = 1.1**(-1) ## vertical shift jittered_scene_box_enlarge1 = np.copy(padded_scene_box) jitter_scale_enlarge1 = 1.1**(1) ## scale reduction jittered_scene_box_reduce2 = np.copy(padded_scene_box) jitter_scale_reduce2 = 1.1**(-2) ## scale enlarge jittered_scene_box_enlarge2 = np.copy(padded_scene_box) jitter_scale_enlarge2 = 1.1**(2) scene_boxes = np.concatenate([ scene_boxes, jittered_scene_box_horizon, jittered_scene_box_vertical, jittered_scene_box_reduce1, jittered_scene_box_enlarge1, jittered_scene_box_reduce2, jittered_scene_box_enlarge2 ], axis=0) jitter_scale = [ 1., jitter_scale_horizon, jitter_scale_vertical, jitter_scale_reduce1, jitter_scale_enlarge1, jitter_scale_reduce2, jitter_scale_enlarge2 ] else: jitter_scale = [1.] model.eval() for bidx in range(0, scene_boxes.shape[0]): crop_img_size = (scene_boxes[bidx, 2:4] * ( (opts['img_size'], opts['img_size']) / target_bbox[2:4]) ).astype('int64') * jitter_scale[bidx] cropped_image, cur_image_var = img_crop_model.crop_image( cur_image, np.reshape(scene_boxes[bidx], (1, 4)), crop_img_size) cropped_image = cropped_image - 128. feat_map = model(cropped_image, out_layer='conv3') rel_target_bbox = np.copy(target_bbox) rel_target_bbox[0:2] -= scene_boxes[bidx, 0:2] batch_num = np.zeros((pos_examples.shape[0], 1)) cur_pos_rois = np.copy(pos_examples) # logging.info('cur_pos_rois from copy: %s', cur_pos_rois.shape) cur_pos_rois[:, 0:2] -= np.repeat(np.reshape(scene_boxes[bidx, 0:2], (1, 2)), cur_pos_rois.shape[0], axis=0) # logging.info('cur_pos_rois after reshape: %s', cur_pos_rois.shape) scaled_obj_size = float(opts['img_size']) * jitter_scale[bidx] # logging.info('scaled_obj_size: %s ', scaled_obj_size) cur_pos_rois = samples2maskroi(cur_pos_rois, model.receptive_field, (scaled_obj_size, scaled_obj_size), target_bbox[2:4], opts['padding']) # logging.info('cur_pos_rois after after samples2maskroi: %s', cur_pos_rois.shape) cur_pos_rois = np.concatenate((batch_num, cur_pos_rois), axis=1) # logging.info('cur_pos_rois after after concatenate: %s', cur_pos_rois.shape) cur_pos_rois = Variable( torch.from_numpy(cur_pos_rois.astype('float32'))).cuda() cur_pos_feats = model.roi_align_model(feat_map, cur_pos_rois) cur_pos_feats = cur_pos_feats.view(cur_pos_feats.size(0), -1).data.clone() batch_num = np.zeros((neg_examples.shape[0], 1)) cur_neg_rois = np.copy(neg_examples) cur_neg_rois[:, 0:2] -= np.repeat(np.reshape(scene_boxes[bidx, 0:2], (1, 2)), cur_neg_rois.shape[0], axis=0) cur_neg_rois = samples2maskroi(cur_neg_rois, model.receptive_field, (scaled_obj_size, scaled_obj_size), target_bbox[2:4], opts['padding']) cur_neg_rois = np.concatenate((batch_num, cur_neg_rois), axis=1) cur_neg_rois = Variable( torch.from_numpy(cur_neg_rois.astype('float32'))).cuda() cur_neg_feats = model.roi_align_model(feat_map, cur_neg_rois) cur_neg_feats = cur_neg_feats.view(cur_neg_feats.size(0), -1).data.clone() ## bbreg rois batch_num = np.zeros((cur_bbreg_examples.shape[0], 1)) cur_bbreg_rois = np.copy(cur_bbreg_examples) cur_bbreg_rois[:, 0:2] -= np.repeat(np.reshape(scene_boxes[bidx, 0:2], (1, 2)), cur_bbreg_rois.shape[0], axis=0) scaled_obj_size = float(opts['img_size']) * jitter_scale[bidx] cur_bbreg_rois = samples2maskroi(cur_bbreg_rois, model.receptive_field, (scaled_obj_size, scaled_obj_size), target_bbox[2:4], opts['padding']) cur_bbreg_rois = np.concatenate((batch_num, cur_bbreg_rois), axis=1) cur_bbreg_rois = Variable( torch.from_numpy(cur_bbreg_rois.astype('float32'))).cuda() cur_bbreg_feats = model.roi_align_model(feat_map, cur_bbreg_rois) cur_bbreg_feats = cur_bbreg_feats.view(cur_bbreg_feats.size(0), -1).data.clone() feat_dim = cur_pos_feats.size(-1) if bidx == 0: pos_feats = cur_pos_feats neg_feats = cur_neg_feats ##bbreg feature bbreg_feats = cur_bbreg_feats bbreg_examples = cur_bbreg_examples else: pos_feats = torch.cat((pos_feats, cur_pos_feats), dim=0) neg_feats = torch.cat((neg_feats, cur_neg_feats), dim=0) ##bbreg feature bbreg_feats = torch.cat((bbreg_feats, cur_bbreg_feats), dim=0) bbreg_examples = np.concatenate( (bbreg_examples, cur_bbreg_examples), axis=0) if pos_feats.size(0) > opts['n_pos_init']: pos_idx = np.asarray(range(pos_feats.size(0))) np.random.shuffle(pos_idx) pos_feats = pos_feats[pos_idx[0:opts['n_pos_init']], :] if neg_feats.size(0) > opts['n_neg_init']: neg_idx = np.asarray(range(neg_feats.size(0))) np.random.shuffle(neg_idx) neg_feats = neg_feats[neg_idx[0:opts['n_neg_init']], :] ##bbreg if bbreg_feats.size(0) > opts['n_bbreg']: bbreg_idx = np.asarray(range(bbreg_feats.size(0))) np.random.shuffle(bbreg_idx) bbreg_feats = bbreg_feats[bbreg_idx[0:opts['n_bbreg']], :] bbreg_examples = bbreg_examples[bbreg_idx[0:opts['n_bbreg']], :] #print bbreg_examples.shape ## open images and crop patch from obj extra_obj_size = np.array((opts['img_size'], opts['img_size'])) extra_crop_img_size = extra_obj_size * (opts['padding'] + 0.6) replicateNum = 100 for iidx in range(replicateNum): extra_target_bbox = np.copy(target_bbox) extra_scene_box = np.copy(extra_target_bbox) extra_scene_box_center = extra_scene_box[ 0:2] + extra_scene_box[2:4] / 2. extra_scene_box_size = extra_scene_box[2:4] * (opts['padding'] + 0.6) extra_scene_box[ 0:2] = extra_scene_box_center - extra_scene_box_size / 2. extra_scene_box[2:4] = extra_scene_box_size extra_shift_offset = np.clip(2. * np.random.randn(2), -4, 4) cur_extra_scale = 1.1**np.clip(np.random.randn(1), -2, 2) extra_scene_box[0] += extra_shift_offset[0] extra_scene_box[1] += extra_shift_offset[1] extra_scene_box[2:4] *= cur_extra_scale[0] scaled_obj_size = float(opts['img_size']) / cur_extra_scale[0] cur_extra_cropped_image, _ = img_crop_model.crop_image( cur_image, np.reshape(extra_scene_box, (1, 4)), extra_crop_img_size) cur_extra_cropped_image = cur_extra_cropped_image.detach() cur_extra_pos_examples = gen_samples( SampleGenerator('gaussian', (ishape[1], ishape[0]), 0.1, 1.2), extra_target_bbox, opts['n_pos_init'] / replicateNum, opts['overlap_pos_init']) cur_extra_neg_examples = gen_samples( SampleGenerator('uniform', (ishape[1], ishape[0]), 0.3, 2, 1.1), extra_target_bbox, opts['n_neg_init'] / replicateNum / 4, opts['overlap_neg_init']) ##bbreg sample cur_extra_bbreg_examples = gen_samples( SampleGenerator('uniform', (ishape[1], ishape[0]), 0.3, 1.5, 1.1), extra_target_bbox, opts['n_bbreg'] / replicateNum / 4, opts['overlap_bbreg'], opts['scale_bbreg']) batch_num = iidx * np.ones((cur_extra_pos_examples.shape[0], 1)) cur_extra_pos_rois = np.copy(cur_extra_pos_examples) cur_extra_pos_rois[:, 0:2] -= np.repeat(np.reshape( extra_scene_box[0:2], (1, 2)), cur_extra_pos_rois.shape[0], axis=0) cur_extra_pos_rois = samples2maskroi( cur_extra_pos_rois, model.receptive_field, (scaled_obj_size, scaled_obj_size), extra_target_bbox[2:4], opts['padding']) cur_extra_pos_rois = np.concatenate((batch_num, cur_extra_pos_rois), axis=1) batch_num = iidx * np.ones((cur_extra_neg_examples.shape[0], 1)) cur_extra_neg_rois = np.copy(cur_extra_neg_examples) cur_extra_neg_rois[:, 0:2] -= np.repeat(np.reshape( extra_scene_box[0:2], (1, 2)), cur_extra_neg_rois.shape[0], axis=0) cur_extra_neg_rois = samples2maskroi( cur_extra_neg_rois, model.receptive_field, (scaled_obj_size, scaled_obj_size), extra_target_bbox[2:4], opts['padding']) cur_extra_neg_rois = np.concatenate((batch_num, cur_extra_neg_rois), axis=1) ## bbreg rois batch_num = iidx * np.ones((cur_extra_bbreg_examples.shape[0], 1)) cur_extra_bbreg_rois = np.copy(cur_extra_bbreg_examples) cur_extra_bbreg_rois[:, 0:2] -= np.repeat(np.reshape( extra_scene_box[0:2], (1, 2)), cur_extra_bbreg_rois.shape[0], axis=0) cur_extra_bbreg_rois = samples2maskroi( cur_extra_bbreg_rois, model.receptive_field, (scaled_obj_size, scaled_obj_size), extra_target_bbox[2:4], opts['padding']) cur_extra_bbreg_rois = np.concatenate( (batch_num, cur_extra_bbreg_rois), axis=1) if iidx == 0: extra_cropped_image = cur_extra_cropped_image extra_pos_rois = np.copy(cur_extra_pos_rois) extra_neg_rois = np.copy(cur_extra_neg_rois) ##bbreg rois extra_bbreg_rois = np.copy(cur_extra_bbreg_rois) extra_bbreg_examples = np.copy(cur_extra_bbreg_examples) else: extra_cropped_image = torch.cat( (extra_cropped_image, cur_extra_cropped_image), dim=0) extra_pos_rois = np.concatenate( (extra_pos_rois, np.copy(cur_extra_pos_rois)), axis=0) extra_neg_rois = np.concatenate( (extra_neg_rois, np.copy(cur_extra_neg_rois)), axis=0) ##bbreg rois extra_bbreg_rois = np.concatenate( (extra_bbreg_rois, np.copy(cur_extra_bbreg_rois)), axis=0) extra_bbreg_examples = np.concatenate( (extra_bbreg_examples, np.copy(cur_extra_bbreg_examples)), axis=0) extra_pos_rois = Variable( torch.from_numpy(extra_pos_rois.astype('float32'))).cuda() extra_neg_rois = Variable( torch.from_numpy(extra_neg_rois.astype('float32'))).cuda() ##bbreg rois extra_bbreg_rois = Variable( torch.from_numpy(extra_bbreg_rois.astype('float32'))).cuda() extra_cropped_image -= 128. extra_feat_maps = model(extra_cropped_image, out_layer='conv3') # Draw pos/neg samples ishape = cur_image.shape extra_pos_feats = model.roi_align_model(extra_feat_maps, extra_pos_rois) extra_pos_feats = extra_pos_feats.view(extra_pos_feats.size(0), -1).data.clone() extra_neg_feats = model.roi_align_model(extra_feat_maps, extra_neg_rois) extra_neg_feats = extra_neg_feats.view(extra_neg_feats.size(0), -1).data.clone() ##bbreg feat extra_bbreg_feats = model.roi_align_model(extra_feat_maps, extra_bbreg_rois) extra_bbreg_feats = extra_bbreg_feats.view(extra_bbreg_feats.size(0), -1).data.clone() ## concatenate extra features to original_features pos_feats = torch.cat((pos_feats, extra_pos_feats), dim=0) neg_feats = torch.cat((neg_feats, extra_neg_feats), dim=0) ## concatenate extra bbreg feats to original_bbreg_feats bbreg_feats = torch.cat((bbreg_feats, extra_bbreg_feats), dim=0) bbreg_examples = np.concatenate((bbreg_examples, extra_bbreg_examples), axis=0) torch.cuda.empty_cache() model.zero_grad() # Initial training train(model, criterion, init_optimizer, pos_feats, neg_feats, opts['maxiter_init']) ##bbreg train if bbreg_feats.size(0) > opts['n_bbreg']: bbreg_idx = np.asarray(range(bbreg_feats.size(0))) np.random.shuffle(bbreg_idx) bbreg_feats = bbreg_feats[bbreg_idx[0:opts['n_bbreg']], :] bbreg_examples = bbreg_examples[bbreg_idx[0:opts['n_bbreg']], :] bbreg = BBRegressor((ishape[1], ishape[0])) bbreg.train(bbreg_feats, bbreg_examples, target_bbox) if pos_feats.size(0) > opts['n_pos_update']: pos_idx = np.asarray(range(pos_feats.size(0))) np.random.shuffle(pos_idx) pos_feats_all = [ pos_feats.index_select( 0, torch.from_numpy(pos_idx[0:opts['n_pos_update']]).cuda()) ] if neg_feats.size(0) > opts['n_neg_update']: neg_idx = np.asarray(range(neg_feats.size(0))) np.random.shuffle(neg_idx) neg_feats_all = [ neg_feats.index_select( 0, torch.from_numpy(neg_idx[0:opts['n_neg_update']]).cuda()) ] state['trans_f'] = opts['trans_f'] state['count'] = 1 state['model'] = model state['target_bbox'] = target_bbox state['img_crop_model'] = img_crop_model state['bbreg'] = bbreg state['criterion'] = criterion state['update_optimizer'] = update_optimizer state['pos_feats_all'] = pos_feats_all state['neg_feats_all'] = neg_feats_all state['feat_dim'] = feat_dim return state
def run_(model, criterion, target, init_optimizer, img_files, init_rect, video, gt=None): # Init bbox box_ = np.array(init_rect) result = np.zeros((len(img_files), 4)) result_bb = np.zeros((len(img_files), 4)) result[0] = box_ bbreg_bbox = box_ result_bb[0] = bbreg_bbox tic = time.time() # Load first image image = Image.open(img_files[0]).convert('RGB') #[W,H] RGB image_np = np.asarray(image) #[H, W, 3] # Init bbox regressor # Give the cropped region # bbreg_examples_roi is [x_min,y_min,x_max,y_max] region, crop_region_sz, bbreg_examples_roi, bbreg_im_index, box__roi, box__crop, coe = acquire_rois_bb( SampleGenerator('uniform', 0.3, 1.5, 1.1, True), image_np, opts, box_, opts['n_bbreg'], opts['overlap_bbreg'], opts['scale_bbreg']) # bbreg_examples_reg is [x_min,y_min,w,h] bbreg_examples_reg = np.hstack( ((bbreg_examples_roi[:, 0]).reshape(-1, 1), (bbreg_examples_roi[:, 1]).reshape(-1, 1), (bbreg_examples_roi[:, 2:] - bbreg_examples_roi[:, :2]))) bbreg_feats = extract_feat(model, region, bbreg_examples_roi, bbreg_im_index, fea_view=True) bbreg = BBRegressor((np.array(region.shape[2:])).reshape(-1, 2), overlap=opts['overlap_bbreg'], scale=opts['scale_bbreg']) bbreg.train(bbreg_feats, bbreg_examples_reg, box__roi) # Draw pos/neg samples pos_examples, pos_examples_roi, pos_im_index = acquire_roi_samples( SampleGenerator('gaussian', 0.1, 1.2, valid=True), coe, box__crop, crop_region_sz, opts['n_pos_init'], opts['overlap_pos_init']) neg_examples, neg_examples_roi, neg_im_index = acquire_roi_samples( SampleGenerator('uniform', 1, 2, 1.1, valid=True), coe, box__crop, crop_region_sz, opts['n_neg_init'] // 2, opts['overlap_neg_init']) neg_examples_whole, neg_examples_roi_whole, neg_im_index_whole = acquire_roi_samples( SampleGenerator('whole', 0, 1.2, 1.1, valid=True), coe, box__crop, crop_region_sz, opts['n_neg_init'] // 2, opts['overlap_neg_init']) neg_examples_roi = np.concatenate( (neg_examples_roi, neg_examples_roi_whole), axis=0) neg_examples_roi = np.random.permutation(neg_examples_roi) neg_im_index = np.concatenate((neg_im_index, neg_im_index_whole), axis=0) # Extract pos/neg features pos_feats = extract_feat(model, region, pos_examples_roi, pos_im_index) neg_feats = extract_feat(model, region, neg_examples_roi, neg_im_index) feat_dim = pos_feats.size(-1) channel_dim = pos_feats.size(-3) # Initial training train(model, criterion, target, init_optimizer, pos_feats, neg_feats, opts['maxiter_init']) model.stop_learnable_params(opts['stop_layers']) update_optimizer = set_optimizer(model, opts['lr_update']) # Init sample generators sample_generator = SampleGenerator('gaussian', opts['trans_f'], opts['scale_f'], valid=True) pos_generator = SampleGenerator('gaussian', 0.1, 1.2, valid=True) neg_generator = SampleGenerator('uniform', 1.5, 1.2, valid=True) # Init pos/neg features for update pos_feats_all = [pos_feats[:opts['n_pos_update']]] neg_feats_all = [neg_feats[:opts['n_neg_update']]] spf_total = time.time() - tic # Start tracking unsuccess_num = 0 for i in range(1, len(img_files)): tic = time.time() # Load image image = Image.open(img_files[i]).convert('RGB') image_np = np.asarray(image) #[H, W, 3] # Cropping region, crop_region_sz, coe, box__crop = acquire_region( image_np, box_, opts) samples, samples_roi, samples_im_index = acquire_roi_samples( sample_generator, coe, box__crop, crop_region_sz, opts['n_samples']) sample_scores, sta_g_weight, sta_penalty, atten_map, conv3_fea = extract_feat( model, region, samples_roi, samples_im_index, out_layer='capsule') top_scores, top_idx = sample_scores[:, 1].topk(5) top_idx = top_idx.cpu().numpy() target_score = top_scores.mean() samples_topk = samples[top_idx] samples_topk[:, :2] = samples_topk[:, :2] - box__crop[:2].reshape( -1, 2) + box_[:2].reshape(-1, 2) box__copy = box_.copy() box_ = samples_topk.mean( axis=0) #Take the mean value of top 5 as the tracking result success = target_score > opts['success_thr'] # Expand search area when failure occurs if success: unsuccess_num = 0 sample_generator.set_trans_f(opts['trans_f']) else: unsuccess_num += 1 sample_generator.set_trans_f(opts['trans_f_expand']) # Bbox regression if success: bbreg_samples_roi = samples_roi[top_idx] bbreg_samples_reg = np.hstack( ((bbreg_samples_roi[:, 0]).reshape(-1, 1), (bbreg_samples_roi[:, 1]).reshape(-1, 1), (bbreg_samples_roi[:, 2:] - bbreg_samples_roi[:, :2]))) bbreg_feats = extract_feat(model, region, bbreg_samples_roi, samples_im_index[top_idx], fea_view=True) bbreg_samples = bbreg.predict(bbreg_feats, bbreg_samples_reg) bbreg_bbox = bbreg_samples.mean(axis=0) bbreg_bbox = np.array([ bbreg_bbox[0] * coe[0], bbreg_bbox[1] * coe[1], bbreg_bbox[2] * coe[0], bbreg_bbox[3] * coe[1] ]) bbreg_bbox[:2] = np.array(bbreg_bbox[:2] - box__crop[:2] + box__copy[:2]) else: bbreg_bbox = box_ # Copy previous result at failure if not success: box_ = result[i - 1] bbreg_bbox = result_bb[i - 1] # Save result result[i] = box_ result_bb[i] = bbreg_bbox # Data collect if success: # Draw pos/neg samples region, crop_region_sz, coe, box__crop = acquire_region( image_np, box_, opts) pos_examples, pos_examples_roi, pos_im_index = acquire_roi_samples( pos_generator, coe, box__crop, crop_region_sz, opts['n_pos_update'], opts['overlap_pos_update']) neg_examples, neg_examples_roi, neg_im_index = acquire_roi_samples( neg_generator, coe, box__crop, crop_region_sz, opts['n_neg_update'], opts['overlap_neg_update']) # Extract pos/neg features pos_feats = extract_feat(model, region, pos_examples_roi, pos_im_index) neg_feats = extract_feat(model, region, neg_examples_roi, neg_im_index) pos_feats_all.append(pos_feats) neg_feats_all.append(neg_feats) if len(pos_feats_all ) > opts['n_frames_long']: # Accumulate updating features del pos_feats_all[ 1] # Keep the information of the first frame 1 or 0 if len(neg_feats_all) > opts['n_frames_short']: del neg_feats_all[ 0] # Keep the information of the first frame, but it will hurt ironman # Short term update if (not success) & (unsuccess_num < 15): nframes = min(opts['n_frames_short'], len(pos_feats_all)) pos_data = torch.stack(pos_feats_all[-nframes:], 0).view(-1, channel_dim, feat_dim, feat_dim) # [20*50, 512,7,7] neg_data = torch.stack(neg_feats_all, 0).view( -1, channel_dim, feat_dim, feat_dim) # [20 or less *200, 512,7,7] train(model, criterion, target, update_optimizer, pos_data, neg_data, opts['maxiter_update']) # Long term update elif i % opts['long_interval'] == 0: pos_data = torch.stack(pos_feats_all, 0).view(-1, channel_dim, feat_dim, feat_dim) neg_data = torch.stack(neg_feats_all, 0).view(-1, channel_dim, feat_dim, feat_dim) train(model, criterion, target, update_optimizer, pos_data, neg_data, opts['maxiter_update']) spf = time.time() - tic spf_total += spf fps = len(img_files) / spf_total print("Speed: %.3f" % (fps)) return result, result_bb, fps, spf_total
# train bb regression print('==> bb regression') dataset = SimpleSampler(train_bbox, img_transforms, num=[FINETUNE_POS_NUM, 0], threshold=FINETUNE_IOU_THRESHOLD)(train_img, train_bbox) loader = data.DataLoader(dataset, len(dataset)) imgs, boxes = next(iter(loader)) feat = Variable(imgs).cuda() feat = featNet(feat) bbreg = BBRegressor() bbreg.fit(feat, boxes, train_bbox) print('done') """ predicting """ print('==> predicting') recent_scores = [] recent_regions = [] last_bbox = train_bbox for frame_idx, (frame_img, _) in enumerate(folder, 1): print(f'Frame[{frame_idx}]', end='\t')
def __init__(self, init_bbox, first_frame): self.frame_idx = 0 self.target_bbox = np.array(init_bbox) self.bbreg_bbox = self.target_bbox # Init model self.model = MDNet(opts['model_path']) if opts['use_gpu']: self.model = self.model.cuda() self.model.set_learnable_params(opts['ft_layers']) # Init criterion and optimizer self.criterion = BinaryLoss() self.init_optimizer = set_optimizer(self.model, opts['lr_init']) self.update_optimizer = set_optimizer(self.model, opts['lr_update']) # Train bbox regressor bbreg_examples = gen_samples( SampleGenerator('uniform', first_frame.size, 0.3, 1.5, 1.1), self.target_bbox, opts['n_bbreg'], opts['overlap_bbreg'], opts['scale_bbreg']) assert len(bbreg_examples) > 0 bbreg_feats = forward_samples(self.model, first_frame, bbreg_examples) assert len(bbreg_feats) > 0 self.bbreg = BBRegressor(first_frame.size) self.bbreg.train(bbreg_feats, bbreg_examples, self.target_bbox) # Draw pos/neg samples pos_examples = gen_samples( SampleGenerator('gaussian', first_frame.size, 0.1, 1.2), self.target_bbox, opts['n_pos_init'], opts['overlap_pos_init']) neg_examples = np.concatenate([ gen_samples( SampleGenerator('uniform', first_frame.size, 1, 2, 1.1), self.target_bbox, opts['n_neg_init'] // 2, opts['overlap_neg_init']), gen_samples( SampleGenerator('whole', first_frame.size, 0, 1.2, 1.1), self.target_bbox, opts['n_neg_init'] // 2, opts['overlap_neg_init']) ]) neg_examples = np.random.permutation(neg_examples) # Extract pos/neg features pos_feats = forward_samples(self.model, first_frame, pos_examples) neg_feats = forward_samples(self.model, first_frame, neg_examples) self.feat_dim = pos_feats.size(-1) # Initial training train(self.model, self.criterion, self.init_optimizer, pos_feats, neg_feats, opts['maxiter_init']) # Init sample generators self.sample_generator = SampleGenerator('gaussian', first_frame.size, opts['trans_f'], opts['scale_f'], valid=True) self.pos_generator = SampleGenerator('gaussian', first_frame.size, 0.1, 1.2) self.neg_generator = SampleGenerator('uniform', first_frame.size, 1.5, 1.2) # Init pos/neg features for update self.pos_feats_all = [pos_feats[:opts['n_pos_update']]] self.neg_feats_all = [neg_feats[:opts['n_neg_update']]]
class Tracker: def __init__(self, init_bbox, first_frame): self.frame_idx = 0 self.target_bbox = np.array(init_bbox) self.bbreg_bbox = self.target_bbox # Init model self.model = MDNet(opts['model_path']) if opts['use_gpu']: self.model = self.model.cuda() self.model.set_learnable_params(opts['ft_layers']) # Init criterion and optimizer self.criterion = BinaryLoss() self.init_optimizer = set_optimizer(self.model, opts['lr_init']) self.update_optimizer = set_optimizer(self.model, opts['lr_update']) # Train bbox regressor bbreg_examples = gen_samples( SampleGenerator('uniform', first_frame.size, 0.3, 1.5, 1.1), self.target_bbox, opts['n_bbreg'], opts['overlap_bbreg'], opts['scale_bbreg']) assert len(bbreg_examples) > 0 bbreg_feats = forward_samples(self.model, first_frame, bbreg_examples) assert len(bbreg_feats) > 0 self.bbreg = BBRegressor(first_frame.size) self.bbreg.train(bbreg_feats, bbreg_examples, self.target_bbox) # Draw pos/neg samples pos_examples = gen_samples( SampleGenerator('gaussian', first_frame.size, 0.1, 1.2), self.target_bbox, opts['n_pos_init'], opts['overlap_pos_init']) neg_examples = np.concatenate([ gen_samples( SampleGenerator('uniform', first_frame.size, 1, 2, 1.1), self.target_bbox, opts['n_neg_init'] // 2, opts['overlap_neg_init']), gen_samples( SampleGenerator('whole', first_frame.size, 0, 1.2, 1.1), self.target_bbox, opts['n_neg_init'] // 2, opts['overlap_neg_init']) ]) neg_examples = np.random.permutation(neg_examples) # Extract pos/neg features pos_feats = forward_samples(self.model, first_frame, pos_examples) neg_feats = forward_samples(self.model, first_frame, neg_examples) self.feat_dim = pos_feats.size(-1) # Initial training train(self.model, self.criterion, self.init_optimizer, pos_feats, neg_feats, opts['maxiter_init']) # Init sample generators self.sample_generator = SampleGenerator('gaussian', first_frame.size, opts['trans_f'], opts['scale_f'], valid=True) self.pos_generator = SampleGenerator('gaussian', first_frame.size, 0.1, 1.2) self.neg_generator = SampleGenerator('uniform', first_frame.size, 1.5, 1.2) # Init pos/neg features for update self.pos_feats_all = [pos_feats[:opts['n_pos_update']]] self.neg_feats_all = [neg_feats[:opts['n_neg_update']]] def track(self, image): self.frame_idx += 1 # Estimate target bbox samples = gen_samples(self.sample_generator, self.target_bbox, opts['n_samples']) sample_scores = forward_samples(self.model, image, samples, out_layer='fc6') top_scores, top_idx = sample_scores[:, 1].topk(5) top_idx = top_idx.cpu().numpy() target_score = top_scores.mean() success = target_score > opts['success_thr'] # Expand search area at failure if success: self.sample_generator.set_trans_f(opts['trans_f']) else: self.sample_generator.set_trans_f(opts['trans_f_expand']) # Save result at success. if success: self.target_bbox = samples[top_idx].mean(axis=0) # Bbox regression bbreg_samples = samples[top_idx] bbreg_feats = forward_samples(self.model, image, bbreg_samples) bbreg_samples = self.bbreg.predict(bbreg_feats, bbreg_samples) self.bbreg_bbox = bbreg_samples.mean(axis=0) # Data collect if success: # Draw pos/neg samples pos_examples = gen_samples(self.pos_generator, self.target_bbox, opts['n_pos_update'], opts['overlap_pos_update']) neg_examples = gen_samples(self.neg_generator, self.target_bbox, opts['n_neg_update'], opts['overlap_neg_update']) # Extract pos/neg features pos_feats = forward_samples(self.model, image, pos_examples) neg_feats = forward_samples(self.model, image, neg_examples) self.pos_feats_all.append(pos_feats) self.neg_feats_all.append(neg_feats) if len(self.pos_feats_all) > opts['n_frames_long']: del self.pos_feats_all[0] if len(self.neg_feats_all) > opts['n_frames_short']: del self.neg_feats_all[0] # Short term update if not success: nframes = min(opts['n_frames_short'], len(self.pos_feats_all)) pos_data = torch.stack(self.pos_feats_all[-nframes:], 0).view(-1, self.feat_dim) neg_data = torch.stack(self.neg_feats_all, 0).view(-1, self.feat_dim) train(self.model, self.criterion, self.update_optimizer, pos_data, neg_data, opts['maxiter_update']) # Long term update elif self.frame_idx % opts['long_interval'] == 0: pos_data = torch.stack(self.pos_feats_all, 0).view(-1, self.feat_dim) neg_data = torch.stack(self.neg_feats_all, 0).view(-1, self.feat_dim) train(self.model, self.criterion, self.update_optimizer, pos_data, neg_data, opts['maxiter_update']) return self.bbreg_bbox, target_score