class SiamMaskWrapper(): def __init__(self, base_path=DEFAULT_BASE_PATH, config=DEFAULT_CONFIG, resume=DEFAULT_RESUME, cpu=False): args = Namespace(base_path=base_path, config=config, resume=resume, cpu=cpu) self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') torch.backends.cudnn.benchmark = True self.state = None self.cfg = load_config(args) # TODO figure out the important parts of this self.siammask = Custom(anchors=self.cfg['anchors']) if args.resume: assert isfile(args.resume), 'Please download {} first.'.format(args.resume) self.siammask = load_pretrain(self.siammask, args.resume) self.siammask.eval().to(self.device) def select_region(self, image, xywh=None): """ image : 3 channel image The initial image with the object xywh : ArrayLike the position of the initial bounding rectangle as [x, y, w, h] If unspecified, a pop up selection will be used """ if xywh is None: xywh = cv2.selectROI('SiamMask', image, False, False) x, y, w, h = xywh # simply expand for convenience target_pos = np.array([x + w / 2, y + h / 2]) target_sz = np.array([w, h]) # init tracker self.state = siamese_init(image, target_pos, target_sz, self.siammask, self.cfg['hp'], device=self.device) def predict(self, image, visualize=False, verbose=False): self.state = siamese_track(self.state, image, mask_enable=True, refine_enable=True, device=self.device) target_pos = self.state["target_pos"] target_sz = self.state["target_sz"] score = self.state["score"] location = self.state['ploygon'].flatten() # compute as ltwh ltwh = np.concatenate((location[0:2], location[4:6] - location[0:2])) transformed_loc = [np.int0(location).reshape((-1, 1, 2))] if verbose: print("transformed loc : {}".format(transformed_loc)) mask = self.state['mask'] > self.state['p'].seg_thr image[:, :, 2] = (mask > 0) * 255 + (mask == 0) * image[:, :, 2] cv2.polylines(image, transformed_loc, True, (0, 255, 0), 3) #cv2.line(image, tuple(loc[0:2]), tuple(loc[0:2] + loc[2:]), (0, 255, 0)) if visualize: # return mask cv2.imshow('SiamMask', image) cv2.waitKey(10000) return ltwh, score, image # TODO the image should be a crop
def main(): global args, logger, v_id args = parser.parse_args() cfg = load_config(args) init_log('global', logging.INFO) if args.log != "": add_file_handler('global', args.log, logging.INFO) logger = logging.getLogger('global') logger.info(args) # setup model if args.arch == 'Custom': from custom import Custom model = Custom(anchors=cfg['anchors']) else: parser.error('invalid architecture: {}'.format(args.arch)) if args.resume: assert isfile(args.resume), '{} is not a valid file'.format(args.resume) model = load_pretrain(model, args.resume) model.eval() device = torch.device('cuda' if (torch.cuda.is_available()) else 'cpu') model = model.to(device) # setup dataset dataset = load_dataset(args.dataset) # VOS or VOT? if args.dataset in ['DAVIS2016', 'DAVIS2017', 'ytb_vos'] and args.mask: vos_enable = True # enable Mask output else: vos_enable = False total_lost = 0 # VOT iou_lists = [] # VOS speed_list = [] for v_id, video in enumerate(dataset.keys(), start=1): if args.video != '' and video != args.video: continue if vos_enable: iou_list, speed = track_vos(model, dataset[video], cfg['hp'] if 'hp' in cfg.keys() else None, args.mask, args.refine, args.dataset in ['DAVIS2017', 'ytb_vos'], device=device) iou_lists.append(iou_list) else: lost, speed = track_vot(model, dataset[video], cfg['hp'] if 'hp' in cfg.keys() else None, args.mask, args.refine, device=device) total_lost += lost speed_list.append(speed) # report final result if vos_enable: for thr, iou in zip(thrs, np.mean(np.concatenate(iou_lists), axis=0)): logger.info('Segmentation Threshold {:.2f} mIoU: {:.3f}'.format(thr, iou)) else: logger.info('Total Lost: {:d}'.format(total_lost)) logger.info('Mean Speed: {:.2f} FPS'.format(np.mean(speed_list)))
def __init__(self,sample_im, base_dir='', x=0 ,y=0,w=10,h=10, use_tensorrt=False,fp16_mode=True,features_trt=True,rpn_trt=False,mask_trt=False,refine_trt=False): self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') torch.backends.cudnn.benchmark = True # Setup Model args = argparse.Namespace() args.config = base_dir + 'SiamMask/experiments/siammask_sharp/config_vot.json' args.resume = base_dir + 'SiamMask/experiments/siammask_sharp/SiamMask_VOT.pth' self.cfg = load_config(args) from custom import Custom siammask = Custom(anchors=self.cfg['anchors']) siammask = load_pretrain(siammask, args.resume) if args.resume: assert isfile(args.resume), 'Please download {} first.'.format(args.resume) siammask = load_pretrain(siammask, args.resume) siammask.eval().to(self.device) target_pos = np.array([x + w / 2, y + h / 2]) target_sz = np.array([w, h]) self.state = siamese_init(sample_im, target_pos, target_sz, siammask, self.cfg['hp'], device=self.device) # init tracker if use_tensorrt: self.state['net'].init_trt(fp16_mode,features_trt,rpn_trt,mask_trt,refine_trt, trt_weights_path='/root/msl_raptor_ws/src/msl_raptor/src/front_end/SiamMask/weights_trt') self.keys_to_share = ['target_pos','target_sz','score','mask','ploygon'] self.states_each_object = [] self.current_classes = []
def get_siammask(): siammask = Custom(anchors=cfg['anchors']) if args.resume: assert isfile(args.resume), 'Please download {} first.'.format( args.resume) siammask = load_pretrain(siammask, args.resume) siammask.eval().to(device) return siammask
def track_init(img, x, y, w, h, device): cfg = load_config('config_davis.json') # Setup Model from custom import Custom siammask = Custom(anchors=cfg['anchors']) siammask = load_pretrain(siammask, 'SiamMask_VOT.pth') siammask.eval().to(device) target_pos = np.array([x + w / 2, y + h / 2]) target_sz = np.array([w, h]) state = siamese_init(img, target_pos, target_sz, siammask, cfg['hp'], device=device) return state
def __init__(self): # Setup device device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') torch.backends.cudnn.benchmark = True # Setup Model args = FakeArgParser() cfg = libsiam.load_config(args) siammask = Custom(anchors=cfg['anchors']) if args.resume: assert libsiam.isfile( args.resume), '{} is not a valid file'.format(args.resume) siammask = libsiam.load_pretrain(siammask, args.resume) siammask.eval().to(device) # -- Output self.siammask = siammask self.args = args self.cfg = cfg self.state = None
class SiamTracker(object): def __init__(self, resume=os.path.join(os.path.dirname(__file__), 'experiments/siammask_sharp/SiamMask_DAVIS.pth'), \ config=os.path.join(os.path.dirname(__file__), 'experiments/siammask_sharp/config_davis.json')): self.args = EasyDict(resume=resume, config=config) self.device = torch.device( 'cuda' if torch.cuda.is_available() else 'cpu') torch.backends.cudnn.benchmark = True # Setup Model self.cfg = load_config(self.args) self.siammask = Custom(anchors=self.cfg['anchors']) assert isfile(self.args.resume), 'Please download {} first.'.format( self.args.resume) self.siammask = load_pretrain(self.siammask, self.args.resume) self.siammask = self.siammask.eval().half().to(self.device) def get_state(self, im, bbox): x, y = bbox[0], bbox[1] w, h = bbox[2] - x, bbox[3] - y target_pos = np.array([x + w / 2, y + h / 2]) target_sz = np.array([w, h]) state = siamese_init(im, target_pos, target_sz, self.siammask, self.cfg['hp'], device=self.device) state['track_id'] = np.random.randint(1000000) state['box'] = bbox state['score'] = 1. return state def track(self, state, im): new_state = siamese_track(state, im, self.siammask, mask_enable=False, refine_enable=False, device=self.device) new_state['track_id'] = state['track_id'] p = new_state['target_pos'] sz = new_state['target_sz'] new_state['box'] = np.concatenate((p - sz / 2, p + sz / 2)) return new_state
def main(): init_log('global', logging.INFO) if args.log != "": add_file_handler('global', args.log, logging.INFO) params = {'penalty_k': args.penalty_k, 'window_influence': args.window_influence, 'lr': args.lr, 'instance_size': args.search_region} num_search = len(params['penalty_k']) * len(params['window_influence']) * \ len(params['lr']) * len(params['instance_size']) print(params) print(num_search) cfg = load_config(args) if args.arch == 'Custom': from custom import Custom model = Custom(anchors=cfg['anchors']) else: model = models.__dict__[args.arch](anchors=cfg['anchors']) if args.resume: assert isfile(args.resume), '{} is not a valid file'.format(args.resume) model = load_pretrain(model, args.resume) model.eval() model = model.to(device) default_hp = cfg.get('hp', {}) p = dict() p['network'] = model p['network_name'] = args.arch+'_'+args.resume.split('/')[-1].split('.')[0] p['dataset'] = args.dataset global ims, gt, image_files dataset_info = load_dataset(args.dataset) videos = list(dataset_info.keys()) np.random.shuffle(videos) for video in videos: print(video) if isfile('finish.flag'): return p['video'] = video ims = None image_files = dataset_info[video]['image_files'] gt = dataset_info[video]['gt'] np.random.shuffle(params['penalty_k']) np.random.shuffle(params['window_influence']) np.random.shuffle(params['lr']) for penalty_k in params['penalty_k']: for window_influence in params['window_influence']: for lr in params['lr']: for instance_size in params['instance_size']: p['hp'] = default_hp.copy() p['hp'].update({'penalty_k':penalty_k, 'window_influence':window_influence, 'lr':lr, 'instance_size': instance_size, }) tune(p)
class Dimp_LTMU_Tracker(object): def __init__(self, image, region, p=None, groundtruth=None): self.p = p self.i = 0 self.t_id = 0 if groundtruth is not None: self.groundtruth = groundtruth tfconfig = tf.ConfigProto() tfconfig.gpu_options.per_process_gpu_memory_fraction = 0.3 self.sess = tf.Session(config=tfconfig) init_gt1 = [region.x, region.y, region.width, region.height] init_gt = [ init_gt1[1], init_gt1[0], init_gt1[1] + init_gt1[3], init_gt1[0] + init_gt1[2] ] # ymin xmin ymax xmax self.last_gt = init_gt self.init_pymdnet(image, init_gt1) self.local_init(image, init_gt1) self.Golbal_Track_init(image, init_gt1) if self.p.use_mask: self.siammask_init(image, init_gt1) self.tc_init(self.p.model_dir) self.metric_init(image, np.array(init_gt1)) self.dis_record = [] self.state_record = [] self.rv_record = [] self.all_map = [] self.count = 0 local_state1, self.score_map, update, self.score_max, dis, flag, update_score = self.local_track( image) self.local_Tracker.pos = torch.FloatTensor([ (self.last_gt[0] + self.last_gt[2] - 1) / 2, (self.last_gt[1] + self.last_gt[3] - 1) / 2 ]) self.local_Tracker.target_sz = torch.FloatTensor([ (self.last_gt[2] - self.last_gt[0]), (self.last_gt[3] - self.last_gt[1]) ]) def get_first_state(self): return self.score_map, self.score_max def siammask_init(self, im, init_gt): im = cv2.cvtColor(im, cv2.COLOR_RGB2BGR) parser = argparse.ArgumentParser(description='PyTorch Tracking Demo') parser.add_argument( '--resume', default='SiamMask/experiments/siammask/SiamMask_VOT_LD.pth', type=str, metavar='PATH', help='path to latest checkpoint (default: none)') parser.add_argument( '--config', dest='config', default='SiamMask/experiments/siammask/config_vot19lt.json', help='hyper-parameter of SiamMask in json format') args = parser.parse_args() device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') torch.backends.cudnn.benchmark = True # Setup Model cfg = load_config(args) self.siammask = Custom(anchors=cfg['anchors']) if args.resume: assert isfile(args.resume), '{} is not a valid file'.format( args.resume) self.siammask = load_pretrain(self.siammask, args.resume) self.siammask.eval().to(device) x = init_gt[0] y = init_gt[1] w = init_gt[2] h = init_gt[3] target_pos = np.array([x + w / 2, y + h / 2]) target_sz = np.array([w, h]) self.siamstate = siamese_init(im, target_pos, target_sz, self.siammask, cfg['hp']) def siammask_track(self, im): im = cv2.cvtColor(im, cv2.COLOR_RGB2BGR) self.siamstate = siamese_track(self.siamstate, im, mask_enable=True, refine_enable=True) # track # pdb.set_trace() score = np.max(self.siamstate['score']) location = self.siamstate['ploygon'].flatten() mask = self.siamstate['mask'] > self.siamstate['p'].seg_thr # im[:, :, 2] = (mask > 0) * 255 + (mask == 0) * im[:, :, 2] # # cv2.namedWindow("SiamMask", cv2.WINDOW_NORMAL) # cv2.rectangle(im, (int(self.siamstate['target_pos'][0] - self.siamstate['target_sz'][0] / 2.0), # int(self.siamstate['target_pos'][1] - self.siamstate['target_sz'][1] / 2.0)), # (int(self.siamstate['target_pos'][0] + self.siamstate['target_sz'][0] / 2.0), # int(self.siamstate['target_pos'][1] + self.siamstate['target_sz'][1] / 2.0)), [0, 255, 0], 2) # # cv2.imwrite("/home/xiaobai/Desktop/MBMD_vot_code/figure/%05d.jpg"%frame_id, im[:, :, -1::-1]) # cv2.imshow("SiamMask", im) # cv2.waitKey(1) return score, mask def Golbal_Track_init(self, image, init_box): init_box = [ init_box[0], init_box[1], init_box[0] + init_box[2], init_box[1] + init_box[3] ] cfg_file = 'Global_Track/configs/qg_rcnn_r50_fpn.py' ckp_file = 'Global_Track/checkpoints/qg_rcnn_r50_fpn_coco_got10k_lasot.pth' transforms = data.BasicPairTransforms(train=False) self.Global_Tracker = GlobalTrack(cfg_file, ckp_file, transforms, name_suffix='qg_rcnn_r50_fpn') self.Global_Tracker.init(image, init_box) def Global_Track_eval(self, image, num): # xywh results = self.Global_Tracker.update(image) index = np.argsort(results[:, -1])[::-1] max_index = index[:num] can_boxes = results[max_index][:, :4] can_boxes = np.array([ can_boxes[:, 0], can_boxes[:, 1], can_boxes[:, 2] - can_boxes[:, 0], can_boxes[:, 3] - can_boxes[:, 1] ]).transpose() return can_boxes def init_pymdnet(self, image, init_bbox): target_bbox = np.array(init_bbox) self.last_result = target_bbox self.pymodel = MDNet('./pyMDNet/models/mdnet_imagenet_vid.pth') if opts['use_gpu']: self.pymodel = self.pymodel.cuda() self.pymodel.set_learnable_params(opts['ft_layers']) # Init criterion and optimizer self.criterion = BCELoss() init_optimizer = set_optimizer(self.pymodel, opts['lr_init'], opts['lr_mult']) self.update_optimizer = set_optimizer(self.pymodel, opts['lr_update'], opts['lr_mult']) tic = time.time() # Draw pos/neg samples pos_examples = SampleGenerator('gaussian', image.size, opts['trans_pos'], opts['scale_pos'])( target_bbox, opts['n_pos_init'], opts['overlap_pos_init']) neg_examples = np.concatenate([ SampleGenerator('uniform', image.size, opts['trans_neg_init'], opts['scale_neg_init'])(target_bbox, int(opts['n_neg_init'] * 0.5), opts['overlap_neg_init']), SampleGenerator('whole', image.size)(target_bbox, int(opts['n_neg_init'] * 0.5), opts['overlap_neg_init']) ]) neg_examples = np.random.permutation(neg_examples) # Extract pos/neg features pos_feats = forward_samples(self.pymodel, image, pos_examples, opts) neg_feats = forward_samples(self.pymodel, image, neg_examples, opts) self.feat_dim = pos_feats.size(-1) # Initial training train(self.pymodel, self.criterion, init_optimizer, pos_feats, neg_feats, opts['maxiter_init'], opts=opts) del init_optimizer, neg_feats torch.cuda.empty_cache() # Train bbox regressor bbreg_examples = SampleGenerator( 'uniform', image.size, opts['trans_bbreg'], opts['scale_bbreg'], opts['aspect_bbreg'])(target_bbox, opts['n_bbreg'], opts['overlap_bbreg']) bbreg_feats = forward_samples(self.pymodel, image, bbreg_examples, opts) self.bbreg = BBRegressor(image.size) self.bbreg.train(bbreg_feats, bbreg_examples, target_bbox) del bbreg_feats torch.cuda.empty_cache() # Init sample generators self.sample_generator = SampleGenerator('gaussian', image.size, opts['trans'], opts['scale']) self.pos_generator = SampleGenerator('gaussian', image.size, opts['trans_pos'], opts['scale_pos']) self.neg_generator = SampleGenerator('uniform', image.size, opts['trans_neg'], opts['scale_neg']) # Init pos/neg features for update neg_examples = self.neg_generator(target_bbox, opts['n_neg_update'], opts['overlap_neg_init']) neg_feats = forward_samples(self.pymodel, image, neg_examples, opts) self.pos_feats_all = [pos_feats] self.neg_feats_all = [neg_feats] spf_total = time.time() - tic def pymdnet_eval(self, image, samples): sample_scores = forward_samples(self.pymodel, image, samples, out_layer='fc6', opts=opts) return sample_scores[:, 1][:].cpu().numpy() # def pymdnet_track(self, image): # self.image = image # target_bbox = self.last_result # samples = self.sample_generator(target_bbox, opts['n_samples']) # sample_scores = forward_samples(self.pymodel, image, samples, out_layer='fc6', opts=opts) # # top_scores, top_idx = sample_scores[:, 1].topk(5) # top_idx = top_idx.cpu().numpy() # target_score = top_scores.mean() # target_bbox = samples[top_idx].mean(axis=0) # # success = target_score > 0 # # # Expand search area at failure # if success: # self.sample_generator.set_trans(opts['trans']) # else: # self.sample_generator.expand_trans(opts['trans_limit']) # # self.last_result = target_bbox # # Bbox regression # bbreg_bbox = self.pymdnet_bbox_reg(success, samples, top_idx) # # # Save result # region = bbreg_bbox # # # Data collect # if success: # self.collect_samples_pymdnet() # # # Short term update # if not success: # self.pymdnet_short_term_update() # # # Long term update # elif self.i % opts['long_interval'] == 0: # self.pymdnet_long_term_update() # # return region, target_score def collect_samples_pymdnet(self, image): self.t_id += 1 target_bbox = np.array([ self.last_gt[1], self.last_gt[0], self.last_gt[3] - self.last_gt[1], self.last_gt[2] - self.last_gt[0] ]) pos_examples = self.pos_generator(target_bbox, opts['n_pos_update'], opts['overlap_pos_update']) if len(pos_examples) > 0: pos_feats = forward_samples(self.pymodel, image, pos_examples, opts) self.pos_feats_all.append(pos_feats) if len(self.pos_feats_all) > opts['n_frames_long']: del self.pos_feats_all[0] neg_examples = self.neg_generator(target_bbox, opts['n_neg_update'], opts['overlap_neg_update']) if len(neg_examples) > 0: neg_feats = forward_samples(self.pymodel, image, neg_examples, opts) self.neg_feats_all.append(neg_feats) if len(self.neg_feats_all) > opts['n_frames_short']: del self.neg_feats_all[0] def pymdnet_short_term_update(self): # Short term update nframes = min(opts['n_frames_short'], len(self.pos_feats_all)) pos_data = torch.cat(self.pos_feats_all[-nframes:], 0) neg_data = torch.cat(self.neg_feats_all, 0) train(self.pymodel, self.criterion, self.update_optimizer, pos_data, neg_data, opts['maxiter_update'], opts=opts) def pymdnet_long_term_update(self): if self.t_id % opts['long_interval'] == 0: # Long term update pos_data = torch.cat(self.pos_feats_all, 0) neg_data = torch.cat(self.neg_feats_all, 0) train(self.pymodel, self.criterion, self.update_optimizer, pos_data, neg_data, opts['maxiter_update'], opts=opts) # # def pymdnet_bbox_reg(self, success, samples, top_idx): # target_bbox = self.last_result # if success: # bbreg_samples = samples[top_idx] # if top_idx.shape[0] == 1: # bbreg_samples = bbreg_samples[None, :] # bbreg_feats = forward_samples(self.pymodel, self.image, bbreg_samples, opts) # bbreg_samples = self.bbreg.predict(bbreg_feats, bbreg_samples) # bbreg_bbox = bbreg_samples.mean(axis=0) # else: # bbreg_bbox = target_bbox # return bbreg_bbox def metric_init(self, im, init_box): self.metric_model = ft_net(class_num=1120) path = '../utils/metric_net/metric_model/metric_model.pt' self.metric_model.eval() self.metric_model = self.metric_model.cuda() self.metric_model.load_state_dict(torch.load(path)) tmp = np.random.rand(1, 3, 107, 107) tmp = (Variable(torch.Tensor(tmp))).type(torch.FloatTensor).cuda() # get target feature self.metric_model(tmp) init_box = init_box.reshape((1, 4)) anchor_region = me_extract_regions(im, init_box) anchor_region = process_regions(anchor_region) anchor_region = torch.Tensor(anchor_region) anchor_region = (Variable(anchor_region)).type( torch.FloatTensor).cuda() self.anchor_feature, _ = self.metric_model(anchor_region) def metric_eval(self, im, boxes, anchor_feature): box_regions = me_extract_regions(np.array(im), boxes) box_regions = process_regions(box_regions) box_regions = torch.Tensor(box_regions) box_regions = (Variable(box_regions)).type(torch.FloatTensor).cuda() box_features, class_result = self.metric_model(box_regions) class_result = torch.softmax(class_result, dim=1) ap_dist = torch.norm(anchor_feature - box_features, 2, dim=1).view(-1) return ap_dist def tc_init(self, model_dir): self.tc_model = tclstm() self.X_input = tf.placeholder( "float", [None, tcopts['time_steps'], tcopts['lstm_num_input']]) self.maps = tf.placeholder("float", [None, 19, 19, 1]) self.map_logits = self.tc_model.map_net(self.maps) self.Inputs = tf.concat((self.X_input, self.map_logits), axis=2) self.logits, _ = self.tc_model.net(self.Inputs) variables_to_restore = [ var for var in tf.global_variables() if (var.name.startswith('tclstm') or var.name.startswith('mapnet')) ] saver = tf.train.Saver(var_list=variables_to_restore) if self.p.checkpoint is None: checkpoint = tf.train.latest_checkpoint( os.path.join('./meta_updater', model_dir)) else: checkpoint = './meta_updater/' + self.p.model_dir + '/lstm_model.ckpt-' + str( self.p.checkpoint) saver.restore(self.sess, checkpoint) def local_init(self, image, init_bbox): local_tracker = Tracker('dimp', 'dimp50') params = local_tracker.get_parameters() debug_ = getattr(params, 'debug', 0) params.debug = debug_ params.tracker_name = local_tracker.name params.param_name = local_tracker.parameter_name self.local_Tracker = local_tracker.tracker_class(params) init_box = dict() init_box['init_bbox'] = init_bbox self.local_Tracker.initialize(image, init_box) def local_track(self, image): state, score_map, test_x, scale_ind, sample_pos, sample_scales, flag, s = self.local_Tracker.track_updater( image) update_score = 0 update_flag = flag not in ['not_found', 'uncertain'] update = update_flag max_score = max(score_map.flatten()) self.all_map.append(score_map) local_state = np.array(state).reshape((1, 4)) ap_dis = self.metric_eval(image, local_state, self.anchor_feature) self.dis_record.append(ap_dis.data.cpu().numpy()[0]) h = image.shape[0] w = image.shape[1] self.state_record.append([ local_state[0][0] / w, local_state[0][1] / h, (local_state[0][0] + local_state[0][2]) / w, (local_state[0][1] + local_state[0][3]) / h ]) self.rv_record.append(max_score) if len(self.state_record) >= self.p.start_frame: dis = np.array(self.dis_record[-tcopts["time_steps"]:]).reshape( (tcopts["time_steps"], 1)) rv = np.array(self.rv_record[-tcopts["time_steps"]:]).reshape( (tcopts["time_steps"], 1)) state_tc = np.array(self.state_record[-tcopts["time_steps"]:]) map_input = np.array(self.all_map[-tcopts["time_steps"]:]) map_input = np.reshape(map_input, [tcopts['time_steps'], 1, 19, 19]) map_input = map_input.transpose((0, 2, 3, 1)) X_input = np.concatenate((state_tc, rv, dis), axis=1) logits = self.sess.run(self.logits, feed_dict={ self.X_input: np.expand_dims(X_input, axis=0), self.maps: map_input }) update = logits[0][0] < logits[0][1] update_score = logits[0][1] hard_negative = (flag == 'hard_negative') learning_rate = getattr(self.local_Tracker.params, 'hard_negative_learning_rate', None) if hard_negative else None if update: # Get train sample train_x = test_x[scale_ind:scale_ind + 1, ...] # Create target_box and label for spatial sample target_box = self.local_Tracker.get_iounet_box( self.local_Tracker.pos, self.local_Tracker.target_sz, sample_pos[scale_ind, :], sample_scales[scale_ind]) # Update the classifier model self.local_Tracker.update_classifier(train_x, target_box, learning_rate, s[scale_ind, ...]) self.last_gt = [ state[1], state[0], state[1] + state[3], state[0] + state[2] ] return state, score_map, update, max_score, ap_dis.data.cpu().numpy( )[0], flag, update_score def locate(self, image): # Convert image im = numpy_to_torch(image) self.local_Tracker.im = im # For debugging only # ------- LOCALIZATION ------- # # Get sample sample_pos = self.local_Tracker.pos.round() sample_scales = self.local_Tracker.target_scale * self.local_Tracker.params.scale_factors test_x = self.local_Tracker.extract_processed_sample( im, self.local_Tracker.pos, sample_scales, self.local_Tracker.img_sample_sz) # Compute scores scores_raw = self.local_Tracker.apply_filter(test_x) translation_vec, scale_ind, s, flag = self.local_Tracker.localize_target( scores_raw) return translation_vec, scale_ind, s, flag, sample_pos, sample_scales, test_x def local_update(self, sample_pos, translation_vec, scale_ind, sample_scales, s, test_x, update_flag=None): # Check flags and set learning rate if hard negative if update_flag is None: update_flag = self.flag not in ['not_found', 'uncertain'] hard_negative = (self.flag == 'hard_negative') learning_rate = self.local_Tracker.params.hard_negative_learning_rate if hard_negative else None if update_flag: # Get train sample train_x = TensorList( [x[scale_ind:scale_ind + 1, ...] for x in test_x]) # Create label for sample train_y = self.local_Tracker.get_label_function( sample_pos, sample_scales[scale_ind]) # Update memory self.local_Tracker.update_memory(train_x, train_y, learning_rate) # Train filter if hard_negative: self.local_Tracker.filter_optimizer.run( self.local_Tracker.params.hard_negative_CG_iter) elif (self.local_Tracker.frame_num - 1) % self.local_Tracker.params.train_skipping == 0: self.local_Tracker.filter_optimizer.run( self.local_Tracker.params.CG_iter) def tracking(self, image): self.i += 1 mask = None candidate_bboxes = None # state, pyscore = self.pymdnet_track(image) # self.last_gt = [state[1], state[0], state[1] + state[3], state[0] + state[2]] self.local_Tracker.pos = torch.FloatTensor([ (self.last_gt[0] + self.last_gt[2] - 1) / 2, (self.last_gt[1] + self.last_gt[3] - 1) / 2 ]) self.local_Tracker.target_sz = torch.FloatTensor([ (self.last_gt[2] - self.last_gt[0]), (self.last_gt[3] - self.last_gt[1]) ]) tic = time.time() local_state, self.score_map, update, local_score, dis, flag, update_score = self.local_track( image) md_score = self.pymdnet_eval(image, np.array(local_state).reshape([-1, 4]))[0] self.score_max = md_score if md_score > 0 and flag == 'normal': self.flag = 'found' if self.p.use_mask: self.siamstate['target_pos'] = self.local_Tracker.pos.numpy( )[::-1] self.siamstate[ 'target_sz'] = self.local_Tracker.target_sz.numpy()[::-1] siamscore, mask = self.siammask_track( cv2.cvtColor(image, cv2.COLOR_RGB2BGR)) self.local_Tracker.pos = torch.FloatTensor( self.siamstate['target_pos'][::-1].copy()) self.local_Tracker.target_sz = torch.FloatTensor( self.siamstate['target_sz'][::-1].copy()) local_state = torch.cat( (self.local_Tracker.pos[[1, 0]] - (self.local_Tracker.target_sz[[1, 0]] - 1) / 2, self.local_Tracker.target_sz[[1, 0]])).data.cpu().numpy() self.last_gt = np.array([ local_state[1], local_state[0], local_state[1] + local_state[3], local_state[0] + local_state[2] ]) elif md_score < 0 or flag == 'not_found': self.count += 1 self.flag = 'not_found' candidate_bboxes = self.Global_Track_eval(image, 10) candidate_scores = self.pymdnet_eval(image, candidate_bboxes) max_id = np.argmax(candidate_scores) if candidate_scores[max_id] > 0: redet_bboxes = candidate_bboxes[max_id] if self.count >= 5: self.last_gt = np.array([ redet_bboxes[1], redet_bboxes[0], redet_bboxes[1] + redet_bboxes[3], redet_bboxes[2] + redet_bboxes[0] ]) self.local_Tracker.pos = torch.FloatTensor([ (self.last_gt[0] + self.last_gt[2] - 1) / 2, (self.last_gt[1] + self.last_gt[3] - 1) / 2 ]) self.local_Tracker.target_sz = torch.FloatTensor([ (self.last_gt[2] - self.last_gt[0]), (self.last_gt[3] - self.last_gt[1]) ]) self.score_max = candidate_scores[max_id] self.count = 0 if update: self.collect_samples_pymdnet(image) self.pymdnet_long_term_update() width = self.last_gt[3] - self.last_gt[1] height = self.last_gt[2] - self.last_gt[0] toc = time.time() - tic print(toc) # if self.flag == 'found' and self.score_max > 0: # confidence_score = 0.99 # elif self.flag == 'not_found': # confidence_score = 0.0 # else: # confidence_score = np.clip((local_score+np.arctan(0.2*self.score_max)/math.pi+0.5)/2, 0, 1) confidence_score = np.clip( (local_score + np.arctan(0.2 * self.score_max) / math.pi + 0.5) / 2, 0, 1) if self.p.visualization: show_res(cv2.cvtColor(image, cv2.COLOR_RGB2BGR), np.array(self.last_gt, dtype=np.int32), '2', groundtruth=self.groundtruth, update=update_score, can_bboxes=candidate_bboxes, frame_id=self.i, tracker_score=md_score, mask=mask) return [ float(self.last_gt[1]), float(self.last_gt[0]), float(width), float(height) ], self.score_map, 0, confidence_score, 0
class SingleTracker(object): def __init__(self, config_path, model_path): args = TrackArgs() args.config = config_path args.resume = model_path cfg = load_config(args) if args.arch == 'Custom': from custom import Custom self.model = Custom(anchors=cfg['anchors']) else: parser.error('invalid architecture: {}'.format(args.arch)) if args.resume: assert isfile(args.resume), '{} is not a valid file'.format(args.resume) self.model = load_pretrain(self.model, args.resume) self.model.eval() self.device = torch.device('cuda' if (torch.cuda.is_available() and not args.cpu) else 'cpu') self.model = self.model.to(self.device) ################# Dangerous self.p = TrackerConfig() self.p.update(cfg['hp'] if 'hp' in cfg.keys() else None, self.model.anchors) self.p.renew() self.p.scales = self.model.anchors['scales'] self.p.ratios = self.model.anchors['ratios'] self.p.anchor_num = self.model.anchor_num self.p.anchor = generate_anchor(self.model.anchors, self.p.score_size) if self.p.windowing == 'cosine': self.window = np.outer(np.hanning(self.p.score_size), np.hanning(self.p.score_size)) elif self.p.windowing == 'uniform': self.window = np.ones((self.p.score_size, self.p.score_size)) self.window = np.tile(self.window.flatten(), self.p.anchor_num) ################ def get_examplar_feature(self, img, target_pos, target_sz): avg_chans = np.mean(img, axis=(0, 1)) wc_z = target_sz[0] + self.p.context_amount * sum(target_sz) hc_z = target_sz[1] + self.p.context_amount * sum(target_sz) s_z = round(np.sqrt(wc_z * hc_z)) # initialize the exemplar examplar = get_subwindow_tracking(img, target_pos, self.p.exemplar_size, s_z, avg_chans) z = Variable(examplar.unsqueeze(0)) return self.model.template(z.to(self.device)) def siamese_track(self, img, target_pos, target_sz, examplar_feature, debug=False, mask_enable=True, refine_enable=True): avg_chans = np.mean(img, axis=(0, 1)) im_h = img.shape[0] im_w = img.shape[1] wc_x = target_sz[0] + self.p.context_amount * sum(target_sz) hc_x = target_sz[1] + self.p.context_amount * sum(target_sz) s_x = np.sqrt(wc_x * hc_x) ''' scale_x = self.p.exemplar_size / s_x d_search = (self.p.instance_size - self.p.exemplar_size) / 2 pad = d_search / scale_x s_x = s_x + 2 * pad crop_box = [target_pos[0] - round(s_x) / 2, target_pos[1] - round(s_x) / 2, round(s_x), round(s_x)] ''' # myy # 上面注释的部分, 原作者写的代码可以简化为下面三句 scale_x = self.p.exemplar_size / s_x s_x = self.p.instance_size / self.p.exemplar_size * s_x crop_box = [target_pos[0] - round(s_x) / 2, target_pos[1] - round(s_x) / 2, round(s_x), round(s_x)] # extract scaled crops for search region x at previous target position x_crop = Variable(get_subwindow_tracking(img, target_pos, self.p.instance_size, round(s_x), avg_chans).unsqueeze(0)) if mask_enable: score, delta, mask = self.model.track_mask(examplar_feature, x_crop.to(self.device)) else: score, delta = self.model.track(examplar_feature, x_crop.to(self.device)) delta = delta.permute(1, 2, 3, 0).contiguous().view(4, -1).data.cpu().numpy() score = F.softmax(score.permute(1, 2, 3, 0).contiguous().view(2, -1).permute(1, 0), dim=1).data[:, 1].cpu().numpy() delta[0, :] = delta[0, :] * self.p.anchor[:, 2] + self.p.anchor[:, 0] delta[1, :] = delta[1, :] * self.p.anchor[:, 3] + self.p.anchor[:, 1] delta[2, :] = np.exp(delta[2, :]) * self.p.anchor[:, 2] delta[3, :] = np.exp(delta[3, :]) * self.p.anchor[:, 3] def change(r): return np.maximum(r, 1. / r) def sz(w, h): pad = (w + h) * 0.5 sz2 = (w + pad) * (h + pad) return np.sqrt(sz2) def sz_wh(wh): pad = (wh[0] + wh[1]) * 0.5 sz2 = (wh[0] + pad) * (wh[1] + pad) return np.sqrt(sz2) # size penalty target_sz_in_crop = target_sz*scale_x s_c = change(sz(delta[2, :], delta[3, :]) / (sz_wh(target_sz_in_crop))) # scale penalty r_c = change((target_sz_in_crop[0] / target_sz_in_crop[1]) / (delta[2, :] / delta[3, :])) # ratio penalty penalty = np.exp(-(r_c * s_c - 1) * self.p.penalty_k) pscore = penalty * score # cos window (motion model) pscore = pscore * (1 - self.p.window_influence) + self.window * self.p.window_influence best_pscore_id = np.argmax(pscore) pred_in_crop = delta[:, best_pscore_id] / scale_x lr = penalty[best_pscore_id] * score[best_pscore_id] * self.p.lr # lr for OTB res_x = pred_in_crop[0] + target_pos[0] res_y = pred_in_crop[1] + target_pos[1] res_w = target_sz[0] * (1 - lr) + pred_in_crop[2] * lr res_h = target_sz[1] * (1 - lr) + pred_in_crop[3] * lr target_pos = np.array([res_x, res_y]) target_sz = np.array([res_w, res_h]) # for Mask Branch if mask_enable: best_pscore_id_mask = np.unravel_index(best_pscore_id, (5, self.p.score_size, self.p.score_size)) delta_x, delta_y = best_pscore_id_mask[2], best_pscore_id_mask[1] if refine_enable: mask = self.model.track_refine((delta_y, delta_x)).to(self.device).sigmoid().squeeze().view( self.p.out_size, self.p.out_size).cpu().data.numpy() else: mask = mask[0, :, delta_y, delta_x].sigmoid(). \ squeeze().view(self.p.out_size, self.p.out_size).cpu().data.numpy() def crop_back(image, bbox, out_sz, padding=-1): a = (out_sz[0] - 1) / bbox[2] b = (out_sz[1] - 1) / bbox[3] c = -a * bbox[0] d = -b * bbox[1] mapping = np.array([[a, 0, c], [0, b, d]]).astype(np.float) crop = cv2.warpAffine(image, mapping, (out_sz[0], out_sz[1]), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT, borderValue=padding) return crop s = crop_box[2] / self.p.instance_size sub_box = [crop_box[0] + (delta_x - self.p.base_size / 2) * self.p.total_stride * s, crop_box[1] + (delta_y - self.p.base_size / 2) * self.p.total_stride * s, s * self.p.exemplar_size, s * self.p.exemplar_size] s = self.p.out_size / sub_box[2] back_box = [-sub_box[0] * s, -sub_box[1] * s, im_w * s, im_h * s] mask_in_img = crop_back(mask, back_box, (im_w, im_h)) target_mask = (mask_in_img > self.p.seg_thr).astype(np.uint8) if cv2.__version__[-5] == '4': contours, _ = cv2.findContours(target_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) else: _, contours, _ = cv2.findContours(target_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) cnt_area = [cv2.contourArea(cnt) for cnt in contours] if len(contours) != 0 and np.max(cnt_area) > 100: contour = contours[np.argmax(cnt_area)] # use max area polygon polygon = contour.reshape(-1, 2) # pbox = cv2.boundingRect(polygon) # Min Max Rectangle prbox = cv2.boxPoints(cv2.minAreaRect(polygon)) # Rotated Rectangle # box_in_img = pbox rbox_in_img = prbox else: # empty mask location = cxy_wh_2_rect(target_pos, target_sz) rbox_in_img = np.array([[location[0], location[1]], [location[0] + location[2], location[1]], [location[0] + location[2], location[1] + location[3]], [location[0], location[1] + location[3]]]) target_pos[0] = max(0, min(im_w, target_pos[0])) target_pos[1] = max(0, min(im_h, target_pos[1])) target_sz[0] = max(10, min(im_w, target_sz[0])) target_sz[1] = max(10, min(im_h, target_sz[1])) score = score[best_pscore_id] mask = mask_in_img if mask_enable else [] return target_pos, target_sz, score, mask
def main(): global args, logger, v_id args = parser.parse_args() cfg = load_config(args) init_log('global', logging.INFO) if args.log != "": add_file_handler('global', args.log, logging.INFO) logger = logging.getLogger('global') logger.info(args) # setup model if args.arch == 'Custom': from custom import Custom model = Custom(anchors=cfg['anchors']) else: parser.error('invalid architecture: {}'.format(args.arch)) if args.resume: assert isfile(args.resume), '{} is not a valid file'.format( args.resume) model = load_pretrain(model, args.resume) model.eval() device = torch.device('cuda' if ( torch.cuda.is_available() and not args.cpu) else 'cpu') model = model.to(device) # setup dataset dataset = load_dataset(args.dataset) # VOS or VOT? if args.dataset in ['DAVIS2016', 'DAVIS2017', 'ytb_vos'] and args.mask: vos_enable = True # enable Mask output else: vos_enable = False total_lost = 0 # VOT # iou_lists = [] # VOS # speed_list = [] for v_id, video in enumerate(dataset.keys(), start=1): if args.video != '' and video != args.video: continue if vos_enable: iou_list, speed = track_vos( model, dataset[video], cfg['hp'] if 'hp' in cfg.keys() else None, args.mask, args.refine, args.dataset in ['DAVIS2017', 'ytb_vos'], device=device) # iou_lists.append(iou_list) else: lost, speed = track_vot(model, dataset[video], cfg['hp'] if 'hp' in cfg.keys() else None, args.mask, args.refine, device=device) total_lost += lost
if __name__ == '__main__': # Setup device device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') torch.backends.cudnn.benchmark = True # Setup Model cfg = load_config(args) from custom import Custom siammask = Custom(anchors=cfg['anchors']) if args.resume: assert isfile(args.resume), 'Please download {} first.'.format(args.resume) siammask = load_pretrain(siammask, args.resume) siammask.eval().to(device) # Parse Image file img_files = sorted(glob.glob(join(args.base_path, '*.PN*'))) print(img_files) ims = [cv2.imread(imf) for imf in img_files[130:150]] #img_files = sorted(glob.glob(join(args.base_path, '*.jp*'))) #ims = [cv2.imread(imf) for imf in img_files] # Select ROI cv2.namedWindow("SiamMask", cv2.WND_PROP_FULLSCREEN) # cv2.setWindowProperty("SiamMask", cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN) init_rect = cv2.selectROI('SiamMask', ims[0], False, False) x, y, w, h = init_rect
if __name__ == '__main__': # Setup device # device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # torch.backends.cudnn.benchmark = True # Setup Model cfg = load_config(args) from custom import Custom siammask = Custom(anchors=cfg['anchors']) if args.resume: assert isfile(args.resume), '{} is not a valid file'.format( args.resume) siammask = load_pretrain(siammask, args.resume) # siammask.eval().to(device) siammask.eval() # Parse Image file img_files = sorted(glob.glob(join(args.base_path, '*.jp*'))) ims = [cv2.imread(imf) for imf in img_files] # Select ROI cv2.namedWindow("SiamMask", cv2.WND_PROP_FULLSCREEN) # cv2.setWindowProperty("SiamMask", cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN) try: init_rect = cv2.selectROI('SiamMask', ims[0], False, False) x, y, w, h = init_rect except: exit() toc = 0 for f, im in enumerate(ims):
# Setup device device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') #大部分情况下,设置这个 flag 可以让内置的 cuDNN 的 auto-tuner 自动寻找最适合当前配置的高效算法,来达到优化运行效率的问题 torch.backends.cudnn.benchmark = True # Setup Model cfg = load_config(args) from custom import Custom siammask = Custom(anchors=cfg['anchors']) # anchors从哪里获得??? if args.resume: assert isfile(args.resume), 'Please download {} first.'.format( args.resume) siammask = load_pretrain(siammask, args.resume) siammask.eval().to(device) #eval属于切换到预测模式,并推送到GPU或CPU上运行 # Parse Image file img_files = sorted(glob.glob(join(args.base_path, '*.jp*'))) ims = [cv2.imread(imf) for imf in img_files] # Select ROI cv2.namedWindow("SiamMask", cv2.WND_PROP_FULLSCREEN) # cv2.setWindowProperty("SiamMask", cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN) try: init_rect = cv2.selectROI('SiamMask', ims[0], False, False) x, y, w, h = init_rect # 返回来4个值 except: exit() toc = 0
def main(): # 获取命令行参数信息 global args, logger, v_id args = parser.parse_args() # 获取配置文件中配置信息:主要包括网络结构,超参数等 cfg = load_config(args) # 初始化logxi信息,并将日志信息输入到磁盘文件中 init_log('global', logging.INFO) if args.log != "": add_file_handler('global', args.log, logging.INFO) # 将相关的配置信息输入到日志文件中 logger = logging.getLogger('global') logger.info(args) # setup model # 加载网络模型架构 if args.arch == 'Custom': from custom import Custom model = Custom(anchors=cfg['anchors']) else: parser.error('invalid architecture: {}'.format(args.arch)) # 加载网络模型参数 if args.resume: assert isfile(args.resume), '{} is not a valid file'.format( args.resume) model = load_pretrain(model, args.resume) # 使用评估模式,将drop等激活 model.eval() # 硬件信息 device = torch.device('cuda' if ( torch.cuda.is_available() and not args.cpu) else 'cpu') model = model.to(device) # 加载数据集 setup dataset dataset = load_dataset(args.dataset) # 这三种数据支持掩膜 VOS or VOT? if args.dataset in ['DAVIS2016', 'DAVIS2017', 'ytb_vos'] and args.mask: vos_enable = True # enable Mask output else: vos_enable = False total_lost = 0 # VOT iou_lists = [] # VOS speed_list = [] # 对数据进行处理 for v_id, video in enumerate(dataset.keys(), start=1): if args.video != '' and video != args.video: continue # true 调用track_vos if vos_enable: # 如测试数据是['DAVIS2017', 'ytb_vos']时,会开启多目标跟踪 iou_list, speed = track_vos( model, dataset[video], cfg['hp'] if 'hp' in cfg.keys() else None, args.mask, args.refine, args.dataset in ['DAVIS2017', 'ytb_vos'], device=device) iou_lists.append(iou_list) # False 调用track_vot else: lost, speed = track_vot(model, dataset[video], cfg['hp'] if 'hp' in cfg.keys() else None, args.mask, args.refine, device=device) total_lost += lost speed_list.append(speed) # report final result if vos_enable: for thr, iou in zip(thrs, np.mean(np.concatenate(iou_lists), axis=0)): logger.info('Segmentation Threshold {:.2f} mIoU: {:.3f}'.format( thr, iou)) else: logger.info('Total Lost: {:d}'.format(total_lost)) logger.info('Mean Speed: {:.2f} FPS'.format(np.mean(speed_list)))
class SiamFaceTracker(object): def __init__(self, cfg, min_iou=0.3, scale_factor=2, model="SiamMask_DAVIS.pth"): device = torch.device( 'cuda' if torch.cuda.is_available() else 'cpu' ) # to do - check if this should be instantiated for multiple SiamMasks objects self.internal_id = uuid.uuid4() # object identity can also work self.cfg = cfg self.siammask = Custom(anchors=cfg['anchors']) self.siammask = load_pretrain(self.siammask, model) self.siammask.eval().to(device) self.state = None self.prev_bbox = None self.is_recruited = False self.class_id = None self.min_iou = min_iou self.iou = None self.frames_elapsed_from_set_state = 0 self.last_tracking_result = None self.counter = Counter() self.scale_factor = scale_factor def set_state( self, im, detection ): # we can adapt this input to match the object detector bbox output scaled_bbox = scale_bbox(detection, self.scale_factor) x = scaled_bbox["left"] y = scaled_bbox["top"] w = abs(x - scaled_bbox["right"]) h = abs(y - scaled_bbox["bottom"]) target_pos = np.array([x + w / 2, y + h / 2]) target_sz = np.array([w, h]) self.state = siamese_init(im, target_pos, target_sz, self.siammask, self.cfg['hp']) self.class_id = scaled_bbox["label"] self.is_recruited = True self.frames_elapsed_from_set_state = 0 self.counter[scaled_bbox["label"]] += 1 def update_state(self, im, detection): scaled_bbox = scale_bbox(detection, self.scale_factor) x = scaled_bbox["left"] y = scaled_bbox["top"] w = abs(x - scaled_bbox["right"]) h = abs(y - scaled_bbox["bottom"]) target_pos = np.array([x + w / 2, y + h / 2]) target_sz = np.array([w, h]) self.state = siamese_init(im, target_pos, target_sz, self.siammask, self.cfg['hp']) self.frames_elapsed_from_set_state = 0 self.counter[scaled_bbox["label"]] += 1 def invalidate(self, reason): print(reason) self.class_id = None self.is_recruited = False self.frames_elapsed_from_set_state = 0 self.last_tracking_result = None self.prev_bbox = None self.iou = None self.counter = Counter() def track_face(self, im): if not self.is_recruited: return (None) self.state = siamese_track(self.state, im, mask_enable=False) [x, y] = self.state["target_pos"] [w, h] = self.state["target_sz"] x = int(x - w / 2) y = int(y - h / 2) xw = int(x + w) yh = int(y + h) c_bbox = (x, y, xw, yh) if self.prev_bbox: self.iou = bb_iou(self.prev_bbox, c_bbox) self.prev_bbox = c_bbox self.frames_elapsed_from_set_state += 1 self.class_id = self.counter.most_common(1)[0][0] if self.iou: if self.iou > self.min_iou: self.last_tracking_result = (TrackingResult( self.class_id, c_bbox)) else: self.invalidate( "invalidate: insufficient iou with previous frame") else: self.last_tracking_result = (TrackingResult(self.class_id, c_bbox))
def process_vedio(vedio_path, initRect): """ 视频处理 :param vedio_path:视频路径 :param initRect: 跟踪目标的初始位置 :return: """ # 1. 设置设备信息 Setup device # 有GPU时选择GPU,否则使用CPU device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # 默认优化运行效率 torch.backends.cudnn.benchmark = True # 2. 模型设置 Setup Model # 2.1 将命令行参数解析出来 cfg = load_config(args) # 2.2 custom是构建的网络,否则引用model中的网络结构 from custom import Custom siammask = Custom(anchors=cfg['anchors']) # 2.3 判断是否存在模型的权重文件 if args.resume: assert isfile(args.resume), 'Please download {} first.'.format( args.resume) siammask = load_pretrain(siammask, args.resume) # 在运行推断前,需要调用 model.eval() 函数,以将 dropout 层 和 batch normalization 层设置为评估模式(非训练模式). # to(device)将张量复制到GPU上,之后的计算将在GPU上运行 siammask.eval().to(device) # 首帧跟踪目标的位置 x, y, w, h = initRect print(x) VeryBig = 999999999 # 用于将视频框调整到最大 Cap = cv2.VideoCapture(vedio_path) # 设置读取摄像头 ret, frame = Cap.read() # 读取帧 ims = [frame] # 把frame放入列表格式的frame, 因为原文是将每帧图片放入列表 im = frame f = 0 target_pos = np.array([x + w / 2, y + h / 2]) target_sz = np.array([w, h]) state = siamese_init(im, target_pos, target_sz, siammask, cfg['hp']) # init tracker" middlepath = "../data/middle.mp4" outpath = "../data/output.mp4" vediowriter = cv2.VideoWriter(middlepath, cv2.VideoWriter_fourcc('M', 'P', '4', 'V'), 10, (320, 240)) while (True): tic = cv2.getTickCount() ret, im = Cap.read() # 逐个提取frame if (ret == False): break state = siamese_track(state, im, mask_enable=True, refine_enable=True) # track location = state['ploygon'].flatten() mask = state['mask'] > state['p'].seg_thr im[:, :, 2] = (mask > 0) * 255 + (mask == 0) * im[:, :, 2] cv2.polylines(im, [np.int0(location).reshape((-1, 1, 2))], True, (0, 255, 0), 3) vediowriter.write(im) cv2.imshow('SiamMask', im) key = cv2.waitKey(1) if key > 0: break f = f + 1 vediowriter.release() return
class TrackingManager: def __init__(self): resume = '/home/saad/Root/vision/Computer_Vision/Tracking-systems/SiamMask_DAVIS.pth' config = '/home/saad/Root/vision/Computer_Vision/Tracking-systems/config_davis.json' self.cfg = load_config(config=config) self.siammask = Custom(anchors=self.cfg['anchors']) self.siammask = load_pretrain(self.siammask, resume) self.active_boxes = [] self.frames_generator = get_detection_output_as_frames_generator() self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') self.siammask.eval().to(self.device) self.tracker_initialized = False self.state = None self.times = np_.asarray([]) def track(self): current_frame, final_one = next(self.frames_generator) if final_one: yield 1 s_ = time.time() init_boxes = select_active_boxes(current_frame, history_queue, 0.1) targets = [] for box_ in init_boxes: target_pos = np.array([box_.x1 + box_.w / 2, box_.y1 + box_.h / 2]) target_sz = np.array([box_.w, box_.h]) # print("x1 = {}, y1 = {}, w = {}, h = {}".format(box.x1, box.y1, box.w, box.h)) s = {"target_pos": target_pos, "target_sz": target_sz, "x": box_.x1, "y": box_.y1, "w": box_.w, "h": box_.h} targets.append(s) self.active_boxes.extend(init_boxes) if len(init_boxes) > 0: if self.state is not None: targets.extend(self.state['targets']) self.state = siamese_init(current_frame.img, self.siammask, self.cfg['hp'], device=self.device, targets=targets) # init tracker self.tracker_initialized = True if self.tracker_initialized and self.state is not None and len(self.state['targets']) > 0: self.state = siamese_track(self.state, current_frame.img) t = 0 while t < len(self.state['targets']): # check that the tracked object still exist score = self.state['targets'][t]['score'] if score <= .001: print("remove box because its score is {}".format(self.state['targets'][t]['score'])) self.remove_gone_boxes(self.state['targets'][t]) del self.state['targets'][t] continue target = self.state['targets'][t] boxx = select_matching_box(target['ploygon'], current_frame) self.state['targets'][t]['ploygon'] = [[boxx.x1, boxx.y1], [boxx.x1, boxx.y2], [boxx.x2, boxx.y2], [boxx.x2, boxx.y1]] # assign ID to the tracked object x, y, w, h = target['x'], target['y'], target['w'], target['h'] for o, active_box in enumerate(self.active_boxes): if active_box.x1 == x and active_box.y1 == y and active_box.w == w and active_box.h == h: boxx.ID = active_box.ID boxx.type = active_box.type # frame.get_coord_depend_seg(mask,boxx.x1,boxx.y1,boxx.x2,boxx.y2, id) cv2.rectangle(current_frame.img, (int(boxx.x1), int(boxx.y1)), (int(boxx.x2), int(boxx.y2)), (255, 0, 0), 2) center = [int(x) for x in target['target_pos']] cv2.putText(current_frame.img, str(boxx.ID), tuple(center), cv2.FONT_HERSHEY_PLAIN, 2, (0, 255, 0)) t += 1 # frame.add_polygon(target['ploygon'], id) current_frame.add_box(boxx) history_queue.append(current_frame) print(f"tracked in {time.time() - s_}") self.times = np_.append(self.times, [time.time() - s_], axis=0) cv2.putText(current_frame.img, "current frame", (20, 20), cv2.FONT_HERSHEY_PLAIN, 2, (255, 0, 0)) current_frame.img = cv2.cvtColor(current_frame.img, cv2.COLOR_BGR2RGB) cv2.imwrite(f"/home/saad/Root/datasets/tracking/tracking_facepass_case/{current_frame.frame_indx}.jpg", current_frame.img) yield current_frame def remove_gone_boxes(self, target): x, y, w, h = target['x'], target['y'], target['w'], target['h'] i = 0 for active_box in self.active_boxes: if active_box.x1 == x and active_box.y1 == y and active_box.w == w and active_box.h == h: del self.active_boxes[i] break i += 1 def get_tracker_fps(self): avg = np_.average(self.times) return 1 / avg, avg
def main(): global args, logger, v_id #全局变量 args = parser.parse_args() #args是test.py文件运行时,接受的参数 cfg = load_config(args) #加载 JSON 配置文件并设置args.arch的值。 print(cfg) init_log('global', logging.INFO) if args.log != "": add_file_handler('global', args.log, logging.INFO) #add_file_handler 创建一个记录器并绑定文件句柄。 logger = logging.getLogger('global') logger.info(args) # setup model Custom 为论文实现的网络。如果不是“Custom”,加载 models 下指定的结构。 if args.arch == 'Custom': #args.arch参数,预训练模型的结构,命令行不给的话,默认为' ', from custom import Custom model = Custom(anchors=cfg['anchors'] ) #cfg是从config_vot.json的到的数据,所以跟踪时用的model.anchors字典中的数据 else: parser.error('invalid architecture: {}'.format(args.arch)) if args.resume: #给了args.resume,如果args.resume不是文件,报错, assert isfile(args.resume), '{} is not a valid file'.format( args.resume) model = load_pretrain( model, args.resume) #args.resume是文件load_pretrain ,能够处理网络之间的不一致 model.eval() device = torch.device('cuda' if ( torch.cuda.is_available() and not args.cpu) else 'cpu') model = model.to(device) # setup dataset,字典 dataset = load_dataset( args.dataset) #load_dataset 能够加载 VOT、DAVIS、ytb_vos 三种数据集。 #仅以上三种数据源支持掩膜输出。 # VOS or VOT? if args.dataset in ['DAVIS2016', 'DAVIS2017', 'ytb_vos'] and args.mask: vos_enable = True # enable Mask output ,使用掩膜输出 else: vos_enable = False total_lost = 0 # VOT 跟踪任务有损失函数 iou_lists = [] # VOS 分割任务 speed_list = [] #v_id视频索引从1起,video是视频名字 for v_id, video in enumerate(dataset.keys(), start=1): if v_id == 2: exit() if args.video != '' and video != args.video: #不成立,args.video默认是' ' continue if vos_enable: #分割任务,,,,分割任务和跟踪任务只能选一个 iou_list, speed = track_vos( model, dataset[video], cfg['hp'] if 'hp' in cfg.keys() else None, args.mask, args.refine, args.dataset in ['DAVIS2017', 'ytb_vos'], device=device) iou_lists.append(iou_list) #iou_list是什么类型的数据??? else: #跟踪任务 lost, speed = track_vot(model, dataset[video], cfg['hp'] if 'hp' in cfg.keys() else None, args.mask, args.refine, device=device) total_lost += lost speed_list.append(speed) # report final result记录最终结果 if vos_enable: #如果进行的是分割任务 for thr, iou in zip(thrs, np.mean(np.concatenate(iou_lists), axis=0)): logger.info('Segmentation Threshold {:.2f} mIoU: {:.3f}'.format( thr, iou)) else: logger.info('Total Lost: {:d}'.format(total_lost)) logger.info('Mean Speed: {:.2f} FPS'.format(np.mean(speed_list)))