def __init__(self, threshold=0.65): self.THRES = threshold '''create tracker''' # create base tracker model = ModelBuilder( ) # a model is a Neural Network.(a torch.nn.Module) model = load_pretrain(model, snapshot_path).cuda().eval() self.base_tracker = build_tracker( model ) # a tracker is a object consisting of not only a NN and some post-processing
def __init__(self): project_path_ = os.path.dirname(__file__) siam_model_ = 'siammask_r50_l3' snapshot_path_ = os.path.join(project_path_, 'experiments/%s/model.pth' % siam_model_) config_path_ = os.path.join(project_path_, 'experiments/%s/config.yaml' % siam_model_) # load config cfg_new.merge_from_file(config_path_) # create model model = ModelBuilder() # load model model = load_pretrain(model, snapshot_path_).cuda().eval() # build tracker self.tracker = build_tracker(model)
def __init__(self, rf_model_code, enable_rf=True): model_name = 'siamrpn_' + RF_type.format(rf_model_code) if not enable_rf: model_name = model_name.replace(RF_type.format(rf_model_code), '') super(SiamRPNpp_RF, self).__init__(name=model_name) self.enable_rf = enable_rf # create tracker snapshot_path = os.path.join(project_path_, 'experiments/%s/model.pth' % siam_model_) config_path = os.path.join(project_path_, 'experiments/%s/config.yaml' % siam_model_) cfg.merge_from_file(config_path) model = ModelBuilder() # a sub-class of `torch.nn.Module` model = load_pretrain(model, snapshot_path).cuda().eval() self.tracker = build_tracker( model ) # tracker is a object consisting of NN and some post-processing # create refinement module if self.enable_rf: self.RF_module = RefineModule(refine_path.format(rf_model_code), selector_path, search_factor=sr, input_sz=input_sz)
def main(model_code): RF_module = RefineModule(refine_path.format(model_code), selector_path, search_factor=sr, input_sz=input_sz) model_name = 'siamrpn_' + '{}-{}'.format( RF_type.format(model_code), selector_path) + '_%d' % (args.run_id) snapshot_path = os.path.join( project_path_, 'experiments/%s/model.pth' % args.tracker_name) config_path = os.path.join( project_path_, 'experiments/%s/config.yaml' % args.tracker_name) cfg.merge_from_file(config_path) # create model model = ModelBuilder() # a sub-class of `torch.nn.Module` model = load_pretrain(model, snapshot_path).cuda().eval() # build tracker tracker = build_tracker( model) # tracker is a object consisting of NN and some post-processing # create dataset dataset = DatasetFactory.create_dataset(name=args.dataset, dataset_root=dataset_root_, load_img=False) if args.dataset in ['VOT2016', 'VOT2018', 'VOT2019']: # restart tracking for v_idx, video in enumerate(dataset): if args.video != '': # test one special video if video.name != args.video: continue frame_counter = 0 lost_number = 0 toc = 0 pred_bboxes = [] for idx, (img, gt_bbox) in enumerate(video): if len(gt_bbox) == 4: gt_bbox = [ gt_bbox[0], gt_bbox[1], gt_bbox[0], gt_bbox[1] + gt_bbox[3] - 1, gt_bbox[0] + gt_bbox[2] - 1, gt_bbox[1] + gt_bbox[3] - 1, gt_bbox[0] + gt_bbox[2] - 1, gt_bbox[1] ] tic = cv2.getTickCount() if idx == frame_counter: H, W, _ = img.shape cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox)) gt_bbox_ = [cx - (w - 1) / 2, cy - (h - 1) / 2, w, h] tracker.init(img, gt_bbox_) '''##### initilize refinement module for specific video''' RF_module.initialize(cv2.cvtColor(img, cv2.COLOR_BGR2RGB), np.array(gt_bbox_)) pred_bbox = gt_bbox_ pred_bboxes.append(1) elif idx > frame_counter: outputs = tracker.track(img) pred_bbox = outputs['bbox'] '''##### refine tracking results #####''' pred_bbox = RF_module.refine( cv2.cvtColor(img, cv2.COLOR_BGR2RGB), np.array(pred_bbox)) x1, y1, w, h = pred_bbox.tolist() '''add boundary and min size limit''' x1, y1, x2, y2 = bbox_clip(x1, y1, x1 + w, y1 + h, (H, W)) w = x2 - x1 h = y2 - y1 pred_bbox = np.array([x1, y1, w, h]) tracker.center_pos = np.array([x1 + w / 2, y1 + h / 2]) tracker.size = np.array([w, h]) overlap = vot_overlap(pred_bbox, gt_bbox, (img.shape[1], img.shape[0])) if overlap > 0: # not lost pred_bboxes.append(pred_bbox) else: # lost object pred_bboxes.append(2) frame_counter = idx + 5 # skip 5 frames lost_number += 1 else: pred_bboxes.append(0) toc += cv2.getTickCount() - tic if idx == 0: cv2.destroyAllWindows() if args.vis and idx > frame_counter: cv2.polylines( img, [np.array(gt_bbox, np.int).reshape( (-1, 1, 2))], True, (0, 255, 0), 3) bbox = list(map(int, pred_bbox)) cv2.rectangle(img, (bbox[0], bbox[1]), (bbox[0] + bbox[2], bbox[1] + bbox[3]), (0, 255, 255), 3) cv2.putText(img, str(idx), (40, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2) cv2.putText(img, str(lost_number), (40, 80), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2) cv2.imshow(video.name, img) cv2.waitKey(1) toc /= cv2.getTickFrequency() # save results video_path = os.path.join(save_dir, args.dataset, model_name, 'baseline', video.name) if not os.path.isdir(video_path): os.makedirs(video_path) result_path = os.path.join(video_path, '{}_001.txt'.format(video.name)) with open(result_path, 'w') as f: for x in pred_bboxes: if isinstance(x, int): f.write("{:d}\n".format(x)) else: f.write(','.join([vot_float2str("%.4f", i) for i in x]) + '\n') print( '({:3d}) Video: {:12s} Time: {:4.1f}s Speed: {:3.1f}fps Lost: {:d}' .format(v_idx + 1, video.name, toc, idx / toc, lost_number))
def main(): model_name = 'siamRPN' snapshot_path = os.path.join(project_path_, 'experiments/%s/model.pth' % args.tracker_name) config_path = os.path.join(project_path_, 'experiments/%s/config.yaml' % args.tracker_name) cfg.merge_from_file(config_path) # create model model = ModelBuilder() # a model is a Neural Network.(a torch.nn.Module) # load model model = load_pretrain(model, snapshot_path).cuda().eval() # build tracker tracker = build_tracker(model) # a tracker is a object consisting of not only a NN and some post-processing # create dataset dataset = DatasetFactory.create_dataset(name=args.dataset, dataset_root=dataset_root_, load_img=False) total_lost = 0 if args.dataset in ['VOT2016', 'VOT2018', 'VOT2019']: # restart tracking for v_idx, video in enumerate(dataset): if args.video != '': # test one special video if video.name != args.video: continue frame_counter = 0 lost_number = 0 toc = 0 pred_bboxes = [] for idx, (img, gt_bbox) in enumerate(video): if len(gt_bbox) == 4: gt_bbox = [gt_bbox[0], gt_bbox[1], gt_bbox[0], gt_bbox[1]+gt_bbox[3]-1, gt_bbox[0]+gt_bbox[2]-1, gt_bbox[1]+gt_bbox[3]-1, gt_bbox[0]+gt_bbox[2]-1, gt_bbox[1]] tic = cv2.getTickCount() if idx == frame_counter: H,W,_ = img.shape cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox)) gt_bbox_ = [cx-(w-1)/2, cy-(h-1)/2, w, h] tracker.init(img, gt_bbox_) pred_bbox = gt_bbox_ pred_bboxes.append(1) elif idx > frame_counter: outputs = tracker.track(img) pred_bbox = outputs['bbox'] overlap = vot_overlap(pred_bbox, gt_bbox, (img.shape[1], img.shape[0])) if overlap > 0: # not lost pred_bboxes.append(pred_bbox) else: # lost object pred_bboxes.append(2) frame_counter = idx + 5 # skip 5 frames lost_number += 1 else: pred_bboxes.append(0) toc += cv2.getTickCount() - tic if idx == 0: cv2.destroyAllWindows() if args.vis and idx > frame_counter: cv2.polylines(img, [np.array(gt_bbox, np.int).reshape((-1, 1, 2))], True, (0, 255, 0), 3) bbox = list(map(int, pred_bbox)) cv2.rectangle(img, (bbox[0], bbox[1]), (bbox[0]+bbox[2], bbox[1]+bbox[3]), (0, 255, 255), 3) cv2.putText(img, str(idx), (40, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2) cv2.putText(img, str(lost_number), (40, 80), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2) cv2.imshow(video.name, img) cv2.waitKey(1) toc /= cv2.getTickFrequency() # save results video_path = os.path.join(save_dir, args.dataset, model_name, 'baseline', video.name) if not os.path.isdir(video_path): os.makedirs(video_path) result_path = os.path.join(video_path, '{}_001.txt'.format(video.name)) with open(result_path, 'w') as f: for x in pred_bboxes: if isinstance(x, int): f.write("{:d}\n".format(x)) else: f.write(','.join([vot_float2str("%.4f", i) for i in x])+'\n') print('({:3d}) Video: {:12s} Time: {:4.1f}s Speed: {:3.1f}fps Lost: {:d}'.format( v_idx+1, video.name, toc, idx / toc, lost_number))