def __init__(self, rf_model_code, enable_rf=True): _tracker_name, _tracker_param, model_name = self._get_setting() model_name = model_name.format(rf_model_code) if not enable_rf: model_name = model_name.replace(RF_type.format(rf_model_code), '') super(Pytracking_RF, self).__init__(name=model_name) self.enable_rf = enable_rf # create tracker tracker_info = Tracker(_tracker_name, _tracker_param, None) params = tracker_info.get_parameters() params.visualization = False params.debug = False params.visdom_info = { 'use_visdom': False, 'server': '127.0.0.1', 'port': 8097 } self.tracker = tracker_info.tracker_class(params) # create Refinement module if self.enable_rf: self.RF_module = RefineModule(refine_path.format(rf_model_code), selector_path, search_factor=sr, input_sz=input_sz)
def __init__(self, tracker_name='dimp', para_name='dimp50_vot19', refine_model_name='ARcm_coco_seg', threshold=0.15): self.THRES = threshold '''create tracker''' '''DIMP''' tracker_info = Tracker(tracker_name, para_name, None) params = tracker_info.get_parameters() params.visualization = False params.debug = False params.visdom_info = { 'use_visdom': False, 'server': '127.0.0.1', 'port': 8097 } self.dimp = tracker_info.tracker_class(params) '''Alpha-Refine''' project_path = os.path.join(os.path.dirname(__file__), '..', '..') refine_root = os.path.join(project_path, 'ltr/checkpoints/ltr/ARcm_seg/') refine_path = os.path.join(refine_root, refine_model_name) '''2020.4.25 input size: 384x384''' self.alpha = ARcm_seg(refine_path, input_sz=384)
def __init__(self, tracker_name='dimp', para_name='super_dimp', threshold=0.65): self.THRES = threshold '''create tracker''' tracker_info = Tracker(tracker_name, para_name, None) params = tracker_info.get_parameters() params.visualization = False params.debug = False params.visdom_info = {'use_visdom': False, 'server': '127.0.0.1', 'port': 8097} self.base_tracker = tracker_info.tracker_class(params)
def local_init(self, image, init_bbox): local_tracker = Tracker('segm', 'default_params') params = local_tracker.get_parameters() debug_ = getattr(params, 'debug', 0) params.debug = debug_ params.tracker_name = local_tracker.name params.param_name = local_tracker.parameter_name self.local_Tracker = local_tracker.tracker_class(params) self.local_Tracker.initialize(image, init_bbox)
def local_init(self, image, init_bbox): local_tracker = Tracker('dimp', 'dimp50') params = local_tracker.get_parameters() debug_ = getattr(params, 'debug', 0) params.debug = debug_ params.tracker_name = local_tracker.name params.param_name = local_tracker.parameter_name self.local_Tracker = local_tracker.tracker_class(params) init_box = dict() init_box['init_bbox'] = init_bbox self.local_Tracker.initialize(image, init_box)
def main(): # load config dataset_root = '/media/zxy/Samsung_T5/Data/DataSets/LaSOT/LaSOT_test' # create tracker '''Pytracking-RF tracker''' tracker_info = Tracker(args.tracker_name, args.tracker_param, None) params = tracker_info.get_parameters() params.visualization = args.vis params.debug = args.debug params.visdom_info = { 'use_visdom': False, 'server': '127.0.0.1', 'port': 8097 } tracker = tracker_info.tracker_class(params) '''Refinement module''' # refine_path = "/home/zxy/Desktop/AlphaRefine/experiments/SEx_beta/SEcm_r34_15sr_fcn/SEcmnet_ep0040-a.pth.tar" # RF_CrsM_R34SR15FCN_a # refine_path = "/home/zxy/Desktop/AlphaRefine/experiments/SEx_beta/SEcm_r34/SEcmnet_ep0040-a.pth.tar" # dimp_dimp50RF_CrsM_R34SR20FCN_a-0_1 # refine_path = "/home/zxy/Desktop/AlphaRefine/experiments/SEx_beta/SEcm_r34/SEcmnet_ep0040-b.pth.tar" # refine_path = "/home/zxy/Desktop/AlphaRefine/experiments/SEx_beta/SEcm_r34/SEcmnet_ep0040-c.pth.tar" # refine_path = "/home/zxy/Desktop/AlphaRefine/experiments/SEx_beta/SEcm_r34/SEcmnet_ep0040-d.pth.tar" # refine_path = "/home/zxy/Desktop/AlphaRefine/experiments/SEx_beta/SEcm_r34/SEcmnet_ep0040-e.pth.tar" refine_path = "/home/zxy/Desktop/AlphaRefine/experiments/SEbcm/SEbcm-8gpu/SEbcmnet_ep0040.pth.tar" # RF_CrsM_ARv1_d selector_path = 1 branches = ['corner', 'mask'][0:1] sr = 2.0 input_sz = int(128 * sr) # 2.0 by default RF_module = RefineModule(refine_path, selector_path, branches=branches, search_factor=sr, input_sz=input_sz) RF_type = 'RF_CrsM_R34SR20_e' # RF_type = 'RF_CrsM_ARv1' model_name = args.tracker_name + '_' + args.tracker_param + '{}-{}'.format( RF_type, selector_path) + '_%d' % (args.run_id) # create dataset dataset = DatasetFactory.create_dataset(name=args.dataset, dataset_root=dataset_root, load_img=False) if args.dataset in ['VOT2016', 'VOT2018', 'VOT2019']: total_lost = 0 # restart tracking for v_idx, video in enumerate(dataset): if args.video != '': # test one special video if video.name != args.video: continue frame_counter = 0 lost_number = 0 toc = 0 '''对refinement module计时''' toc_refine = 0 pred_bboxes = [] for idx, (img, gt_bbox) in enumerate(video): if len(gt_bbox) == 4: gt_bbox = [ gt_bbox[0], gt_bbox[1], gt_bbox[0], gt_bbox[1] + gt_bbox[3] - 1, gt_bbox[0] + gt_bbox[2] - 1, gt_bbox[1] + gt_bbox[3] - 1, gt_bbox[0] + gt_bbox[2] - 1, gt_bbox[1] ] tic = cv2.getTickCount() '''get RGB format image''' img_RGB = img[:, :, ::-1].copy() # BGR --> RGB if idx == frame_counter: H, W, _ = img.shape cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox)) gt_bbox_ = [cx - (w - 1) / 2, cy - (h - 1) / 2, w, h] '''Initialize''' gt_bbox_np = np.array(gt_bbox_) gt_bbox_torch = torch.from_numpy( gt_bbox_np.astype(np.float32)) init_info = {} init_info['init_bbox'] = gt_bbox_torch _ = tracker.initialize(img_RGB, init_info) '''##### initilize refinement module for specific video''' RF_module.initialize(img_RGB, np.array(gt_bbox_)) pred_bbox = gt_bbox_ pred_bboxes.append(1) elif idx > frame_counter: '''Track''' outputs = tracker.track(img_RGB) pred_bbox = outputs['target_bbox'] '''##### refine tracking results #####''' result_dict = RF_module.refine(img_RGB, np.array(pred_bbox)) bbox_report = result_dict['bbox_report'] bbox_state = result_dict['bbox_state'] '''report result and update state''' pred_bbox = bbox_report x1, y1, w, h = bbox_state.tolist() '''add boundary and min size limit''' x1, y1, x2, y2 = bbox_clip(x1, y1, x1 + w, y1 + h, (H, W)) w = x2 - x1 h = y2 - y1 new_pos = torch.from_numpy( np.array([y1 + h / 2, x1 + w / 2]).astype(np.float32)) new_target_sz = torch.from_numpy( np.array([h, w]).astype(np.float32)) new_scale = torch.sqrt(new_target_sz.prod() / tracker.base_target_sz.prod()) ##### update tracker.pos = new_pos.clone() tracker.target_sz = new_target_sz tracker.target_scale = new_scale overlap = vot_overlap(pred_bbox, gt_bbox, (img.shape[1], img.shape[0])) if overlap > 0: # not lost pred_bboxes.append(pred_bbox) else: # lost object pred_bboxes.append(2) frame_counter = idx + 5 # skip 5 frames lost_number += 1 else: pred_bboxes.append(0) toc += cv2.getTickCount() - tic if idx == 0: cv2.destroyAllWindows() if args.vis and idx > frame_counter: cv2.polylines( img, [np.array(gt_bbox, np.int).reshape( (-1, 1, 2))], True, (0, 255, 0), 3) if len(pred_bbox) == 8: cv2.polylines( img, [np.array(pred_bbox, np.int).reshape( (-1, 1, 2))], True, (0, 255, 255), 3) else: bbox = list(map(int, pred_bbox)) cv2.rectangle(img, (bbox[0], bbox[1]), (bbox[0] + bbox[2], bbox[1] + bbox[3]), (0, 255, 255), 3) cv2.putText(img, str(idx), (40, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2) cv2.putText(img, str(lost_number), (40, 80), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2) cv2.imshow(video.name, img) cv2.waitKey(1) toc /= cv2.getTickFrequency() # save results video_path = os.path.join(save_dir, args.dataset, model_name, 'baseline', video.name) if not os.path.isdir(video_path): os.makedirs(video_path) result_path = os.path.join(video_path, '{}_001.txt'.format(video.name)) with open(result_path, 'w') as f: for x in pred_bboxes: if isinstance(x, int): f.write("{:d}\n".format(x)) else: f.write(','.join([vot_float2str("%.4f", i) for i in x]) + '\n') print( '({:3d}) Video: {:12s} Time: {:4.1f}s Speed: {:3.1f}fps Lost: {:d}' .format(v_idx + 1, video.name, toc, idx / toc, lost_number)) total_lost += lost_number print("{:s} total lost: {:d}".format(model_name, total_lost)) else: # OPE tracking for v_idx, video in enumerate(dataset): if args.video != '': # test one special video if video.name != args.video: continue toc = 0 pred_bboxes = [] scores = [] track_times = [] for idx, (img, gt_bbox) in enumerate(video): '''get RGB format image''' img_RGB = img[:, :, ::-1].copy() # BGR --> RGB tic = cv2.getTickCount() if idx == 0: H, W, _ = img.shape cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox)) gt_bbox_ = [cx - (w - 1) / 2, cy - (h - 1) / 2, w, h] '''Initialize''' gt_bbox_np = np.array(gt_bbox_) gt_bbox_torch = torch.from_numpy( gt_bbox_np.astype(np.float32)) init_info = {} init_info['init_bbox'] = gt_bbox_torch _ = tracker.initialize(img_RGB, init_info) '''##### initilize refinement module for specific video''' RF_module.initialize(img_RGB, np.array(gt_bbox_)) pred_bbox = gt_bbox_ scores.append(None) if 'VOT2018-LT' == args.dataset: pred_bboxes.append([1]) else: pred_bboxes.append(pred_bbox) else: '''Track''' outputs = tracker.track(img_RGB) pred_bbox = outputs['target_bbox'] '''##### refine tracking results #####''' pred_bbox = RF_module.refine( cv2.cvtColor(img, cv2.COLOR_BGR2RGB), np.array(pred_bbox)) x1, y1, w, h = pred_bbox.tolist() '''add boundary and min size limit''' x1, y1, x2, y2 = bbox_clip(x1, y1, x1 + w, y1 + h, (H, W)) w = x2 - x1 h = y2 - y1 new_pos = torch.from_numpy( np.array([y1 + h / 2, x1 + w / 2]).astype(np.float32)) new_target_sz = torch.from_numpy( np.array([h, w]).astype(np.float32)) new_scale = torch.sqrt(new_target_sz.prod() / tracker.base_target_sz.prod()) ##### update tracker.pos = new_pos.clone() tracker.target_sz = new_target_sz tracker.target_scale = new_scale pred_bboxes.append(pred_bbox) # scores.append(outputs['best_score']) toc += cv2.getTickCount() - tic track_times.append( (cv2.getTickCount() - tic) / cv2.getTickFrequency()) if idx == 0: cv2.destroyAllWindows() if args.vis and idx > 0: gt_bbox = list(map(int, gt_bbox)) pred_bbox = list(map(int, pred_bbox)) cv2.rectangle( img, (gt_bbox[0], gt_bbox[1]), (gt_bbox[0] + gt_bbox[2], gt_bbox[1] + gt_bbox[3]), (0, 255, 0), 3) cv2.rectangle(img, (pred_bbox[0], pred_bbox[1]), (pred_bbox[0] + pred_bbox[2], pred_bbox[1] + pred_bbox[3]), (0, 255, 255), 3) cv2.putText(img, str(idx), (40, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2) cv2.imshow(video.name, img) cv2.waitKey(1) toc /= cv2.getTickFrequency() # save results if 'VOT2018-LT' == args.dataset: video_path = os.path.join(save_dir, args.dataset, model_name, 'longterm', video.name) if not os.path.isdir(video_path): os.makedirs(video_path) result_path = os.path.join(video_path, '{}_001.txt'.format(video.name)) with open(result_path, 'w') as f: for x in pred_bboxes: f.write(','.join([str(i) for i in x]) + '\n') result_path = os.path.join( video_path, '{}_001_confidence.value'.format(video.name)) with open(result_path, 'w') as f: for x in scores: f.write('\n') if x is None else f.write( "{:.6f}\n".format(x)) result_path = os.path.join(video_path, '{}_time.txt'.format(video.name)) with open(result_path, 'w') as f: for x in track_times: f.write("{:.6f}\n".format(x)) elif 'GOT-10k' == args.dataset: video_path = os.path.join(save_dir, args.dataset, model_name, video.name) if not os.path.isdir(video_path): os.makedirs(video_path) result_path = os.path.join(video_path, '{}_001.txt'.format(video.name)) with open(result_path, 'w') as f: for x in pred_bboxes: f.write(','.join([str(i) for i in x]) + '\n') result_path = os.path.join(video_path, '{}_time.txt'.format(video.name)) with open(result_path, 'w') as f: for x in track_times: f.write("{:.6f}\n".format(x)) else: model_path = os.path.join(save_dir, args.dataset, model_name) if not os.path.isdir(model_path): os.makedirs(model_path) result_path = os.path.join(model_path, '{}.txt'.format(video.name)) with open(result_path, 'w') as f: for x in pred_bboxes: f.write(','.join([str(i) for i in x]) + '\n') print('({:3d}) Video: {:12s} Time: {:5.1f}s Speed: {:3.1f}fps'. format(v_idx + 1, video.name, toc, idx / toc))
def main(): # create tracker tracker_info = Tracker(args.tracker_name, args.tracker_param, None) params = tracker_info.get_parameters() params.visualization = args.vis params.debug = args.debug params.visdom_info = { 'use_visdom': False, 'server': '127.0.0.1', 'port': 8097 } tracker = tracker_info.tracker_class(params) model_name = args.tracker_name + '_' + args.tracker_param + '_%d' % ( args.run_id) # create dataset dataset = DatasetFactory.create_dataset(name=args.dataset, dataset_root=dataset_root_, load_img=False) # OPE tracking for v_idx, video in enumerate(dataset): if os.path.exists( os.path.join(save_dir, args.dataset, model_name, '{}.txt'.format(video.name))): continue if args.video != '': # test one special video if video.name != args.video: continue toc = 0 pred_bboxes = [] scores = [] track_times = [] for idx, (img, gt_bbox) in enumerate(video): '''get RGB format image''' img_RGB = img[:, :, ::-1].copy() # BGR --> RGB tic = cv2.getTickCount() if idx == 0: H, W, _ = img.shape cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox)) gt_bbox_ = [cx - (w - 1) / 2, cy - (h - 1) / 2, w, h] '''Initialize''' gt_bbox_np = np.array(gt_bbox_) gt_bbox_torch = torch.from_numpy(gt_bbox_np.astype(np.float32)) init_info = {} init_info['init_bbox'] = gt_bbox_torch _ = tracker.initialize(img_RGB, init_info) pred_bbox = gt_bbox_ scores.append(None) if 'VOT2018-LT' == args.dataset: pred_bboxes.append([1]) else: pred_bboxes.append(pred_bbox) else: '''Track''' outputs = tracker.track(img_RGB) pred_bbox = outputs['target_bbox'] '''##### refine tracking results #####''' pred_bboxes.append(pred_bbox) # scores.append(outputs['best_score']) toc += cv2.getTickCount() - tic track_times.append( (cv2.getTickCount() - tic) / cv2.getTickFrequency()) if idx == 0: cv2.destroyAllWindows() if args.vis and idx > 0: gt_bbox = list(map(int, gt_bbox)) pred_bbox = list(map(int, pred_bbox)) cv2.rectangle( img, (gt_bbox[0], gt_bbox[1]), (gt_bbox[0] + gt_bbox[2], gt_bbox[1] + gt_bbox[3]), (0, 255, 0), 3) cv2.rectangle( img, (pred_bbox[0], pred_bbox[1]), (pred_bbox[0] + pred_bbox[2], pred_bbox[1] + pred_bbox[3]), (0, 255, 255), 3) cv2.putText(img, str(idx), (40, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2) cv2.imshow(video.name, img) cv2.waitKey(1) toc /= cv2.getTickFrequency() # save results model_path = os.path.join(save_dir, args.dataset, model_name) if not os.path.isdir(model_path): os.makedirs(model_path) result_path = os.path.join(model_path, '{}.txt'.format(video.name)) with open(result_path, 'w') as f: for x in pred_bboxes: f.write(','.join([str(i) for i in x]) + '\n') print('({:3d}) Video: {:12s} Time: {:5.1f}s Speed: {:3.1f}fps'.format( v_idx + 1, video.name, toc, idx / toc))
def main(): # create tracker tracker_info = Tracker(args.tracker_name, args.tracker_param, None) params = tracker_info.get_parameters() params.visualization = args.vis params.debug = args.debug params.visdom_info = { 'use_visdom': False, 'server': '127.0.0.1', 'port': 8097 } tracker = tracker_info.tracker_class(params) model_name = args.tracker_name + '_' + args.tracker_param # create dataset dataset = DatasetFactory.create_dataset(name=args.dataset, dataset_root=dataset_root_, load_img=False) if args.dataset in ['VOT2016', 'VOT2018', 'VOT2019']: total_lost = 0 # restart tracking for v_idx, video in enumerate(dataset): if args.video != '': # test one special video if video.name != args.video: continue frame_counter = 0 lost_number = 0 toc = 0 '''对refinement module计时''' toc_refine = 0 pred_bboxes = [] for idx, (img, gt_bbox) in enumerate(video): if len(gt_bbox) == 4: gt_bbox = [ gt_bbox[0], gt_bbox[1], gt_bbox[0], gt_bbox[1] + gt_bbox[3] - 1, gt_bbox[0] + gt_bbox[2] - 1, gt_bbox[1] + gt_bbox[3] - 1, gt_bbox[0] + gt_bbox[2] - 1, gt_bbox[1] ] tic = cv2.getTickCount() '''get RGB format image''' img_RGB = img[:, :, ::-1].copy() # BGR --> RGB if idx == frame_counter: H, W, _ = img.shape cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox)) gt_bbox_ = [cx - (w - 1) / 2, cy - (h - 1) / 2, w, h] '''Initialize''' gt_bbox_np = np.array(gt_bbox_) gt_bbox_torch = torch.from_numpy( gt_bbox_np.astype(np.float32)) init_info = {} init_info['init_bbox'] = gt_bbox_torch _ = tracker.initialize(img_RGB, init_info) pred_bbox = gt_bbox_ pred_bboxes.append(1) elif idx > frame_counter: '''Track''' outputs = tracker.track(img_RGB) pred_bbox = outputs['target_bbox'] overlap = vot_overlap(pred_bbox, gt_bbox, (img.shape[1], img.shape[0])) if overlap > 0: # not lost pred_bboxes.append(pred_bbox) else: # lost object pred_bboxes.append(2) frame_counter = idx + 5 # skip 5 frames lost_number += 1 else: pred_bboxes.append(0) toc += cv2.getTickCount() - tic if idx == 0: cv2.destroyAllWindows() if args.vis and idx > frame_counter: cv2.polylines( img, [np.array(gt_bbox, np.int).reshape( (-1, 1, 2))], True, (0, 255, 0), 3) if len(pred_bbox) == 8: cv2.polylines( img, [np.array(pred_bbox, np.int).reshape( (-1, 1, 2))], True, (0, 255, 255), 3) else: bbox = list(map(int, pred_bbox)) cv2.rectangle(img, (bbox[0], bbox[1]), (bbox[0] + bbox[2], bbox[1] + bbox[3]), (0, 255, 255), 3) cv2.putText(img, str(idx), (40, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2) cv2.putText(img, str(lost_number), (40, 80), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2) cv2.imshow(video.name, img) cv2.waitKey(1) toc /= cv2.getTickFrequency() # save results video_path = os.path.join(save_dir, args.dataset, model_name, 'baseline', video.name) if not os.path.isdir(video_path): os.makedirs(video_path) result_path = os.path.join(video_path, '{}_001.txt'.format(video.name)) with open(result_path, 'w') as f: for x in pred_bboxes: if isinstance(x, int): f.write("{:d}\n".format(x)) else: f.write(','.join([vot_float2str("%.4f", i) for i in x]) + '\n') print( '({:3d}) Video: {:12s} Time: {:4.1f}s Speed: {:3.1f}fps Lost: {:d}' .format(v_idx + 1, video.name, toc, idx / toc, lost_number)) total_lost += lost_number print("{:s} total lost: {:d}".format(model_name, total_lost))
def main(): # load config model_name = args.tracker_name + '_' + args.tracker_param dataset_root = dataset_root_ # create tracker '''Pytracking tracker''' tracker_info = Tracker(args.tracker_name, args.tracker_param, None) params = tracker_info.get_parameters() params.visualization = args.vis params.debug = args.debug params.visdom_info = {'use_visdom': False, 'server': '127.0.0.1', 'port': 8097} tracker = tracker_info.tracker_class(params) # create dataset frames_dir = os.path.join(dataset_root,'frames') seq_list = sorted(os.listdir(frames_dir)) # OPE tracking for v_idx, seq_name in enumerate(seq_list): if args.video != '': # test one special video if seq_name != args.video: continue toc = 0 pred_bboxes = [] scores = [] track_times = [] seq_frame_dir = os.path.join(frames_dir,seq_name) num_frames = len(os.listdir(seq_frame_dir)) gt_file = os.path.join(dataset_root,'anno','%s.txt'%seq_name) gt_bbox = np.loadtxt(gt_file,dtype=np.float32,delimiter=',').squeeze() for idx in range(num_frames): frame_path = os.path.join(seq_frame_dir,'%d.jpg'%idx) img = cv2.imread(frame_path) '''get RGB format image''' img_RGB = img[:, :, ::-1].copy() # BGR --> RGB tic = cv2.getTickCount() if idx == 0: cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox)) gt_bbox_ = [cx-(w-1)/2, cy-(h-1)/2, w, h] '''Initialize''' gt_bbox_np = np.array(gt_bbox_) gt_bbox_torch = torch.from_numpy(gt_bbox_np.astype(np.float32)) init_info = {} init_info['init_bbox'] = gt_bbox_torch _ = tracker.initialize(img_RGB, init_info) pred_bbox = gt_bbox_ scores.append(None) pred_bboxes.append(pred_bbox) else: '''Track''' outputs = tracker.track(img_RGB) pred_bbox = outputs['target_bbox'] pred_bboxes.append(pred_bbox) # scores.append(outputs['best_score']) toc += cv2.getTickCount() - tic track_times.append((cv2.getTickCount() - tic)/cv2.getTickFrequency()) if idx == 0: cv2.destroyAllWindows() if args.vis and idx > 0: pred_bbox = list(map(int, pred_bbox)) cv2.rectangle(img, (pred_bbox[0], pred_bbox[1]), (pred_bbox[0]+pred_bbox[2], pred_bbox[1]+pred_bbox[3]), (0, 255, 255), 3) cv2.putText(img, str(idx), (40, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2) cv2.imshow(seq_name, img) cv2.waitKey(1) toc /= cv2.getTickFrequency() # save results model_path = os.path.join(save_dir, 'trackingnet', model_name) if not os.path.isdir(model_path): os.makedirs(model_path) result_path = os.path.join(model_path, '{}.txt'.format(seq_name)) with open(result_path, 'w') as f: for x in pred_bboxes: f.write(','.join([str(i) for i in x])+'\n') print('({:3d}) Video: {:12s} Time: {:5.1f}s Speed: {:3.1f}fps'.format( v_idx+1, seq_name, toc, idx / toc))
def main(): # create tracker tracker_info = Tracker(args.tracker_name, args.tracker_param, None) params = tracker_info.get_parameters() params.visualization = args.vis params.debug = args.debug params.visdom_info = { 'use_visdom': False, 'server': '127.0.0.1', 'port': 8097 } tracker = tracker_info.tracker_class(params) model_name = 'atom_oracle' # create dataset dataset = DatasetFactory.create_dataset(name=args.dataset, dataset_root=dataset_root_, load_img=False) # OPE tracking for v_idx, video in enumerate(dataset): if os.path.exists( os.path.join(save_dir, args.dataset, model_name, '{}.txt'.format(video.name))): continue if args.video != '': # test one special video if video.name != args.video: continue toc = 0 pred_bboxes = [] scores = [] track_times = [] for idx, (img, gt_bbox) in enumerate(video): '''get RGB format image''' img_RGB = img[:, :, ::-1].copy() # BGR --> RGB tic = cv2.getTickCount() if idx == 0: H, W, _ = img.shape cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox)) gt_bbox_ = [cx - (w - 1) / 2, cy - (h - 1) / 2, w, h] '''Initialize''' gt_bbox_np = np.array(gt_bbox_) gt_bbox_torch = torch.from_numpy(gt_bbox_np.astype(np.float32)) init_info = {} init_info['init_bbox'] = gt_bbox_torch _ = tracker.initialize(img_RGB, init_info) pred_bbox = gt_bbox_ scores.append(None) pred_bboxes.append(pred_bbox) else: '''Track''' outputs = tracker.track(img_RGB) pred_bbox = outputs['target_bbox'] x1, y1, w, h = pred_bbox '''add boundary and min size limit''' x1, y1, x2, y2 = bbox_clip(x1, y1, x1 + w, y1 + h, (H, W)) w = x2 - x1 h = y2 - y1 pred_bbox = np.array([x1, y1, w, h]) '''##### reset tracking results #####''' cx, cy, _, _ = get_axis_aligned_bbox(np.array(gt_bbox)) if not gt_bbox == [0, 0, 0, 0]: new_pos = torch.from_numpy( np.array([cy, cx]).astype(np.float32)) else: new_pos = torch.from_numpy( np.array([y1 + h / 2, x1 + w / 2]).astype(np.float32)) new_target_sz = torch.from_numpy( np.array([h, w]).astype(np.float32)) new_scale = torch.sqrt(new_target_sz.prod() / tracker.base_target_sz.prod()) ##### update tracker.pos = new_pos.clone() tracker.target_sz = new_target_sz tracker.target_scale = new_scale pred_bboxes.append(pred_bbox) toc += cv2.getTickCount() - tic track_times.append( (cv2.getTickCount() - tic) / cv2.getTickFrequency()) if idx == 0: cv2.destroyAllWindows() if args.vis and idx > 0: gt_bbox = list(map(int, gt_bbox)) pred_bbox = list(map(int, pred_bbox)) cv2.rectangle( img, (gt_bbox[0], gt_bbox[1]), (gt_bbox[0] + gt_bbox[2], gt_bbox[1] + gt_bbox[3]), (0, 255, 0), 3) cv2.rectangle( img, (pred_bbox[0], pred_bbox[1]), (pred_bbox[0] + pred_bbox[2], pred_bbox[1] + pred_bbox[3]), (0, 255, 255), 3) cv2.putText(img, str(idx), (40, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2) cv2.imshow(video.name, img) cv2.waitKey(1) toc /= cv2.getTickFrequency() # save results model_path = os.path.join(save_dir, args.dataset, model_name) if not os.path.isdir(model_path): os.makedirs(model_path) result_path = os.path.join(model_path, '{}.txt'.format(video.name)) with open(result_path, 'w') as f: for x in pred_bboxes: f.write(','.join([str(i) for i in x]) + '\n') print('({:3d}) Video: {:12s} Time: {:5.1f}s Speed: {:3.1f}fps'.format( v_idx + 1, video.name, toc, idx / toc))
def main(): # refine_method = args.refine_method model_name = 'siamrpn_' + refine_method model_path = '/' snapshot_path = os.path.join( project_path_, 'experiments/%s/model.pth' % args.tracker_name) config_path = os.path.join( project_path_, 'experiments/%s/config.yaml' % args.tracker_name) cfg.merge_from_file(config_path) dataset_root = dataset_root_ # create model '''a model is a Neural Network.(a torch.nn.Module)''' model = ModelBuilder() # load model model = load_pretrain(model, snapshot_path).cuda().eval() # build tracker '''a tracker is a object, which consists of not only a NN but also some post-processing''' tracker = build_tracker(model) # create dataset dataset = DatasetFactory.create_dataset(name=args.dataset, dataset_root=dataset_root, load_img=False) '''##### build a refinement module #####''' if 'RF' in refine_method: RF_module = RefineModule(refine_path, selector_path, branches=branches, search_factor=sr, input_sz=input_sz) elif refine_method == 'iou_net': RF_info = Tracker('iou_net', 'iou_net_dimp', None) RF_params = RF_info.get_parameters() RF_params.visualization = False RF_params.debug = False RF_params.visdom_info = { 'use_visdom': False, 'server': '127.0.0.1', 'port': 8097 } RF_module = RF_info.tracker_class(RF_params) elif refine_method == 'mask': RF_module = siammask() else: raise ValueError("refine_method should be 'RF' or 'iou' or 'mask' ") # model_name = args.snapshot.split('/')[-1].split('.')[0] total_lost = 0 if args.dataset in ['VOT2016', 'VOT2018', 'VOT2019']: # restart tracking for v_idx, video in enumerate(dataset): if args.video != '': # test one special video if video.name != args.video: continue frame_counter = 0 lost_number = 0 toc = 0 pred_bboxes = [] for idx, (img, gt_bbox) in enumerate(video): if len(gt_bbox) == 4: gt_bbox = [ gt_bbox[0], gt_bbox[1], gt_bbox[0], gt_bbox[1] + gt_bbox[3] - 1, gt_bbox[0] + gt_bbox[2] - 1, gt_bbox[1] + gt_bbox[3] - 1, gt_bbox[0] + gt_bbox[2] - 1, gt_bbox[1] ] tic = cv2.getTickCount() if idx == frame_counter: H, W, _ = img.shape cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox)) gt_bbox_ = [cx - (w - 1) / 2, cy - (h - 1) / 2, w, h] tracker.init(img, gt_bbox_) '''##### initilize refinement module for specific video''' if 'RF' in refine_method: RF_module.initialize( cv2.cvtColor(img, cv2.COLOR_BGR2RGB), np.array(gt_bbox_)) elif refine_method == 'iou_net': gt_bbox_np = np.array(gt_bbox_) gt_bbox_torch = torch.from_numpy( gt_bbox_np.astype(np.float32)) init_info = {} init_info['init_bbox'] = gt_bbox_torch RF_module.initialize( cv2.cvtColor(img, cv2.COLOR_BGR2RGB), init_info) elif refine_method == 'mask': RF_module.initialize(img, np.array(gt_bbox_)) else: raise ValueError( "refine_method should be 'RF' or 'RF_mask' or 'iou_net' or 'mask' " ) pred_bbox = gt_bbox_ pred_bboxes.append(1) elif idx > frame_counter: outputs = tracker.track(img) pred_bbox = outputs['bbox'] '''##### refine tracking results #####''' if 'RF' in refine_method or refine_method == 'iou_net': pred_bbox = RF_module.refine( cv2.cvtColor(img, cv2.COLOR_BGR2RGB), np.array(pred_bbox)) x1, y1, w, h = pred_bbox.tolist() '''add boundary and min size limit''' x1, y1, x2, y2 = bbox_clip(x1, y1, x1 + w, y1 + h, (H, W)) w = x2 - x1 h = y2 - y1 pred_bbox = np.array([x1, y1, w, h]) '''pass new state back to base tracker''' tracker.center_pos = np.array([x1 + w / 2, y1 + h / 2]) tracker.size = np.array([w, h]) elif refine_method == 'mask': pred_bbox, center_pos, size = RF_module.refine( img, np.array(pred_bbox), VOT=True) # boundary and min size limit have been included in "refine" '''pass new state back to base tracker''' '''pred_bbox is a list with 8 elements''' tracker.center_pos = center_pos tracker.size = size else: raise ValueError( 'refine_method should be RF or iou or mask') overlap = vot_overlap(pred_bbox, gt_bbox, (img.shape[1], img.shape[0])) if overlap > 0: # not lost pred_bboxes.append(pred_bbox) else: # lost object pred_bboxes.append(2) frame_counter = idx + 5 # skip 5 frames lost_number += 1 else: pred_bboxes.append(0) toc += cv2.getTickCount() - tic if idx == 0: cv2.destroyAllWindows() if args.vis and idx > frame_counter: cv2.polylines( img, [np.array(gt_bbox, np.int).reshape( (-1, 1, 2))], True, (0, 255, 0), 3) if refine_method == 'mask': cv2.polylines( img, [np.array(pred_bbox, np.int).reshape( (-1, 1, 2))], True, (0, 255, 255), 3) else: bbox = list(map(int, pred_bbox)) cv2.rectangle(img, (bbox[0], bbox[1]), (bbox[0] + bbox[2], bbox[1] + bbox[3]), (0, 255, 255), 3) cv2.putText(img, str(idx), (40, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2) cv2.putText(img, str(lost_number), (40, 80), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2) cv2.imshow(video.name, img) cv2.waitKey(1) toc /= cv2.getTickFrequency() # save results video_path = os.path.join(save_dir, args.dataset, model_name, 'baseline', video.name) if not os.path.isdir(video_path): os.makedirs(video_path) result_path = os.path.join(video_path, '{}_001.txt'.format(video.name)) with open(result_path, 'w') as f: for x in pred_bboxes: if isinstance(x, int): f.write("{:d}\n".format(x)) else: f.write(','.join([vot_float2str("%.4f", i) for i in x]) + '\n') print( '({:3d}) Video: {:12s} Time: {:4.1f}s Speed: {:3.1f}fps Lost: {:d}' .format(v_idx + 1, video.name, toc, idx / toc, lost_number)) total_lost += lost_number print("{:s} total lost: {:d}".format(model_name, total_lost)) else: # OPE tracking for v_idx, video in enumerate(dataset): if video.name + '.txt' in os.listdir(model_path): continue if args.video != '': # test one special video if video.name != args.video: continue toc = 0 pred_bboxes = [] scores = [] track_times = [] for idx, (img, gt_bbox) in enumerate(video): tic = cv2.getTickCount() if idx == 0: H, W, _ = img.shape cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox)) gt_bbox_ = [cx - (w - 1) / 2, cy - (h - 1) / 2, w, h] tracker.init(img, gt_bbox_) '''##### initilize refinement module for specific video''' if 'RF' in refine_method: RF_module.initialize( cv2.cvtColor(img, cv2.COLOR_BGR2RGB), np.array(gt_bbox_)) elif refine_method == 'iou_net': gt_bbox_np = np.array(gt_bbox_) gt_bbox_torch = torch.from_numpy( gt_bbox_np.astype(np.float32)) init_info = {} init_info['init_bbox'] = gt_bbox_torch RF_module.initialize( cv2.cvtColor(img, cv2.COLOR_BGR2RGB), init_info) elif refine_method == 'mask': RF_module.initialize(img, np.array(gt_bbox_)) else: raise ValueError( "refine_method should be 'RF' or 'iou' or 'mask' ") pred_bbox = gt_bbox_ scores.append(None) if 'VOT2018-LT' == args.dataset: pred_bboxes.append([1]) else: pred_bboxes.append(pred_bbox) else: outputs = tracker.track(img) pred_bbox = outputs['bbox'] '''##### refine tracking results #####''' if 'RF' in refine_method or refine_method == 'iou_net': pred_bbox = RF_module.refine( cv2.cvtColor(img, cv2.COLOR_BGR2RGB), np.array(pred_bbox)) elif refine_method == 'mask': pred_bbox = RF_module.refine(img, np.array(pred_bbox), VOT=False) else: raise ValueError( "refine_method should be 'RF' or 'iou' or 'mask' ") x1, y1, w, h = pred_bbox.tolist() '''add boundary and min size limit''' x1, y1, x2, y2 = bbox_clip(x1, y1, x1 + w, y1 + h, (H, W)) w = x2 - x1 h = y2 - y1 pred_bbox = np.array([x1, y1, w, h]) tracker.center_pos = np.array([x1 + w / 2, y1 + h / 2]) tracker.size = np.array([w, h]) pred_bboxes.append(pred_bbox) scores.append(outputs['best_score']) toc += cv2.getTickCount() - tic track_times.append( (cv2.getTickCount() - tic) / cv2.getTickFrequency()) if idx == 0: cv2.destroyAllWindows() if args.vis and idx > 0: gt_bbox = list(map(int, gt_bbox)) pred_bbox = list(map(int, pred_bbox)) cv2.rectangle( img, (gt_bbox[0], gt_bbox[1]), (gt_bbox[0] + gt_bbox[2], gt_bbox[1] + gt_bbox[3]), (0, 255, 0), 3) cv2.rectangle(img, (pred_bbox[0], pred_bbox[1]), (pred_bbox[0] + pred_bbox[2], pred_bbox[1] + pred_bbox[3]), (0, 255, 255), 3) cv2.putText(img, str(idx), (40, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2) cv2.imshow(video.name, img) cv2.waitKey(1) toc /= cv2.getTickFrequency() # save results if 'VOT2018-LT' == args.dataset: video_path = os.path.join(save_dir, args.dataset, model_name, 'longterm', video.name) if not os.path.isdir(video_path): os.makedirs(video_path) result_path = os.path.join(video_path, '{}_001.txt'.format(video.name)) with open(result_path, 'w') as f: for x in pred_bboxes: f.write(','.join([str(i) for i in x]) + '\n') result_path = os.path.join( video_path, '{}_001_confidence.value'.format(video.name)) with open(result_path, 'w') as f: for x in scores: f.write('\n') if x is None else f.write( "{:.6f}\n".format(x)) result_path = os.path.join(video_path, '{}_time.txt'.format(video.name)) with open(result_path, 'w') as f: for x in track_times: f.write("{:.6f}\n".format(x)) elif 'GOT-10k' == args.dataset: video_path = os.path.join(save_dir, args.dataset, model_name, video.name) if not os.path.isdir(video_path): os.makedirs(video_path) result_path = os.path.join(video_path, '{}_001.txt'.format(video.name)) with open(result_path, 'w') as f: for x in pred_bboxes: f.write(','.join([str(i) for i in x]) + '\n') result_path = os.path.join(video_path, '{}_time.txt'.format(video.name)) with open(result_path, 'w') as f: for x in track_times: f.write("{:.6f}\n".format(x)) else: model_path = os.path.join( save_dir, args.dataset, model_name + '_' + str(selector_path)) if not os.path.isdir(model_path): os.makedirs(model_path) result_path = os.path.join(model_path, '{}.txt'.format(video.name)) with open(result_path, 'w') as f: for x in pred_bboxes: f.write(','.join([str(i) for i in x]) + '\n') print('({:3d}) Video: {:12s} Time: {:5.1f}s Speed: {:3.1f}fps'. format(v_idx + 1, video.name, toc, idx / toc))
def main(): # create tracker tracker_info = Tracker(args.tracker_name, args.tracker_param, None) params = tracker_info.get_parameters() params.visualization = args.vis params.debug = args.debug params.visdom_info = { 'use_visdom': False, 'server': '127.0.0.1', 'port': 8097 } tracker = tracker_info.tracker_class(params) '''Refinement module''' RF_module = RefineModule(refine_path, selector_path, search_factor=sr, input_sz=input_sz) model_name = args.tracker_name + '_' + args.tracker_param + '{}-{}'.format( RF_type, selector_path) + '_%d' % (args.run_id) # create dataset dataset = DatasetFactory.create_dataset(name=args.dataset, dataset_root=dataset_root_, load_img=False) # OPE tracking for v_idx, video in enumerate(dataset): color = np.array(COLORS[random.randint(0, len(COLORS) - 1)])[None, None, ::-1] vis_result = os.path.join( '/home/zxy/Desktop/AlphaRefine/CVPR21/material/quality_analysis/mask_vis', '{}'.format(video.name)) if args.video != '': # test one special video if video.name != args.video: continue else: print() if not os.path.exists(vis_result): os.makedirs(vis_result) toc = 0 pred_bboxes = [] scores = [] track_times = [] for idx, (img, gt_bbox) in enumerate(video): '''get RGB format image''' img_RGB = img[:, :, ::-1].copy() # BGR --> RGB tic = cv2.getTickCount() if idx == 0: H, W, _ = img.shape cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox)) gt_bbox_ = [cx - (w - 1) / 2, cy - (h - 1) / 2, w, h] '''Initialize''' gt_bbox_np = np.array(gt_bbox_) gt_bbox_torch = torch.from_numpy(gt_bbox_np.astype(np.float32)) init_info = {} init_info['init_bbox'] = gt_bbox_torch _ = tracker.initialize(img_RGB, init_info) '''##### initilize refinement module for specific video''' RF_module.initialize(img_RGB, np.array(gt_bbox_)) pred_bbox = gt_bbox_ scores.append(None) pred_bboxes.append(pred_bbox) else: '''Track''' outputs = tracker.track(img_RGB) pred_bbox = outputs['target_bbox'] '''##### refine tracking results #####''' pred_bbox = RF_module.refine( cv2.cvtColor(img, cv2.COLOR_BGR2RGB), np.array(pred_bbox)) x1, y1, w, h = pred_bbox.tolist() '''add boundary and min size limit''' x1, y1, x2, y2 = bbox_clip(x1, y1, x1 + w, y1 + h, (H, W)) w = x2 - x1 h = y2 - y1 new_pos = torch.from_numpy( np.array([y1 + h / 2, x1 + w / 2]).astype(np.float32)) new_target_sz = torch.from_numpy( np.array([h, w]).astype(np.float32)) new_scale = torch.sqrt(new_target_sz.prod() / tracker.base_target_sz.prod()) ##### update tracker.pos = new_pos.clone() tracker.target_sz = new_target_sz tracker.target_scale = new_scale mask_pred = RF_module.get_mask( cv2.cvtColor(img, cv2.COLOR_BGR2RGB), np.array(pred_bbox)) from external.pysot.toolkit.visualization import draw_mask draw_mask(img, mask_pred, idx=idx, show=True, save_dir='dimpsuper_armask_crocodile-3') pred_bboxes.append(pred_bbox) # scores.append(outputs['best_score']) toc += cv2.getTickCount() - tic track_times.append( (cv2.getTickCount() - tic) / cv2.getTickFrequency()) if idx == 0: cv2.destroyAllWindows() if args.vis and idx > 0: im4show = img mask_pred = np.uint8(mask_pred > 0.5)[:, :, None] contours, _ = cv2.findContours(mask_pred.squeeze(), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE) im4show = im4show * (1 - mask_pred) + np.uint8( im4show * mask_pred / 2) + mask_pred * np.uint8(color) * 128 pred_bbox = list(map(int, pred_bbox)) # gt_bbox = list(map(int, gt_bbox)) # cv2.rectangle(im4show, (gt_bbox[0], gt_bbox[1]), # (gt_bbox[0]+gt_bbox[2], gt_bbox[1]+gt_bbox[3]), (0, 255, 0), 3) # cv2.rectangle(im4show, (pred_bbox[0], pred_bbox[1]), # (pred_bbox[0]+pred_bbox[2], pred_bbox[1]+pred_bbox[3]), color[::-1].squeeze().tolist(), 3) cv2.drawContours(im4show, contours, -1, color[::-1].squeeze(), 2) cv2.putText(im4show, str(idx), (40, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2) # cv2.imshow(video.name, im4show) cv2.imwrite(os.path.join(vis_result, '{:06}.jpg'.format(idx)), im4show) cv2.waitKey(1) toc /= cv2.getTickFrequency()
def main(): # load config model_name = args.tracker_name + '_' + args.tracker_param + '_' + RF_type; print(model_name) dataset_root = dataset_root_ # create tracker tracker_info = Tracker(args.tracker_name, args.tracker_param, None) params = tracker_info.get_parameters() params.visualization = args.vis params.debug = args.debug params.visdom_info = {'use_visdom': False, 'server': '127.0.0.1', 'port': 8097} tracker = tracker_info.tracker_class(params) # setup refine module RF_module = RefineModule(refine_path, selector_path, search_factor=sr, input_sz=input_sz) # create dataset frames_dir = os.path.join(dataset_root, 'frames') seq_list = sorted(os.listdir(frames_dir)) # OPE tracking for v_idx, seq_name in enumerate(seq_list): if args.video != '': # test one special video if seq_name != args.video: continue toc = 0 pred_bboxes = [] scores = [] track_times = [] seq_frame_dir = os.path.join(frames_dir, seq_name) num_frames = len(os.listdir(seq_frame_dir)) gt_file = os.path.join(dataset_root, 'anno', '%s.txt' % seq_name) gt_bbox = np.loadtxt(gt_file, dtype=np.float32, delimiter=',').squeeze() for idx in range(num_frames): frame_path = os.path.join(seq_frame_dir, '%d.jpg' % idx) img = cv2.imread(frame_path) '''get RGB format image''' img_RGB = img[:, :, ::-1].copy() # BGR --> RGB tic = cv2.getTickCount() if idx == 0: H, W, _ = img.shape cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox)) gt_bbox_ = [cx - (w - 1) / 2, cy - (h - 1) / 2, w, h] '''Initialize''' gt_bbox_np = np.array(gt_bbox_) gt_bbox_torch = torch.from_numpy(gt_bbox_np.astype(np.float32)) init_info = {} init_info['init_bbox'] = gt_bbox_torch _ = tracker.initialize(img_RGB, init_info) '''##### initilize refinement module for specific video''' RF_module.initialize(img_RGB, np.array(gt_bbox_)) pred_bbox = gt_bbox_ scores.append(None) pred_bboxes.append(pred_bbox) else: '''Track''' outputs = tracker.track(img_RGB) pred_bbox = outputs['target_bbox'] '''##### refine tracking results #####''' pred_bbox = RF_module.refine(cv2.cvtColor(img, cv2.COLOR_BGR2RGB), np.array(pred_bbox)) x1, y1, w, h = pred_bbox.tolist() '''add boundary and min size limit''' x1, y1, x2, y2 = bbox_clip(x1, y1, x1 + w, y1 + h, (H, W)) w = x2 - x1 h = y2 - y1 new_pos = torch.from_numpy(np.array([y1 + h / 2, x1 + w / 2]).astype(np.float32)) new_target_sz = torch.from_numpy(np.array([h, w]).astype(np.float32)) new_scale = torch.sqrt(new_target_sz.prod() / tracker.base_target_sz.prod()) ##### update tracker.pos = new_pos.clone() tracker.target_sz = new_target_sz tracker.target_scale = new_scale pred_bboxes.append(pred_bbox) # scores.append(outputs['best_score']) toc += cv2.getTickCount() - tic track_times.append((cv2.getTickCount() - tic) / cv2.getTickFrequency()) if idx == 0: cv2.destroyAllWindows() if args.vis and idx > 0: pred_bbox = list(map(int, pred_bbox)) cv2.rectangle(img, (pred_bbox[0], pred_bbox[1]), (pred_bbox[0] + pred_bbox[2], pred_bbox[1] + pred_bbox[3]), (0, 255, 255), 3) cv2.putText(img, str(idx), (40, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2) cv2.imshow(seq_name, img) cv2.waitKey(1) toc /= cv2.getTickFrequency() # save results model_path = os.path.join(save_dir, 'trackingnet', model_name) if not os.path.isdir(model_path): os.makedirs(model_path) result_path = os.path.join(model_path, '{}.txt'.format(seq_name)) with open(result_path, 'w') as f: for x in pred_bboxes: f.write(','.join([str(i) for i in x]) + '\n') print('({:3d}) Video: {:12s} Time: {:5.1f}s Speed: {:3.1f}fps'.format( v_idx + 1, seq_name, toc, idx / toc))
os.environ["CUDA_VISIBLE_DEVICES"] = "" import vot from vot import Rectangle, Polygon, Point del os.environ[ 'MKL_NUM_THREADS'] #note:todo when called from matlab, it should be added import torch import cv2 import PIL.Image as Image from pytracking.evaluation import Tracker tracker = Tracker('ATCAIS_cpu', 'default') tracker_name = tracker.name parameters = tracker.get_parameters() tracker_module = importlib.import_module('pytracking.tracker.{}'.format( tracker.name)) tracker_class = tracker_module.get_tracker_class() tracker = tracker_class(parameters) def overlay_boxes(image, state, image_file, tracker_name): state = torch.tensor(state).reshape(-1, 4) boxes = state.clone() boxes[:, 2:4] = state[:, 0:2] + state[:, 2:4] for box in boxes: box = box.to(torch.int64) top_left, bottom_right = box[:2].tolist(), box[2:].tolist()
def main(): # create tracker tracker_info = Tracker(args.tracker_name, args.tracker_param, None) params = tracker_info.get_parameters() params.visualization = args.vis params.debug = args.debug params.visdom_info = {'use_visdom': False, 'server': '127.0.0.1', 'port': 8097} tracker = tracker_info.tracker_class(params) '''Refinement module''' RF_module = RefineModule(refine_path, selector_path, search_factor=sr, input_sz=input_sz) model_name = args.tracker_name + '_' + args.tracker_param + '{}-{}'.format(RF_type, selector_path) + '_%d'%(args.run_id) model_name = 'LaSOT_gt' # create dataset dataset = DatasetFactory.create_dataset(name=args.dataset, dataset_root=dataset_root_, load_img=False) # OPE tracking for v_idx, video in enumerate(dataset): if os.path.exists(os.path.join(save_dir, args.dataset, model_name, '{}.txt'.format(video.name))): continue if args.video != '': # test one special video if video.name != args.video: continue pred_bboxes = [] for idx, (img, gt_bbox) in enumerate(video): cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox)) gt_bbox_ = [cx - (w - 1) / 2, cy - (h - 1) / 2, w, h] pred_bboxes.append(gt_bbox_) continue '''get RGB format image''' img_RGB = img[:, :, ::-1].copy() # BGR --> RGB tic = cv2.getTickCount() if idx == 0: H, W, _ = img.shape cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox)) gt_bbox_ = [cx-(w-1)/2, cy-(h-1)/2, w, h] '''Initialize''' gt_bbox_np = np.array(gt_bbox_) gt_bbox_torch = torch.from_numpy(gt_bbox_np.astype(np.float32)) init_info = {} init_info['init_bbox'] = gt_bbox_torch _ = tracker.initialize(img_RGB, init_info) '''##### initilize refinement module for specific video''' RF_module.initialize(img_RGB, np.array(gt_bbox_)) pred_bbox = gt_bbox_ scores.append(None) pred_bboxes.append(pred_bbox) else: '''Track''' outputs = tracker.track(img_RGB) pred_bbox = outputs['target_bbox'] ''' refine tracking results ''' pred_bbox = RF_module.refine(cv2.cvtColor(img, cv2.COLOR_BGR2RGB), np.array(pred_bbox)) x1, y1, w, h = pred_bbox.tolist() w, h = get_mean_wh(pred_bboxes, w, h) '''add boundary and min size limit''' x1, y1, x2, y2 = bbox_clip(x1, y1, x1 + w, y1 + h, (H, W)) w = x2 - x1 h = y2 - y1 new_pos = torch.from_numpy(np.array([y1 + h / 2, x1 + w / 2]).astype(np.float32)) new_target_sz = torch.from_numpy(np.array([h, w]).astype(np.float32)) new_scale = torch.sqrt(new_target_sz.prod() / tracker.base_target_sz.prod()) # update tracker.pos = new_pos.clone() tracker.target_sz = new_target_sz tracker.target_scale = new_scale pred_bboxes.append(pred_bbox) toc += cv2.getTickCount() - tic track_times.append((cv2.getTickCount() - tic)/cv2.getTickFrequency()) if idx == 0: cv2.destroyAllWindows() if args.vis and idx > 0: gt_bbox = list(map(int, gt_bbox)) pred_bbox = list(map(int, pred_bbox)) cv2.rectangle(img, (gt_bbox[0], gt_bbox[1]), (gt_bbox[0]+gt_bbox[2], gt_bbox[1]+gt_bbox[3]), (0, 255, 0), 3) cv2.rectangle(img, (pred_bbox[0], pred_bbox[1]), (pred_bbox[0]+pred_bbox[2], pred_bbox[1]+pred_bbox[3]), (0, 255, 255), 3) cv2.putText(img, str(idx), (40, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2) cv2.imshow(video.name, img) k = cv2.waitKey(0) if k == ord('q'): exit() elif k == ord('s'): cv2.imwrite(os.path.join(os.environ['HOME'], 'Desktop/demo', video.name+'_{}.jpg'.format(idx)), img) # save results model_path = os.path.join(save_dir, args.dataset, model_name) if not os.path.isdir(model_path): os.makedirs(model_path) result_path = os.path.join(model_path, '{}.txt'.format(video.name)) with open(result_path, 'w') as f: for x in pred_bboxes: f.write(','.join([str(i) for i in x])+'\n') print(video.name)
def main(): # create tracker tracker_info = Tracker(args.tracker_name, args.tracker_param, None) params = tracker_info.get_parameters() params.visualization = args.vis params.debug = args.debug params.visdom_info = { 'use_visdom': False, 'server': '127.0.0.1', 'port': 8097 } tracker = tracker_info.tracker_class(params) # setup refine module RF_module = RefineModule(refine_path, selector_path, search_factor=sr, input_sz=input_sz) model_name = args.tracker_name + '_' + args.tracker_param + '{}-{}'.format( RF_type, selector_path) + '_%d' % (args.run_id) # create dataset dataset = DatasetFactory.create_dataset(name=args.dataset, dataset_root=dataset_root_, load_img=False) # OPE tracking for v_idx, video in enumerate(dataset): if os.path.exists( os.path.join(save_dir, args.dataset, model_name, '{}.txt'.format(video.name))): continue if args.video != '': # test one special video if video.name != args.video: continue toc = 0 pred_bboxes = [] scores = [] track_times = [] for idx, (img, gt_bbox) in enumerate(video): '''get RGB format image''' img_RGB = img[:, :, ::-1].copy() # BGR --> RGB tic = cv2.getTickCount() if idx == 0: H, W, _ = img.shape cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox)) gt_bbox_ = [cx - (w - 1) / 2, cy - (h - 1) / 2, w, h] '''Initialize''' gt_bbox_np = np.array(gt_bbox_) gt_bbox_torch = torch.from_numpy(gt_bbox_np.astype(np.float32)) init_info = {} init_info['init_bbox'] = gt_bbox_torch _ = tracker.initialize(img_RGB, init_info) '''##### initilize refinement module for specific video''' RF_module.initialize(img_RGB, np.array(gt_bbox_)) pred_bbox = gt_bbox_ scores.append(None) if 'VOT2018-LT' == args.dataset: pred_bboxes.append([1]) else: pred_bboxes.append(pred_bbox) else: '''Track''' outputs = tracker.track(img_RGB) pred_bbox = outputs['target_bbox'] '''##### refine tracking results #####''' pred_bbox = RF_module.refine( cv2.cvtColor(img, cv2.COLOR_BGR2RGB), np.array(pred_bbox)) x1, y1, w, h = pred_bbox.tolist() '''add boundary and min size limit''' x1, y1, x2, y2 = bbox_clip(x1, y1, x1 + w, y1 + h, (H, W)) w = x2 - x1 h = y2 - y1 new_pos = torch.from_numpy( np.array([y1 + h / 2, x1 + w / 2]).astype(np.float32)) new_target_sz = torch.from_numpy( np.array([h, w]).astype(np.float32)) new_scale = torch.sqrt(new_target_sz.prod() / tracker.base_target_sz.prod()) ##### update tracker.pos = new_pos.clone() tracker.target_sz = new_target_sz tracker.target_scale = new_scale pred_bboxes.append(pred_bbox) # scores.append(outputs['best_score']) toc += cv2.getTickCount() - tic track_times.append( (cv2.getTickCount() - tic) / cv2.getTickFrequency()) if idx == 0: cv2.destroyAllWindows() if args.vis and idx > 0: gt_bbox = list(map(int, gt_bbox)) pred_bbox = list(map(int, pred_bbox)) cv2.rectangle( img, (gt_bbox[0], gt_bbox[1]), (gt_bbox[0] + gt_bbox[2], gt_bbox[1] + gt_bbox[3]), (0, 255, 0), 3) cv2.rectangle( img, (pred_bbox[0], pred_bbox[1]), (pred_bbox[0] + pred_bbox[2], pred_bbox[1] + pred_bbox[3]), (0, 255, 255), 3) cv2.putText(img, str(idx), (40, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2) cv2.imshow(video.name, img) cv2.waitKey(1) toc /= cv2.getTickFrequency() # save results if 'VOT2018-LT' == args.dataset: video_path = os.path.join(save_dir, args.dataset, model_name, 'longterm', video.name) if not os.path.isdir(video_path): os.makedirs(video_path) result_path = os.path.join(video_path, '{}_001.txt'.format(video.name)) with open(result_path, 'w') as f: for x in pred_bboxes: f.write(','.join([str(i) for i in x]) + '\n') result_path = os.path.join( video_path, '{}_001_confidence.value'.format(video.name)) with open(result_path, 'w') as f: for x in scores: f.write('\n') if x is None else f.write( "{:.6f}\n".format(x)) result_path = os.path.join(video_path, '{}_time.txt'.format(video.name)) with open(result_path, 'w') as f: for x in track_times: f.write("{:.6f}\n".format(x)) elif 'GOT-10k' == args.dataset: video_path = os.path.join(save_dir, args.dataset, model_name, video.name) if not os.path.isdir(video_path): os.makedirs(video_path) result_path = os.path.join(video_path, '{}_001.txt'.format(video.name)) with open(result_path, 'w') as f: for x in pred_bboxes: f.write(','.join([str(i) for i in x]) + '\n') result_path = os.path.join(video_path, '{}_time.txt'.format(video.name)) with open(result_path, 'w') as f: for x in track_times: f.write("{:.6f}\n".format(x)) else: model_path = os.path.join(save_dir, args.dataset, model_name) if not os.path.isdir(model_path): os.makedirs(model_path) result_path = os.path.join(model_path, '{}.txt'.format(video.name)) with open(result_path, 'w') as f: for x in pred_bboxes: f.write(','.join([str(i) for i in x]) + '\n') print('({:3d}) Video: {:12s} Time: {:5.1f}s Speed: {:3.1f}fps'.format( v_idx + 1, video.name, toc, idx / toc))