def __init__(self, rf_model_code, enable_rf=True): model_name = 'RTMDNet' + '{}-{}'.format(RF_type.format(rf_model_code), selector_path) if not enable_rf: model_name = model_name.replace(RF_type.format(rf_model_code), '') super(RTMDNet_RF, self).__init__(name=model_name) self.enable_rf = enable_rf self.tracker = RT_MDNet() if self.enable_rf: self.RF_module = RefineModule(refine_path.format(rf_model_code), selector_path, search_factor=sr, input_sz=input_sz)
def __init__(self, threshold=0.65): self.THRES = threshold '''create tracker''' self.base_tracker = tracker = RT_MDNet( ) # a tracker is a object consisting of not only a NN and some post-processing '''Alpha-Refine''' self.alpha = RefineModule(refine_path, sr, input_sz=input_sz)
class RTMDNet_RF(GOT10kTracker): def __init__(self, rf_model_code, enable_rf=True): model_name = 'RTMDNet' + '{}-{}'.format(RF_type.format(rf_model_code), selector_path) if not enable_rf: model_name = model_name.replace(RF_type.format(rf_model_code), '') super(RTMDNet_RF, self).__init__(name=model_name) self.enable_rf = enable_rf self.tracker = RT_MDNet() if self.enable_rf: self.RF_module = RefineModule(refine_path.format(rf_model_code), selector_path, search_factor=sr, input_sz=input_sz) def init(self, image, box): image = np.array(image) self.im_H, self.im_W, _ = image.shape cx, cy, w, h = get_axis_aligned_bbox(np.array(box)) gt_bbox_ = [cx - (w - 1) / 2, cy - (h - 1) / 2, w, h] # initialize tracker self.tracker.initialize_seq(image, np.array(gt_bbox_)) # initilize refine module if self.enable_rf: self.RF_module.initialize(image, np.array(gt_bbox_)) self.box = box def update(self, image): image = np.array(image) pred_bbox = self.tracker.track(image) if self.enable_rf: # refine tracking results pred_bbox = self.RF_module.refine(image, np.array(pred_bbox)) pred_bbox = bbox_clip( pred_bbox, (self.im_H, self.im_W)) # boundary and size limit '''update state''' self.tracker.target_bbox = pred_bbox.copy() return pred_bbox
def main(model_code): # create dataset dataset = DatasetFactory.create_dataset(name=args.dataset, dataset_root=dataset_root_, load_img=False) '''##### build a Refinement module #####''' RF_module = RefineModule(refine_path.format(model_code), selector_path, search_factor=sr, input_sz=input_sz) model_name = 'RT_MDNet' + '_{}-{}'.format(RF_type.format(model_code), selector_path) + '_%d' % (args.run_id) if args.dataset in ['VOT2016', 'VOT2018', 'VOT2019']: # restart tracking for v_idx, video in enumerate(dataset): if args.video != '': # test one special video if video.name != args.video: continue tracker = RT_MDNet() frame_counter = 0 lost_number = 0 toc = 0 pred_bboxes = [] for idx, (img, gt_bbox) in enumerate(video): img_RGB = cv2.cvtColor(img,cv2.COLOR_BGR2RGB) # RGB format if len(gt_bbox) == 4: gt_bbox = [gt_bbox[0], gt_bbox[1], gt_bbox[0], gt_bbox[1]+gt_bbox[3]-1, gt_bbox[0]+gt_bbox[2]-1, gt_bbox[1]+gt_bbox[3]-1, gt_bbox[0]+gt_bbox[2]-1, gt_bbox[1]] tic = cv2.getTickCount() if idx == frame_counter: H,W,_ = img.shape cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox)) gt_bbox_ = [cx-(w-1)/2, cy-(h-1)/2, w, h] '''initialize tracker''' tracker.initialize_seq(img_RGB, np.array(gt_bbox_)) '''initilize refine module for specific video''' RF_module.initialize(img_RGB, np.array(gt_bbox_)) pred_bbox = gt_bbox_ pred_bboxes.append(1) elif idx > frame_counter: '''track''' ori_bbox = tracker.track(img_RGB) '''refine tracking result''' pred_bbox = RF_module.refine(img_RGB, np.array(ori_bbox)) pred_bbox = bbox_clip(pred_bbox, (H, W)) tracker.target_bbox = pred_bbox.copy() overlap = vot_overlap(pred_bbox, gt_bbox, (img.shape[1], img.shape[0])) if overlap > 0: # not lost pred_bboxes.append(pred_bbox) else: # lost object pred_bboxes.append(2) frame_counter = idx + 5 # skip 5 frames lost_number += 1 else: pred_bboxes.append(0) toc += cv2.getTickCount() - tic if idx == 0: cv2.destroyAllWindows() if args.vis and idx > frame_counter: cv2.polylines(img, [np.array(gt_bbox, np.int).reshape((-1, 1, 2))], True, (0, 255, 0), 3) bbox = list(map(int, pred_bbox)) cv2.rectangle(img, (bbox[0], bbox[1]), (bbox[0]+bbox[2], bbox[1]+bbox[3]), (0, 255, 255), 3) cv2.putText(img, str(idx), (40, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2) cv2.putText(img, str(lost_number), (40, 80), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2) cv2.imshow(video.name, img) cv2.waitKey(1) toc /= cv2.getTickFrequency() # save results video_path = os.path.join(save_dir, args.dataset, model_name, 'baseline', video.name) if not os.path.isdir(video_path): os.makedirs(video_path) result_path = os.path.join(video_path, '{}_001.txt'.format(video.name)) with open(result_path, 'w') as f: for x in pred_bboxes: if isinstance(x, int): f.write("{:d}\n".format(x)) else: f.write(','.join([vot_float2str("%.4f", i) for i in x])+'\n') print('({:3d}) Video: {:12s} Time: {:4.1f}s Speed: {:3.1f}fps Lost: {:d}'.format( v_idx+1, video.name, toc, idx / toc, lost_number))
def main(): # create dataset dataset = DatasetFactory.create_dataset(name=args.dataset, dataset_root=dataset_root_, load_img=False) model_name = 'RTMDNet-oracle' # OPE tracking for v_idx, video in enumerate(dataset): if os.path.exists( os.path.join(save_dir, args.dataset, model_name, '{}.txt'.format(video.name))): continue if args.video != '': # test one special video if video.name != args.video: continue '''build tracker''' tracker = RT_MDNet() toc = 0 pred_bboxes = [] scores = [] track_times = [] for idx, (img, gt_bbox) in enumerate(video): img_RGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # RGB format tic = cv2.getTickCount() if idx == 0: H, W, _ = img.shape cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox)) gt_bbox_ = [cx - (w - 1) / 2, cy - (h - 1) / 2, w, h] '''initialize tracker''' tracker.initialize_seq(img_RGB, np.array(gt_bbox_)) pred_bbox = gt_bbox_ scores.append(None) pred_bboxes.append(pred_bbox) else: ori_bbox = tracker.track(img_RGB) pred_bbox = bbox_clip(ori_bbox, (H, W)) oracle_box = pred_bbox.copy() cx, cy, _, _ = get_axis_aligned_bbox(np.array(gt_bbox)) oracle_box[:2] = np.array([cx, cy]) - oracle_box[2:] / 2 tracker.target_bbox = oracle_box pred_bboxes.append(pred_bbox) toc += cv2.getTickCount() - tic track_times.append( (cv2.getTickCount() - tic) / cv2.getTickFrequency()) if idx == 0: cv2.destroyAllWindows() if args.vis and idx > 0: gt_bbox = list(map(int, gt_bbox)) ori_bbox = list(map(int, ori_bbox)) pred_bbox = list(map(int, pred_bbox)) cv2.rectangle( img, (gt_bbox[0], gt_bbox[1]), (gt_bbox[0] + gt_bbox[2], gt_bbox[1] + gt_bbox[3]), (0, 0, 255), 3) cv2.rectangle(img, (oracle_box[0], oracle_box[1]), (oracle_box[0] + oracle_box[2], oracle_box[1] + oracle_box[3]), (255, 0, 0), 3) cv2.rectangle( img, (pred_bbox[0], pred_bbox[1]), (pred_bbox[0] + pred_bbox[2], pred_bbox[1] + pred_bbox[3]), (0, 255, 0), 3) cv2.putText(img, str(idx), (40, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2) cv2.imshow(video.name, img) cv2.waitKey(1) toc /= cv2.getTickFrequency() # save results model_path = os.path.join(save_dir, args.dataset, model_name) if not os.path.isdir(model_path): os.makedirs(model_path) result_path = os.path.join(model_path, '{}.txt'.format(video.name)) with open(result_path, 'w') as f: for x in pred_bboxes: f.write(','.join([str(i) for i in x]) + '\n') print('({:3d}) Video: {:12s} Time: {:5.1f}s Speed: {:3.1f}fps'.format( v_idx + 1, video.name, toc, idx / toc))
def main(): # create dataset dataset = DatasetFactory.create_dataset(name=args.dataset, dataset_root=dataset_root_, load_img=False) '''##### build a Refinement module #####''' RF_module = RefineModule(refine_path, selector_path, search_factor=sr, input_sz=input_sz) model_name = 'RTMDNet' + '{}-{}'.format(RF_type, selector_path) # OPE tracking for v_idx, video in enumerate(dataset): if os.path.exists( os.path.join(save_dir, args.dataset, model_name, '{}.txt'.format(video.name))): continue if args.video != '': # test one special video if video.name != args.video: continue '''build tracker''' tracker = RT_MDNet() toc = 0 pred_bboxes = [] scores = [] track_times = [] for idx, (img, gt_bbox) in enumerate(video): img_RGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # RGB format tic = cv2.getTickCount() if idx == 0: H, W, _ = img.shape cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox)) gt_bbox_ = [cx - (w - 1) / 2, cy - (h - 1) / 2, w, h] '''initialize tracker''' tracker.initialize_seq(img_RGB, np.array(gt_bbox_)) '''initilize refine module for specific video''' RF_module.initialize(img_RGB, np.array(gt_bbox_)) pred_bbox = gt_bbox_ scores.append(None) if 'VOT2018-LT' == args.dataset: pred_bboxes.append([1]) else: pred_bboxes.append(pred_bbox) else: ori_bbox = tracker.track(img_RGB) '''##### refine tracking results #####''' pred_bbox = RF_module.refine(img_RGB, np.array(ori_bbox)) '''boundary and size limit''' pred_bbox = bbox_clip(pred_bbox, (H, W)) '''update state''' tracker.target_bbox = pred_bbox.copy() pred_bboxes.append(pred_bbox) # scores.append(outputs['best_score']) toc += cv2.getTickCount() - tic track_times.append( (cv2.getTickCount() - tic) / cv2.getTickFrequency()) if idx == 0: cv2.destroyAllWindows() if args.vis and idx > 0: gt_bbox = list(map(int, gt_bbox)) ori_bbox = list(map(int, ori_bbox)) pred_bbox = list(map(int, pred_bbox)) cv2.rectangle( img, (gt_bbox[0], gt_bbox[1]), (gt_bbox[0] + gt_bbox[2], gt_bbox[1] + gt_bbox[3]), (0, 0, 255), 3) cv2.rectangle( img, (ori_bbox[0], ori_bbox[1]), (ori_bbox[0] + ori_bbox[2], ori_bbox[1] + ori_bbox[3]), (255, 0, 0), 3) cv2.rectangle( img, (pred_bbox[0], pred_bbox[1]), (pred_bbox[0] + pred_bbox[2], pred_bbox[1] + pred_bbox[3]), (0, 255, 0), 3) cv2.putText(img, str(idx), (40, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2) cv2.imshow(video.name, img) cv2.waitKey(1) toc /= cv2.getTickFrequency() # save results model_path = os.path.join(save_dir, args.dataset, model_name) if not os.path.isdir(model_path): os.makedirs(model_path) result_path = os.path.join(model_path, '{}.txt'.format(video.name)) with open(result_path, 'w') as f: for x in pred_bboxes: f.write(','.join([str(i) for i in x]) + '\n') print('({:3d}) Video: {:12s} Time: {:5.1f}s Speed: {:3.1f}fps'.format( v_idx + 1, video.name, toc, idx / toc))
def __init__(self, threshold=0.65): self.THRES = threshold '''create tracker''' self.base_tracker = tracker = RT_MDNet( ) # a tracker is a object consisting of not only a NN and some post-processing
def main(): # create dataset dataset_root = dataset_root_ frames_dir = os.path.join(dataset_root, 'frames') seq_list = sorted(os.listdir(frames_dir)) model_name = 'RTMDNet' # OPE tracking for v_idx, seq_name in enumerate(seq_list): if args.video != '': # test one special video if seq_name != args.video: continue '''build tracker''' toc = 0 pred_bboxes = [] scores = [] track_times = [] tracker = RT_MDNet() seq_frame_dir = os.path.join(frames_dir, seq_name) num_frames = len(os.listdir(seq_frame_dir)) gt_file = os.path.join(dataset_root, 'anno', '%s.txt' % seq_name) gt_bbox = np.loadtxt(gt_file, dtype=np.float32, delimiter=',').squeeze() for idx in range(num_frames): frame_path = os.path.join(seq_frame_dir, '%d.jpg' % idx) img = cv2.imread(frame_path) '''get RGB format image''' img_RGB = img[:, :, ::-1].copy() # BGR --> RGB tic = cv2.getTickCount() if idx == 0: H, W, _ = img.shape cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox)) gt_bbox_ = [cx - (w - 1) / 2, cy - (h - 1) / 2, w, h] '''initialize tracker''' tracker.initialize_seq(img_RGB, np.array(gt_bbox_)) pred_bbox = gt_bbox_ scores.append(None) pred_bboxes.append(pred_bbox) else: pred_bbox = tracker.track(img_RGB) pred_bboxes.append(pred_bbox) toc += cv2.getTickCount() - tic track_times.append( (cv2.getTickCount() - tic) / cv2.getTickFrequency()) if idx == 0: cv2.destroyAllWindows() if args.vis and idx > 0: pred_bbox = list(map(int, pred_bbox)) cv2.rectangle( img, (pred_bbox[0], pred_bbox[1]), (pred_bbox[0] + pred_bbox[2], pred_bbox[1] + pred_bbox[3]), (0, 255, 255), 3) cv2.putText(img, str(idx), (40, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2) cv2.imshow(seq_name, img) cv2.waitKey(1) toc /= cv2.getTickFrequency() # save results model_path = os.path.join(save_dir, 'trackingnet', model_name) if not os.path.isdir(model_path): os.makedirs(model_path) result_path = os.path.join(model_path, '{}.txt'.format(seq_name)) with open(result_path, 'w') as f: for x in pred_bboxes: f.write(','.join([str(i) for i in x]) + '\n') print('({:3d}) Video: {:12s} Time: {:5.1f}s Speed: {:3.1f}fps'.format( v_idx + 1, seq_name, toc, idx / toc))