def main(): # load config cfg.merge_from_file(args.config) cfg.CUDA = torch.cuda.is_available() device = torch.device('cuda' if cfg.CUDA else 'cpu') # create model model = ModelBuilder() # load model model.load_state_dict( torch.load( args.snapshot, map_location=lambda storage, loc: storage.cpu())['state_dict']) model.eval().to(device) # build tracker if cfg.RPN.TYPE == "YOLO": tracker = build_tracker(model, True) else: tracker = build_tracker(model) first_frame = True if args.video_name: video_name = args.video_name.split('/')[-1].split('.')[0] else: video_name = 'webcam' cv2.namedWindow(video_name, cv2.WND_PROP_FULLSCREEN) writer = cv2.VideoWriter("result.avi", cv2.VideoWriter_fourcc(*'XVID'), 25.0, (480, 360)) for frame in get_frames(args.video_name): if first_frame: try: init_rect = cv2.selectROI(video_name, frame, False, False) except: exit() tracker.init(frame, init_rect) first_frame = False else: outputs = tracker.track(frame) if 'polygon' in outputs: polygon = np.array(outputs['polygon']).astype(np.int32) cv2.polylines(frame, [polygon.reshape((-1, 1, 2))], True, (0, 255, 0), 3) mask = ((outputs['mask'] > cfg.TRACK.MASK_THERSHOLD) * 255) mask = mask.astype(np.uint8) mask = np.stack([mask, mask * 255, mask]).transpose(1, 2, 0) frame = cv2.addWeighted(frame, 0.77, mask, 0.23, -1) else: bbox = list(map(int, outputs['bbox'])) cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[0] + bbox[2], bbox[1] + bbox[3]), (0, 255, 0), 3) cv2.imshow(video_name, frame) writer.write(frame) cv2.waitKey(40) writer.release()
def main(): ############################################################################# # initialize the tracker cfg.merge_from_file("experiments/siamrpn_mobilev2_l234_dwxcorr/config.yaml") cfg.CUDA = torch.cuda.is_available() device = torch.device("cuda" if cfg.CUDA else "cpu") model = ModelBuilder() model.load_state_dict( torch.load("experiments/siamrpn_mobilev2_l234_dwxcorr/model.pth", map_location=lambda storage, loc: storage.cpu())) model.eval().to(device) tracker = build_tracker(model) ############################################################################# # initialzie the benchmark parameter img = cv2.imread("image/benchmark_5.jpg") bbox = (131, 122, 92, 118) # calculate channle average channel_average = np.mean(img, axis=(0, 1)) # EXEMPLAR_SIZE of mobilenetV2 is 127 z_new_crop = get_subwindow(img, 127, bbox, channel_average) print(z_new_crop.mean(), "\t", z_new_crop.std())
def multi_processing(model, bbox, label, probs): """Conduct the tracker to track object according to key frame Args: model: track model bbox (N, 4): ground truth boxes. label (1, N): object labels. probs(1,N): Confidence Returns: res: return the track results and time consuming. """ # set model on GPU model.eval().to('cuda') # build the tracker tracker = build_tracker(model,label,probs) init_rect = [bbox[0],bbox[1],bbox[2]-bbox[0],bbox[3]-bbox[1]] # read images from the folder # image_files = sorted(glob.glob('../datasets/demo/*.JPEG')) # the path of images track_start_time = time.time() total_time = 0 for f, image_file in enumerate(image_files): frame = cv2.imread(image_file) # initialize the tracker if f == 0: tracker.init(frame, init_rect) continue t1 = time.time() # track images output = tracker.track(frame) total_time += time.time()-t1 stdOutput = [f, output, time.time()-t1] track_end_time = time.time() return [track_start_time, track_end_time, total_time, stdOutput]
def __init__(self, parent=None): super(MyMainWindow, self).__init__(parent) # Connect the on-clicked functions self.pushButton_locationLoading.clicked.connect(self.location_loading) self.pushButton_videoLoading.clicked.connect(self.video_loading) self.pushButton_cameraLoading.clicked.connect(self.camera_loading) self.pushButton_bboxSetting.clicked.connect(self.bbox_setting) self.pushButton_algorithmProcessing.clicked.connect( self.algorithm_processing) self.scrollBar.valueChanged.connect(self.slider_change) self.checkBox.stateChanged.connect(self.checkbox_change) # Message box ignore self.bbox_tips = True self.save_tips = True # Initialize trackers model_location = './pysot/experiments/siammaske_r50_l3' self.config = model_location + '/config.yaml' self.snapshot = model_location + '/model.pth' self.tracker_name = model_location.split('/')[-1] self.video_name = '' cfg.merge_from_file(self.config) cfg.CUDA = torch.cuda.is_available() device = torch.device('cuda' if cfg.CUDA else 'cpu') model = ModelBuilder() model.load_state_dict( torch.load(self.snapshot, map_location=lambda storage, loc: storage.cpu())) model.eval().to(device) self.tracker = build_tracker(model) self.vs = None self.analysis_box = None self.analysis_max = 10 self.save_location = '' self.afterCamera = False
def main(): # load config print("begin") cfg.merge_from_file(args.config) cur_dir = os.path.dirname(os.path.realpath(__file__)) dataset_root = os.path.join(cur_dir, '../testing_dataset', args.dataset) dataset_root = "/data/VisDrone Challenge/Single-Object Tracking/VisDrone2019-SOT-val/" # create model model = ModelBuilder() tracker = build_tracker(model) dataset = DatasetFactory.create_dataset(name=args.dataset, dataset_root=dataset_root, load_img=False) print(dataset.dataset_root) j =0 for v_idx, video in enumerate(dataset): for idx, (img, gt_bbox) in enumerate(video): if idx == 0: cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox)) # 左上角坐标 ,w ,h 形式 gt_bbox_ = [cx - (w - 1) / 2, cy - (h - 1) / 2, w, h] # 初始化tracker 的box img=tracker.init(img, gt_bbox_) img =img.cpu().numpy()[0].transpose(1,2,0) cv2.imwrite("./Radio{:06d}.jpg".format(j),img) j += 1 else: break
def __init__(self,classid=0,anchorid=0,init_frame=None,init_bbox=None): # load config cfg_path = '../object_detection/pysot/experiments/siamrpn_r50_l234_dwxcorr/config.yaml' snapshot = '../object_detection/pysot/experiments/siamrpn_r50_l234_dwxcorr/model.pth' cfg.merge_from_file(cfg_path) cfg.CUDA = torch.cuda.is_available() and cfg.CUDA # cfg.CUDA = False device = torch.device('cuda' if cfg.CUDA else 'cpu') # device='cpu' # create model model = ModelBuilder() # load model model.load_state_dict(torch.load(snapshot, map_location=lambda storage, loc: storage.cpu())) model.eval().to(device) # build tracker tracker = build_tracker(model) self.tracker = tracker self.classId = classid self.anchorId = anchorid self.init_bbox = init_bbox self.init_frame = init_frame
def main(): # load config cfg.merge_from_file(args.config) cfg.CUDA = torch.cuda.is_available() and cfg.CUDA device = torch.device('cuda' if cfg.CUDA else 'cpu') # create model model = ModelBuilder() # load model model.load_state_dict( torch.load( args.snapshot, map_location=lambda storage, loc: storage.cpu())['state_dict']) model.eval().to(device) # build tracker tracker = build_tracker(model) first_frame = True if args.video_name: video_name = args.video_name.split('/')[-1].split('.')[0] else: video_name = 'webcam' cv2.namedWindow(video_name, cv2.WND_PROP_FULLSCREEN) for frame in get_frames(args.video_name): if first_frame: try: init_rect = cv2.selectROI(video_name, frame, False, False) except: exit() tracker.init(frame, init_rect) first_frame = False else: outputs = tracker.track(frame) if cfg.TRANSFORMER.TRANSFORMER: acc, (x1, y1, x2, y2) = outputs cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 3) cv2.putText(frame, 'Acc: ' + acc.astype('str'), (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (36, 255, 12), 2) cv2.imshow(video_name, frame) cv2.waitKey(40) else: if 'polygon' in outputs: polygon = np.array(outputs['polygon']).astype(np.int32) cv2.polylines(frame, [polygon.reshape((-1, 1, 2))], True, (0, 255, 0), 3) mask = ((outputs['mask'] > cfg.TRACK.MASK_THERSHOLD) * 255) mask = mask.astype(np.uint8) mask = np.stack([mask, mask * 255, mask]).transpose(1, 2, 0) frame = cv2.addWeighted(frame, 0.77, mask, 0.23, -1) else: bbox = list(map(int, outputs['bbox'])) cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[0] + bbox[2], bbox[1] + bbox[3]), (0, 255, 0), 3) cv2.imshow(video_name, frame) cv2.waitKey(40)
def setup_tracker(): cfg.merge_from_file(cfg_file) model = ModelBuilder() model = load_pretrain(model, model_file).cuda().eval() tracker = build_tracker(model) warmup(model) return tracker
def build_model(self): model = ModelBuilder() # load model model.load_state_dict(torch.load(args.snapshot, map_location=lambda storage, loc: storage.cpu())) model.eval().to(device) # build tracker tracker = build_tracker(model) return tracker
def __init__(self, config_file, model_path): super().__init__() cfg.merge_from_file(config_file) model = ModelBuilder() model.load_state_dict( torch.load(model_path, map_location=lambda storage, loc: storage.cpu())) model.eval().cuda() self.tracker = build_tracker(model)
def main(): # load config cfg.merge_from_file(args.config) cfg.CUDA = torch.cuda.is_available() and cfg.CUDA device = torch.device('cuda' if cfg.CUDA else 'cpu') # create model model = ModelBuilder() # load model model.load_state_dict( torch.load(args.snapshot, map_location=lambda storage, loc: storage.cpu())) model.eval().to(device) # build tracker tracker = build_tracker(model) first_frame = True if args.video_name: video_name = args.video_name.split('/')[-1].split('.')[0] else: video_name = 'webcam' # cv2.namedWindow(video_name, cv2.WND_PROP_FULLSCREEN) i = 0 for frame in get_frames(args.video_name): if first_frame: try: init_rect = cv2.selectROI(video_name, frame, False, False) except: exit() tracker.init(frame, init_rect) first_frame = False else: outputs = tracker.track(frame) if 'polygon' in outputs: polygon = np.array(outputs['polygon']).astype(np.int32) cv2.polylines(frame, [polygon.reshape((-1, 1, 2))], True, (0, 255, 0), 3) mask = ((outputs['mask'] > cfg.TRACK.MASK_THERSHOLD) * 255) mask = mask.astype(np.uint8) mask = np.stack([mask, mask * 255, mask]).transpose(1, 2, 0) frame = cv2.addWeighted(frame, 0.77, mask, 0.23, -1) else: bbox = list(map(int, outputs['bbox'])) cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[0] + bbox[2], bbox[1] + bbox[3]), (0, 255, 0), 3) # cv2.imshow(video_name, frame) # cv2.waitKey(40) print(i) cv2.imwrite(filename="/home/tempuser1/pysot/demo/ouput/" + str(i) + '.jpg', img=frame) i += 1
def build_model(self): model = ModelBuilder() # load model model.load_state_dict(torch.load(args.snapshot, map_location=lambda storage, loc: storage.cpu())) # import ipdb # ipdb.set_trace() device = torch.device('cuda:{}'.format(int(self.index//self.num_workers)) if cfg.CUDA else 'cpu') print(device) model.eval().to(device) # build tracker tracker = build_tracker(model) return tracker
def load_tracker(self, tracker_config, tracker_snapshot): """Load the selected pysot tracker. Args: - tracker_config (str): Path to pysot config file for the tracker - tracker_snapshot (str): Path to .pth file of pysot tracker """ cfg.merge_from_file(tracker_config) cfg.CUDA = torch.cuda.is_available() and cfg.CUDA device = torch.device('cuda' if cfg.CUDA else 'cpu') model = ModelBuilder() model.load_state_dict(torch.load(tracker_snapshot)) model.eval().to(device) self.tracker = build_tracker(model)
def __init__(self): super(DROL, self).__init__("DROL") # load config cfg.merge_from_file(path_config.DROL_CONFIG) seed_torch(cfg.TRACK.SEED) # create model model = ModelBuilder() # load model model = load_pretrain(model, path_config.DROL_SNAPSHOT).cuda().eval() # build tracker self.tracker = build_tracker(model)
def main(): # load config cfg.merge_from_file(args.config) cfg.CUDA = torch.cuda.is_available() and cfg.CUDA device = torch.device('cuda' if cfg.CUDA else 'cpu') # create model model = ModelBuilder() # load model model.load_state_dict(torch.load(args.snapshot, map_location=lambda storage, loc: storage.cpu())) model.eval().to(device) # build tracker tracker = build_tracker(model) first_frame = True if args.video_name: video_name = args.video_name.split('/')[-1].split('.')[0] else: video_name = 'webcam' cv2.namedWindow(video_name, cv2.WINDOW_NORMAL)#cv2.WND_PROP_FULLSCREEN) for frame in get_frames(args.video_name): if first_frame: try: init_rect = cv2.selectROI(video_name, frame, False, False)#choose a rectangle as ROI except: exit() tracker.init(frame, init_rect)#initiating the tracker first_frame = False # choose the ROI on the first frame and then track it on the following frames else: outputs = tracker.track(frame)#outputs:bbox/polygon+best_score if 'polygon' in outputs: polygon = np.array(outputs['polygon']).astype(np.int32) cv2.polylines(frame, [polygon.reshape((-1, 1, 2))],#draw polygons([vertex_nums,1,2]) on the frame True, (0, 255, 0), 3) mask = ((outputs['mask'] > cfg.TRACK.MASK_THERSHOLD) * 255) mask = mask.astype(np.uint8) mask = np.stack([mask, mask*255, mask]).transpose(1, 2, 0) frame = cv2.addWeighted(frame, 0.77, mask, 0.23, -1)#image fusion, can adjust transparency else: bbox = list(map(int, outputs['bbox']))#float to int cv2.rectangle(frame, (bbox[0], bbox[1]),#draw bbox on the frame (bbox[0]+bbox[2], bbox[1]+bbox[3]), (0, 255, 0), 3) cv2.imshow(video_name, frame) cv2.waitKey(40)
def PYSOTINIT(): # load config cfg.merge_from_file(tracker_config) cfg.CUDA = torch.cuda.is_available() device = torch.device('cuda' if cfg.CUDA else 'cpu') # create model model = ModelBuilder() # load model model.load_state_dict( torch.load(snapshot, map_location=lambda storage, loc: storage.cpu())) model.eval().to(device) # build tracker tracker = build_tracker(model) return tracker
def fitness(config, reporter): # Only support VOT Dataset temporarily # print('debug:fitness') tracker_name = 'tracker_penalty_k={0:.3f},window_influence={1:.3f},lr={2:.3f},search_region={3}'.format( config['penalty_k'], config['window_influence'], config['lr'], config['search_region']) if args.dataset in ['VOT2016', 'VOT2017', 'VOT2018', 'VOT2019']: tracker = build_tracker(model, config) eao = calculate_eao(tracker, tracker_name) print( "penalty_k: {0:.3f}, lr: {1:.3f}, window_influence: {2:.3f}, search_region: {3}, eao: {4:.5f}" .format(config['penalty_k'], config['window_influence'], config['lr'], config['search_region'], eao)) reporter(EAO=eao) else: raise NotImplementedError
def __init__(self, config, snapshot): cfg.merge_from_file(config) cfg.CUDA = torch.cuda.is_available() and cfg.CUDA device = torch.device('cuda' if cfg.CUDA else 'cpu') # create model self.model = ModelBuilder() # load model self.model.load_state_dict( torch.load(snapshot, map_location=lambda storage, loc: storage.cpu())) self.model.eval().to(device) # build tracker self.tracker = build_tracker(self.model) self.center_pos = None self.size = None
def __init__(self, config_file, model_file): self.config_file = config_file self.model_file = model_file # load config cfg.merge_from_file(self.config_file) cfg.CUDA = torch.cuda.is_available() self.device = torch.device('cuda' if cfg.CUDA else 'cpu') # load model self.model = ModelBuilder() self.model.load_state_dict( torch.load(model_file, map_location=lambda storage, loc: storage.cpu())) self.model.eval().to(self.device) # build tracker self.tracker = build_tracker(self.model)
def init_track(self): # 参数整合 cfg.merge_from_file(self.config_path) cfg.CUDA = torch.cuda.is_available() and cfg.CUDA device = torch.device('cuda' if cfg.CUDA else 'cpu') # create model self.textBws_show_process.append('模型对象创建...') self.checkpoint=torch.load(self.snapshot_path, map_location=lambda storage, loc: storage.cpu()) self.model = ModelBuilder() print('断点') # load model self.model.load_state_dict(self.checkpoint) self.model.eval().to(device) self.textBws_show_process.append('加载跟踪模型完毕!') # 创建跟踪器 self.tracker = build_tracker(self.model)
def test_snapshot(epoch: int, snapshot: str, test_path: str): # model max_img = 8 model = ModelBuilder() data = torch.load(snapshot, map_location=lambda storage, loc: storage.cpu()) model.load_state_dict(data['state_dict']) model.eval().to(torch.device('cpu')) tracker = build_tracker(model) root = cfg.DATASET.COCO.ROOT cur_path = os.path.dirname(os.path.realpath(__file__)) root = os.path.join(cur_path, '../../', root) anno_path = os.path.join(root, '../', "val2017.json") with open(anno_path, 'r') as f: anno = json.load(f) anno = filter_zero(anno) dataset = os.path.join(root, "val2017") folder = random.choice(glob.glob(f"{dataset}/**")) zs = glob.glob(f"{folder}/*.z.jpg") xs = glob.glob(f"{folder}/*.x.jpg") zs = sorted(zs) xs = sorted(xs) xs = [(x, get_anno_from_img_path(anno, x)) for x in xs] for i in range(len(zs[:max_img])): z = cv2.imread(zs[i]) x_path, bbox = xs[i] x = cv2.imread(x_path) tracker.init_(z) cls, (x1, y1, x2, y2) = tracker.track(x) cv2.rectangle(x, (x1, y1), (x2, y2), (255, 0, 0), 2) a1, b1, a2, b2 = bbox cv2.rectangle(x, (a1, b1), (a2, b2), (0, 0, 255), 2) cv2.putText(x, 'Acc: ' + cls.astype('str'), (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (36, 255, 12), 2) parent_dir = f"{test_path}/{os.path.basename(Path(zs[i]).parent)}" if not os.path.exists(parent_dir): os.makedirs(parent_dir) cv2.imwrite(f"{parent_dir}/{os.path.basename(x_path)}", x) cv2.imwrite(f"{parent_dir}/{os.path.basename(zs[i])}", z)
def __init__(self, backbone, target): super(SiamRPNPPGroup, self).__init__(f"SiamRPN++Group/{backbone}/{target}") if backbone == "AlexNet" and target == "OTB": config = path_config.SIAMRPNPP_ALEXNET_OTB_CONFIG snapshot = path_config.SIAMRPNPP_ALEXNET_OTB_SNAPSHOT elif backbone == "AlexNet" and target == "VOT": config = path_config.SIAMRPNPP_ALEXNET_CONFIG snapshot = path_config.SIAMRPNPP_ALEXNET_SNAPSHOT elif backbone == "ResNet-50" and target == "OTB": config = path_config.SIAMRPNPP_RESNET_OTB_CONFIG snapshot = path_config.SIAMRPNPP_RESNET_OTB_SNAPSHOT elif backbone == "ResNet-50" and target == "VOT": config = path_config.SIAMRPNPP_RESNET_CONFIG snapshot = path_config.SIAMRPNPP_RESNET_SNAPSHOT elif backbone == "ResNet-50" and target == "VOTLT": config = path_config.SIAMRPNPP_RESNET_LT_CONFIG snapshot = path_config.SIAMRPNPP_RESNET_LT_SNAPSHOT elif backbone == "MobileNetV2" and target == "VOT": config = path_config.SIAMRPNPP_MOBILENET_CONFIG snapshot = path_config.SIAMRPNPP_MOBILENET_SNAPSHOT elif backbone == "SiamMask" and target == "VOT": config = path_config.SIAMPRNPP_SIAMMASK_CONFIG snapshot = path_config.SIAMPRNPP_SIAMMASK_SNAPSHOT else: raise ValueError("Invalid backbone and target") # load config cfg.merge_from_file(config) cfg.CUDA = torch.cuda.is_available() device = torch.device("cuda" if cfg.CUDA else "cpu") # create model self.model = ModelBuilder() # load model self.model.load_state_dict( torch.load(snapshot, map_location=lambda storage, loc: storage.cpu()) ) self.model.eval().to(device) # build tracker self.tracker = build_tracker(self.model)
def __init__(self): super(SiamRPNPP, self).__init__("SiamRPN++") config = path_config.SIAMRPNPP_CONFIG snapshot = path_config.SIAMRPNPP_SNAPSHOT # load config cfg.merge_from_file(config) cfg.CUDA = torch.cuda.is_available() device = torch.device("cuda" if cfg.CUDA else "cpu") # create model self.model = ModelBuilder() # load model self.model.load_state_dict( torch.load(snapshot, map_location=lambda storage, loc: storage.cpu()) ) self.model.eval().to(device) # build tracker self.tracker = build_tracker(self.model)
def __init__(self): self.init_rect = None self.pysot_pub = rospy.Publisher(config.TRACK_PUB_TOPIC, Int32MultiArray, queue_size=10) self.img_sub = rospy.Subscriber(config.IMAGE_SUB_TOPIC, Image, self.receive_frame_and_track) self.service = rospy.Service("init_rect", InitRect, self.set_init_rect) cfg.TRACK.TYPE = config.TRACK_TYPE cfg.merge_from_file(config.CONFIG_PATH) cfg.CUDA = torch.cuda.is_available() device = torch.device('cuda' if cfg.CUDA else 'cpu') model = ModelBuilder() model.load_state_dict( torch.load(config.MODEL_PATH, map_location=lambda storage, loc: storage.cpu())) model.eval().to(device) self.tracker = build_tracker(model)
def __init__(self, tr): self.type = tr self.PYSOT_TRACKER_THRESHOLD = 0.3 if tr in OPENCV_TRACKERS: self.tracker = OPENCV_TRACKERS[tr]() elif tr in PYSOT_TRACKERS: self.tracker = build_tracker(load_pysot_model(self.type)) elif tr == "kalman": self.tracker = KalmanTracker() elif tr == "flow_LK_mean": self.tracker = LKFlowTracker(strategy="mean") elif tr == "flow_LK_median": self.tracker = LKFlowTracker(strategy="median") elif tr == "flow_GF_mean": self.tracker = GFFlowTracker(strategy="mean") elif tr == "flow_GF_median": self.tracker = GFFlowTracker(strategy="median") elif tr == "iou": self.tracker = StaticTracker() else: raise Exception("Tracker not supported")
def init_track(self): # 配置config文件 config_path = './models/siamrpn_alex_dwxcorr/config.yaml' # 配置snapshot 文件 snapshot_path = './models/siamrpn_alex_dwxcorr/model.pth' # 参数整合 cfg.merge_from_file(config_path) cfg.CUDA = torch.cuda.is_available() and cfg.CUDA device = torch.device('cuda' if cfg.CUDA else 'cpu') # create model model = ModelBuilder() # load model model.load_state_dict( torch.load(snapshot_path, map_location=lambda storage, loc: storage.cpu())) model.eval().to(device) # 创建跟踪器 self.tracker = build_tracker(model)
def main(): # load config cfg.merge_from_file(args.config) cur_dir = os.path.dirname(os.path.realpath(__file__)) dataset_root = os.path.join(cur_dir, '../testing_dataset', args.dataset) # create model model = ModelBuilder(cfg) # load model model = load_pretrain(model, args.snapshot).cuda().eval() # build tracker tracker = build_tracker(model) # create dataset dataset = DatasetFactory.create_dataset(name=args.dataset, dataset_root=dataset_root, load_img=False) model_name = args.snapshot.split('/')[-1].split('.')[0] total_lost = 0 if args.dataset in ['VOT2016', 'VOT2018', 'VOT2019']: # restart tracking for v_idx, video in enumerate(dataset): if args.video != '': # test one special video if video.name != args.video: continue frame_counter = 0 lost_number = 0 toc = 0 pred_bboxes = [] for idx, (img, gt_bbox) in enumerate(video): if len(gt_bbox) == 4: gt_bbox = [gt_bbox[0], gt_bbox[1], gt_bbox[0], gt_bbox[1]+gt_bbox[3]-1, gt_bbox[0]+gt_bbox[2]-1, gt_bbox[1]+gt_bbox[3]-1, gt_bbox[0]+gt_bbox[2]-1, gt_bbox[1]] tic = cv2.getTickCount() if idx == frame_counter: cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox)) gt_bbox_ = [cx-(w-1)/2, cy-(h-1)/2, w, h] tracker.init(img, gt_bbox_) pred_bbox = gt_bbox_ pred_bboxes.append(1) elif idx > frame_counter: outputs = tracker.track(img) pred_bbox = outputs['bbox'] if cfg.MASK.MASK: pred_bbox = outputs['polygon'] overlap = vot_overlap(pred_bbox, gt_bbox, (img.shape[1], img.shape[0])) if overlap > 0: # not lost pred_bboxes.append(pred_bbox) else: # lost object pred_bboxes.append(2) frame_counter = idx + 5 # skip 5 frames lost_number += 1 else: pred_bboxes.append(0) toc += cv2.getTickCount() - tic if idx == 0: cv2.destroyAllWindows() if args.vis and idx > frame_counter: cv2.polylines(img, [np.array(gt_bbox, np.int).reshape((-1, 1, 2))], True, (0, 255, 0), 3) if cfg.MASK.MASK: cv2.polylines(img, [np.array(pred_bbox, np.int).reshape((-1, 1, 2))], True, (0, 255, 255), 3) else: bbox = list(map(int, pred_bbox)) cv2.rectangle(img, (bbox[0], bbox[1]), (bbox[0]+bbox[2], bbox[1]+bbox[3]), (0, 255, 255), 3) cv2.putText(img, str(idx), (40, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2) cv2.putText(img, str(lost_number), (40, 80), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2) cv2.imshow(video.name, img) cv2.waitKey(1) toc /= cv2.getTickFrequency() # save results video_path = os.path.join('results', args.dataset, model_name, 'baseline', video.name) if not os.path.isdir(video_path): os.makedirs(video_path) result_path = os.path.join(video_path, '{}_001.txt'.format(video.name)) with open(result_path, 'w') as f: for x in pred_bboxes: if isinstance(x, int): f.write("{:d}\n".format(x)) else: f.write(','.join([vot_float2str("%.4f", i) for i in x])+'\n') print('({:3d}) Video: {:12s} Time: {:4.1f}s Speed: {:3.1f}fps Lost: {:d}'.format( v_idx+1, video.name, toc, idx / toc, lost_number)) total_lost += lost_number print("{:s} total lost: {:d}".format(model_name, total_lost)) else: # OPE tracking for v_idx, video in enumerate(dataset): if args.video != '': # test one special video if video.name != args.video: continue toc = 0 pred_bboxes = [] scores = [] track_times = [] for idx, (img, gt_bbox) in enumerate(video): tic = cv2.getTickCount() if idx == 0: cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox)) gt_bbox_ = [cx-(w-1)/2, cy-(h-1)/2, w, h] tracker.init(img, gt_bbox_) pred_bbox = gt_bbox_ scores.append(None) if 'VOT2018-LT' == args.dataset: pred_bboxes.append([1]) else: pred_bboxes.append(pred_bbox) else: outputs = tracker.track(img) pred_bbox = outputs['bbox'] pred_bboxes.append(pred_bbox) scores.append(outputs['best_score']) toc += cv2.getTickCount() - tic track_times.append((cv2.getTickCount() - tic)/cv2.getTickFrequency()) if idx == 0: cv2.destroyAllWindows() if args.vis and idx > 0: gt_bbox = list(map(int, gt_bbox)) pred_bbox = list(map(int, pred_bbox)) cv2.rectangle(img, (gt_bbox[0], gt_bbox[1]), (gt_bbox[0]+gt_bbox[2], gt_bbox[1]+gt_bbox[3]), (0, 255, 0), 3) cv2.rectangle(img, (pred_bbox[0], pred_bbox[1]), (pred_bbox[0]+pred_bbox[2], pred_bbox[1]+pred_bbox[3]), (0, 255, 255), 3) cv2.putText(img, str(idx), (40, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2) cv2.imshow(video.name, img) cv2.waitKey(1) toc /= cv2.getTickFrequency() # save results if 'VOT2018-LT' == args.dataset: video_path = os.path.join('results', args.dataset, model_name, 'longterm', video.name) if not os.path.isdir(video_path): os.makedirs(video_path) result_path = os.path.join(video_path, '{}_001.txt'.format(video.name)) with open(result_path, 'w') as f: for x in pred_bboxes: f.write(','.join([str(i) for i in x])+'\n') result_path = os.path.join(video_path, '{}_001_confidence.value'.format(video.name)) with open(result_path, 'w') as f: for x in scores: f.write('\n') if x is None else f.write("{:.6f}\n".format(x)) result_path = os.path.join(video_path, '{}_time.txt'.format(video.name)) with open(result_path, 'w') as f: for x in track_times: f.write("{:.6f}\n".format(x)) elif 'GOT-10k' == args.dataset: video_path = os.path.join('results', args.dataset, model_name, video.name) if not os.path.isdir(video_path): os.makedirs(video_path) result_path = os.path.join(video_path, '{}_001.txt'.format(video.name)) with open(result_path, 'w') as f: for x in pred_bboxes: f.write(','.join([str(i) for i in x])+'\n') result_path = os.path.join(video_path, '{}_time.txt'.format(video.name)) with open(result_path, 'w') as f: for x in track_times: f.write("{:.6f}\n".format(x)) else: model_path = os.path.join('results', args.dataset, model_name) if not os.path.isdir(model_path): os.makedirs(model_path) result_path = os.path.join(model_path, '{}.txt'.format(video.name)) with open(result_path, 'w') as f: for x in pred_bboxes: f.write(','.join([str(i) for i in x])+'\n') print('({:3d}) Video: {:12s} Time: {:5.1f}s Speed: {:3.1f}fps'.format( v_idx+1, video.name, toc, idx / toc))
def main(): is_gpu_cuda_available = torch.cuda.is_available() if not is_gpu_cuda_available: raise RuntimeError( 'Failed to locate a CUDA GPU. Program cannot continue..') num_gpus = torch.cuda.device_count() gpu_type = torch.cuda.get_device_name(0) print(f"You have {num_gpus} available of type: {gpu_type}") print("This might take a few minutes...Grab a cup of coffee\n") # load config cfg.merge_from_file(args.config) dataset_root = os.path.join(args.dataset_directory, args.dataset) print(f"dataset root-->{dataset_root}") # create model model = ModelBuilder() # load model model = load_pretrain(model, args.snapshot).cuda().eval() # build tracker tracker = build_tracker(model) # create dataset dataset = DatasetFactory.create_dataset(name=args.dataset, dataset_root=dataset_root, load_img=False) model_name = args.model_name print(f"Model name is {model_name}") total_lost = 0 if args.dataset in vot_like_dataset: # restart tracking for v_idx, video in enumerate(dataset): if args.video != '': # test one special video if video.name != args.video: continue frame_counter = 0 lost_number = 0 toc = 0 pred_bboxes = [] for idx, (img, gt_bbox) in enumerate(video): if len(gt_bbox) == 4: gt_bbox = [ gt_bbox[0], gt_bbox[1], gt_bbox[0], gt_bbox[1] + gt_bbox[3] - 1, gt_bbox[0] + gt_bbox[2] - 1, gt_bbox[1] + gt_bbox[3] - 1, gt_bbox[0] + gt_bbox[2] - 1, gt_bbox[1] ] tic = cv2.getTickCount() if idx == frame_counter: cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox)) gt_bbox_ = [cx - (w - 1) / 2, cy - (h - 1) / 2, w, h] tracker.init(img, gt_bbox_) pred_bbox = gt_bbox_ pred_bboxes.append(1) elif idx > frame_counter: outputs = tracker.track(img) pred_bbox = outputs['bbox'] if cfg.MASK.MASK: pred_bbox = outputs['polygon'] overlap = vot_overlap(pred_bbox, gt_bbox, (img.shape[1], img.shape[0])) if overlap > 0.85: # not lost pred_bboxes.append(pred_bbox) else: # lost object pred_bboxes.append(2) frame_counter = idx + args.skip_frames # skip 1 frame lost_number += 1 else: pred_bboxes.append(0) toc += cv2.getTickCount() - tic if idx == 0: cv2.destroyAllWindows() if args.vis and idx > frame_counter: cv2.polylines( img, [np.array(gt_bbox, np.int).reshape( (-1, 1, 2))], True, (0, 255, 0), 3) if cfg.MASK.MASK: cv2.polylines( img, [np.array(pred_bbox, np.int).reshape( (-1, 1, 2))], True, (0, 255, 255), 3) else: bbox = list(map(int, pred_bbox)) cv2.rectangle(img, (bbox[0], bbox[1]), (bbox[0] + bbox[2], bbox[1] + bbox[3]), (0, 255, 255), 3) cv2.putText(img, str(idx), (40, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2) cv2.putText(img, str(lost_number), (40, 80), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2) cv2.imshow(video.name, img) cv2.waitKey(1) toc /= cv2.getTickFrequency() # save results save_path = os.path.join(args.results_path, args.dataset, model_name, args.experiment_name, video.name) if not os.path.isdir(save_path): os.makedirs(save_path) result_path = os.path.join(save_path, '{}_001.txt'.format(video.name)) with open(result_path, 'w') as f: for x in pred_bboxes: if isinstance(x, int): f.write("{:d}\n".format(x)) else: f.write(','.join([vot_float2str("%.4f", i) for i in x]) + '\n') with open(os.path.join(save_path, '..', 'lost.txt'), 'a+') as f: f.write( f"{v_idx+1} Class: {video.name} | Time: {toc}s | Speed: {idx/toc}fps | Lost:{lost_number} \n" ) print( '({:3d}) Class: {:12s} Time: {:4.1f}s Speed: {:3.1f}fps Lost: {:d}' .format(v_idx + 1, video.name, toc, idx / toc, lost_number)) total_lost += lost_number print("{:s} total lost: {:d}".format(model_name, total_lost)) with open(os.path.join(save_path, '..', 'lost.txt'), 'a+') as f: f.write( f"Model architeture used --> {model_name} \ntotal lost: {total_lost} \n" ) f.write(f"SKIP FRAMES USED --> {args.skip_frames}") else: # OPE tracking # will be implemented if needed in future pass
color_img = np.zeros((1280, 720, 3), dtype=np.uint8) result_mask_img = np.zeros((1280, 720, 3), dtype=np.uint8) result_bbox_img = np.zeros((1280, 720, 3), dtype=np.uint8) result_mask = np.zeros((1280, 720), dtype=np.uint8) pysot_img = np.zeros((1280, 720, 3), dtype=np.uint8) mask_rcnn_flag = 0 pysot_mask = np.zeros((1280, 720), dtype=np.uint8) pysot_contour_img = np.zeros((1280, 720, 3), dtype=np.uint8) cfg.merge_from_file('config.yaml') cfg.CUDA = torch.cuda.is_available() device = torch.device('cuda' if cfg.CUDA else 'cpu') model_pysot = ModelBuilder() tracker = build_tracker(model_pysot) model_pysot.load_state_dict( torch.load('model.pth', map_location=lambda storage, loc: storage.cpu())) model_pysot.eval().to(device) def run_maskrcnn(): global color_img global result_mask_img global result_bbox_img global result_mask global mask_rcnn_flag global inds_len while 1: mask_rcnn_flag = 1 result = inference_detector(model, color_img)
def main(): torch.cuda.set_device(args.gpu_id) model_dir = "./experiments/siamrpn_r50_l234_dwxcorr/model.pth" model_config = "./experiments/siamrpn_r50_l234_dwxcorr/config.yaml" if os.path.isfile(model_dir): print("model file {} found".format(model_dir)) else: print("model files not found, starting download".format(model_dir)) os.system( "gdown https://drive.google.com/uc?id=1-tEtYQdT1G9kn8HsqKNDHVqjE16F8YQH") os.system("mv model.pth ./experiments/siamrpn_r50_l234_dwxcorr") # load config cfg.merge_from_file(model_config) cfg.CUDA = torch.cuda.is_available() and cfg.CUDA device = torch.device('cuda' if cfg.CUDA else 'cpu') # create model model = ModelBuilder() # load model model.load_state_dict(torch.load(model_dir, map_location=lambda storage, loc: storage.cpu())) model.eval().to(device) # create an unique identifier worker_id = uuid.uuid4() # build tracker tracker = build_tracker(model) # Socket to talk to server context = zmq.Context() sub_socket = context.socket(zmq.SUB) # set up frame listening socket sub_socket.connect("tcp://{}:5556".format(args.server_ip)) sub_socket.setsockopt_string(zmq.SUBSCRIBE, "frame_") sub_socket.setsockopt_string(zmq.SUBSCRIBE, str(worker_id)) # setup push socket context = zmq.Context() push_socket = context.socket(zmq.PUSH) push_socket.connect("tcp://{}:5557".format(args.server_ip)) # event monitoring # used to register worker once connection is established EVENT_MAP = {} for name in dir(zmq): if name.startswith('EVENT_'): value = getattr(zmq, name) EVENT_MAP[value] = name # monitor thread function def event_monitor(monitor): while monitor.poll(): evt = recv_monitor_message(monitor) evt.update({'description': EVENT_MAP[evt['event']]}) if evt['event'] == zmq.EVENT_HANDSHAKE_SUCCEEDED: push_socket.send_json( {"type": "REGISTER", "id": str(worker_id)}) if evt['event'] == zmq.EVENT_MONITOR_STOPPED: break monitor.close() # register monitor monitor = sub_socket.get_monitor_socket() t = threading.Thread(target=event_monitor, args=(monitor,)) t.start() support = None try: while True: # wait for next message _ = sub_socket.recv() md = sub_socket.recv_json() if md['type'] == 'FRAME': msg = sub_socket.recv() buf = memoryview(msg) frame = np.frombuffer( buf, dtype=md['dtype']).reshape(md['shape']) if support is None: continue outputs = tracker.track(frame) bbox = list(map(int, outputs['bbox'])) # send result push_socket.send_json( { "type": "TRACK", "bbox": bbox, "score": outputs['best_score'].tolist(), "time": md['time'], "id": str(worker_id) }) print('message: {}'.format(md['time']), end='\r') elif md['type'] == 'SUPPORT': frame_raw = md['data']['img'] # base 64 png image frame = np.array( Image.open( io.BytesIO( base64.b64decode(frame_raw) ) ).convert('RGB'))[:, :, ::-1] bbox = [int(float(i)) for i in md['data']['bbox'].split(",")] tracker.init(frame, bbox) support = (frame, bbox) print('Support received, tracking will now start') elif md['type'] == 'LOCATION': # make sure tracker has been initalized if support is not None: center_pos = np.array(md['data']) tracker.update(center_pos) elif md['type'] == 'PING': push_socket.send_json({"type": "PONG", "id": str(worker_id)}) else: print('Invalid message type received: {}'.format(md['type'])) except KeyboardInterrupt: print('Exiting... notifying server of disconnect') push_socket.send_json( {"type": "FIN", "id": str(worker_id)}) # wait for the server to respond or let the user forcefully close print("Waiting for server response. Press CTRL+C again to forcefully close") while True: _ = sub_socket.recv() md = sub_socket.recv_json() if md['type'] == "FIN": print('Server responded, now exiting') exit(0) elif md['type'] == "FRAME": # we have to accept the incoming frame to properly accept future messages msg = sub_socket.recv()