def main(): rank, world_size = dist_init() logger.info("init done") # load cfg cfg.merge_from_file(args.cfg) if rank == 0: if not os.path.exists(cfg.TRAIN.LOG_DIR): os.makedirs(cfg.TRAIN.LOG_DIR) init_log('global', logging.INFO) if cfg.TRAIN.LOG_DIR: add_file_handler('global', os.path.join(cfg.TRAIN.LOG_DIR, 'logs.txt'), logging.INFO) logger.info("Version Information: \n{}\n".format(commit())) logger.info("config \n{}".format(json.dumps(cfg, indent=4))) # create model model = ModelBuilder().cuda().train() dist_model = DistModule(model) # load pretrained backbone weights if cfg.BACKBONE.PRETRAINED: cur_path = os.path.dirname(os.path.realpath(__file__)) backbone_path = os.path.join(cur_path, '../', cfg.BACKBONE.PRETRAINED) load_pretrain(model.backbone, backbone_path) # create tensorboard writer if rank == 0 and cfg.TRAIN.LOG_DIR: tb_writer = SummaryWriter(cfg.TRAIN.LOG_DIR) else: tb_writer = None # build dataset loader train_loader = build_data_loader() # build optimizer and lr_scheduler optimizer, lr_scheduler = build_opt_lr(dist_model.module, cfg.TRAIN.START_EPOCH) # resume training if cfg.TRAIN.RESUME: logger.info("resume from {}".format(cfg.TRAIN.RESUME)) assert os.path.isfile(cfg.TRAIN.RESUME), \ '{} is not a valid file.'.format(cfg.TRAIN.RESUME) model, optimizer, cfg.TRAIN.START_EPOCH = \ restore_from(model, optimizer, cfg.TRAIN.RESUME) dist_model = DistModule(model) logger.info(lr_scheduler) logger.info("model prepare done") # start training train(train_loader, dist_model, optimizer, lr_scheduler, tb_writer)
def main(): # load config cfg.merge_from_file(args.config) cfg.CUDA = torch.cuda.is_available() and cfg.CUDA device = torch.device('cuda' if cfg.CUDA else 'cpu') # create model model = ModelBuilder() # load model # model.load_state_dict(torch.load(args.snapshot, # map_location=lambda storage, loc: storage.cpu())) # model.eval().to(device) # load model model = load_pretrain(model, args.snapshot).cuda().eval() # build tracker tracker = build_tracker(model) first_frame = True if args.video_name: video_name = args.video_name.split('/')[-1].split('.')[0] else: video_name = 'webcam' cv2.namedWindow(video_name, cv2.WND_PROP_FULLSCREEN) for frame in get_frames(args.video_name): if first_frame: try: init_rect = cv2.selectROI(video_name, frame, False, False) except: exit() tracker.init(frame, init_rect) first_frame = False else: outputs = tracker.track(frame) if 'polygon' in outputs: polygon = np.array(outputs['polygon']).astype(np.int32) cv2.polylines(frame, [polygon.reshape((-1, 1, 2))], True, (0, 255, 0), 3) mask = ((outputs['mask'] > cfg.TRACK.MASK_THERSHOLD) * 255) mask = mask.astype(np.uint8) mask = np.stack([mask, mask * 255, mask]).transpose(1, 2, 0) frame = cv2.addWeighted(frame, 0.77, mask, 0.23, -1) else: bbox = list(map(int, outputs['bbox'])) cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[0] + bbox[2], bbox[1] + bbox[3]), (0, 255, 0), 3) cv2.imshow(video_name, frame) cv2.waitKey(40)
def main(): args.tracker_name = args.snapshot.split('/')[-1].split('.')[0] # load config cfg.merge_from_file(args.config) #cur_dir = os.path.dirname(os.path.realpath(__file__)) dataset_root = os.path.join('./datasets', args.dataset) # create model model = ModelBuilder() # load model model = load_pretrain(model, args.snapshot).cuda().eval() # build tracker tracker = build_tracker(model) # create dataset dataset = DatasetFactory.create_dataset(name=args.dataset, dataset_root=dataset_root, load_img=False) #model_name = args.snapshot.split('/')[-1].split('.')[0] total_lost = 0 if args.dataset in ['VOT2016', 'VOT2018', 'VOT2019']: # restart tracking # for v_idx, video in enumerate(dataset): for video in tqdm(dataset): if args.video != '': # test one special video if video.name != args.video: continue frame_counter = 0 lost_number = 0 toc = 0 pred_bboxes = [] for idx, (img, gt_bbox) in enumerate(video): if len(gt_bbox) == 4: gt_bbox = [ gt_bbox[0], gt_bbox[1], gt_bbox[0], gt_bbox[1] + gt_bbox[3] - 1, gt_bbox[0] + gt_bbox[2] - 1, gt_bbox[1] + gt_bbox[3] - 1, gt_bbox[0] + gt_bbox[2] - 1, gt_bbox[1] ] tic = cv2.getTickCount() if idx == frame_counter: cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox)) gt_bbox_ = [cx - (w - 1) / 2, cy - (h - 1) / 2, w, h] #[topx,topy,w,h] tracker.init(img, gt_bbox_) pred_bbox = gt_bbox_ pred_bboxes.append(1) elif idx > frame_counter: outputs = tracker.track(img) pred_bbox = outputs['bbox'] if cfg.MASK.MASK: pred_bbox = outputs['polygon'] overlap = vot_overlap(pred_bbox, gt_bbox, (img.shape[1], img.shape[0])) if overlap > 0: # not lost pred_bboxes.append(pred_bbox) else: # lost object pred_bboxes.append(2) frame_counter = idx + 5 # skip 5 frames lost_number += 1 else: pred_bboxes.append(0) toc += cv2.getTickCount() - tic if idx == 0: cv2.destroyAllWindows() if args.vis and idx > frame_counter: cv2.polylines( img, [np.array(gt_bbox, np.int).reshape( (-1, 1, 2))], True, (0, 255, 0), 3) if cfg.MASK.MASK: cv2.polylines( img, [np.array(pred_bbox, np.int).reshape( (-1, 1, 2))], True, (0, 255, 255), 3) else: bbox = list(map(int, pred_bbox)) cv2.rectangle(img, (bbox[0], bbox[1]), (bbox[0] + bbox[2], bbox[1] + bbox[3]), (0, 255, 255), 3) cv2.putText(img, str(idx), (40, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2) cv2.putText(img, str(lost_number), (40, 80), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2) cv2.imshow(video.name, img) cv2.waitKey(1) toc /= cv2.getTickFrequency() # save results video_path = os.path.join('results', args.dataset, args.tracker_name, 'baseline', video.name) if not os.path.isdir(video_path): os.makedirs(video_path) result_path = os.path.join(video_path, '{}_001.txt'.format(video.name)) with open(result_path, 'w') as f: for x in pred_bboxes: if isinstance(x, int): f.write("{:d}\n".format(x)) else: f.write(','.join([vot_float2str("%.4f", i) for i in x]) + '\n') # print('({:3d}) Video: {:12s} Time: {:4.1f}s Speed: {:3.1f}fps Lost: {:d}'.format( # v_idx+1, video.name, toc, idx / toc, lost_number)) total_lost += lost_number # print("{:s} total lost: {:d}".format(args.tracker_name, total_lost)) else: # OPE tracking #for v_idx, video in tqdm(enumerate(dataset)): for video in tqdm(enumerate(dataset)): if args.video != '': # test one special video if video.name != args.video: continue toc = 0 pred_bboxes = [] scores = [] track_times = [] for idx, (img, gt_bbox) in enumerate(video[1]): tic = cv2.getTickCount() if idx == 0: cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox)) gt_bbox_ = [cx - (w - 1) / 2, cy - (h - 1) / 2, w, h] #[topx,topy,w,h] tracker.init(img, gt_bbox_) pred_bbox = gt_bbox_ scores.append(None) if 'VOT2018-LT' == args.dataset: pred_bboxes.append([1]) else: pred_bboxes.append(pred_bbox) else: outputs = tracker.track(img) pred_bbox = outputs['bbox'] pred_bboxes.append(pred_bbox) scores.append(outputs['best_score']) toc += cv2.getTickCount() - tic track_times.append( (cv2.getTickCount() - tic) / cv2.getTickFrequency()) if idx == 0: cv2.destroyAllWindows() if args.vis and idx > 0: gt_bbox = list(map(int, gt_bbox)) pred_bbox = list(map(int, pred_bbox)) cv2.rectangle( img, (gt_bbox[0], gt_bbox[1]), (gt_bbox[0] + gt_bbox[2], gt_bbox[1] + gt_bbox[3]), (0, 255, 0), 3) cv2.rectangle(img, (pred_bbox[0], pred_bbox[1]), (pred_bbox[0] + pred_bbox[2], pred_bbox[1] + pred_bbox[3]), (0, 255, 255), 3) cv2.putText(img, str(idx), (40, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2) cv2.imshow(video[1].name, img) cv2.waitKey(1) toc /= cv2.getTickFrequency() # save results if 'VOT2018-LT' == args.dataset: video_path = os.path.join('results', args.dataset, args.tracker_name, 'longterm', video.name) if not os.path.isdir(video_path): os.makedirs(video_path) result_path = os.path.join(video_path, '{}_001.txt'.format(video.name)) with open(result_path, 'w') as f: for x in pred_bboxes: f.write(','.join([str(i) for i in x]) + '\n') result_path = os.path.join( video_path, '{}_001_confidence.value'.format(video.name)) with open(result_path, 'w') as f: for x in scores: f.write('\n') if x is None else f.write( "{:.6f}\n".format(x)) result_path = os.path.join(video_path, '{}_time.txt'.format(video.name)) with open(result_path, 'w') as f: for x in track_times: f.write("{:.6f}\n".format(x)) elif 'GOT-10k' == args.dataset: video_path = os.path.join('results', args.dataset, args.tracker_name, video[1].name) if not os.path.isdir(video_path): os.makedirs(video_path) result_path = os.path.join(video_path, '{}_001.txt'.format(video[1].name)) with open(result_path, 'w') as f: for x in pred_bboxes: f.write(','.join([str(i) for i in x]) + '\n') result_path = os.path.join(video_path, '{}_time.txt'.format(video[1].name)) with open(result_path, 'w') as f: for x in track_times: f.write("{:.6f}\n".format(x)) else: model_path = os.path.join('results', args.dataset, args.tracker_name) if not os.path.isdir(model_path): os.makedirs(model_path) result_path = os.path.join(model_path, '{}.txt'.format(video[1].name)) with open(result_path, 'w') as f: for x in pred_bboxes: f.write(','.join([str(i) for i in x]) + '\n') # print('({:3d}) Video: {:12s} Time: {:5.1f}s Speed: {:3.1f}fps'.format( # v_idx+1, video.name, toc, idx / toc)) print(args.snapshot.split('/')[-1]) evaluate(args)
* len(args.lr) \ * len(args.search_region) print("Total search number: {}".format(num_search)) cfg.merge_from_file(args.config) cur_dir = os.path.dirname(os.path.realpath(__file__)) dataset_root = os.path.join(cur_dir, '../datasets', args.dataset) # create dataset dataset = DatasetFactory.create_dataset(name=args.dataset, dataset_root=dataset_root, load_img=False) # create model model = ModelBuilder() # load model model = load_pretrain(model, args.snapshot).cuda().eval() # build tracker tracker = build_tracker(model) model_name = args.snapshot.split('/')[-1].split('.')[0] benchmark_path = os.path.join('hp_search_result', args.dataset) seqs = list(range(len(dataset))) np.random.shuffle(seqs) for idx in seqs: video = dataset[idx] # load image video.load_img()
def main(): #(1) #rank, world_size = dist_init() rank = 0 logger.info("init done") # load cfg cfg.merge_from_file(args.cfg) #rank=0代表是单节点运行 if rank == 0: if not os.path.exists(cfg.TRAIN.LOG_DIR): os.makedirs(cfg.TRAIN.LOG_DIR) init_log('global', logging.INFO) if cfg.TRAIN.LOG_DIR: add_file_handler('global', os.path.join(cfg.TRAIN.LOG_DIR, 'logs.txt'), logging.INFO) logger.info("Version Information: \n{}\n".format(commit())) logger.info("config \n{}".format(json.dumps(cfg, indent=4))) #(2) # create model model = ModelBuilder().cuda().train() dist_model = nn.DataParallel(model, device_ids=[0, 1]) #dist_model = DistModule(model) # load pretrained backbone weights if cfg.BACKBONE.PRETRAINED: cur_path = os.path.dirname(os.path.realpath(__file__)) backbone_path = os.path.join(cur_path, '../', cfg.BACKBONE.PRETRAINED) load_pretrain(model.backbone, backbone_path) # create tensorboard writer if rank == 0 and cfg.TRAIN.LOG_DIR: tb_writer = SummaryWriter(cfg.TRAIN.LOG_DIR) else: tb_writer = None # build dataset loader 加载数据集 train_loader = build_data_loader() # build optimizer and lr_scheduler optimizer, lr_scheduler = build_opt_lr(dist_model.module, cfg.TRAIN.START_EPOCH) # resume training if cfg.TRAIN.RESUME: logger.info("resume from {}".format(cfg.TRAIN.RESUME)) assert os.path.isfile(cfg.TRAIN.RESUME), \ '{} is not a valid file.'.format(cfg.TRAIN.RESUME) # (1) 从某一个checkpoint开始训练 dist_model, optimizer, cfg.TRAIN.START_EPOCH = \ restore_from(model, optimizer, cfg.TRAIN.RESUME) # (2) 加载预训练模型 # device = torch.cuda.current_device() # ckpt = torch.load(cfg.TRAIN.RESUME, map_location=lambda storage, loc: storage.cuda(device)) # model.load_state_dict(ckpt, strict=False) logger.info(lr_scheduler) logger.info("model prepare done") # start training train(train_loader, dist_model, optimizer, lr_scheduler, tb_writer)
def main(): #load parameters parser = argparse.ArgumentParser(description='tracking demo') parser.add_argument('--config', type=str, help='config file', default=config) parser.add_argument('--snapshot', type=str, help='model name', default=snapshot) parser.add_argument('--video_name', type=str, help='videos or image files', default=video) args = parser.parse_args() # load config cfg.merge_from_file(args.config) cfg.CUDA = torch.cuda.is_available() device = torch.device('cuda' if cfg.CUDA else 'cpu') # create model model = ModelBuilder() # load model model.load_state_dict( torch.load(args.snapshot, map_location=lambda storage, loc: storage.cpu())) model.eval().to(device) # build tracker tracker = build_tracker(model) first_frame = True if args.video_name: video_name = args.video_name.split('/')[-1].split('.')[0] else: video_name = 'webcam' cv2.namedWindow('video', cv2.WND_PROP_FULLSCREEN) ################################变量初始化################################### sum = 0 timer = 0 num = 0 gif_images = [] #gif图 ############################################################################ for frame in get_frames(args.video_name): start = cv2.getTickCount() #if num==0:#directory+imgname+".avi" #videoWriter = cv2.VideoWriter(directory+imgname+'.avi',cv2.VideoWriter_fourcc("X", "V", "I", "D"),50,(frame.shape[1],frame.shape[0]))#img.shape[1],img.shape[0] num = num + 1 if first_frame: try: init_rect = cv2.selectROI(video_name, frame, False, False) except: exit() tracker.init(frame, init_rect) first_frame = False else: outputs = tracker.track(frame) end = cv2.getTickCount() during = (end - start) / cv2.getTickFrequency() timer = timer + during if 'polygon' in outputs: polygon = np.array(outputs['polygon']).astype(np.int32) cv2.polylines(frame, [polygon.reshape((-1, 1, 2))], True, (0, 255, 0), 3) mask = ((outputs['mask'] > cfg.TRACK.MASK_THERSHOLD) * 255) mask = mask.astype(np.uint8) mask = np.stack([mask, mask, mask * 255]).transpose(1, 2, 0) gray = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY) ret, thresh = cv2.threshold(gray, 100, 255, cv2.THRESH_BINARY) contours, hierarchy = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) c = sorted(contours, key=cv2.contourArea, reverse=True)[0] #面积最大的轮廓区域 rect_new2 = cv2.boundingRect(c) frame = cv2.addWeighted(frame, 0.77, mask, 0.23, -1) cv2.rectangle( frame, (rect_new2[0], rect_new2[1]), (rect_new2[0] + rect_new2[2], rect_new2[1] + rect_new2[3]), (0, 0, 255), 2) else: bbox = list(map(int, outputs['bbox'])) cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[0] + bbox[2], bbox[1] + bbox[3]), (0, 255, 0), 2) # f.write('\n'+str(bbox[0])+','+str(bbox[1])+','+str(bbox[2])+','+str(bbox[3])) cv2.putText(frame, imgname2, (5, 50), cv2.FONT_HERSHEY_COMPLEX, 2.0, (255, 0, 0), 2) cv2.putText(frame, str(num), (5, 120), cv2.FONT_HERSHEY_COMPLEX, 2.0, (255, 0, 0), 2) cv2.namedWindow(video_name, 0) cv2.resizeWindow(video_name, 1000, 800) cv2.imshow(video_name, frame) #gif_images.append(frame) #videoWriter.write(frame) cv2.waitKey(30) #imageio.mimsave(directory+imgname+'.gif',gif_images,'GIF',duration = 0.02)#速度太慢 #f.close() fps = int(num / timer) print('FPS:%d' % (fps))