def main(): rank, world_size = dist_init() # rank = 0 logger.info("init done") # load cfg cfg.merge_from_file(args.cfg) if rank == 0: if not os.path.exists(cfg.TRAIN.LOG_DIR): os.makedirs(cfg.TRAIN.LOG_DIR) init_log('global', logging.INFO) if cfg.TRAIN.LOG_DIR: add_file_handler('global', os.path.join(cfg.TRAIN.LOG_DIR, 'logs.txt'), logging.INFO) logger.info("Version Information: \n{}\n".format(commit())) logger.info("config \n{}".format(json.dumps(cfg, indent=4))) # create model model = ModelBuilder().train() dist_model = nn.DataParallel(model).cuda() # load pretrained backbone weights if cfg.BACKBONE.PRETRAINED: cur_path = os.path.dirname(os.path.realpath(__file__)) backbone_path = os.path.join(cur_path, '../', cfg.BACKBONE.PRETRAINED) load_pretrain(model.backbone, backbone_path) # create tensorboard writer if rank == 0 and cfg.TRAIN.LOG_DIR: tb_writer = SummaryWriter(cfg.TRAIN.LOG_DIR) else: tb_writer = None # build dataset loader train_loader = build_data_loader() # build optimizer and lr_scheduler optimizer, lr_scheduler = build_opt_lr(dist_model.module, cfg.TRAIN.START_EPOCH) # resume training if cfg.TRAIN.RESUME: logger.info("resume from {}".format(cfg.TRAIN.RESUME)) assert os.path.isfile(cfg.TRAIN.RESUME), \ '{} is not a valid file.'.format(cfg.TRAIN.RESUME) model, optimizer, cfg.TRAIN.START_EPOCH = \ restore_from(model, optimizer, cfg.TRAIN.RESUME) # load pretrain elif cfg.TRAIN.PRETRAINED: load_pretrain(model, cfg.TRAIN.PRETRAINED) dist_model = nn.DataParallel(model) logger.info(lr_scheduler) logger.info("model prepare done") # start training train(train_loader, dist_model, optimizer, lr_scheduler, tb_writer)
def save_siamese_rpn(): # load config rpn_path = root_dir + 'experiments/siamrpn_alex_dwxcorr_16gpu/pre_train/checkpoint_e45.pth' gru_rpn = root_dir + 'experiments/siamrpn_alex_dwxcorr_16gpu/config.yaml' cfg.merge_from_file(gru_rpn) # create model model_rpn = ModelBuilder() model_rpn = load_pretrain(model_rpn, rpn_path).cuda().eval() gru_path = root_dir + 'experiments/siamrpn_alex_dwxcorr_16gpu/gru_snapshot/gru_10.pth' gru_cfg = root_dir + 'experiments/siamrpn_alex_dwxcorr_16gpu/config_gru.yaml' cfg.merge_from_file(gru_cfg) # create model model_gru = ModelBuilder() model_gru = load_pretrain(model_gru, gru_path).cuda().eval() for key, item in model_gru.named_parameters(): # print(key.find("grus")) print(key, item.shape) for key, item in model_rpn.named_parameters(): # print(key.find("grus")) print(key, item.shape) model_gru_dict = model_gru.state_dict() model_rpn_dict = model_rpn.state_dict() for key in model_gru_dict: if key.find("grus") != -1: print("fix:", key) else: print("change:", key) model_gru_dict[key] = model_rpn_dict[key] # name_map={} # model_legacy_dict = model_legacy.state_dict() # model_alexnet_dict = model_alexnet.state_dict() # for para1,para2 in zip(model_legacy.named_parameters(),model_alexnet.named_parameters()): # # print(para1[0],para1[1].shape) # print(para1[0]) # print(para2[0]) # print(para1[1].shape) # print(para2[1].shape) # print("--"*40) # # print("['{}'--->'{}']".format(para1[0], para2[0]),para1[1].shape, para2[1].shape) # name_map[para1[0]]=para2[0] # print(name_map) # # # for key,val in name_map.items(): # model_alexnet_dict[val]=model_legacy_dict[key] torch.save(model_gru_dict, "siamese_gru10_rpn45.pth")
def main(): rank, world_size = dist_init() logger.info("init done") # load cfg cfg.merge_from_file(args.cfg) if rank == 0: if not os.path.exists(cfg.TRAIN.LOG_DIR): os.makedirs(cfg.TRAIN.LOG_DIR) init_log('global', logging.INFO) if cfg.TRAIN.LOG_DIR: add_file_handler('global', os.path.join(cfg.TRAIN.LOG_DIR, 'logs.txt'), logging.INFO) logger.info("Version Information: \n{}\n".format(commit())) logger.info("config \n{}".format(json.dumps(cfg, indent=4))) # create model model = Template_Enhance().cuda().train() dist_model = DistModule(model) # load pretrained SiamRPN++ model if cfg.TSA.MODELBUILD_PATH: cur_path = os.path.dirname(os.path.realpath(__file__)) backbone_path = os.path.join(cur_path, '../', cfg.TSA.MODELBUILD_PATH) load_pretrain(model.model, backbone_path) # create tensorboard writer if rank == 0 and cfg.TRAIN.LOG_DIR: tb_writer = SummaryWriter(cfg.TRAIN.LOG_DIR) else: tb_writer = None # build datasets loader train_loader, val_loader = build_data_loader() # build optimizer and lr_scheduler optimizer, scheduler = build_opt_lr(dist_model.module) logger.info(scheduler) logger.info("model prepare done") # start estimation # train(train_loader, dist_model, optimizer, scheduler, tb_writer) estimation = Estimator(train_loader, val_loader, dist_model, optimizer, scheduler, tb_writer) # estimation.evaluate(5000, 'loss') estimation.train()
def setup_tracker(): cfg.merge_from_file(cfg_file) model = ModelBuilder() model = load_pretrain(model, model_file).cuda().eval() tracker = build_tracker(model) warmup(model) return tracker
def main(): # load config cfg.merge_from_file(args.config) cfg.CUDA = torch.cuda.is_available() device = torch.device('cuda' if cfg.CUDA else 'cpu') # create model model = ModelBuilder() # load model # model.load_state_dict(torch.load(args.snapshot, # map_location=lambda storage, loc: storage.cpu())) # model.eval().to(device) model = load_pretrain(model, args.snapshot).eval().to(device) # build tracker tracker = build_tracker(model) first_frame = True if args.video_name: video_name = args.video_name.split('/')[-1].split('.')[0] else: video_name = 'webcam' cv2.namedWindow(video_name, cv2.WND_PROP_FULLSCREEN) for frame in get_frames(args.video_name): if first_frame: try: init_rect = cv2.selectROI(video_name, frame, False, False) except: exit() tracker.init(frame, init_rect) first_frame = False else: outputs = tracker.track(frame) if 'polygon' in outputs: polygon = np.array(outputs['polygon']).astype(np.int32) cv2.polylines(frame, [polygon.reshape((-1, 1, 2))], True, (0, 255, 0), 3) mask = ((outputs['mask'] > cfg.TRACK.MASK_THERSHOLD) * 255) mask = mask.astype(np.uint8) mask = np.stack([mask, mask * 255, mask]).transpose(1, 2, 0) frame = cv2.addWeighted(frame, 0.77, mask, 0.23, -1) else: bbox = list(map(int, outputs['bbox'])) cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[0] + bbox[2], bbox[1] + bbox[3]), (0, 255, 0), 3) cv2.imshow(video_name, frame) cv2.waitKey(40)
def __init__(self): super(DROL, self).__init__("DROL") # load config cfg.merge_from_file(path_config.DROL_CONFIG) seed_torch(cfg.TRACK.SEED) # create model model = ModelBuilder() # load model model = load_pretrain(model, path_config.DROL_SNAPSHOT).cuda().eval() # build tracker self.tracker = build_tracker(model)
def __init__(self, lr_u=0.2,lr_v=0.2,lambda_u=0.1,lambda_v=10.0,x_padding=0.5, z_ratio=1.2,features='gray', kernel='gaussian'): super(SFKCF).__init__() self.x_padding = x_padding self.lambda_ = 1e-4 self.features = features self.w2c=None if self.features=='hog': self.interp_factor = 0.02 self.sigma = 0.5 self.cell_size=4 self.output_sigma_factor=0.1 elif self.features=='sfres50': self.interp_factor = 0.02 self.sigma = 0.5 self.cell_size=8.0 self.output_sigma_factor=0.1 model = ModelBuilder() model = load_pretrain(model, cfg.BACKBONE.PRETRAINED).backbone self.model = model.cuda().eval() elif self.features=='gray' or self.features=='color': self.interp_factor=0.075 self.sigma=0.2 self.cell_size=1 self.output_sigma_factor=0.1 elif self.features=='cn': self.interp_factor=0.075 self.sigma=0.2 self.cell_size=1 self.output_sigma_factor=1./16 self.padding=1 else: raise NotImplementedError self.kernel=kernel self.U = None self.V = None self.lr_u = lr_u self.lr_v = lr_v self.lambda_v = lambda_v self.lambda_u = lambda_u self.z_padding = z_ratio*x_padding self.vis = None
def load_pysot_model(tracker_type): configpath = "./week3/kalman/pysot/experiments/" + PYSOT_TRACKERS[tracker_type] + \ "/config.yaml" modelpath = "./week3/kalman/pysot/models/" + PYSOT_TRACKERS[ tracker_type] + ".pth" cfg.merge_from_file(configpath) cfg.CUDA = torch.cuda.is_available() device = torch.device('cuda' if cfg.CUDA else 'cpu') # load model model = ModelBuilder() model.load_state_dict( torch.load(modelpath, map_location=lambda storage, loc: storage.cpu())) model.eval().to(device) return load_pretrain(model, modelpath).cuda().eval()
def __init__(self,dataset=''): if 'OTB' in dataset: cfg_file = os.path.join(project_path_,'pysot/experiments/siamrpn_r50_l234_dwxcorr_otb/config.yaml') snapshot = os.path.join(project_path_,'pysot/experiments/siamrpn_r50_l234_dwxcorr_otb/model.pth') elif 'LT' in dataset: cfg_file = os.path.join(project_path_, 'pysot/experiments/siamrpn_r50_l234_dwxcorr_lt/config.yaml') snapshot = os.path.join(project_path_, 'pysot/experiments/siamrpn_r50_l234_dwxcorr_lt/model.pth') else: cfg_file = os.path.join(project_path_, 'pysot/experiments/siamrpn_r50_l234_dwxcorr/config.yaml') snapshot = os.path.join(project_path_, 'pysot/experiments/siamrpn_r50_l234_dwxcorr/model.pth') # load config cfg.merge_from_file(cfg_file) # create model self.model = ModelBuilder()# A Neural Network.(a torch.nn.Module) # load model self.model = load_pretrain(self.model, snapshot).cuda().eval()
def main(): # load config cfg.merge_from_file(args.config) cfg.CUDA = torch.cuda.is_available() device = torch.device('cuda' if cfg.CUDA else 'cpu') # create model model = ModelBuilder() # load model model = load_pretrain(model, args.snapshot).eval().to(device) # build tracker tracker = SiamAPNTracker(model, cfg.TRACK) hp = {'lr': 0.3, 'penalty_k': 0.04, 'window_lr': 0.4} first_frame = True if args.video_name: video_name = args.video_name.split('/')[-1].split('.')[0] else: video_name = 'webcam' cv2.namedWindow(video_name, cv2.WND_PROP_FULLSCREEN) for frame in get_frames(args.video_name): if first_frame: try: init_rect = cv2.selectROI(video_name, frame, False, False) except: exit() tracker.init(frame, init_rect) first_frame = False else: outputs = tracker.track(frame, hp) bbox = list(map(int, outputs['bbox'])) cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[0] + bbox[2], bbox[1] + bbox[3]), (0, 255, 0), 3) cv2.imshow(video_name, frame) cv2.waitKey(40)
def save_siamese_rpn(): # load config cfg.merge_from_file(args.config) cfg.BACKBONE.TYPE = 'alexnetlegacy' # create model model_legacy = ModelBuilder() # load model model_legacy = load_pretrain(model_legacy, args.snapshot).cuda().eval() cfg.BACKBONE.TYPE = 'alexnet' # create model model_alexnet = ModelBuilder() # # for key ,item in model.named_parameters(): # print(key,item.shape) for key, item in model_alexnet.named_parameters(): print(key, item.shape) name_map = {} model_legacy_dict = model_legacy.state_dict() model_alexnet_dict = model_alexnet.state_dict() for para1, para2 in zip(model_legacy.named_parameters(), model_alexnet.named_parameters()): # print(para1[0],para1[1].shape) print(para1[0]) print(para2[0]) print(para1[1].shape) print(para2[1].shape) print("--" * 40) # print("['{}'--->'{}']".format(para1[0], para2[0]),para1[1].shape, para2[1].shape) name_map[para1[0]] = para2[0] print(name_map) for key, val in name_map.items(): model_alexnet_dict[val] = model_legacy_dict[key] torch.save(model_alexnet_dict, "siamese_alexnet_rpn.pth")
def train(video, v_idx, attack_region, template_dir): n_epochs = args.epochs epsilon = args.epsilon lr = args.lr track_model = ModelBuilder() track_model = load_pretrain(track_model, args.snapshot).cuda().eval() # build tracker tracker = build_tracker(track_model) # h, w, _ = video[0][0].shape attacker = ModelAttacker(args.batch, args.epsilon).cuda().train() optimizer = optim.Adam(attacker.parameters(), lr=lr) # optimizer = optim.SGD(attacker.parameters(), lr=lr, momentum=0.9) for name, param in tracker.model.named_parameters(): param.requires_grad_(False) # load pretrained start_epoch = 0 checkpoint_dir = os.path.join(args.savedir, args.dataset, 'checkpoint', attack_region, video.name) if os.path.exists(checkpoint_dir): entries = os.listdir(checkpoint_dir) if len(entries) > 0: entries.sort() start_epoch = int( entries[-1][-7:-4]) if entries[-1][-7:-4].isnumeric() else 0 if start_epoch == args.epochs: return if start_epoch > 0: state = torch.load(os.path.join(checkpoint_dir, entries[-1])) attacker.load_state_dict(state['attacker']) optimizer.load_state_dict(state['optimizer']) # elif attack_region == 'search': # checkpoint_dir = os.path.join(args.savedir, args.dataset, 'checkpoint', 'template', video.name) # assert os.path.exists( # os.path.join(checkpoint_dir, 'checkpoint_100.pth')), ' missing ' + checkpoint_dir + ' in ' + video.name # state = torch.load(os.path.join(checkpoint_dir, 'checkpoint_100.pth')) # attacker.load_state_dict(state['attacker']) # optimizer.load_state_dict(state['optimizer']) # generate cropping offset if attack_region == 'template': tracker.generate_transition(0, len(video)) elif attack_region == 'search': tracker.generate_transition(0, len(video)) # disable gradient if attack_region == 'template': attacker.adv_x.requires_grad = False elif attack_region == 'search': attacker.adv_z.requires_grad = False training_data = MyDataset() num_frames = len(video) it = math.ceil((num_frames - 1) / args.batch) params = {'batch_size': args.batch, 'shuffle': False, 'num_workers': 6} for i in range(0, it - 1): for j in range(0, args.batch): indx = i * args.batch + j + 1 training_data.add([video[indx][0], video[indx][1]]) for j in range(args.batch * (it - 1), num_frames - 1): training_data.add([video[j + 1][0], video[j + 1][1]]) img_names = [ x.replace(args.dataset_dir, args.fabricated_dir) for x in video.img_names ] del img_names[0] data_loader = torch.utils.data.DataLoader(training_data, **params) toc = 0 for epoch in range(0, args.epochs): if epoch < start_epoch: continue # initial frame img, gt_bbox = video[0] if attack_region == 'search': # img_name = video.img_names[0].replace(args.dataset_dir, args.fabricated_dir) img_name = os.path.join(args.savedir, args.dataset, video.name, '000099.jpg') img = cv2.imread(img_name) gt_bbox, gt_bbox_ = gt_bbox_adaptor(gt_bbox) state = {'img': img, 'gt_bbox': gt_bbox_, 'video_name': video.name} state, loss = adversarial_train(0, state, attacker, tracker, optimizer, gt_bbox_, attack_region, template_dir, epoch) pbar = tqdm(enumerate(data_loader), position=0, leave=True) _loss = [] if attack_region == 'template': adv_z = [] for (_idx, (idx, (imgs, gt_bboxes))) in enumerate(pbar): if len(gt_bboxes[0]) == 4: gt_bboxes = (gt_bboxes[:, 0], gt_bboxes[:, 1], gt_bboxes[:, 0], gt_bboxes[:, 1] + gt_bboxes[:, 3] - 1, gt_bboxes[:, 0] + gt_bboxes[:, 2] - 1, gt_bboxes[:, 1] + gt_bboxes[:, 3] - 1, gt_bboxes[:, 0] + gt_bboxes[:, 2] - 1, gt_bboxes[:, 1]) gt_bboxes = torch.stack(gt_bboxes).float() cx, cy, w, h = get_axis_aligned_bbox_tensor(gt_bboxes) gt_bboxes_ = torch.stack([cx, cy, w, h]) tic = cv2.getTickCount() state['img'] = imgs state, loss = adversarial_train(args.batch * idx + 1, state, attacker, tracker, optimizer, gt_bboxes_, attack_region, template_dir, epoch) toc += cv2.getTickCount() - tic if idx < 60: if idx > 0: _loss.append(loss) # pbar.set_postfix_str('%d. Video: %s epoch: %d total %.3f %.3f %.3f %.3f %.3f' % # (v_idx + 1, video.name, epoch + 1, loss[0], loss[1], loss[2], loss[3], # attacker.adv_z.mean())) pbar.set_postfix_str('Video(%d): %s epoch: %d ' % (v_idx + 1, video.name, epoch + 1)) # pbar.set_postfix_str('%d. Video: %s epoch: %d total %.3f %.3f %.3f %.3f' % # (v_idx + 1, video.name, epoch + 1, loss[0], loss[1], loss[2], loss[3])) if attack_region == 'search': fabricated_dir = '/'.join(img_names[0].split('/')[:-1]) if not os.path.exists(fabricated_dir): os.makedirs(os.path.join(fabricated_dir)) for i in range(len(imgs)): x_adv = attacker.add_noise(tracker.x_crops[i], attacker.adv_x[i]) x_adv = x_adv.unsqueeze(0) save(imgs[i].data.cpu().numpy(), x_adv, state['s_x'], gt_bboxes_[:, i], img_names[args.batch * idx + i], shift=tracker.shift[:, args.batch * idx + i + 1].numpy(), region=attack_region, save=True) toc /= cv2.getTickFrequency() if attack_region == 'template': z_adv = attacker.add_noise(tracker.z_crop, attacker.adv_z, epsilon) img_dir = os.path.join(args.savedir, args.dataset, state['video_name']) if not os.path.exists(img_dir): os.makedirs(os.path.join(img_dir)) save(state['zimg'], z_adv, state['sz'], state['init_gt'], attack_region, os.path.join(img_dir, str(epoch).zfill(6) + '.jpg'), shift=None, region=attack_region, save=True) _loss = np.asarray(_loss) _loss_v = sum(_loss, 0) / _loss.shape[0] pbar.clear() print('%d. Video: %s Time: %.2fs epoch: %d total %.3f %.3f %.3f' % (v_idx + 1, video.name, toc, epoch + 1, _loss_v[0], _loss_v[1], _loss_v[2])) # save state dict state_dict = { 'attacker': attacker.state_dict(), 'optimizer': optimizer.state_dict(), 'epoch': epoch + 1 } checkpoint_path = os.path.join(args.savedir, args.dataset, 'checkpoint', attack_region, video.name) if not os.path.exists(checkpoint_path): os.makedirs(checkpoint_path) torch.save(state_dict, os.path.join(checkpoint_path, 'checkpoint.pth'))
def main(): # load config cfg.merge_from_file(args.config) cur_dir = os.path.dirname(os.path.realpath(__file__)) dataset_root = os.path.join(cur_dir, '../testing_dataset', args.dataset) # create model model = Model2021() # load model model = load_pretrain(model, args.snapshot).cuda().eval() # build tracke tracker = build_tracker(model) # create dataset dataset = DatasetFactory.create_dataset(name=args.dataset, dataset_root=dataset_root, load_img=False) model_name = args.snapshot.split('/')[-1].split('.')[0] total_lost = 0 if args.dataset in ['VOT2016', 'VOT2018', 'VOT2019']: # restart tracking for v_idx, video in enumerate(dataset): overlaps1 = [] vars1 = [] vars0 = [] occl1 = [] if args.video != '': # test one special video if video.name != args.video: continue frame_counter = 0 lost_number = 0 toc = 0 pred_bboxes = [] frame_width = 960#img.shape[1] frame_height = 540#img.shape[0] video_loc = os.path.join('../results', model_name, video.name) out = cv2.VideoWriter(video_loc+'.avi',cv2.VideoWriter_fourcc('M','J','P','G'), 10, (frame_width,frame_height),True) if video.tags['occlusion']==[] or (np.array(video.tags['occlusion'])==1).sum()==0: print("\t\tdiscard occlusion") continue video.tags['occlusion'] = video.tags['all'] for idx, (img, gt_bbox) in enumerate(video): if len(gt_bbox) == 4: gt_bbox = [gt_bbox[0], gt_bbox[1], gt_bbox[0], gt_bbox[1]+gt_bbox[3]-1, gt_bbox[0]+gt_bbox[2]-1, gt_bbox[1]+gt_bbox[3]-1, gt_bbox[0]+gt_bbox[2]-1, gt_bbox[1]] tic = cv2.getTickCount() if idx == frame_counter: cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox)) gt_bbox_ = [cx-(w-1)/2, cy-(h-1)/2, w, h] box1 = gt_bbox_ tracker.init(img, gt_bbox_) pred_bbox = gt_bbox_ pred_bboxes.append(1) if idx == 0: print(img.shape) elif idx > frame_counter: outputs = tracker.track(img, mode) pred_bbox = outputs['bbox'] if cfg.MASK.MASK: pred_bbox = outputs['polygon'] overlap = vot_overlap(pred_bbox, gt_bbox, (img.shape[1], img.shape[0])) ####################################################################################### cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox)) gt_bbox_ = [cx-(w-1)/2, cy-(h-1)/2, w, h] box2 = gt_bbox_ w1, h1 = box1[2], box1[3] w2, h2 = box2[2], box2[3] cx1, cy1 = (img.shape[1]//2, img.shape[0]//2) cx2, cy2 = (box2[2]/2+box2[0], box2[3]/2+box2[1]) # box1 = box2 # scale variation s1 = np.sqrt(w1*h1) s2 = np.sqrt(w2*h2) sv = max(s1/s2, s2/s1) # aspect ratio variation r1, r2 = h1/w1, h2/w2 arv = max(r1/r2, r2/r1) # fast motion fm = np.sqrt((cx2-cx1)**2+(cy2-cy1)**2)/np.sqrt(s1*s2) vars0.append(np.array([sv, arv, fm, outputs['cls2']])) # occlusion ######################################################################################### # print(idx, outputs['var'], np.array([sv, arv, fm])) ################################## overlaps1.append(overlap) vars1.append(outputs['cls2']) if idx<=len(video.tags['occlusion']): occl1.append(video.tags['occlusion'][idx]) else: occl1.append(np.zeros(idx-len(video.tags['occlusion']))) if overlap > 0.0: # not lost pred_bboxes.append(pred_bbox) else: # lost object # print("-------loss---------") pred_bboxes.append(2) frame_counter = idx + 5 # skip 5 frames lost_number += 1 for l in range(0,5): vars1.append(-0.2) occl1.append(-0.2) else: pred_bboxes.append(0) toc += cv2.getTickCount() - tic if idx == 0: cv2.destroyAllWindows() if args.vis and idx > frame_counter: cv2.polylines(img, [np.array(gt_bbox, np.int).reshape((-1, 1, 2))], True, (255, 0, 0), 3) if cfg.MASK.MASK: cv2.polylines(img, [np.array(pred_bbox, np.int).reshape((-1, 1, 2))], True, (0, 255, 255), 3) else: bbox = list(map(int, pred_bbox)) cv2.rectangle(img, (bbox[0], bbox[1]), (bbox[0]+bbox[2], bbox[1]+bbox[3]), (0, 255, 255), 3) cv2.putText(img, str(idx), (40, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2) print(idx) cv2.putText(img, 'occl_gt:'+str(video.tags['occlusion'][idx-1]), (40, 80), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2) cv2.putText(img, 'proposed_TL:'+str(lost_number), (40, 160), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2) cv2.putText(img, 'occl_pred:'+str(vars1[idx-1]), (40, 120), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2) cv2.imshow(video.name, img) out.write(img) cv2.imwrite(video_loc+str(idx)+'.png',img) cv2.waitKey(1) toc /= cv2.getTickFrequency() # save results out.release() video_path = os.path.join(args.results, args.dataset, model_name, 'baseline', video.name) if not os.path.isdir(video_path): os.makedirs(video_path) result_path = os.path.join(video_path, '{}_001.txt'.format(video.name)) with open(result_path, 'w') as f: for x in pred_bboxes: if isinstance(x, int): f.write("{:d}\n".format(x)) else: f.write(','.join([vot_float2str("%.4f", i) for i in x])+'\n') print('({:3d}) Video: {:12s} Time: {:4.1f}s Speed: {:3.1f}fps Lost: {:d}, mIOU: {:0.4f}'.format( v_idx+1, video.name, toc, idx / toc, lost_number, np.array(overlaps1).mean())) # plt.plot(overlaps1) # plt.plot(np.array(vars0)[:,3]) # plt.plot(np.array(occl1)) # plt.plot(np.array(vars1)) # print(np.correlate(overlaps1,np.array(vars1)[:,2])) overlaps2.append(np.array(overlaps1).mean()) # occl2.append(np.array(occl1)) # vars2.append(np.array(vars1)) # if args.video != '': # v_idx=0 # print(100*(confusion_matrix(occl2[v_idx],vars2[v_idx]).ravel())) total_lost += lost_number print("{:s} total lost: {:d}".format(model_name, total_lost)) # cv2.destroyAllWindows() # print("Total Mean IOU is %0.4f"%np.array(overlaps2).mean()) else: # OPE tracking for v_idx, video in enumerate(dataset): if args.video != '': # test one special video if video.name != args.video: continue toc = 0 pred_bboxes = [] scores = [] track_times = [] for idx, (img, gt_bbox) in enumerate(video): tic = cv2.getTickCount() if idx == 0: cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox)) gt_bbox_ = [cx-(w-1)/2, cy-(h-1)/2, w, h] tracker.init(img, gt_bbox_) pred_bbox = gt_bbox_ scores.append(None) if 'VOT2018-LT' == args.dataset: pred_bboxes.append([1]) else: pred_bboxes.append(pred_bbox) else: outputs = tracker.track(img) pred_bbox = outputs['bbox'] pred_bboxes.append(pred_bbox) scores.append(outputs['best_score']) toc += cv2.getTickCount() - tic track_times.append((cv2.getTickCount() - tic)/cv2.getTickFrequency()) if idx == 0: cv2.destroyAllWindows() if args.vis and idx > 0: gt_bbox = list(map(int, gt_bbox)) pred_bbox = list(map(int, pred_bbox)) cv2.rectangle(img, (gt_bbox[0], gt_bbox[1]), (gt_bbox[0]+gt_bbox[2], gt_bbox[1]+gt_bbox[3]), (0, 255, 0), 3) cv2.rectangle(img, (pred_bbox[0], pred_bbox[1]), (pred_bbox[0]+pred_bbox[2], pred_bbox[1]+pred_bbox[3]), (0, 255, 255), 3) cv2.putText(img, str(idx), (40, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2) cv2.imshow(video.name, img) cv2.rewaitKey(1) toc /= cv2.getTickFrequency() # save results if 'VOT2018-LT' == args.dataset: video_path = os.path.join('../results', args.dataset, model_name, 'longterm', video.name) if not os.path.isdir(video_path): os.makedirs(video_path) result_path = os.path.join(video_path, '{}_001.txt'.format(video.name)) with open(result_path, 'w') as f: for x in pred_bboxes: f.write(','.join([str(i) for i in x])+'\n') result_path = os.path.join(video_path, '{}_001_confidence.value'.format(video.name)) with open(result_path, 'w') as f: for x in scores: f.write('\n') if x is None else f.write("{:.6f}\n".format(x)) result_path = os.path.join(video_path, '{}_time.txt'.format(video.name)) with open(result_path, 'w') as f: for x in track_times: f.write("{:.6f}\n".format(x)) elif 'GOT-10k' == args.dataset: video_path = os.path.join('../results', args.dataset, model_name, video.name) if not os.path.isdir(video_path): os.makedirs(video_path) result_path = os.path.join(video_path, '{}_001.txt'.format(video.name)) with open(result_path, 'w') as f: for x in pred_bboxes: f.write(','.join([str(i) for i in x])+'\n') result_path = os.path.join(video_path, '{}_time.txt'.format(video.name)) with open(result_path, 'w') as f: for x in track_times: f.write("{:.6f}\n".format(x)) else: model_path = os.path.join('../results', args.dataset, model_name) if not os.path.isdir(model_path): os.makedirs(model_path) result_path = os.path.join(model_path, '{}.txt'.format(video.name)) with open(result_path, 'w') as f: for x in pred_bboxes: f.write(','.join([str(i) for i in x])+'\n') print('({:3d}) Video: {:12s} Time: {:5.1f}s Speed: {:3.1f}fps'.format( v_idx+1, video.name, toc, idx / toc ))
def main(): rank, world_size = dist_init() logger.info("init done") # load cfg cfg.merge_from_file(args.cfg) if rank == 0: if not os.path.exists(cfg.TRAIN.LOG_DIR): os.makedirs(cfg.TRAIN.LOG_DIR) init_log('global', logging.INFO) if cfg.TRAIN.LOG_DIR: add_file_handler('global', os.path.join(cfg.TRAIN.LOG_DIR, 'logs.txt'), logging.INFO) # logger.info("Version Information: \n{}\n".format(commit())) logger.info("config \n{}".format(json.dumps(cfg, indent=4))) # create model # model = ModelBuilder().cuda().train() model2 = SiamNet_D().cuda().train() ### # tracker = build_tracker(model) # dist_model = DistModule(model) dist_model2 = DistModule(model2) # load pretrained backbone weights if cfg.BACKBONE.PRETRAINED: cur_path = os.path.dirname(os.path.realpath(__file__)) backbone_path = os.path.join(cur_path, '../', cfg.BACKBONE.PRETRAINED) load_pretrain(model2.netG.backbone, backbone_path) # create tensorboard writer if rank == 0 and cfg.TRAIN.LOG_DIR: tb_writer = SummaryWriter(cfg.TRAIN.LOG_DIR) else: tb_writer = None # build dataset loader train_loader = build_data_loader() # build optimizer and lr_scheduler # optimizer, lr_scheduler, paramsG = build_opt_lr(dist_model2, cfg.TRAIN.START_EPOCH) optimizer2, lr_scheduler2 = build_opt_lr2(dist_model2, cfg.TRAIN.START_EPOCH) # print("gen:",paramsG) # print("disc",paramsD) # resume training # if cfg.TRAIN.RESUME: # logger.info("resume from {}".format(cfg.TRAIN.RESUME)) # assert os.path.isfile(cfg.TRAIN.RESUME), \ # '{} is not a valid file.'.format(cfg.TRAIN.RESUME) # model2, optimizer2, cfg.TRAIN.START_EPOCH = \ # restore_from(model2.module.netG, optimizer2, cfg.TRAIN.RESUME) # load pretrain # if cfg.TRAIN.PRETRAINED: # load_pretrain(model2.netG, cfg.TRAIN.PRETRAINED) # dist_model = DistModule(model) dist_model2 = DistModule(model2) # logger.info(lr_scheduler) logger.info("model prepare done") # start training train(train_loader, tb_writer, dist_model2, optimizer2, lr_scheduler2)
def test(video, v_idx, model_name, template_dir=None, img_names=None): # create model track_model = ModelBuilder() # load model track_model = load_pretrain(track_model, args.snapshot).cuda().eval() # build tracker tracker = build_tracker(track_model) # set writing video parameters height, width, channels = video[0][0].shape out = cv2.VideoWriter( os.path.join(args.savedir, args.dataset, video.name + '.avi'), cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), 15, (width, height)) frame_counter = 0 toc = 0 pred_bboxes_adv = [] adv_z = [] pbar = tqdm(enumerate(video), position=0, leave=True) for idx, (img, gt_bbox) in pbar: gt_bbox, gt_bbox_ = gt_bbox_adaptor(gt_bbox) tic = cv2.getTickCount() pred_bbox, _lost, frame_counter = stoa_track(idx, frame_counter, img, gt_bbox, tracker, template_dir, img_names) pred_bboxes_adv.append(pred_bbox) toc += cv2.getTickCount() - tic if idx > 0: bbox = list(map(int, pred_bbox)) cv2.rectangle(img, (bbox[0], bbox[1]), (bbox[0] + bbox[2], bbox[1] + bbox[3]), (0, 255, 255), 3) __gt_bbox = list(map(int, gt_bbox_)) cv2.rectangle( img, (__gt_bbox[0], __gt_bbox[1]), (__gt_bbox[0] + __gt_bbox[2], __gt_bbox[1] + __gt_bbox[3]), (0, 0, 0), 3) cv2.putText(img, str(idx), (40, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0), 2) out.write(img) # save results if args.dataset not in ['VOT2016', 'VOT2018', 'VOT2019']: model_path = os.path.join('results', args.dataset, model_name) if not os.path.isdir(model_path): os.makedirs(model_path) result_path = os.path.join(model_path, '{}.txt'.format(video.name)) with open(result_path, 'w') as f: for x in pred_bboxes_adv: f.write(','.join([str(i) for i in x]) + '\n') else: video_path = os.path.join('results', args.dataset, model_name, 'baseline', video.name) if not os.path.isdir(video_path): os.makedirs(video_path) result_path = os.path.join(video_path, '{}_001.txt'.format(video.name)) with open(result_path, 'w') as f: for x in pred_bboxes_adv: if isinstance(x, int): f.write("{:d}\n".format(x)) else: f.write(','.join([vot_float2str("%.4f", i) for i in x]) + '\n')
def main(): # load config cfg.merge_from_file(args.config) #!!! input your dataset path dataset_root = os.path.join(your_dataset_path, args.dataset) # create model model = ModelBuilder() # load model model = load_pretrain(model, args.snapshot).cuda().eval() # build tracker tracker = build_tracker(model) # create dataset dataset = DatasetFactory.create_dataset(name=args.dataset, dataset_root=dataset_root, load_img=False) model_name = args.snapshot.split('/')[-1].split('.')[0] model_name = model_name + '_pk-{:.3f}'.format( cfg.TRACK.PENALTY_K) + '_wi-{:.3f}'.format( cfg.TRACK.WINDOW_INFLUENCE) + '_lr-{:.3f}'.format(cfg.TRACK.LR) total_lost = 0 if args.dataset in ['VOT2016', 'VOT2018', 'VOT2019']: # restart tracking for v_idx, video in enumerate(dataset): if args.video != '': # test one special video if video.name != args.video: continue frame_counter = 0 lost_number = 0 toc = 0 pred_bboxes = [] for idx, (img, gt_bbox) in enumerate(video): if len(gt_bbox) == 4: gt_bbox = [ gt_bbox[0], gt_bbox[1], gt_bbox[0], gt_bbox[1] + gt_bbox[3] - 1, gt_bbox[0] + gt_bbox[2] - 1, gt_bbox[1] + gt_bbox[3] - 1, gt_bbox[0] + gt_bbox[2] - 1, gt_bbox[1] ] tic = cv2.getTickCount() if idx == frame_counter: cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox)) gt_bbox_ = [cx - (w - 1) / 2, cy - (h - 1) / 2, w, h] tracker.init(img, gt_bbox_) pred_bbox = gt_bbox_ pred_bboxes.append(1) elif idx > frame_counter: outputs = tracker.track(img) pred_bbox = outputs['bbox'] overlap = vot_overlap(pred_bbox, gt_bbox, (img.shape[1], img.shape[0])) if overlap > 0: # not lost pred_bboxes.append(pred_bbox) else: # lost object pred_bboxes.append(2) frame_counter = idx + 5 # skip 5 frames lost_number += 1 else: pred_bboxes.append(0) toc += cv2.getTickCount() - tic toc /= cv2.getTickFrequency() # save results video_path = os.path.join('results', args.dataset, model_name, 'baseline', video.name) if not os.path.isdir(video_path): os.makedirs(video_path) result_path = os.path.join(video_path, '{}_001.txt'.format(video.name)) with open(result_path, 'w') as f: for x in pred_bboxes: if isinstance(x, int): f.write("{:d}\n".format(x)) else: f.write(','.join([vot_float2str("%.4f", i) for i in x]) + '\n') print( '({:3d}) Video: {:12s} Time: {:4.1f}s Speed: {:3.1f}fps Lost: {:d}' .format(v_idx + 1, video.name, toc, idx / toc, lost_number)) total_lost += lost_number print("{:s} total lost: {:d}".format(model_name, total_lost)) else: # OPE tracking for v_idx, video in enumerate(dataset): if args.video != '': # test one special video if video.name != args.video: continue toc = 0 pred_bboxes = [] scores = [] track_times = [] for idx, (img, gt_bbox) in enumerate(video): tic = cv2.getTickCount() if idx == 0: cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox)) gt_bbox_ = [cx - (w - 1) / 2, cy - (h - 1) / 2, w, h] tracker.init(img, gt_bbox_) pred_bbox = gt_bbox_ scores.append(None) if 'VOT2018-LT' == args.dataset: pred_bboxes.append([1]) else: pred_bboxes.append(pred_bbox) else: outputs = tracker.track(img) pred_bbox = outputs['bbox'] pred_bboxes.append(pred_bbox) scores.append(outputs['best_score']) toc += cv2.getTickCount() - tic track_times.append( (cv2.getTickCount() - tic) / cv2.getTickFrequency()) toc /= cv2.getTickFrequency() # save results if 'VOT2018-LT' == args.dataset: video_path = os.path.join('results', args.dataset, model_name, 'longterm', video.name) if not os.path.isdir(video_path): os.makedirs(video_path) result_path = os.path.join(video_path, '{}_001.txt'.format(video.name)) with open(result_path, 'w') as f: for x in pred_bboxes: f.write(','.join([str(i) for i in x]) + '\n') result_path = os.path.join( video_path, '{}_001_confidence.value'.format(video.name)) with open(result_path, 'w') as f: for x in scores: f.write('\n') if x is None else f.write( "{:.6f}\n".format(x)) result_path = os.path.join(video_path, '{}_time.txt'.format(video.name)) with open(result_path, 'w') as f: for x in track_times: f.write("{:.6f}\n".format(x)) elif 'GOT-10k' == args.dataset: video_path = os.path.join('results', args.dataset, model_name, video.name) if not os.path.isdir(video_path): os.makedirs(video_path) result_path = os.path.join(video_path, '{}_001.txt'.format(video.name)) with open(result_path, 'w') as f: for x in pred_bboxes: f.write(','.join([str(i) for i in x]) + '\n') result_path = os.path.join(video_path, '{}_time.txt'.format(video.name)) with open(result_path, 'w') as f: for x in track_times: f.write("{:.6f}\n".format(x)) else: model_path = os.path.join('results', args.dataset, model_name) if not os.path.isdir(model_path): os.makedirs(model_path) result_path = os.path.join(model_path, '{}.txt'.format(video.name)) with open(result_path, 'w') as f: for x in pred_bboxes: f.write(','.join([str(i) for i in x]) + '\n') print('({:3d}) Video: {:12s} Time: {:5.1f}s Speed: {:3.1f}fps'. format(v_idx + 1, video.name, toc, idx / toc))
def main(): # load config cfg.merge_from_file(args.config) cur_dir = os.path.dirname(os.path.realpath(__file__)) dataset_root = os.path.join(args.dataset_dir, args.dataset) epsilon = args.epsilon # create model model = ModelBuilder() # load model model = load_pretrain(model, args.snapshot).cuda().train() # build tracker tracker = build_tracker(model) # create dataset dataset = DatasetFactory.create_dataset(name=args.dataset, dataset_root=dataset_root, load_img=False, config=cfg) # # vid.name = {'ants1','ants3',....} # img, bbox, cls, delta, delta_weight # vid[0][0],vid[0][1],vid[0][2],vid[0][3],vid[0][4] model_name = args.snapshot.split('/')[-1].split('.')[0] total_lost = 0 if args.dataset in ['VOT2016', 'VOT2018', 'VOT2019']: # restart tracking for v_idx, video in enumerate(dataset): if args.video != '': # test one special video if video.name != args.video: continue # set writing video parameters height, width, channels = video[0][0].shape out = cv2.VideoWriter( os.path.join(args.savedir, video.name + '.avi'), cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), 15, (width, height)) frame_counter = 0 lost_number = 0 toc = 0 pred_bboxes = [] data = {'template': None, 'search': None} for idx, (img, gt_bbox, cls, delta_cls, delta_w, _bbox, cls_s, delta_cls_s, delta_w_s, _bbox_s) \ in enumerate(video): if len(gt_bbox) == 4: gt_bbox = [ gt_bbox[0], gt_bbox[1], gt_bbox[0], gt_bbox[1] + gt_bbox[3] - 1, gt_bbox[0] + gt_bbox[2] - 1, gt_bbox[1] + gt_bbox[3] - 1, gt_bbox[0] + gt_bbox[2] - 1, gt_bbox[1] ] tic = cv2.getTickCount() if idx == frame_counter: cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox)) gt_bbox_ = [cx - (w - 1) / 2, cy - (h - 1) / 2, w, h] tracker.init(img, gt_bbox_) pred_bbox = gt_bbox_ pred_bboxes.append(1) nimg, sz, box, _ = tracker.crop(img, bbox=gt_bbox_, im_name='exemplar') data['template'] = torch.autograd.Variable( nimg, requires_grad=True).cuda() elif idx > frame_counter: cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox)) gt_bbox_ = [cx - (w - 1) / 2, cy - (h - 1) / 2, w, h] nimg, sz, box, pad = tracker.crop(img, bbox=gt_bbox_, is_template=False, im_name='search' + str(idx)) [bT, bB, bL, bR] = box sz = int(sz) data['search'] = torch.autograd.Variable( nimg, requires_grad=True).cuda() data['label_cls'] = torch.Tensor(cls_s).type( torch.LongTensor).cuda() data['label_loc'] = torch.Tensor(delta_cls_s).type( torch.FloatTensor).cuda() data['label_loc_weight'] = torch.Tensor(delta_w_s).cuda() outputs = model(data) cls_loss = outputs['cls_loss'] loc_loss = outputs['loc_loss'] total_loss = outputs['total_loss'] total_loss.backward() data_grad = data['search'].grad # torch.Tensor(img.transpose([2, 0, 1])).unsqueeze(dim=0) perturb_data = fgsm_attack(data['search'], epsilon, data_grad) # cv2.imwrite(os.path.join(args.savedir, 'original_' + str(idx) + '.jpg'), img) # _img = perturb_data.data.cpu().numpy().squeeze().transpose([1, 2, 0]) # cv2.imwrite(os.path.join(args.savedir, 'perturb_' + str(idx) + '.jpg'), _img) if not np.array_equal(cfg.TRACK.INSTANCE_SIZE, sz): perturb_data = F.interpolate(perturb_data, size=sz) _img = perturb_data.data.cpu().numpy().squeeze().transpose( [1, 2, 0]) # cv2.imwrite(os.path.join(args.savedir, 'crop_full_' + str(idx) + '.jpg'), _img) nh, nw, _ = _img.shape img[bT:bB + 1, bL:bR + 1, :] = _img[pad[0]:nh - pad[1], pad[2]:nw - pad[3], :] # cv2.imwrite(os.path.join(args.savedir, 'perturb_full_' + str(idx) + '.jpg'), img) outputs = tracker.track(img) pred_bbox = outputs['bbox'] if cfg.MASK.MASK: pred_bbox = outputs['polygon'] overlap = vot_overlap(pred_bbox, gt_bbox, (img.shape[1], img.shape[0])) if overlap > 0: # not lost pred_bboxes.append(pred_bbox) else: print('*************** lost ***************') import pdb pdb.set_trace() # lost object pred_bboxes.append(2) frame_counter = idx + 5 # skip 5 frames lost_number += 1 print(idx, torch.sum(data_grad, (2, 3))) print( idx, torch.sum(torch.abs(torch.sum(data_grad, (2, 3))), (0, 1))) else: pred_bboxes.append(0) toc += cv2.getTickCount() - tic if idx == 0: cv2.destroyAllWindows() if args.vis and idx > frame_counter: cv2.polylines( img, [np.array(gt_bbox, np.int).reshape( (-1, 1, 2))], True, (0, 255, 0), 3) if cfg.MASK.MASK: cv2.polylines( img, [np.array(pred_bbox, np.int).reshape( (-1, 1, 2))], True, (0, 255, 255), 3) else: bbox = list(map(int, pred_bbox)) cv2.rectangle(img, (bbox[0], bbox[1]), (bbox[0] + bbox[2], bbox[1] + bbox[3]), (0, 255, 255), 3) cv2.putText(img, str(idx), (40, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2) cv2.putText(img, str(lost_number), (40, 80), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2) cv2.imshow(video.name, img) cv2.waitKey(1) # save tracking image bbox = list(map(int, pred_bbox)) cv2.rectangle(img, (bbox[0], bbox[1]), (bbox[0] + bbox[2], bbox[1] + bbox[3]), (0, 255, 255), 3) cv2.putText(img, str(idx), (40, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2) cv2.putText(img, str(lost_number), (40, 80), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2) # cv2.imwrite(os.path.join(args.savedir, 'track_' + str(idx) + '.jpg'), img) out.write(img) toc /= cv2.getTickFrequency() # save results video_path = os.path.join('results', args.dataset, model_name, 'baseline', video.name) if not os.path.isdir(video_path): os.makedirs(video_path) result_path = os.path.join(video_path, '{}_001.txt'.format(video.name)) with open(result_path, 'w') as f: for x in pred_bboxes: if isinstance(x, int): f.write("{:d}\n".format(x)) else: f.write(','.join([vot_float2str("%.4f", i) for i in x]) + '\n') print( '({:3d}) Video: {:12s} Time: {:4.1f}s Speed: {:3.1f}fps Lost: {:d}' .format(v_idx + 1, video.name, toc, idx / toc, lost_number)) total_lost += lost_number print("{:s} total lost: {:d}".format(model_name, total_lost)) else: # OPE tracking for v_idx, video in enumerate(dataset): if args.video != '': # test one special video if video.name != args.video: continue toc = 0 pred_bboxes = [] scores = [] track_times = [] for idx, (img, gt_bbox) in enumerate(video): tic = cv2.getTickCount() if idx == 0: cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox)) gt_bbox_ = [cx - (w - 1) / 2, cy - (h - 1) / 2, w, h] tracker.init(img, gt_bbox_) pred_bbox = gt_bbox_ scores.append(None) if 'VOT2018-LT' == args.dataset: pred_bboxes.append([1]) else: pred_bboxes.append(pred_bbox) else: outputs = tracker.track(img) pred_bbox = outputs['bbox'] pred_bboxes.append(pred_bbox) scores.append(outputs['best_score']) toc += cv2.getTickCount() - tic track_times.append( (cv2.getTickCount() - tic) / cv2.getTickFrequency()) if idx == 0: cv2.destroyAllWindows() if args.vis and idx > 0: gt_bbox = list(map(int, gt_bbox)) pred_bbox = list(map(int, pred_bbox)) cv2.rectangle( img, (gt_bbox[0], gt_bbox[1]), (gt_bbox[0] + gt_bbox[2], gt_bbox[1] + gt_bbox[3]), (0, 255, 0), 3) cv2.rectangle(img, (pred_bbox[0], pred_bbox[1]), (pred_bbox[0] + pred_bbox[2], pred_bbox[1] + pred_bbox[3]), (0, 255, 255), 3) cv2.putText(img, str(idx), (40, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2) cv2.imshow(video.name, img) cv2.waitKey(1) toc /= cv2.getTickFrequency() # save results if 'VOT2018-LT' == args.dataset: video_path = os.path.join('results', args.dataset, model_name, 'longterm', video.name) if not os.path.isdir(video_path): os.makedirs(video_path) result_path = os.path.join(video_path, '{}_001.txt'.format(video.name)) with open(result_path, 'w') as f: for x in pred_bboxes: f.write(','.join([str(i) for i in x]) + '\n') result_path = os.path.join( video_path, '{}_001_confidence.value'.format(video.name)) with open(result_path, 'w') as f: for x in scores: f.write('\n') if x is None else f.write( "{:.6f}\n".format(x)) result_path = os.path.join(video_path, '{}_time.txt'.format(video.name)) with open(result_path, 'w') as f: for x in track_times: f.write("{:.6f}\n".format(x)) elif 'GOT-10k' == args.dataset: video_path = os.path.join('results', args.dataset, model_name, video.name) if not os.path.isdir(video_path): os.makedirs(video_path) result_path = os.path.join(video_path, '{}_001.txt'.format(video.name)) with open(result_path, 'w') as f: for x in pred_bboxes: f.write(','.join([str(i) for i in x]) + '\n') result_path = os.path.join(video_path, '{}_time.txt'.format(video.name)) with open(result_path, 'w') as f: for x in track_times: f.write("{:.6f}\n".format(x)) else: model_path = os.path.join('results', args.dataset, model_name) if not os.path.isdir(model_path): os.makedirs(model_path) result_path = os.path.join(model_path, '{}.txt'.format(video.name)) with open(result_path, 'w') as f: for x in pred_bboxes: f.write(','.join([str(i) for i in x]) + '\n') print('({:3d}) Video: {:12s} Time: {:5.1f}s Speed: {:3.1f}fps'. format(v_idx + 1, video.name, toc, idx / toc))
def main(): '''change save_path to yours''' save_path = '/home/masterbin-iiau/Desktop/AdvTrack-project/supplementary/%s' % args.video if not os.path.exists(save_path): os.mkdir(save_path) # load config cfg.merge_from_file(args.config) dataset_root = os.path.join(dataset_root_, args.dataset) # create model '''a model is a Neural Network.(a torch.nn.Module)''' model = ModelBuilder() # load model model = load_pretrain(model, args.snapshot).cuda().eval() # build tracker '''a tracker is a object, which consists of not only a NN but also some post-processing''' tracker = build_tracker(model) # create dataset dataset = DatasetFactory.create_dataset(name=args.dataset, dataset_root=dataset_root, load_img=False) # model_name = args.snapshot.split('/')[-1].split('.')[0] total_lost = 0 if args.dataset in ['VOT2016', 'VOT2018', 'VOT2019']: # restart tracking for v_idx, video in enumerate(dataset): if args.video != '': # test one special video if video.name != args.video: continue frame_counter = 0 lost_number = 0 toc = 0 pred_bboxes = [] for idx, (img, gt_bbox) in enumerate(video): if len(gt_bbox) == 4: gt_bbox = [ gt_bbox[0], gt_bbox[1], gt_bbox[0], gt_bbox[1] + gt_bbox[3] - 1, gt_bbox[0] + gt_bbox[2] - 1, gt_bbox[1] + gt_bbox[3] - 1, gt_bbox[0] + gt_bbox[2] - 1, gt_bbox[1] ] tic = cv2.getTickCount() if idx == frame_counter: cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox)) gt_bbox_ = [cx - (w - 1) / 2, cy - (h - 1) / 2, w, h] tracker.init(img, gt_bbox_) pred_bbox = gt_bbox_ pred_bboxes.append(1) elif idx > frame_counter: '''GAN''' outputs = tracker.track_supp(img, GAN, save_path, idx) pred_bbox = outputs['bbox'] if cfg.MASK.MASK: pred_bbox = outputs['polygon'] overlap = vot_overlap(pred_bbox, gt_bbox, (img.shape[1], img.shape[0])) if overlap > 0: # not lost pred_bboxes.append(pred_bbox) else: # lost object pred_bboxes.append(2) frame_counter = idx + 5 # skip 5 frames lost_number += 1 else: pred_bboxes.append(0) toc += cv2.getTickCount() - tic if idx == 0: cv2.destroyAllWindows() if args.vis and idx > frame_counter: cv2.polylines( img, [np.array(gt_bbox, np.int).reshape( (-1, 1, 2))], True, (0, 255, 0), 3) if cfg.MASK.MASK: cv2.polylines( img, [np.array(pred_bbox, np.int).reshape( (-1, 1, 2))], True, (0, 255, 255), 3) else: bbox = list(map(int, pred_bbox)) cv2.rectangle(img, (bbox[0], bbox[1]), (bbox[0] + bbox[2], bbox[1] + bbox[3]), (0, 255, 255), 3) cv2.putText(img, str(idx), (40, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2) cv2.putText(img, str(lost_number), (40, 80), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2) cv2.imshow(video.name, img) cv2.waitKey(1) toc /= cv2.getTickFrequency() else: # OPE tracking for v_idx, video in enumerate(dataset): if args.video != '': # test one special video if video.name != args.video: continue toc = 0 pred_bboxes = [] scores = [] track_times = [] for idx, (img, gt_bbox) in enumerate(video): tic = cv2.getTickCount() if idx == 0: cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox)) gt_bbox_ = [cx - (w - 1) / 2, cy - (h - 1) / 2, w, h] tracker.init(img, gt_bbox_) pred_bbox = gt_bbox_ scores.append(None) if 'VOT2018-LT' == args.dataset: pred_bboxes.append([1]) else: pred_bboxes.append(pred_bbox) else: outputs = tracker.track_supp(img, GAN, save_path, idx) pred_bbox = outputs['bbox'] pred_bboxes.append(pred_bbox) scores.append(outputs['best_score']) toc += cv2.getTickCount() - tic track_times.append( (cv2.getTickCount() - tic) / cv2.getTickFrequency()) if idx == 0: cv2.destroyAllWindows() if args.vis and idx > 0: gt_bbox = list(map(int, gt_bbox)) pred_bbox = list(map(int, pred_bbox)) cv2.rectangle( img, (gt_bbox[0], gt_bbox[1]), (gt_bbox[0] + gt_bbox[2], gt_bbox[1] + gt_bbox[3]), (0, 255, 0), 3) cv2.rectangle(img, (pred_bbox[0], pred_bbox[1]), (pred_bbox[0] + pred_bbox[2], pred_bbox[1] + pred_bbox[3]), (0, 255, 255), 3) cv2.putText(img, str(idx), (40, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2) cv2.imshow(video.name, img) cv2.waitKey(1) toc /= cv2.getTickFrequency()
from pysot.utils.model_load import load_pretrain from pysot.models.model_builder import ModelBuilder from pysot.core.config import cfg from pysot.utils.bbox import get_axis_aligned_bbox from toolkit.datasets import DatasetFactory from matplotlib.image import imread from PIL import Image from got10k.datasets import GOT10k from got10k.utils.viz import show_frame if __name__ == '__main__': cfg.merge_from_file('/home/sourabhswain/Documents/SiamRPN/config.yaml') net_path = '/home/sourabhswain/Documents/SiamRPN/model.pth' model = ModelBuilder() model = load_pretrain(model, net_path).cuda().eval() tracker = build_tracker(model) dataset = GOT10k(root_dir='/home/sourabhswain/Documents/SiamRPN/dataset', subset='val') #dataset = DatasetFactory.create_dataset(name='GOT-10k', # dataset_root='/home/sourabhswain/Documents/SiamRPN/dataset', # load_img=False) """ for v_idx, (video, anno) in enumerate(dataset): toc = 0 pred_bboxes = [] scores = []
def main(frame_interval, interpolation_rate): # load config cfg.merge_from_file(args.config) cur_dir = os.path.dirname(os.path.realpath(__file__)) dataset_root = os.path.join(cur_dir, '../testing_dataset', args.dataset) # create model model = ModelBuilder() # load model model = load_pretrain(model, args.snapshot).cuda().eval() # build tracker tracker = build_tracker(model) # create dataset dataset = DatasetFactory.create_dataset(name=args.dataset, dataset_root=dataset_root, load_img=False) model_name = args.snapshot.split('/')[-1].split('.')[0] total_lost = 0 if args.dataset in ['VOT2016', 'VOT2018', 'VOT2019']: # restart tracking for v_idx, video in enumerate(dataset): if args.video != '': # test one special video if video.name != args.video: continue frame_counter = 0 lost_number = 0 toc = 0 pred_bboxes = [] for idx, (img, gt_bbox) in enumerate(video): if len(gt_bbox) == 4: gt_bbox = [ gt_bbox[0], gt_bbox[1], gt_bbox[0], gt_bbox[1] + gt_bbox[3] - 1, gt_bbox[0] + gt_bbox[2] - 1, gt_bbox[1] + gt_bbox[3] - 1, gt_bbox[0] + gt_bbox[2] - 1, gt_bbox[1] ] tic = cv2.getTickCount() if idx == frame_counter: cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox)) gt_bbox_ = [cx - (w - 1) / 2, cy - (h - 1) / 2, w, h] tracker.init(img, gt_bbox_) pred_bbox = gt_bbox_ pred_bboxes.append(1) elif idx > frame_counter: outputs = tracker.track(img) pred_bbox = outputs['bbox'] if cfg.MASK.MASK: pred_bbox = outputs['polygon'] overlap = vot_overlap(pred_bbox, gt_bbox, (img.shape[1], img.shape[0])) if overlap > 0: # not lost pred_bboxes.append(pred_bbox) else: # lost object pred_bboxes.append(2) frame_counter = idx + 5 # skip 5 frames lost_number += 1 else: pred_bboxes.append(0) toc += cv2.getTickCount() - tic if idx == 0: cv2.destroyAllWindows() if args.vis and idx > frame_counter: cv2.polylines( img, [np.array(gt_bbox, np.int).reshape( (-1, 1, 2))], True, (0, 0, 255), 3) if cfg.MASK.MASK: cv2.polylines( img, [np.array(pred_bbox, np.int).reshape( (-1, 1, 2))], True, (255, 0, 0), 3) else: bbox = list(map(int, pred_bbox)) cv2.rectangle(img, (bbox[0], bbox[1]), (bbox[0] + bbox[2], bbox[1] + bbox[3]), (0, 255, 255), 3) cv2.putText(img, str(idx), (40, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2) cv2.putText(img, str(lost_number), (40, 80), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2) cv2.imshow(video.name, img) window_name = "Result" cv2.moveWindow(window_name, 100, 100) cv2.waitKey(1) toc /= cv2.getTickFrequency() # # save results # video_path = os.path.join('results', args.dataset, model_name, # 'baseline', video.name) # if not os.path.isdir(video_path): # os.makedirs(video_path) # result_path = os.path.join(video_path, '{}_001.txt'.format(video.name)) # with open(result_path, 'w') as f: # for x in pred_bboxes: # if isinstance(x, int): # f.write("{:d}\n".format(x)) # else: # f.write(','.join([vot_float2str("%.4f", i) for i in x])+'\n') # print('({:3d}) Video: {:12s} Time: {:4.1f}s Speed: {:3.1f}fps Lost: {:d}'.format( # v_idx+1, video.name, toc, idx / toc, lost_number)) # total_lost += lost_number print("{:s} total lost: {:d}".format(model_name, total_lost)) else: # FPS List fps_list = [] # OPE tracking for v_idx, video in enumerate(dataset): if args.video != '': # test one special video if video.name != args.video: continue toc = 0 pred_bboxes = [] scores = [] track_times = [] # PARAMETERS # frame_interval = 2 # interpolation_rate = 0.005 for idx, (img, gt_bbox) in enumerate(video): tic = cv2.getTickCount() if idx == 0: cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox)) gt_bbox_ = [cx - (w - 1) / 2, cy - (h - 1) / 2, w, h] # (left-top width height) tracker.init(img, gt_bbox_) pred_bbox = gt_bbox_ scores.append(None) if 'VOT2018-LT' == args.dataset: pred_bboxes.append([1]) else: pred_bboxes.append(pred_bbox) else: outputs = tracker.track(img) pred_bbox = outputs['bbox'] # (left-top width height) pred_bboxes.append(pred_bbox) scores.append(outputs['best_score']) ###################################### # Adaptive Template(exemplar update) # ###################################### if idx % frame_interval == 0: tracker.update_z(img, pred_bbox, interpolation_rate=interpolation_rate) toc += cv2.getTickCount() - tic track_times.append( (cv2.getTickCount() - tic) / cv2.getTickFrequency()) if idx == 0: cv2.destroyAllWindows() if args.vis and idx > 0: gt_bbox = list(map(int, gt_bbox)) pred_bbox = list(map(int, pred_bbox)) cv2.rectangle( img, (gt_bbox[0], gt_bbox[1]), (gt_bbox[0] + gt_bbox[2], gt_bbox[1] + gt_bbox[3]), (0, 255, 0), 3) cv2.rectangle(img, (pred_bbox[0], pred_bbox[1]), (pred_bbox[0] + pred_bbox[2], pred_bbox[1] + pred_bbox[3]), (255, 0, 0), 3) cv2.putText(img, str(idx), (40, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2) cv2.imshow(video.name, img) window_name = "Result" cv2.moveWindow(window_name, 20, 20) cv2.waitKey(1) toc /= cv2.getTickFrequency() # save results if 'VOT2018-LT' == args.dataset: video_path = os.path.join('results', args.dataset, model_name, 'longterm', video.name) if not os.path.isdir(video_path): os.makedirs(video_path) result_path = os.path.join(video_path, '{}_001.txt'.format(video.name)) with open(result_path, 'w') as f: for x in pred_bboxes: f.write(','.join([str(i) for i in x]) + '\n') result_path = os.path.join( video_path, '{}_001_confidence.value'.format(video.name)) with open(result_path, 'w') as f: for x in scores: f.write('\n') if x is None else f.write( "{:.6f}\n".format(x)) result_path = os.path.join(video_path, '{}_time.txt'.format(video.name)) with open(result_path, 'w') as f: for x in track_times: f.write("{:.6f}\n".format(x)) elif 'GOT-10k' == args.dataset: video_path = os.path.join('results', args.dataset, model_name, video.name) if not os.path.isdir(video_path): os.makedirs(video_path) result_path = os.path.join(video_path, '{}_001.txt'.format(video.name)) with open(result_path, 'w') as f: for x in pred_bboxes: f.write(','.join([str(i) for i in x]) + '\n') result_path = os.path.join(video_path, '{}_time.txt'.format(video.name)) with open(result_path, 'w') as f: for x in track_times: f.write("{:.6f}\n".format(x)) # OTB-100 HERE!!!!!!!!!!!!!! else: result_folder_name = "results_{0:d}frame_exemplar_update_rate_{1:s}".format( frame_interval, str(interpolation_rate)) model_path = os.path.join(result_save_base_path, result_folder_name, args.dataset, model_name) # model_path = os.path.join(result_save_base_path, 'results', args.dataset, model_name) if not os.path.isdir(model_path): os.makedirs(model_path) result_path = os.path.join(model_path, '{}.txt'.format(video.name)) with open(result_path, 'w') as f: for x in pred_bboxes: f.write(','.join([str(i) for i in x]) + '\n') print('({:3d}) Video: {:12s} Time: {:5.1f}s Speed: {:3.1f}fps'. format(v_idx + 1, video.name, toc, idx / toc)) # FPS Result fps = idx / toc fps_list.append(fps) # Make FPS Result Path fps_array = np.asarray(fps_list).reshape(-1, 1) fps_file_name = "model_fps__[{:3.1f}].txt".format( np.average(fps_array)) model_fps_file = os.path.join(os.path.dirname(model_path), "../", fps_file_name) np.savetxt(model_fps_file, fps_array)
if __name__ == "__main__": torch.set_num_threads(1) os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_id # load config #cfg.merge_from_file(args.config) cur_dir = os.path.dirname(os.path.realpath(__file__)) #dataset_root = os.path.join(cur_dir, '../testing_dataset', args.dataset) dataset_root = os.path.join("/ssd", args.dataset) # create model net = models.__dict__[args.arch](anchors_nums=args.anchor_nums, cls_type=args.cls_type) net = load_pretrain(net, args.snapshot) net.eval() net = net.cuda() # create dataset dataset = DatasetFactory.create_dataset(name=args.dataset, dataset_root=dataset_root, load_img=False) # Eval dataset # root = os.path.realpath(os.path.join(os.path.dirname(__file__), # '../testing_dataset')) root = "/ssd" root = os.path.join(root, args.dataset) if 'OTB' in args.dataset: dataset_eval = OTBDataset(args.dataset, root)
def main(): # load config cfg.merge_from_file(args.config) # cur_dir = os.path.dirname(os.path.realpath(__file__)) # dataset_root = os.path.join(cur_dir, '../testing_dataset', args.dataset) # UAVDTdataset = '/media/li/CA5CF8AE5CF89683/research/UAVDT/' # UAVDTdataset = '/media/li/DATA/VisDrone2019-SOT/' UAVDTdataset = args.datasetroot dataset_root = os.path.join(UAVDTdataset) # create model model = ModelBuilder() # load model model = load_pretrain(model, args.snapshot).cuda().eval() # build tracker tracker = build_tracker_f(model) # create dataset dataset = DatasetFactory.create_dataset(name=args.dataset, dataset_root=dataset_root, load_img=False) model_name = args.snapshot.split('/')[-2].split('.')[0] torch.cuda.synchronize() # OPE tracking for v_idx, video in enumerate(dataset): #if v_idx>40: o_path=os.path.join('results_rt_raw_f', args.dataset, model_name) if not os.path.isdir(o_path): os.makedirs(o_path) out_path = os.path.join('results_rt_raw_f', args.dataset, model_name, video.name + '.pkl') if os.path.isfile(out_path): print('({:3d}) Video: {:12s} already done!'.format( v_idx+1, video.name)) continue if args.video != '': # test one special video if video.name != args.video: continue toc = 0 pred_bboxes = [] scores = [] track_times = [] input_fidx = [] runtime = [] timestamps = [] last_fidx = None n_frame=len(video) t_total = n_frame/args.fps t_start = perf_counter() while 1: t1 = perf_counter() t_elapsed=t1-t_start if t_elapsed>t_total: break # identify latest available frame fidx_continous = t_elapsed*args.fps fidx = int(np.floor(fidx_continous)) #if the tracker finishes current frame before next frame comes, continue if fidx == last_fidx: continue last_fidx=fidx tic = cv2.getTickCount() (img,gt_bbox)=video[fidx] if fidx == 0: cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox)) gt_bbox_ = [cx-(w-1)/2, cy-(h-1)/2, w, h] tracker.init(img, gt_bbox_) torch.cuda.synchronize() t2 = perf_counter() t_elapsed=t2-t_start timestamps.append(t_elapsed) runtime.append(t2-t1) pred_bbox = gt_bbox_ scores.append(None) pred_bboxes.append(pred_bbox) input_fidx.append(fidx) else: box_f = tracker.forecaster.forecast(fidx, input_fidx[-1], np.array([pred_bboxes[-1]])) outputs = tracker.track(img,box_f[0]) torch.cuda.synchronize() t2 = perf_counter() t_elapsed=t2-t_start timestamps.append(t_elapsed) runtime.append(t2-t1) pred_bbox = outputs['bbox'] pred_bboxes.append(pred_bbox) scores.append(outputs['best_score']) input_fidx.append(fidx) if t_elapsed>t_total: break toc += cv2.getTickCount() - tic track_times.append((cv2.getTickCount() - tic)/cv2.getTickFrequency()) #save results and run time if args.overwrite or not os.path.isfile(out_path): pickle.dump({ 'results_raw': pred_bboxes, 'timestamps': timestamps, 'input_fidx': input_fidx, 'runtime': runtime, }, open(out_path, 'wb')) toc /= cv2.getTickFrequency() # save results # model_path = os.path.join('results', args.dataset, model_name) # if not os.path.isdir(model_path): # os.makedirs(model_path) # result_path = os.path.join(model_path, '{}.txt'.format(video.name)) # with open(result_path, 'w') as f: # for x in pred_bboxes: # f.write(','.join([str(i) for i in x])+'\n') print('({:3d}) Video: {:12s} Time: {:5.1f}s Speed: {:3.1f}fps'.format( v_idx+1, video.name, toc, fidx / toc))
def main(): is_gpu_cuda_available = torch.cuda.is_available() if not is_gpu_cuda_available: raise RuntimeError( 'Failed to locate a CUDA GPU. Program cannot continue..') num_gpus = torch.cuda.device_count() gpu_type = torch.cuda.get_device_name(0) print(f"You have {num_gpus} available of type: {gpu_type}") print("This might take a few minutes...Grab a cup of coffee\n") # load config cfg.merge_from_file(args.config) dataset_root = os.path.join(args.dataset_directory, args.dataset) print(f"dataset root-->{dataset_root}") # create model model = ModelBuilder() # load model model = load_pretrain(model, args.snapshot).cuda().eval() # build tracker tracker = build_tracker(model) # create dataset dataset = DatasetFactory.create_dataset(name=args.dataset, dataset_root=dataset_root, load_img=False) model_name = args.model_name print(f"Model name is {model_name}") total_lost = 0 if args.dataset in vot_like_dataset: # restart tracking for v_idx, video in enumerate(dataset): if args.video != '': # test one special video if video.name != args.video: continue frame_counter = 0 lost_number = 0 toc = 0 pred_bboxes = [] for idx, (img, gt_bbox) in enumerate(video): if len(gt_bbox) == 4: gt_bbox = [ gt_bbox[0], gt_bbox[1], gt_bbox[0], gt_bbox[1] + gt_bbox[3] - 1, gt_bbox[0] + gt_bbox[2] - 1, gt_bbox[1] + gt_bbox[3] - 1, gt_bbox[0] + gt_bbox[2] - 1, gt_bbox[1] ] tic = cv2.getTickCount() if idx == frame_counter: cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox)) gt_bbox_ = [cx - (w - 1) / 2, cy - (h - 1) / 2, w, h] tracker.init(img, gt_bbox_) pred_bbox = gt_bbox_ pred_bboxes.append(1) elif idx > frame_counter: outputs = tracker.track(img) pred_bbox = outputs['bbox'] if cfg.MASK.MASK: pred_bbox = outputs['polygon'] overlap = vot_overlap(pred_bbox, gt_bbox, (img.shape[1], img.shape[0])) if overlap > 0.85: # not lost pred_bboxes.append(pred_bbox) else: # lost object pred_bboxes.append(2) frame_counter = idx + args.skip_frames # skip 1 frame lost_number += 1 else: pred_bboxes.append(0) toc += cv2.getTickCount() - tic if idx == 0: cv2.destroyAllWindows() if args.vis and idx > frame_counter: cv2.polylines( img, [np.array(gt_bbox, np.int).reshape( (-1, 1, 2))], True, (0, 255, 0), 3) if cfg.MASK.MASK: cv2.polylines( img, [np.array(pred_bbox, np.int).reshape( (-1, 1, 2))], True, (0, 255, 255), 3) else: bbox = list(map(int, pred_bbox)) cv2.rectangle(img, (bbox[0], bbox[1]), (bbox[0] + bbox[2], bbox[1] + bbox[3]), (0, 255, 255), 3) cv2.putText(img, str(idx), (40, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2) cv2.putText(img, str(lost_number), (40, 80), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2) cv2.imshow(video.name, img) cv2.waitKey(1) toc /= cv2.getTickFrequency() # save results save_path = os.path.join(args.results_path, args.dataset, model_name, args.experiment_name, video.name) if not os.path.isdir(save_path): os.makedirs(save_path) result_path = os.path.join(save_path, '{}_001.txt'.format(video.name)) with open(result_path, 'w') as f: for x in pred_bboxes: if isinstance(x, int): f.write("{:d}\n".format(x)) else: f.write(','.join([vot_float2str("%.4f", i) for i in x]) + '\n') with open(os.path.join(save_path, '..', 'lost.txt'), 'a+') as f: f.write( f"{v_idx+1} Class: {video.name} | Time: {toc}s | Speed: {idx/toc}fps | Lost:{lost_number} \n" ) print( '({:3d}) Class: {:12s} Time: {:4.1f}s Speed: {:3.1f}fps Lost: {:d}' .format(v_idx + 1, video.name, toc, idx / toc, lost_number)) total_lost += lost_number print("{:s} total lost: {:d}".format(model_name, total_lost)) with open(os.path.join(save_path, '..', 'lost.txt'), 'a+') as f: f.write( f"Model architeture used --> {model_name} \ntotal lost: {total_lost} \n" ) f.write(f"SKIP FRAMES USED --> {args.skip_frames}") else: # OPE tracking # will be implemented if needed in future pass
def main(): # load config # save_siamese_rpn() cfg.merge_from_file(args.config) cur_dir = os.path.dirname(os.path.realpath(__file__)) # dataset_root = os.path.join(cur_dir, '../testing_dataset', args.dataset) dataset_root = datasets_root + args.dataset # create model model = ModelBuilder() # load model model = load_pretrain(model, args.snapshot).cuda().eval() # save_backbone(model) # build tracker tracker = build_tracker(model) # create dataset dataset = DatasetFactory.create_dataset(name=args.dataset, dataset_root=dataset_root, load_img=False) model_name = args.snapshot.split('/')[-1].split('.')[0] total_lost = 0 #multi-pass tracking,跟踪丢失后重新初始化的测试方法 if args.dataset in ['VOT2016', 'VOT2018', 'VOT2019']: # restart tracking for v_idx, video in enumerate(dataset): if args.video != '': # test one special video if video.name != args.video: continue frame_counter = 0 lost_number = 0 toc = 0 # pred_bboxes包含两种类型的数据,类型1:整型数据,有1,2,0,三个值,分别表示跟踪开始,跟踪结束(丢失),跟踪丢失之后,间隔帧的占位符 # 类型2:浮点类型的bbox,也就是跟踪结果 pred_bboxes = [] gru_seq_len = tracker.model.grus.seq_in_len video_len = len(video) for idx, (img, gt_bbox) in enumerate(video): if len( gt_bbox ) == 4: #如果gt是【x,y,w,h】的方式,转化为8个坐标信息(x1,y1,x2,y2,x3,y3,x4,y4) gt_bbox = [ gt_bbox[0], gt_bbox[1], gt_bbox[0], gt_bbox[1] + gt_bbox[3] - 1, gt_bbox[0] + gt_bbox[2] - 1, gt_bbox[1] + gt_bbox[3] - 1, gt_bbox[0] + gt_bbox[2] - 1, gt_bbox[1] ] tic = cv2.getTickCount() #跟踪初始化 if idx == frame_counter: # 跟踪第一帧初始化 idxs = list( map(lambda x, y: x + y, [idx] * gru_seq_len, list(range( 0, gru_seq_len)))) # 取出idx后面的gru_seq_len个序列的索引号 idxs = list(map(lambda x: min(x, video_len - 1), idxs)) # 避免索引号越界 tracker.template_idx = 0 #模板初始化的第一帧 for k in idxs: init_img, init_gt_bbox = video[k] #连续gru_seq_len帧初始化 #init_img, init_gt_bbox =video[idxs[0]] #只用一帧作为初始化参数 cx, cy, w, h = get_axis_aligned_bbox( np.array(init_gt_bbox) ) #将倾斜框4个点坐标,转化为bbox,x,y为中心点形式(cx,cy,w,h) init_gt_bbox = [ cx - (w - 1) / 2, cy - (h - 1) / 2, w, h ] #x,y,中心点形式,转化为左上角形式 tracker.init_gru(init_img, init_gt_bbox, k) if k == 0: pred_bbox = init_gt_bbox pred_bboxes.append(1) #持续的后续跟踪 elif idx > frame_counter: outputs = tracker.track(img) #对于下面的帧 pred_bbox = outputs['bbox'] #只有输出概率很高的时候才更新模板 if outputs['best_score'] > 0.95: tracker.init_gru(img, pred_bbox, idx) if cfg.MASK.MASK: pred_bbox = outputs['polygon'] overlap = vot_overlap(pred_bbox, gt_bbox, (img.shape[1], img.shape[0])) #查看初始化后的第一帧检测iou和score之间的关系 # if tracker.template_idx==4: # print("{:3.2f}\t{:3.2f}".format(overlap,outputs['best_score'])) if overlap > 0: # not lost pred_bboxes.append(pred_bbox) else: # lost object pred_bboxes.append(2) frame_counter = idx + 5 # skip 5 frames lost_number += 1 else: pred_bboxes.append(0) toc += cv2.getTickCount() - tic if idx == 0: cv2.destroyAllWindows() #绘制输出框,gt和mask都按照多边形来绘制,跟踪的bbox按照矩形来绘制 if args.vis and idx > frame_counter: #绘制多边形的gt cv2.polylines( img, [np.array(gt_bbox, np.int).reshape( (-1, 1, 2))], True, (0, 255, 0), 3) #绘制siamesemask输出的多边形 if cfg.MASK.MASK: cv2.polylines( img, [np.array(pred_bbox, np.int).reshape( (-1, 1, 2))], True, (0, 255, 255), 3) #绘制输出矩形框 else: bbox = list(map(int, pred_bbox)) cv2.rectangle(img, (bbox[0], bbox[1]), (bbox[0] + bbox[2], bbox[1] + bbox[3]), (0, 255, 255), 3) #添加图像标注,帧号和丢失次数 cv2.putText(img, str(idx), (40, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2) cv2.putText(img, str(lost_number), (40, 80), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2) cv2.imshow(video.name, img) cv2.waitKey(1) toc /= cv2.getTickFrequency() # save results video_path = os.path.join('results', args.dataset, model_name, 'baseline', video.name) if not os.path.isdir(video_path): os.makedirs(video_path) #结果路径的构成: ./results/VOT2018/model/baseline/ants1/ants1_001.txt result_path = os.path.join(video_path, '{}_001.txt'.format(video.name)) #pred_bboxes包含两种类型的数据,类型1:整型数据,有1,2,0,三个值,分别表示跟踪开始,跟踪结束(丢失),跟踪丢失之后,间隔帧的占位符 # 类型2:浮点类型的bbox,也就是跟踪结果 with open(result_path, 'w') as f: for x in pred_bboxes: if isinstance(x, int): #整数代表开始,或者有丢失 f.write("{:d}\n".format(x)) else: #浮点数才是bbox f.write(','.join([vot_float2str("%.4f", i) for i in x]) + '\n') print( '({:3d}) Video: {:12s} Time: {:4.1f}s Speed: {:3.1f}fps Lost: {:d}' .format(v_idx + 1, video.name, toc, idx / toc, lost_number)) total_lost += lost_number print("{:s} total lost: {:d}".format(model_name, total_lost)) #oetracking,跟踪丢失后不再重新初始化的测试方法 else: # OPE tracking for v_idx, video in enumerate(dataset): if args.video != '': # test one special video if video.name != args.video: continue toc = 0 pred_bboxes = [] scores = [] track_times = [] for idx, (img, gt_bbox) in enumerate(video): tic = cv2.getTickCount() if idx == 0: cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox)) gt_bbox_ = [cx - (w - 1) / 2, cy - (h - 1) / 2, w, h] tracker.init(img, gt_bbox_) pred_bbox = gt_bbox_ scores.append(None) if 'VOT2018-LT' == args.dataset: pred_bboxes.append([1]) else: pred_bboxes.append(pred_bbox) else: outputs = tracker.track(img) pred_bbox = outputs['bbox'] pred_bboxes.append(pred_bbox) scores.append(outputs['best_score']) toc += cv2.getTickCount() - tic track_times.append( (cv2.getTickCount() - tic) / cv2.getTickFrequency()) if idx == 0: cv2.destroyAllWindows() if args.vis and idx > 0: gt_bbox = list(map(int, gt_bbox)) pred_bbox = list(map(int, pred_bbox)) cv2.rectangle( img, (gt_bbox[0], gt_bbox[1]), (gt_bbox[0] + gt_bbox[2], gt_bbox[1] + gt_bbox[3]), (0, 255, 0), 3) cv2.rectangle(img, (pred_bbox[0], pred_bbox[1]), (pred_bbox[0] + pred_bbox[2], pred_bbox[1] + pred_bbox[3]), (0, 255, 255), 3) cv2.putText(img, str(idx), (40, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2) cv2.imshow(video.name, img) cv2.waitKey(1) toc /= cv2.getTickFrequency() # save results if 'VOT2018-LT' == args.dataset: video_path = os.path.join('results', args.dataset, model_name, 'longterm', video.name) if not os.path.isdir(video_path): os.makedirs(video_path) result_path = os.path.join(video_path, '{}_001.txt'.format(video.name)) with open(result_path, 'w') as f: for x in pred_bboxes: f.write(','.join([str(i) for i in x]) + '\n') result_path = os.path.join( video_path, '{}_001_confidence.value'.format(video.name)) with open(result_path, 'w') as f: for x in scores: f.write('\n') if x is None else f.write( "{:.6f}\n".format(x)) result_path = os.path.join(video_path, '{}_time.txt'.format(video.name)) with open(result_path, 'w') as f: for x in track_times: f.write("{:.6f}\n".format(x)) elif 'GOT-10k' == args.dataset: video_path = os.path.join('results', args.dataset, model_name, video.name) if not os.path.isdir(video_path): os.makedirs(video_path) result_path = os.path.join(video_path, '{}_001.txt'.format(video.name)) with open(result_path, 'w') as f: for x in pred_bboxes: f.write(','.join([str(i) for i in x]) + '\n') result_path = os.path.join(video_path, '{}_time.txt'.format(video.name)) with open(result_path, 'w') as f: for x in track_times: f.write("{:.6f}\n".format(x)) else: model_path = os.path.join('results', args.dataset, model_name) if not os.path.isdir(model_path): os.makedirs(model_path) result_path = os.path.join(model_path, '{}.txt'.format(video.name)) with open(result_path, 'w') as f: for x in pred_bboxes: f.write(','.join([str(i) for i in x]) + '\n') print('({:3d}) Video: {:12s} Time: {:5.1f}s Speed: {:3.1f}fps'. format(v_idx + 1, video.name, toc, idx / toc))
def main(): # load config cfg.merge_from_file(args.config) cur_dir = os.path.dirname(os.path.realpath(__file__)) dataset_root = os.path.join(cur_dir, '../test_dataset', args.dataset) # create model model = ModelBuilder() # load model model = load_pretrain(model, args.snapshot).cuda().eval() # build tracker tracker = SiamAPNTracker(model) # create dataset dataset = DatasetFactory.create_dataset(name=args.dataset, dataset_root=dataset_root, load_img=False) model_name = args.snapshot.split('/')[-1].split('.')[0] + str(cfg.TRACK.w1) # OPE tracking for v_idx, video in enumerate(dataset): if args.video != '': # test one special video if video.name != args.video: continue toc = 0 pred_bboxes = [] scores = [] track_times = [] for idx, (img, gt_bbox) in enumerate(video): tic = cv2.getTickCount() if idx == 0: cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox)) gt_bbox_ = [cx - (w - 1) / 2, cy - (h - 1) / 2, w, h] tracker.init(img, gt_bbox_) pred_bbox = gt_bbox_ scores.append(None) if 'VOT2018-LT' == args.dataset: pred_bboxes.append([1]) else: pred_bboxes.append(pred_bbox) else: outputs = tracker.track(img) pred_bbox = outputs['bbox'] pred_bboxes.append(pred_bbox) scores.append(outputs['best_score']) toc += cv2.getTickCount() - tic track_times.append( (cv2.getTickCount() - tic) / cv2.getTickFrequency()) if idx == 0: cv2.destroyAllWindows() if args.vis and idx > 0: gt_bbox = list(map(int, gt_bbox)) pred_bbox = list(map(int, pred_bbox)) cv2.rectangle( img, (gt_bbox[0], gt_bbox[1]), (gt_bbox[0] + gt_bbox[2], gt_bbox[1] + gt_bbox[3]), (0, 255, 0), 3) cv2.rectangle( img, (pred_bbox[0], pred_bbox[1]), (pred_bbox[0] + pred_bbox[2], pred_bbox[1] + pred_bbox[3]), (0, 255, 255), 3) cv2.putText(img, str(idx), (40, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2) cv2.imshow(video.name, img) cv2.waitKey(1) toc /= cv2.getTickFrequency() # save results model_path = os.path.join('results', args.dataset, model_name) if not os.path.isdir(model_path): os.makedirs(model_path) result_path = os.path.join(model_path, '{}.txt'.format(video.name)) with open(result_path, 'w') as f: for x in pred_bboxes: f.write(','.join([str(i) for i in x]) + '\n') print('({:3d}) Video: {:12s} Time: {:5.1f}s Speed: {:3.1f}fps'.format( v_idx + 1, video.name, toc, idx / toc))
def main(): mode = args.mode # load config cfg.merge_from_file(args.config) cur_dir = os.path.dirname(os.path.realpath(__file__)) dataset_root = os.path.join(args.dataset_dir, args.dataset) epsilon = args.epsilon # create model track_model = ModelBuilder() track_model0 = ModelBuilder() lr = args.lr # load model track_model = load_pretrain(track_model, args.snapshot).cuda().eval() track_model0 = load_pretrain(track_model0, args.snapshot).cuda().eval() # build tracker tracker = build_tracker(track_model) tracker0 = build_tracker(track_model0) attacker = ModelAttacker().cuda().train() optimizer = optim.Adam(attacker.parameters(), lr=lr) # create dataset dataset = DatasetFactory.create_dataset(name=args.dataset, dataset_root=dataset_root, load_img=False, dataset_toolkit='oneshot', config=cfg) # # vid.name = {'ants1','ants3',....} # img, bbox, cls, delta, delta_weight # vid[0][0],vid[0][1],vid[0][2],vid[0][3],vid[0][4] model_name = args.snapshot.split('/')[-1].split('.')[0] total_lost = 0 n_epochs = args.epochs for name, param in tracker.model.named_parameters(): param.requires_grad_(False) # for name, param in attacker.named_parameters(): # if 'backbone' in name or 'neck' in name or 'rpn_head' in name: # param.requires_grad_(False) # for name, param in tracker2.model.named_parameters(): # if 'backbone' in name or 'neck' in name or 'rpn_head' in name: # param.requires_grad_(False) # elif param.requires_grad: # param.requires_grad_(True) # # print(name, param.data) # print('grad true ', name) # else: # print('grad false ', name) if args.dataset in ['VOT2016', 'VOT2018', 'VOT2019', 'OTB100']: # restart tracking for v_idx, video in enumerate(dataset): if args.video != '': # test one special video if video.name != args.video: continue else: if not os.path.exists( os.path.join(args.savedir, video.name)): os.mkdir(os.path.join(args.savedir, video.name)) # set writing video parameters height, width, channels = video[0][0].shape out = cv2.VideoWriter( os.path.join(args.savedir, video.name + '.avi'), cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), 15, (width, height)) frame_counter = 0 frame_counter_adv = 0 lost_number = 0 lost_number_adv = 0 toc = 0 pred_bboxes = [] pred_bboxes_adv = [] pbar = tqdm(enumerate(video)) for idx, (img, gt_bbox) in pbar: if len(gt_bbox) == 4: gt_bbox = [ gt_bbox[0], gt_bbox[1], gt_bbox[0], gt_bbox[1] + gt_bbox[3] - 1, gt_bbox[0] + gt_bbox[2] - 1, gt_bbox[1] + gt_bbox[3] - 1, gt_bbox[0] + gt_bbox[2] - 1, gt_bbox[1] ] cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox)) gt_bbox_ = [cx - (w - 1) / 2, cy - (h - 1) / 2, w, h] tic = cv2.getTickCount() ########################################## # # # for state of the art tracking # # # ########################################## if mode == 0: pred_bbox, _lost, frame_counter = stoa_track( idx, frame_counter, img, gt_bbox, tracker0) ########################################## # # # # # adversarial tracking # # # # # ########################################## if idx == 0: state = { 'img': img, 'gt_bbox': gt_bbox, 'video_name': video.name } else: state['img'] = img if mode == 1: optimizer, state, ad_bbox = \ adversarial_train(idx, frame_counter_adv, state, attacker, tracker, optimizer, pbar) if idx == 0: break toc += cv2.getTickCount() - tic if idx > 0 and mode == 0: bbox = list(map(int, pred_bbox)) cv2.rectangle(img, (bbox[0], bbox[1]), (bbox[0] + bbox[2], bbox[1] + bbox[3]), (0, 255, 255), 3) if idx > 0 and mode == 1: ad_bbox = list(map(int, ad_bbox)) cv2.rectangle( img, (ad_bbox[0], ad_bbox[1]), (ad_bbox[0] + ad_bbox[2], ad_bbox[1] + ad_bbox[3]), (0, 0, 255), 3) __gt_bbox = list(map(int, gt_bbox_)) cv2.rectangle( img, (__gt_bbox[0], __gt_bbox[1]), (__gt_bbox[0] + __gt_bbox[2], __gt_bbox[1] + __gt_bbox[3]), (0, 0, 0), 3) cv2.putText(img, str(idx), (40, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0), 2) out.write(img) toc /= cv2.getTickFrequency() if mode == 0: # save results if args.dataset == 'OTB100': model_path = os.path.join('results', args.dataset, model_name) if not os.path.isdir(model_path): os.makedirs(model_path) result_path = os.path.join(model_path, '{}.txt'.format(video.name)) with open(result_path, 'w') as f: for x in pred_bboxes_adv: f.write(','.join([str(i) for i in x]) + '\n') else: video_path = os.path.join('results', args.dataset, model_name, 'baseline', video.name) if not os.path.isdir(video_path): os.makedirs(video_path) result_path = os.path.join(video_path, '{}_001.txt'.format(video.name)) # ii = 0 # with open(result_path, 'r') as f: # xs = f.readlines() # for x in xs: # if ii == 0: # pred_bboxes_adv[0] = ','.join([vot_float2str("%.4f", i) for i in pred_bboxes_adv[0]]) + '\n' # else: # pred_bboxes_adv.append(x) # ii += 1 # # with open(result_path, 'w') as f: # for x in pred_bboxes_adv: # f.write(x) with open(result_path, 'w') as f: for x in pred_bboxes_adv: if isinstance(x, int): f.write("{:d}\n".format(x)) else: f.write(','.join( [vot_float2str("%.4f", i) for i in x]) + '\n')
raise NotImplementedError if __name__ == '__main__': torch.backends.cudnn.benchmark = True cfg.merge_from_file(args.config) cur_dir = os.path.dirname(os.path.realpath(__file__)) dataset_root = os.path.join(cur_dir, '../testing_dataset', args.dataset) dataset = DatasetFactory.create_dataset(name=args.dataset, dataset_root=dataset_root, load_img=False) benchmark = EAOBenchmark(dataset) model = ModelBuilder() model = load_pretrain(model, args.snapshot).cuda().eval() # the resources you computer have, object_store_memory is shm ray.init(num_gpus=1, num_cpus=8, object_store_memory=30000000000) tune.register_trainable('fitness', fitness) # define search space params = { 'penalty_k': hp.quniform('penalty_k', 0.001, 0.6, 0.001), 'lr': hp.quniform('scale_lr', 0.1, 0.8, 0.001), 'window_influence': hp.quniform('window_influence', 0.05, 0.65, 0.001), 'search_region': hp.choice('search_region', [255]), } # stop condition for VOT and OTB if args.dataset.startswith('VOT'):
def main(): # load config cfg.merge_from_file(args.config) # hp_search params = getattr(cfg.HP_SEARCH, args.dataset) hp = {'lr': params[0], 'penalty_k': params[1], 'window_lr': params[2]} cur_dir = os.path.dirname(os.path.realpath(__file__)) dataset_root = os.path.join(cur_dir, '../testing_dataset', args.dataset) model = ModelBuilder() # load model model = load_pretrain(model, args.snapshot).cuda().eval() # build tracker tracker = SiamCARTracker(model, cfg.TRACK) # create dataset dataset = DatasetFactory.create_dataset(name=args.dataset, dataset_root=dataset_root, load_img=False) model_name = args.snapshot.split('/')[-2] + str(hp['lr']) + '_' + str( hp['penalty_k']) + '_' + str(hp['window_lr']) # OPE tracking for v_idx, video in enumerate(dataset): if args.video != '': # test one special video if video.name != args.video: continue toc = 0 pred_bboxes = [] track_times = [] for idx, (img, gt_bbox) in enumerate(video): tic = cv2.getTickCount() if idx == 0: cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox)) gt_bbox_ = [cx - (w - 1) / 2, cy - (h - 1) / 2, w, h] tracker.init(img, gt_bbox_) pred_bbox = gt_bbox_ pred_bboxes.append(pred_bbox) else: outputs = tracker.track(img, hp) pred_bbox = outputs['bbox'] pred_bboxes.append(pred_bbox) toc += cv2.getTickCount() - tic track_times.append( (cv2.getTickCount() - tic) / cv2.getTickFrequency()) if idx == 0: cv2.destroyAllWindows() if args.vis and idx > 0: if not any(map(math.isnan, gt_bbox)): gt_bbox = list(map(int, gt_bbox)) pred_bbox = list(map(int, pred_bbox)) cv2.rectangle( img, (gt_bbox[0], gt_bbox[1]), (gt_bbox[0] + gt_bbox[2], gt_bbox[1] + gt_bbox[3]), (0, 255, 0), 3) cv2.rectangle(img, (pred_bbox[0], pred_bbox[1]), (pred_bbox[0] + pred_bbox[2], pred_bbox[1] + pred_bbox[3]), (0, 255, 255), 3) cv2.putText(img, str(idx), (40, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2) cv2.imshow(video.name, img) cv2.waitKey(1) toc /= cv2.getTickFrequency() # save results model_path = os.path.join('results', args.dataset, model_name) if not os.path.isdir(model_path): os.makedirs(model_path) result_path = os.path.join(model_path, '{}.txt'.format(video.name)) with open(result_path, 'w') as f: for x in pred_bboxes: f.write(','.join([str(i) for i in x]) + '\n') print('({:3d}) Video: {:12s} Time: {:5.1f}s Speed: {:3.1f}fps'.format( v_idx + 1, video.name, toc, idx / toc)) os.chdir(model_path) save_file = '../%s' % dataset shutil.make_archive(save_file, 'zip') print('Records saved at', save_file + '.zip')
def main(): # load config cfg.merge_from_file(args.config) cur_dir = os.path.dirname(os.path.realpath(__file__)) dataset_root = os.path.join(cur_dir, '../testing_dataset', args.dataset) # create model model = ModelBuilder(cfg) # load model model = load_pretrain(model, args.snapshot).cuda().eval() # build tracker tracker = build_tracker(model) # create dataset dataset = DatasetFactory.create_dataset(name=args.dataset, dataset_root=dataset_root, load_img=False) model_name = args.snapshot.split('/')[-1].split('.')[0] total_lost = 0 if args.dataset in ['VOT2016', 'VOT2018', 'VOT2019']: # restart tracking for v_idx, video in enumerate(dataset): if args.video != '': # test one special video if video.name != args.video: continue frame_counter = 0 lost_number = 0 toc = 0 pred_bboxes = [] for idx, (img, gt_bbox) in enumerate(video): if len(gt_bbox) == 4: gt_bbox = [gt_bbox[0], gt_bbox[1], gt_bbox[0], gt_bbox[1]+gt_bbox[3]-1, gt_bbox[0]+gt_bbox[2]-1, gt_bbox[1]+gt_bbox[3]-1, gt_bbox[0]+gt_bbox[2]-1, gt_bbox[1]] tic = cv2.getTickCount() if idx == frame_counter: cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox)) gt_bbox_ = [cx-(w-1)/2, cy-(h-1)/2, w, h] tracker.init(img, gt_bbox_) pred_bbox = gt_bbox_ pred_bboxes.append(1) elif idx > frame_counter: outputs = tracker.track(img) pred_bbox = outputs['bbox'] if cfg.MASK.MASK: pred_bbox = outputs['polygon'] overlap = vot_overlap(pred_bbox, gt_bbox, (img.shape[1], img.shape[0])) if overlap > 0: # not lost pred_bboxes.append(pred_bbox) else: # lost object pred_bboxes.append(2) frame_counter = idx + 5 # skip 5 frames lost_number += 1 else: pred_bboxes.append(0) toc += cv2.getTickCount() - tic if idx == 0: cv2.destroyAllWindows() if args.vis and idx > frame_counter: cv2.polylines(img, [np.array(gt_bbox, np.int).reshape((-1, 1, 2))], True, (0, 255, 0), 3) if cfg.MASK.MASK: cv2.polylines(img, [np.array(pred_bbox, np.int).reshape((-1, 1, 2))], True, (0, 255, 255), 3) else: bbox = list(map(int, pred_bbox)) cv2.rectangle(img, (bbox[0], bbox[1]), (bbox[0]+bbox[2], bbox[1]+bbox[3]), (0, 255, 255), 3) cv2.putText(img, str(idx), (40, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2) cv2.putText(img, str(lost_number), (40, 80), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2) cv2.imshow(video.name, img) cv2.waitKey(1) toc /= cv2.getTickFrequency() # save results video_path = os.path.join('results', args.dataset, model_name, 'baseline', video.name) if not os.path.isdir(video_path): os.makedirs(video_path) result_path = os.path.join(video_path, '{}_001.txt'.format(video.name)) with open(result_path, 'w') as f: for x in pred_bboxes: if isinstance(x, int): f.write("{:d}\n".format(x)) else: f.write(','.join([vot_float2str("%.4f", i) for i in x])+'\n') print('({:3d}) Video: {:12s} Time: {:4.1f}s Speed: {:3.1f}fps Lost: {:d}'.format( v_idx+1, video.name, toc, idx / toc, lost_number)) total_lost += lost_number print("{:s} total lost: {:d}".format(model_name, total_lost)) else: # OPE tracking for v_idx, video in enumerate(dataset): if args.video != '': # test one special video if video.name != args.video: continue toc = 0 pred_bboxes = [] scores = [] track_times = [] for idx, (img, gt_bbox) in enumerate(video): tic = cv2.getTickCount() if idx == 0: cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox)) gt_bbox_ = [cx-(w-1)/2, cy-(h-1)/2, w, h] tracker.init(img, gt_bbox_) pred_bbox = gt_bbox_ scores.append(None) if 'VOT2018-LT' == args.dataset: pred_bboxes.append([1]) else: pred_bboxes.append(pred_bbox) else: outputs = tracker.track(img) pred_bbox = outputs['bbox'] pred_bboxes.append(pred_bbox) scores.append(outputs['best_score']) toc += cv2.getTickCount() - tic track_times.append((cv2.getTickCount() - tic)/cv2.getTickFrequency()) if idx == 0: cv2.destroyAllWindows() if args.vis and idx > 0: gt_bbox = list(map(int, gt_bbox)) pred_bbox = list(map(int, pred_bbox)) cv2.rectangle(img, (gt_bbox[0], gt_bbox[1]), (gt_bbox[0]+gt_bbox[2], gt_bbox[1]+gt_bbox[3]), (0, 255, 0), 3) cv2.rectangle(img, (pred_bbox[0], pred_bbox[1]), (pred_bbox[0]+pred_bbox[2], pred_bbox[1]+pred_bbox[3]), (0, 255, 255), 3) cv2.putText(img, str(idx), (40, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2) cv2.imshow(video.name, img) cv2.waitKey(1) toc /= cv2.getTickFrequency() # save results if 'VOT2018-LT' == args.dataset: video_path = os.path.join('results', args.dataset, model_name, 'longterm', video.name) if not os.path.isdir(video_path): os.makedirs(video_path) result_path = os.path.join(video_path, '{}_001.txt'.format(video.name)) with open(result_path, 'w') as f: for x in pred_bboxes: f.write(','.join([str(i) for i in x])+'\n') result_path = os.path.join(video_path, '{}_001_confidence.value'.format(video.name)) with open(result_path, 'w') as f: for x in scores: f.write('\n') if x is None else f.write("{:.6f}\n".format(x)) result_path = os.path.join(video_path, '{}_time.txt'.format(video.name)) with open(result_path, 'w') as f: for x in track_times: f.write("{:.6f}\n".format(x)) elif 'GOT-10k' == args.dataset: video_path = os.path.join('results', args.dataset, model_name, video.name) if not os.path.isdir(video_path): os.makedirs(video_path) result_path = os.path.join(video_path, '{}_001.txt'.format(video.name)) with open(result_path, 'w') as f: for x in pred_bboxes: f.write(','.join([str(i) for i in x])+'\n') result_path = os.path.join(video_path, '{}_time.txt'.format(video.name)) with open(result_path, 'w') as f: for x in track_times: f.write("{:.6f}\n".format(x)) else: model_path = os.path.join('results', args.dataset, model_name) if not os.path.isdir(model_path): os.makedirs(model_path) result_path = os.path.join(model_path, '{}.txt'.format(video.name)) with open(result_path, 'w') as f: for x in pred_bboxes: f.write(','.join([str(i) for i in x])+'\n') print('({:3d}) Video: {:12s} Time: {:5.1f}s Speed: {:3.1f}fps'.format( v_idx+1, video.name, toc, idx / toc))
def main(): # load config cfg.merge_from_file(args.config) # create model model = ModelBuilder() # load model model = load_pretrain(model, args.snapshot).cuda().eval() # build tracker tracker = build_tracker(model) #model_name = args.snapshot.split('/')[-1].split('.')[0] #total_lost = 0 #cur_dir = os.path.dirname(os.path.realpath(__file__)) #dataset_root = os.path.join(cur_dir, '../testing_dataset', args.dataset) video_path = '/home/yuuzhao/Documents/project/pysot/testing_dataset/VOT2016' #lists = open('/home/lichao/tracking/LaSOT_Evaluation_Toolkit/sequence_evaluation_config/' + setfile + '.txt', 'r') #list_file = [line.strip() for line in lists] category = os.listdir(video_path) category.sort() # create dataset #dataset = DatasetFactory.create_dataset(name=args.dataset,dataset_root=dataset_root,load_img=False) template_acc = [] template_cur = [] init0 = [] init = [] pre = [] gt = [] # init0 is reset init print("Category & Video:") for tmp_cat in category: tmp_cat_path = temp_path + '/' + tmp_cat if not os.path.isdir(tmp_cat_path): os.makedirs(tmp_cat_path) print("Category:", tmp_cat) video = os.listdir(join(video_path, tmp_cat)) video.sort() #video_cut = video[0:frames_of_each_video] frame = 0 #for picture in video_cut: # 这个循环或许该去掉 # print("Frame:", picture) gt_path = join(video_path, tmp_cat, 'groundtruth.txt') ground_truth = np.loadtxt(gt_path, delimiter=',') # num_frames = len(ground_truth); # num_frames = min(num_frames, frame_max) num_frames = frames_of_each_video # print("num_frames: ",num_frames) img_path = join(video_path, tmp_cat) # print("imgpath",img_path) imgFiles = [ join(img_path, '%08d.jpg') % i for i in range(1, num_frames + 1) ] while frame < num_frames: print("frame:", frame) Polygon = ground_truth[frame] cx, cy, w, h = get_axis_aligned_bbox(Polygon) gt_rect = [cx, cy, w, h] image_file = imgFiles[frame] # target_pos, target_sz = np.array([cx, cy]), np.array([w, h]) img = cv2.imread(image_file) # HxWxC if frame == 0: tracker.init(img, gt_rect) if w * h != 0: # image_file = imgFiles[frame] # target_pos, target_sz = np.array([cx, cy]), np.array([w, h]) # img = cv2.imread(image_file) # HxWxC zf_acc = tracker.get_zf(img, gt_rect) output = tracker.track(img) pre_rect = output['bbox'] zf_pre = tracker.get_zf(img, pre_rect) template_acc.append(zf_acc) template_cur.append((zf_pre)) print("ACC&PRE") init0.append(0) init.append(frame) frame_reset = 0 pre.append(0) gt.append(1) while frame < (num_frames - 1): print("while ", frame, "<", num_frames) frame = frame + 1 frame_reset = frame_reset + 1 image_file = imgFiles[frame] if not image_file: break Polygon = ground_truth[frame] cx, cy, w, h = get_axis_aligned_bbox(Polygon) gt_rect = [cx, cy, w, h] img = cv2.imread(image_file) # HxWxC zf_acc = tracker.get_zf(img, gt_rect) output = tracker.track(img) pre_rect = output['bbox'] zf_pre = tracker.get_zf(img, pre_rect) # print("zf_pre:",zf_pre.shape) # print("zf_acc:",zf_acc.shape) # pdb.set_trace() template_acc.append(zf_acc) template_cur.append(zf_pre) init0.append(frame_reset) init.append(frame) pre.append(1) if frame == (num_frames - 1): # last frame print("if frame == num_frames-1") gt.append(0) else: gt.append(1) pre_rect_arr = np.array(pre_rect) cx, cy, w, h = get_axis_aligned_bbox(pre_rect_arr) target_pos, target_siz = np.array([cx, cy]), np.array([w, h]) res = cxy_wh_2_rect(target_pos, target_siz) if reset: cx, cy, w, h = get_axis_aligned_bbox( ground_truth[frame]) gt_rect = [cx, cy, w, h] gt_rect = np.array(gt_rect) iou = overlap_ratio(gt_rect, res) if iou <= 0: break else: print("else") template_acc.append( torch.zeros([1, 3, 127, 127], dtype=torch.float32)) template_cur.append( torch.zeros([1, 3, 127, 127], dtype=torch.float32)) init0.append(0) init.append(frame) pre.append(1) if frame == (num_frames - 1): # last frame gt.append(0) else: gt.append(1) frame = frame + 1 # skip #写出一次 #print("template_acc:",template_acc) #print("template_cur:",template_cur) #print("init:", init) #print("init0:",init0) #print("pre:",pre) #template_acc_con = np.concatenate(template_acc); #template_cur_con = np.concatenate(template_cur) print("write for each video") np.save(tmp_cat_path + '/template', template_acc) np.save(tmp_cat_path + '/templatei', template_cur) np.save(tmp_cat_path + '/init0', init0) np.save(tmp_cat_path + '/init', init) np.save(tmp_cat_path + '/pre', pre) np.save(tmp_cat_path + '/gt', gt) print("template")