def main(): global args, logger, v_id args = parser.parse_args() cfg = load_config(args) init_log('global', logging.INFO) if args.log != "": add_file_handler('global', args.log, logging.INFO) logger = logging.getLogger('global') logger.info(args) # setup model if args.arch == 'Custom': from custom import Custom model = Custom(anchors=cfg['anchors']) else: parser.error('invalid architecture: {}'.format(args.arch)) if args.resume: assert isfile(args.resume), '{} is not a valid file'.format(args.resume) model = load_pretrain(model, args.resume) model.eval() device = torch.device('cuda' if (torch.cuda.is_available()) else 'cpu') model = model.to(device) # setup dataset dataset = load_dataset(args.dataset) # VOS or VOT? if args.dataset in ['DAVIS2016', 'DAVIS2017', 'ytb_vos'] and args.mask: vos_enable = True # enable Mask output else: vos_enable = False total_lost = 0 # VOT iou_lists = [] # VOS speed_list = [] for v_id, video in enumerate(dataset.keys(), start=1): if args.video != '' and video != args.video: continue if vos_enable: iou_list, speed = track_vos(model, dataset[video], cfg['hp'] if 'hp' in cfg.keys() else None, args.mask, args.refine, args.dataset in ['DAVIS2017', 'ytb_vos'], device=device) iou_lists.append(iou_list) else: lost, speed = track_vot(model, dataset[video], cfg['hp'] if 'hp' in cfg.keys() else None, args.mask, args.refine, device=device) total_lost += lost speed_list.append(speed) # report final result if vos_enable: for thr, iou in zip(thrs, np.mean(np.concatenate(iou_lists), axis=0)): logger.info('Segmentation Threshold {:.2f} mIoU: {:.3f}'.format(thr, iou)) else: logger.info('Total Lost: {:d}'.format(total_lost)) logger.info('Mean Speed: {:.2f} FPS'.format(np.mean(speed_list)))
class SingleTracker(object): def __init__(self, config_path, model_path): args = TrackArgs() args.config = config_path args.resume = model_path cfg = load_config(args) if args.arch == 'Custom': from custom import Custom self.model = Custom(anchors=cfg['anchors']) else: parser.error('invalid architecture: {}'.format(args.arch)) if args.resume: assert isfile(args.resume), '{} is not a valid file'.format(args.resume) self.model = load_pretrain(self.model, args.resume) self.model.eval() self.device = torch.device('cuda' if (torch.cuda.is_available() and not args.cpu) else 'cpu') self.model = self.model.to(self.device) ################# Dangerous self.p = TrackerConfig() self.p.update(cfg['hp'] if 'hp' in cfg.keys() else None, self.model.anchors) self.p.renew() self.p.scales = self.model.anchors['scales'] self.p.ratios = self.model.anchors['ratios'] self.p.anchor_num = self.model.anchor_num self.p.anchor = generate_anchor(self.model.anchors, self.p.score_size) if self.p.windowing == 'cosine': self.window = np.outer(np.hanning(self.p.score_size), np.hanning(self.p.score_size)) elif self.p.windowing == 'uniform': self.window = np.ones((self.p.score_size, self.p.score_size)) self.window = np.tile(self.window.flatten(), self.p.anchor_num) ################ def get_examplar_feature(self, img, target_pos, target_sz): avg_chans = np.mean(img, axis=(0, 1)) wc_z = target_sz[0] + self.p.context_amount * sum(target_sz) hc_z = target_sz[1] + self.p.context_amount * sum(target_sz) s_z = round(np.sqrt(wc_z * hc_z)) # initialize the exemplar examplar = get_subwindow_tracking(img, target_pos, self.p.exemplar_size, s_z, avg_chans) z = Variable(examplar.unsqueeze(0)) return self.model.template(z.to(self.device)) def siamese_track(self, img, target_pos, target_sz, examplar_feature, debug=False, mask_enable=True, refine_enable=True): avg_chans = np.mean(img, axis=(0, 1)) im_h = img.shape[0] im_w = img.shape[1] wc_x = target_sz[0] + self.p.context_amount * sum(target_sz) hc_x = target_sz[1] + self.p.context_amount * sum(target_sz) s_x = np.sqrt(wc_x * hc_x) ''' scale_x = self.p.exemplar_size / s_x d_search = (self.p.instance_size - self.p.exemplar_size) / 2 pad = d_search / scale_x s_x = s_x + 2 * pad crop_box = [target_pos[0] - round(s_x) / 2, target_pos[1] - round(s_x) / 2, round(s_x), round(s_x)] ''' # myy # 上面注释的部分, 原作者写的代码可以简化为下面三句 scale_x = self.p.exemplar_size / s_x s_x = self.p.instance_size / self.p.exemplar_size * s_x crop_box = [target_pos[0] - round(s_x) / 2, target_pos[1] - round(s_x) / 2, round(s_x), round(s_x)] # extract scaled crops for search region x at previous target position x_crop = Variable(get_subwindow_tracking(img, target_pos, self.p.instance_size, round(s_x), avg_chans).unsqueeze(0)) if mask_enable: score, delta, mask = self.model.track_mask(examplar_feature, x_crop.to(self.device)) else: score, delta = self.model.track(examplar_feature, x_crop.to(self.device)) delta = delta.permute(1, 2, 3, 0).contiguous().view(4, -1).data.cpu().numpy() score = F.softmax(score.permute(1, 2, 3, 0).contiguous().view(2, -1).permute(1, 0), dim=1).data[:, 1].cpu().numpy() delta[0, :] = delta[0, :] * self.p.anchor[:, 2] + self.p.anchor[:, 0] delta[1, :] = delta[1, :] * self.p.anchor[:, 3] + self.p.anchor[:, 1] delta[2, :] = np.exp(delta[2, :]) * self.p.anchor[:, 2] delta[3, :] = np.exp(delta[3, :]) * self.p.anchor[:, 3] def change(r): return np.maximum(r, 1. / r) def sz(w, h): pad = (w + h) * 0.5 sz2 = (w + pad) * (h + pad) return np.sqrt(sz2) def sz_wh(wh): pad = (wh[0] + wh[1]) * 0.5 sz2 = (wh[0] + pad) * (wh[1] + pad) return np.sqrt(sz2) # size penalty target_sz_in_crop = target_sz*scale_x s_c = change(sz(delta[2, :], delta[3, :]) / (sz_wh(target_sz_in_crop))) # scale penalty r_c = change((target_sz_in_crop[0] / target_sz_in_crop[1]) / (delta[2, :] / delta[3, :])) # ratio penalty penalty = np.exp(-(r_c * s_c - 1) * self.p.penalty_k) pscore = penalty * score # cos window (motion model) pscore = pscore * (1 - self.p.window_influence) + self.window * self.p.window_influence best_pscore_id = np.argmax(pscore) pred_in_crop = delta[:, best_pscore_id] / scale_x lr = penalty[best_pscore_id] * score[best_pscore_id] * self.p.lr # lr for OTB res_x = pred_in_crop[0] + target_pos[0] res_y = pred_in_crop[1] + target_pos[1] res_w = target_sz[0] * (1 - lr) + pred_in_crop[2] * lr res_h = target_sz[1] * (1 - lr) + pred_in_crop[3] * lr target_pos = np.array([res_x, res_y]) target_sz = np.array([res_w, res_h]) # for Mask Branch if mask_enable: best_pscore_id_mask = np.unravel_index(best_pscore_id, (5, self.p.score_size, self.p.score_size)) delta_x, delta_y = best_pscore_id_mask[2], best_pscore_id_mask[1] if refine_enable: mask = self.model.track_refine((delta_y, delta_x)).to(self.device).sigmoid().squeeze().view( self.p.out_size, self.p.out_size).cpu().data.numpy() else: mask = mask[0, :, delta_y, delta_x].sigmoid(). \ squeeze().view(self.p.out_size, self.p.out_size).cpu().data.numpy() def crop_back(image, bbox, out_sz, padding=-1): a = (out_sz[0] - 1) / bbox[2] b = (out_sz[1] - 1) / bbox[3] c = -a * bbox[0] d = -b * bbox[1] mapping = np.array([[a, 0, c], [0, b, d]]).astype(np.float) crop = cv2.warpAffine(image, mapping, (out_sz[0], out_sz[1]), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT, borderValue=padding) return crop s = crop_box[2] / self.p.instance_size sub_box = [crop_box[0] + (delta_x - self.p.base_size / 2) * self.p.total_stride * s, crop_box[1] + (delta_y - self.p.base_size / 2) * self.p.total_stride * s, s * self.p.exemplar_size, s * self.p.exemplar_size] s = self.p.out_size / sub_box[2] back_box = [-sub_box[0] * s, -sub_box[1] * s, im_w * s, im_h * s] mask_in_img = crop_back(mask, back_box, (im_w, im_h)) target_mask = (mask_in_img > self.p.seg_thr).astype(np.uint8) if cv2.__version__[-5] == '4': contours, _ = cv2.findContours(target_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) else: _, contours, _ = cv2.findContours(target_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) cnt_area = [cv2.contourArea(cnt) for cnt in contours] if len(contours) != 0 and np.max(cnt_area) > 100: contour = contours[np.argmax(cnt_area)] # use max area polygon polygon = contour.reshape(-1, 2) # pbox = cv2.boundingRect(polygon) # Min Max Rectangle prbox = cv2.boxPoints(cv2.minAreaRect(polygon)) # Rotated Rectangle # box_in_img = pbox rbox_in_img = prbox else: # empty mask location = cxy_wh_2_rect(target_pos, target_sz) rbox_in_img = np.array([[location[0], location[1]], [location[0] + location[2], location[1]], [location[0] + location[2], location[1] + location[3]], [location[0], location[1] + location[3]]]) target_pos[0] = max(0, min(im_w, target_pos[0])) target_pos[1] = max(0, min(im_h, target_pos[1])) target_sz[0] = max(10, min(im_w, target_sz[0])) target_sz[1] = max(10, min(im_h, target_sz[1])) score = score[best_pscore_id] mask = mask_in_img if mask_enable else [] return target_pos, target_sz, score, mask
def main(): init_log('global', logging.INFO) if args.log != "": add_file_handler('global', args.log, logging.INFO) params = {'penalty_k': args.penalty_k, 'window_influence': args.window_influence, 'lr': args.lr, 'instance_size': args.search_region} num_search = len(params['penalty_k']) * len(params['window_influence']) * \ len(params['lr']) * len(params['instance_size']) print(params) print(num_search) cfg = load_config(args) if args.arch == 'Custom': from custom import Custom model = Custom(anchors=cfg['anchors']) else: model = models.__dict__[args.arch](anchors=cfg['anchors']) if args.resume: assert isfile(args.resume), '{} is not a valid file'.format(args.resume) model = load_pretrain(model, args.resume) model.eval() model = model.to(device) default_hp = cfg.get('hp', {}) p = dict() p['network'] = model p['network_name'] = args.arch+'_'+args.resume.split('/')[-1].split('.')[0] p['dataset'] = args.dataset global ims, gt, image_files dataset_info = load_dataset(args.dataset) videos = list(dataset_info.keys()) np.random.shuffle(videos) for video in videos: print(video) if isfile('finish.flag'): return p['video'] = video ims = None image_files = dataset_info[video]['image_files'] gt = dataset_info[video]['gt'] np.random.shuffle(params['penalty_k']) np.random.shuffle(params['window_influence']) np.random.shuffle(params['lr']) for penalty_k in params['penalty_k']: for window_influence in params['window_influence']: for lr in params['lr']: for instance_size in params['instance_size']: p['hp'] = default_hp.copy() p['hp'].update({'penalty_k':penalty_k, 'window_influence':window_influence, 'lr':lr, 'instance_size': instance_size, }) tune(p)
def main(): global args, logger, v_id args = parser.parse_args() cfg = load_config(args) init_log('global', logging.INFO) if args.log != "": add_file_handler('global', args.log, logging.INFO) logger = logging.getLogger('global') logger.info(args) # setup model if args.arch == 'Custom': from custom import Custom model = Custom(anchors=cfg['anchors']) else: parser.error('invalid architecture: {}'.format(args.arch)) if args.resume: assert isfile(args.resume), '{} is not a valid file'.format( args.resume) model = load_pretrain(model, args.resume) model.eval() device = torch.device('cuda' if ( torch.cuda.is_available() and not args.cpu) else 'cpu') model = model.to(device) # setup dataset dataset = load_dataset(args.dataset) # VOS or VOT? if args.dataset in ['DAVIS2016', 'DAVIS2017', 'ytb_vos'] and args.mask: vos_enable = True # enable Mask output else: vos_enable = False total_lost = 0 # VOT # iou_lists = [] # VOS # speed_list = [] for v_id, video in enumerate(dataset.keys(), start=1): if args.video != '' and video != args.video: continue if vos_enable: iou_list, speed = track_vos( model, dataset[video], cfg['hp'] if 'hp' in cfg.keys() else None, args.mask, args.refine, args.dataset in ['DAVIS2017', 'ytb_vos'], device=device) # iou_lists.append(iou_list) else: lost, speed = track_vot(model, dataset[video], cfg['hp'] if 'hp' in cfg.keys() else None, args.mask, args.refine, device=device) total_lost += lost
def main(): global args, best_acc, tb_writer, logger args = parser.parse_args() init_log('global', logging.INFO) if args.log != "": add_file_handler('global', args.log, logging.INFO) print("Init logger") logger = logging.getLogger('global') print(44) #logger.info("\n" + collect_env_info()) print(99) logger.info(args) cfg = load_config(args) logger.info("config \n{}".format(json.dumps(cfg, indent=4))) print(2) if args.log_dir: tb_writer = SummaryWriter(args.log_dir) else: tb_writer = Dummy() # build dataset train_loader, val_loader = build_data_loader(cfg) print(3) path = "/usr4/alg504/cliao25/siammask/experiments/siammask_base/snapshot/checkpoint_e{}.pth" for epoch in range(1,21): if args.arch == 'Custom': from custom import Custom model = Custom(pretrain=True, anchors=cfg['anchors']) else: exit() print(4) if args.pretrained: model = load_pretrain(model, args.pretrained) model = model.cuda() #model.features.unfix((epoch - 1) / 20) optimizer, lr_scheduler = build_opt_lr(model, cfg, args, epoch) filepath = path.format(epoch) assert os.path.isfile(filepath) model, _, _, _, _ = restore_from(model, optimizer, filepath) #model = load_pretrain(model, filepath) model = torch.nn.DataParallel(model, list(range(torch.cuda.device_count()))).cuda() model.train() device = torch.device('cuda') model = model.to(device) valid(val_loader, model, cfg) print("Done")
def main(): # 获取命令行参数信息 global args, logger, v_id args = parser.parse_args() # 获取配置文件中配置信息:主要包括网络结构,超参数等 cfg = load_config(args) # 初始化logxi信息,并将日志信息输入到磁盘文件中 init_log('global', logging.INFO) if args.log != "": add_file_handler('global', args.log, logging.INFO) # 将相关的配置信息输入到日志文件中 logger = logging.getLogger('global') logger.info(args) # setup model # 加载网络模型架构 if args.arch == 'Custom': from custom import Custom model = Custom(anchors=cfg['anchors']) else: parser.error('invalid architecture: {}'.format(args.arch)) # 加载网络模型参数 if args.resume: assert isfile(args.resume), '{} is not a valid file'.format( args.resume) model = load_pretrain(model, args.resume) # 使用评估模式,将drop等激活 model.eval() # 硬件信息 device = torch.device('cuda' if ( torch.cuda.is_available() and not args.cpu) else 'cpu') model = model.to(device) # 加载数据集 setup dataset dataset = load_dataset(args.dataset) # 这三种数据支持掩膜 VOS or VOT? if args.dataset in ['DAVIS2016', 'DAVIS2017', 'ytb_vos'] and args.mask: vos_enable = True # enable Mask output else: vos_enable = False total_lost = 0 # VOT iou_lists = [] # VOS speed_list = [] # 对数据进行处理 for v_id, video in enumerate(dataset.keys(), start=1): if args.video != '' and video != args.video: continue # true 调用track_vos if vos_enable: # 如测试数据是['DAVIS2017', 'ytb_vos']时,会开启多目标跟踪 iou_list, speed = track_vos( model, dataset[video], cfg['hp'] if 'hp' in cfg.keys() else None, args.mask, args.refine, args.dataset in ['DAVIS2017', 'ytb_vos'], device=device) iou_lists.append(iou_list) # False 调用track_vot else: lost, speed = track_vot(model, dataset[video], cfg['hp'] if 'hp' in cfg.keys() else None, args.mask, args.refine, device=device) total_lost += lost speed_list.append(speed) # report final result if vos_enable: for thr, iou in zip(thrs, np.mean(np.concatenate(iou_lists), axis=0)): logger.info('Segmentation Threshold {:.2f} mIoU: {:.3f}'.format( thr, iou)) else: logger.info('Total Lost: {:d}'.format(total_lost)) logger.info('Mean Speed: {:.2f} FPS'.format(np.mean(speed_list)))
test_iter = iter(test_loader) # 确定数据集长度 train_lenth = len(train_loader) test_lenth = len(test_loader) global_logger.debug( '==>>> total trainning batch number: {}'.format(train_lenth)) global_logger.debug('==>>> total testing batch number: {}'.format(test_lenth)) # 加载模型 sys.path.append(os.path.join(".", experiment_path, experiment_name)) if arch == "Custom": from custom import Custom model = Custom(cfg=cfg.model) model = model.to(device) model = torch.nn.DataParallel(model, list(range( torch.cuda.device_count()))).to(device) else: raise NotImplementedError # 建立tensorboard的实例 from tensorboardX import SummaryWriter writer = SummaryWriter(os.path.join(".", board_path, experiment_name)) # 建立优化器 optimizer = torch.optim.Adam(model.parameters(), lr=lr, betas=(0.9, 0.999)) scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer=optimizer, milestones=[10, 20, 30, 40], gamma=0.5)
def main(): global args, logger, v_id #全局变量 args = parser.parse_args() #args是test.py文件运行时,接受的参数 cfg = load_config(args) #加载 JSON 配置文件并设置args.arch的值。 print(cfg) init_log('global', logging.INFO) if args.log != "": add_file_handler('global', args.log, logging.INFO) #add_file_handler 创建一个记录器并绑定文件句柄。 logger = logging.getLogger('global') logger.info(args) # setup model Custom 为论文实现的网络。如果不是“Custom”,加载 models 下指定的结构。 if args.arch == 'Custom': #args.arch参数,预训练模型的结构,命令行不给的话,默认为' ', from custom import Custom model = Custom(anchors=cfg['anchors'] ) #cfg是从config_vot.json的到的数据,所以跟踪时用的model.anchors字典中的数据 else: parser.error('invalid architecture: {}'.format(args.arch)) if args.resume: #给了args.resume,如果args.resume不是文件,报错, assert isfile(args.resume), '{} is not a valid file'.format( args.resume) model = load_pretrain( model, args.resume) #args.resume是文件load_pretrain ,能够处理网络之间的不一致 model.eval() device = torch.device('cuda' if ( torch.cuda.is_available() and not args.cpu) else 'cpu') model = model.to(device) # setup dataset,字典 dataset = load_dataset( args.dataset) #load_dataset 能够加载 VOT、DAVIS、ytb_vos 三种数据集。 #仅以上三种数据源支持掩膜输出。 # VOS or VOT? if args.dataset in ['DAVIS2016', 'DAVIS2017', 'ytb_vos'] and args.mask: vos_enable = True # enable Mask output ,使用掩膜输出 else: vos_enable = False total_lost = 0 # VOT 跟踪任务有损失函数 iou_lists = [] # VOS 分割任务 speed_list = [] #v_id视频索引从1起,video是视频名字 for v_id, video in enumerate(dataset.keys(), start=1): if v_id == 2: exit() if args.video != '' and video != args.video: #不成立,args.video默认是' ' continue if vos_enable: #分割任务,,,,分割任务和跟踪任务只能选一个 iou_list, speed = track_vos( model, dataset[video], cfg['hp'] if 'hp' in cfg.keys() else None, args.mask, args.refine, args.dataset in ['DAVIS2017', 'ytb_vos'], device=device) iou_lists.append(iou_list) #iou_list是什么类型的数据??? else: #跟踪任务 lost, speed = track_vot(model, dataset[video], cfg['hp'] if 'hp' in cfg.keys() else None, args.mask, args.refine, device=device) total_lost += lost speed_list.append(speed) # report final result记录最终结果 if vos_enable: #如果进行的是分割任务 for thr, iou in zip(thrs, np.mean(np.concatenate(iou_lists), axis=0)): logger.info('Segmentation Threshold {:.2f} mIoU: {:.3f}'.format( thr, iou)) else: logger.info('Total Lost: {:d}'.format(total_lost)) logger.info('Mean Speed: {:.2f} FPS'.format(np.mean(speed_list)))