def __init__(self, name): super(Input_Dataset, self).__init__() # size match desired_size = (cfg.TRAIN.SEARCH_SIZE - cfg.TRAIN.EXEMPLAR_SIZE) / \ cfg.ANCHOR.STRIDE + 1 + cfg.TRAIN.BASE_SIZE if desired_size != cfg.TRAIN.OUTPUT_SIZE: raise Exception('size not match!') # create anchor target self.anchor_target = AnchorTarget() # create datasets self.dataset = [] start_video = 0 self.num = 0 # 图片序列数目 # create train datasets if name == 'train': train_dataset = VID_Dataset( name, cfg.DATASET.VID.ROOT, cfg.DATASET.VID.ANNO, start_video ) self.dataset = train_dataset self.num += train_dataset.num train_dataset.log() # create val datasets if name == 'val': val_dataset = VID_Dataset( name, cfg.DATASET.VID.ROOT, cfg.DATASET.VID.VALANNO, start_video ) self.dataset = val_dataset self.num += val_dataset.num val_dataset.log() # data augmentation self.template_aug = Augmentation( cfg.DATASET.TEMPLATE.SHIFT, cfg.DATASET.TEMPLATE.SCALE, cfg.DATASET.TEMPLATE.BLUR, cfg.DATASET.TEMPLATE.FLIP, cfg.DATASET.TEMPLATE.COLOR ) self.search_aug = Augmentation( cfg.DATASET.SEARCH.SHIFT, cfg.DATASET.SEARCH.SCALE, cfg.DATASET.SEARCH.BLUR, cfg.DATASET.SEARCH.FLIP, cfg.DATASET.SEARCH.COLOR )
def __init__(self,): super(TrkDataset, self).__init__() desired_size = (cfg.TRAIN.SEARCH_SIZE - cfg.TRAIN.EXEMPLAR_SIZE) / \ cfg.ANCHOR.STRIDE + 1 + cfg.TRAIN.BASE_SIZE if desired_size != cfg.TRAIN.OUTPUT_SIZE: raise Exception('size not match!') # create anchor target self.anchor_target = AnchorTarget() # create sub dataset self.all_dataset = [] start = 0 self.num = 0 print() for name in cfg.DATASET.NAMES: # print(name) subdata_cfg = getattr(cfg.DATASET, name) sub_dataset = SubDataset( name, subdata_cfg.ROOT, subdata_cfg.ANNO, subdata_cfg.FRAME_RANGE, subdata_cfg.NUM_USE, start ) start += sub_dataset.num self.num += sub_dataset.num_use sub_dataset.log() self.all_dataset.append(sub_dataset) # data augmentation self.template_aug = Augmentation( cfg.DATASET.TEMPLATE.SHIFT, cfg.DATASET.TEMPLATE.SCALE, cfg.DATASET.TEMPLATE.BLUR, cfg.DATASET.TEMPLATE.FLIP, cfg.DATASET.TEMPLATE.COLOR ) self.search_aug = Augmentation( cfg.DATASET.SEARCH.SHIFT, cfg.DATASET.SEARCH.SCALE, cfg.DATASET.SEARCH.BLUR, cfg.DATASET.SEARCH.FLIP, cfg.DATASET.SEARCH.COLOR ) videos_per_epoch = cfg.DATASET.VIDEOS_PER_EPOCH self.num = videos_per_epoch if videos_per_epoch > 0 else self.num self.num *= cfg.TRAIN.EPOCH self.pick = self.shuffle()
def __init__(self, model): super(SiamRPNLatentTracker, self).__init__() self.score_size = (cfg.TRACK.INSTANCE_SIZE - cfg.TRACK.EXEMPLAR_SIZE) // \ cfg.ANCHOR.STRIDE + 1 + cfg.TRACK.BASE_SIZE self.anchor_num = len(cfg.ANCHOR.RATIOS) * len(cfg.ANCHOR.SCALES) hanning = np.hanning(self.score_size) window = np.outer(hanning, hanning) self.window = np.tile(window.flatten(), self.anchor_num) self.anchors = self.generate_anchor(self.score_size) self.model = model self.model.eval() # create anchor target self.anchor_target = AnchorTarget()
def __init__(self, seq_input_len=3, seq_output_len=1): super(SeqTrkDataset, self).__init__() #特征图输出size为:(搜索区域尺寸 -- 模板尺寸)/stride +1 ,没有padding desired_size = (cfg.TRAIN.SEARCH_SIZE - cfg.TRAIN.EXEMPLAR_SIZE) / \ cfg.ANCHOR.STRIDE + 1 + cfg.TRAIN.BASE_SIZE if desired_size != cfg.TRAIN.OUTPUT_SIZE: raise Exception('size not match!') # create anchor target(初始化事为每个点设置anchor的形状,调用的时候确定哪些anchor是正样本,哪些anchor是负样本) self.anchor_target = AnchorTarget() #lstm,gru中连续seq_input_len个tensor输入作为历史信息的学习,后面再输入一个tensor,输出一个状态,后面输入的 #tensor的总个数为个 self.seq_input_len = seq_input_len #lstm ,gru输入序列的长度 self.seq_output_len = seq_output_len #lstm,gru 输出序列的长度 # create sub dataset self.all_dataset = [] start = 0 self.num = 0 #视频片段的总个数 for name in cfg.DATASET.NAMES: subdata_cfg = getattr(cfg.DATASET, name) sub_dataset = SubDataset(name, subdata_cfg.ROOT, subdata_cfg.ANNO, subdata_cfg.FRAME_RANGE, subdata_cfg.NUM_USE, start) start += sub_dataset.num self.num += sub_dataset.num_use sub_dataset.log() self.all_dataset.append(sub_dataset) # data augmentation(数据增强初始化参数,对于模板的数据增强参数,和搜索区域的数据增强参数不一样,例如模板平移范围为4个像素,而搜索区域平移范围为64个像素) self.template_aug = Augmentation(cfg.DATASET.TEMPLATE.SHIFT, cfg.DATASET.TEMPLATE.SCALE, cfg.DATASET.TEMPLATE.BLUR, cfg.DATASET.TEMPLATE.FLIP, cfg.DATASET.TEMPLATE.COLOR) self.search_aug = Augmentation(cfg.DATASET.SEARCH.SHIFT, cfg.DATASET.SEARCH.SCALE, cfg.DATASET.SEARCH.BLUR, cfg.DATASET.SEARCH.FLIP, cfg.DATASET.SEARCH.COLOR) videos_per_epoch = cfg.DATASET.VIDEOS_PER_EPOCH self.num = videos_per_epoch if videos_per_epoch > 0 else self.num self.num *= cfg.TRAIN.EPOCH self.pick = self.shuffle() self.img_cnt = 0 # 调试用
def __init__(self, cfg): self.cfg = cfg # augmentations: # shift, scale, blur, flip, grayscale template_augmentors = [ ColorJitterAugmentor(), ShiftScaleAugmentor(self.cfg.DATASET.TEMPLATE.PAD_RATIO, self.cfg.DATASET.TEMPLATE.SHIFT, self.cfg.DATASET.TEMPLATE.SCALE, self.cfg.DATASET.TEMPLATE.ASPECT, self.cfg.TRAIN.EXEMPLAR_SIZE, self.cfg.PREPROC.PIXEL_MEAN[::-1]), # ResizeAugmentor(self.cfg.TRAIN.EXEMPLAR_SIZE), ] if self.cfg.DATASET.TEMPLATE.BLUR: template_augmentors.append( MotionBlurAugmentor(self.cfg.DATASET.TEMPLATE.BLUR)) # if cfg.DATASET.GRAY: # template_augmentors.append(GrayscaleAugmentor(cfg.DATASET.GRAY)) self.template_aug = imgaug.AugmentorList(template_augmentors) search_augmentors = [ ColorJitterAugmentor(), ShiftScaleAugmentor(self.cfg.DATASET.SEARCH.PAD_RATIO, self.cfg.DATASET.SEARCH.SHIFT, self.cfg.DATASET.SEARCH.SCALE, self.cfg.DATASET.SEARCH.ASPECT, self.cfg.TRAIN.SEARCH_SIZE, self.cfg.PREPROC.PIXEL_MEAN[::-1]), # ResizeAugmentor(self.cfg.TRAIN.SEARCH_SIZE), ] if self.cfg.DATASET.SEARCH.BLUR: search_augmentors.append( MotionBlurAugmentor(self.cfg.DATASET.SEARCH.BLUR)) # if cfg.DATASET.GRAY: # template_augmentors.append(GrayscaleAugmentor(cfg.DATASET.GRAY)) self.search_aug = imgaug.AugmentorList(search_augmentors) self.anchor_target = AnchorTarget()
def __init__(self, name, loader, root, frame_root): super(TrackingDataset, self).__init__() desired_size = (cfg.TRAIN.SEARCH_SIZE - cfg.TRAIN.EXEMPLAR_SIZE) / \ cfg.ANCHOR.STRIDE + 1 + cfg.TRAIN.BASE_SIZE if desired_size != cfg.TRAIN.OUTPUT_SIZE: raise Exception('size not match!') # Adapt base dataset to tracking dataset loader sub_dataset = loader(name=name, dataset_root=root) tracking_sub_dataset = TrackingDatasetAdapter(sub_dataset, frame_root=frame_root) self.dataset = tracking_sub_dataset self.anchor_target = AnchorTarget() self.template_aug = Augmentation(cfg.DATASET.TEMPLATE.SHIFT, cfg.DATASET.TEMPLATE.SCALE, cfg.DATASET.TEMPLATE.BLUR, cfg.DATASET.TEMPLATE.FLIP, cfg.DATASET.TEMPLATE.COLOR) self.search_aug = Augmentation(cfg.DATASET.SEARCH.SHIFT, cfg.DATASET.SEARCH.SCALE, cfg.DATASET.SEARCH.BLUR, cfg.DATASET.SEARCH.FLIP, cfg.DATASET.SEARCH.COLOR)
def __init__(self, name, root, video_dir, init_rect, img_names, gt_rect, attr, load_img=False, config=None): self.name = name self.video_dir = video_dir self.init_rect = init_rect self.gt_traj = gt_rect self.attr = attr self.pred_trajs = {} dataset = root.split('/')[-1] if dataset == 'VOT2018': self.img_names = [ os.path.join(root, x.replace('color/', '')) for x in img_names ] elif dataset == 'OTB100': if video_dir == 'Jogging-1' or 'Skating2-1' in video_dir: self.img_names = [ os.path.join(root, x.replace('-1', '')) for x in img_names ] elif video_dir == 'Jogging-2' or 'Skating2-2' in video_dir: self.img_names = [ os.path.join(root, x.replace('-2', '')) for x in img_names ] else: self.img_names = [os.path.join(root, x) for x in img_names] self.imgs = None self.config = config self.size = None self.center_pos = None self.template_aug = Augmentation(config.DATASET.TEMPLATE.SHIFT, config.DATASET.TEMPLATE.SCALE, config.DATASET.TEMPLATE.BLUR, config.DATASET.TEMPLATE.FLIP, config.DATASET.TEMPLATE.COLOR) self.search_aug = Augmentation(config.DATASET.SEARCH.SHIFT, config.DATASET.SEARCH.SCALE, config.DATASET.SEARCH.BLUR, config.DATASET.SEARCH.FLIP, config.DATASET.SEARCH.COLOR) # create anchor target self.anchor_target = AnchorTarget() if load_img: self.imgs = [cv2.imread(x) for x in self.img_names] self.width = self.imgs[0].shape[1] self.height = self.imgs[0].shape[0] else: img = cv2.imread(self.img_names[0]) assert img is not None, self.img_names[0] self.width = img.shape[1] self.height = img.shape[0]