def __init__(self, data_root='.', split='train', **kargs): ArrayDataset.__init__(self, **kargs) self.split = split ic15_root_dir = data_root + '/ICDAR2015/Challenge4/' train_data_dir = ic15_root_dir + 'ch4_training_images/' train_gt_dir = ic15_root_dir + 'ch4_training_localization_transcription_gt/' test_data_dir = ic15_root_dir + 'ch4_test_images/' test_gt_dir = ic15_root_dir + 'ch4_test_localization_transcription_gt/' if split == 'train': data_dirs = [train_data_dir] gt_dirs = [train_gt_dir] else: data_dirs = [test_data_dir] gt_dirs = [test_gt_dir] self.img_paths = [] self.gt_paths = [] for data_dir, gt_dir in zip(data_dirs, gt_dirs): img_names = util.io.ls(data_dir, '.jpg') img_names.extend(util.io.ls(data_dir, '.png')) # img_names.extend(util.io.ls(data_dir, '.gif')) img_names.sort() img_paths = [] gt_paths = [] for idx, img_name in enumerate(img_names): img_path = data_dir + img_name img_paths.append(img_path) gt_name = 'gt_' + img_name.split('.')[0] + '.txt' gt_path = gt_dir + gt_name gt_paths.append(gt_path) self.img_paths.extend(img_paths) self.gt_paths.extend(gt_paths)
def __init__(self, ctw_root='.', split='train', **kargs): ArrayDataset.__init__(self, **kargs) ctw_root_dir = ctw_root + '/data/ctw1500/' ctw_train_data_dir = ctw_root_dir + 'train/text_image/' ctw_train_gt_dir = ctw_root_dir + 'train/ctw1500_e2e_train/' ctw_test_data_dir = ctw_root_dir + 'test/text_image/' ctw_test_gt_dir = ctw_root_dir + 'test/ctw1500_e2e_test/' if split == 'train': data_dirs = [ctw_train_data_dir] gt_dirs = [ctw_train_gt_dir] else: data_dirs = [ctw_test_data_dir] gt_dirs = [ctw_test_gt_dir] self.img_paths = [] self.gt_paths = [] for data_dir, gt_dir in zip(data_dirs, gt_dirs): img_names = util.io.ls(data_dir, '.jpg') img_names.extend(util.io.ls(data_dir, '.png')) # img_names.extend(util.io.ls(data_dir, '.gif')) img_names.sort() img_paths = [] gt_paths = [] for idx, img_name in enumerate(img_names): img_path = data_dir + img_name img_paths.append(img_path) gt_name = img_name.split('.')[0] + '.txt' gt_path = gt_dir + gt_name gt_paths.append(gt_path) self.img_paths.extend(img_paths) self.gt_paths.extend(gt_paths)
def __init__(self, data_root='.', split='train', **kargs): ArrayDataset.__init__(self, **kargs) self.split = split root_dir = data_root + '/ReCTS2019/' train_data_dir = root_dir + 'train/img/' train_gt_path = root_dir + 'train/gt/' # not gt for test set test_data_dir = root_dir + 'ReCTS_test_part1/Task3_and_Task4/img/' test_gt_path = '' if split == 'train': data_dir = train_data_dir gt_dir = train_gt_path else: data_dir = test_data_dir gt_dir = test_gt_path self.random_scale = np.array([0.5, 0.7, 1.0, 1.2]) # scan images self.img_paths = [] img_names = util.io.ls(data_dir, '.jpg') img_names.extend(util.io.ls(data_dir, '.png')) img_names.sort() img_paths = [] gt_paths = [] for idx, img_name in enumerate(img_names): img_path = data_dir + img_name img_paths.append(img_path) if gt_dir: gt_name = img_name.split('.')[0] + '.json' gt_path = gt_dir + gt_name gt_paths.append(gt_path) self.img_paths = img_paths self.gt_paths = gt_paths
def __init__(self, data_root='.', split='train', **kargs): ArrayDataset.__init__(self, **kargs) self.split = split root_dir = data_root + '/ArT2019/' train_data_dir = root_dir + 'train_images/' train_gt_path = root_dir + 'train_labels.json' # not gt for test set test_data_dir = root_dir + 'test_images/' test_gt_path = '' if split == 'train': data_dir = train_data_dir gt_path = train_gt_path else: data_dir = test_data_dir gt_path = test_gt_path # scan images self.img_paths = [] img_names = util.io.ls(data_dir, '.jpg') img_names.extend(util.io.ls(data_dir, '.png')) img_names.sort() img_paths = [] for idx, img_name in enumerate(img_names): img_path = data_dir + img_name img_paths.append(img_path) self.img_paths.extend(img_paths) self.gt = None if gt_path: with open(gt_path, 'r') as f: gt = json.load(f) self.gt = gt
def __init__(self, datadir, name='--', exts=None): ArrayDataset.__init__(self, name) self.datadir = datadir # scan files filenames = os.listdir(datadir) filenames.sort() self.list = [] for filename in filenames: if not allowed_file(filename, exts): continue self.list.append(filename)
def __init__(self, dataset, filter=None, name='--'): """ Split a dataset :param dataset: source dataset :param filter: sampling rule, like low-0.5, high-100 :param name: dataset name :return selected part of the dataset. """ ArrayDataset.__init__(self, name=name) self.dataset = dataset self.filter = filter total = dataset.size() if filter is None: self.ibuf = [i for i in range(dataset.size())] else: # self.ibuf = [] segs = filter.split('-') pos = segs[0] num = float(segs[1]) if num < 1: base = 100 num = int(num * base) if num % 10 == 0: base = 10 num /= 10 if pos == 'low': for i in range(total): if i % base < num: self.ibuf.append(i) else: for i in range(total): if i % base >= (base-num): self.ibuf.append(i) else: num = int(num) if pos == 'low': for i in range(total): if i < num: self.ibuf.append(i) else: for i in range(total): if i >= (total-num): self.ibuf.append(i)
def __init__(self, data_root='.', split='train', **kargs): ArrayDataset.__init__(self, **kargs) self.split = split ic15_root_dir = data_root+'/MLT2019/' train_data_dir = ic15_root_dir + 'train_images/' train_gt_dir = ic15_root_dir + 'train_gt_t13/' # not gt for test set test_data_dir = ic15_root_dir + 'train_images/' test_gt_dir = ic15_root_dir + 'train_gt_t13/' if split == 'train': data_dirs = [train_data_dir] gt_dirs = [train_gt_dir] else: data_dirs = [test_data_dir] gt_dirs = [test_gt_dir] self.img_paths = [] self.gt_paths = [] for data_dir, gt_dir in zip(data_dirs, gt_dirs): img_names = util.io.ls(data_dir, '.jpg') img_names.extend(util.io.ls(data_dir, '.png')) # img_names.extend(util.io.ls(data_dir, '.gif')) img_names.sort() img_paths = [] gt_paths = [] for idx, img_name in enumerate(img_names): img_path = data_dir + img_name img_paths.append(img_path) gt_name = img_name.split('.')[0] + '.txt' gt_path = gt_dir + gt_name gt_paths.append(gt_path) self.img_paths.extend(img_paths) self.gt_paths.extend(gt_paths)
def __init__(self, rootdir, imgdir = 'img_align_celeba', name='CelebA'): ArrayDataset.__init__(self, name) self.rootdir = rootdir self.attr_path = os.path.join(rootdir, 'list_attr_celeba.txt') self.landmarks_path = os.path.join(rootdir, 'list_landmarks_align_celeba.txt') self.bbox_path = os.path.join(rootdir, 'list_bbox_celeba.txt') self.img_dir = os.path.join(rootdir, imgdir) # image list self.img_list = os.listdir(self.img_dir) # {'path', 'landmark':[], 'attr':[] } # parse attr with open(self.landmarks_path, 'r') as f: total = f.readline() total = int(total) item_list = [{} for i in range(total)] attr_names = f.readline().strip() self.attr_names = attr_names.split() idx = 0 for line in f.readlines(): fields = line.strip().split() fname = fields[0] attr = [int(x) for x in fields[1:]] item = {'path': fname, 'attr': attr} item_list[idx] = item idx += 1 # parse landmarks with open(self.landmarks_path, 'r') as f: f.readline() f.readline() idx = 0 for line in f.readlines(): fields = line.strip().split() landmarks = [int(x) for x in fields[1:]] item_list[idx]['landmark'] = landmarks idx += 1 # parse bboxs with open(self.bbox_path, 'r') as f: f.readline() f.readline() idx = 0 for line in f.readlines(): fields = line.strip().split() bbox = [int(x) for x in fields[1:]] item_list[idx]['bbox'] = bbox idx += 1 # check if got landmark directory ldir = os.path.join(rootdir, 'landmark') if os.path.exists(ldir): files = os.listdir(ldir) if len(files) > len(item_list) * 0.8: ok_idx = [] for i in range(total): item = item_list[i] lpath = os.path.join(ldir, item['path']) + '.txt' if not os.path.exists(lpath): continue pts = load_landmarks(lpath) item['kp68'] = pts ok_idx.append(i) # filter outputs = [] for idx in ok_idx: outputs.append(item_list[idx]) item_list = outputs self.list = item_list