def generate_ann(root_path, split, image_infos): """Generate cropped annotations and label txt file. Args: root_path(str): The relative path of the totaltext file split(str): The split of dataset. Namely: training or test image_infos(list[dict]): A list of dicts of the img and annotation information """ dst_image_root = osp.join(root_path, 'dst_imgs', split) if split == 'training': dst_label_file = osp.join(root_path, 'train_label.txt') elif split == 'test': dst_label_file = osp.join(root_path, 'test_label.txt') os.makedirs(dst_image_root, exist_ok=True) lines = [] for image_info in image_infos: index = 1 src_img_path = osp.join(root_path, 'imgs', image_info['file_name']) image = mmcv.imread(src_img_path) src_img_root = osp.splitext(image_info['file_name'])[0].split('/')[1] for anno in image_info['anno_info']: word = anno['word'] dst_img = crop_img(image, anno['bbox']) dst_img_name = f'{src_img_root}_{index}.png' index += 1 dst_img_path = osp.join(dst_image_root, dst_img_name) mmcv.imwrite(dst_img, dst_img_path) lines.append(f'{osp.basename(dst_image_root)}/{dst_img_name} ' f'{word}') list_to_file(dst_label_file, lines)
def generate_ann(root_path, split, image_infos, preserve_vertical, format): """Generate cropped annotations and label txt file. Args: root_path (str): The root path of the dataset split (str): The split of dataset. Namely: training or test image_infos (list[dict]): A list of dicts of the img and annotation information preserve_vertical (bool): Whether to preserve vertical texts format (str): Annotation format, should be either 'txt' or 'jsonl' """ dst_image_root = osp.join(root_path, 'crops', split) ignore_image_root = osp.join(root_path, 'ignores', split) if split == 'training': dst_label_file = osp.join(root_path, f'train_label.{format}') elif split == 'val': dst_label_file = osp.join(root_path, f'val_label.{format}') mmcv.mkdir_or_exist(dst_image_root) mmcv.mkdir_or_exist(ignore_image_root) lines = [] for image_info in image_infos: index = 1 src_img_path = osp.join(root_path, 'imgs', split, image_info['file_name']) image = mmcv.imread(src_img_path) src_img_root = image_info['file_name'].split('.')[0] for anno in image_info['anno_info']: word = anno['word'] dst_img = crop_img(image, anno['bbox'], 0, 0) h, w, _ = dst_img.shape dst_img_name = f'{src_img_root}_{index}.png' index += 1 # Skip invalid annotations if min(dst_img.shape) == 0 or len(word) == 0: continue # Filter out vertical texts if not preserve_vertical and h / w > 2 and split == 'training': dst_img_path = osp.join(ignore_image_root, dst_img_name) else: dst_img_path = osp.join(dst_image_root, dst_img_name) mmcv.imwrite(dst_img, dst_img_path) if format == 'txt': lines.append(f'{osp.basename(dst_image_root)}/{dst_img_name} ' f'{word}') elif format == 'jsonl': lines.append( json.dumps({ 'filename': f'{osp.basename(dst_image_root)}/{dst_img_name}', 'text': word })) else: raise NotImplementedError list_to_file(dst_label_file, lines)
def generate_ann(root_path, split, image_infos, preserve_vertical, format): """Generate cropped annotations and label txt file. Args: root_path (str): The root path of the dataset split (str): The split of dataset. Namely: training or test image_infos (list[dict]): A list of dicts of the img and annotation information preserve_vertical (bool): Whether to preserve vertical texts format (str): Using jsonl(dict) or str to format annotations """ dst_image_root = osp.join(root_path, 'dst_imgs', split) if split == 'training': dst_label_file = osp.join(root_path, f'train_label.{format}') elif split == 'test': dst_label_file = osp.join(root_path, f'test_label.{format}') os.makedirs(dst_image_root, exist_ok=True) lines = [] for image_info in image_infos: index = 1 src_img_path = osp.join(root_path, 'imgs', image_info['file_name']) image = mmcv.imread(src_img_path) src_img_root = image_info['file_name'].split('.')[0] for anno in image_info['anno_info']: word = anno['word'] dst_img = crop_img(image, anno['bbox']) h, w, _ = dst_img.shape # Skip invalid annotations if min(dst_img.shape) == 0: continue # Skip vertical texts if not preserve_vertical and h / w > 2: continue dst_img_name = f'{src_img_root}_{index}.png' index += 1 dst_img_path = osp.join(dst_image_root, dst_img_name) mmcv.imwrite(dst_img, dst_img_path) if format == 'txt': lines.append(f'{osp.basename(dst_image_root)}/{dst_img_name} ' f'{word}') elif format == 'jsonl': lines.append( json.dumps( { 'filename': f'{osp.basename(dst_image_root)}/{dst_img_name}', 'text': word }, ensure_ascii=False)) else: raise NotImplementedError list_to_file(dst_label_file, lines)
def convert_annotations(root_path, split, format): """Convert original annotations to mmocr format. The annotation format is as the following: word_1.png, "flying" word_2.png, "today" word_3.png, "means" After this module, the annotation has been changed to the format below: txt: word_1.png flying word_2.png today word_3.png means jsonl: {'filename': 'word_1.png', 'text': 'flying'} {'filename': 'word_2.png', 'text': 'today'} {'filename': 'word_3.png', 'text': 'means'} Args: root_path (str): The root path of the dataset split (str): The split of dataset. Namely: Train or Test format (str): Annotation format, should be either 'txt' or 'jsonl' """ assert isinstance(root_path, str) assert isinstance(split, str) lines = [] with open(osp.join(root_path, 'annotations', f'Challenge1_{split}_Task3_GT.txt'), 'r', encoding='"utf-8-sig') as f: annos = f.readlines() dst_image_root = osp.join(root_path, split.lower()) for anno in annos: # text may contain comma ',' dst_img_name, word = anno.split(', "') word = word.replace('"\n', '') if format == 'txt': lines.append(f'{osp.basename(dst_image_root)}/{dst_img_name} ' f'{word}') elif format == 'jsonl': lines.append( json.dumps({ 'filename': f'{osp.basename(dst_image_root)}/{dst_img_name}', 'text': word })) else: raise NotImplementedError list_to_file(osp.join(root_path, f'{split.lower()}_label.{format}'), lines)
def main(): args = parse_args() root_path = args.root_path # inputs src_label_file = osp.join(root_path, 'test.xml') if not osp.exists(src_label_file): raise Exception( f'{src_label_file} not exists, please check and try again.') src_image_root = root_path # outputs dst_label_file = osp.join(root_path, 'test_label.txt') dst_image_root = osp.join(root_path, 'image') os.makedirs(dst_image_root, exist_ok=True) tree = ET.parse(src_label_file) root = tree.getroot() index = 1 lines = [] total_img_num = len(root) i = 1 for image_node in root.findall('image'): image_name = image_node.find('imageName').text print(f'[{i}/{total_img_num}] Process image: {image_name}') i += 1 lexicon = image_node.find('lex').text.lower() lexicon_list = lexicon.split(',') lex_size = len(lexicon_list) src_img = cv2.imread(osp.join(src_image_root, image_name)) for rectangle in image_node.find('taggedRectangles'): x = int(rectangle.get('x')) y = int(rectangle.get('y')) w = int(rectangle.get('width')) h = int(rectangle.get('height')) rb, re = max(0, y), max(0, y + h) cb, ce = max(0, x), max(0, x + w) dst_img = src_img[rb:re, cb:ce] text_label = rectangle.find('tag').text.lower() if args.resize: dst_img = cv2.resize(dst_img, (args.width, args.height)) dst_img_name = f'img_{index:04}' + '.jpg' index += 1 dst_img_path = osp.join(dst_image_root, dst_img_name) cv2.imwrite(dst_img_path, dst_img) lines.append(f'{osp.basename(dst_image_root)}/{dst_img_name} ' f'{text_label} {lex_size} {lexicon}') list_to_file(dst_label_file, lines) print(f'Finish to generate svt testset, ' f'with label file {dst_label_file}')
def convert_annotations(root_path, split, format): """Convert original annotations to mmocr format. The annotation format is as the following: Crops/val/11/1/1.png weighted Crops/val/11/1/2.png 26 Crops/val/11/1/3.png casting Crops/val/11/1/4.png 28 After this module, the annotation has been changed to the format below: jsonl: {'filename': 'Crops/val/11/1/1.png', 'text': 'weighted'} {'filename': 'Crops/val/11/1/1.png', 'text': '26'} {'filename': 'Crops/val/11/1/1.png', 'text': 'casting'} {'filename': 'Crops/val/11/1/1.png', 'text': '28'} Args: root_path (str): The root path of the dataset split (str): The split of dataset. Namely: training or test format (str): Annotation format, should be either 'txt' or 'jsonl' """ assert isinstance(root_path, str) assert isinstance(split, str) if format == 'txt': # LV has already provided txt format annos return if format == 'jsonl': lines = [] with open(osp.join(root_path, f'{split}_label.txt'), 'r', encoding='"utf-8-sig') as f: annos = f.readlines() for anno in annos: if anno: # Text may contain spaces dst_img_name, word = anno.split('png ') word = word.strip('\n') lines.append( json.dumps({ 'filename': dst_img_name + 'png', 'text': word })) else: raise NotImplementedError list_to_file(osp.join(root_path, f'{split}_label.{format}'), lines)
def generate_ann(root_path, split, image_infos, format): dst_image_root = osp.join(root_path, 'crops', split) dst_label_file = osp.join(root_path, f'{split}_label.{format}') os.makedirs(dst_image_root, exist_ok=True) lines = [] for image_info in image_infos: index = 1 src_img_path = osp.join(root_path, 'imgs', image_info['file_name']) image = mmcv.imread(src_img_path) src_img_root = image_info['file_name'].split('.')[0] for anno in image_info['anno_info']: word = anno['word'] dst_img = crop_img(image, anno['bbox'], 0, 0) # Skip invalid annotations if min(dst_img.shape) == 0: continue dst_img_name = f'{src_img_root}_{index}.png' index += 1 dst_img_path = osp.join(dst_image_root, dst_img_name) mmcv.imwrite(dst_img, dst_img_path) if format == 'txt': lines.append(f'{osp.basename(dst_image_root)}/{dst_img_name} ' f'{word}') elif format == 'jsonl': lines.append( json.dumps({ 'filename': f'{osp.basename(dst_image_root)}/{dst_img_name}', 'text': word })) else: raise NotImplementedError list_to_file(dst_label_file, lines)
def convert_textocr(root_path, dst_image_path, dst_label_filename, annotation_filename, img_start_idx=0, nproc=1): annotation_path = osp.join(root_path, annotation_filename) if not osp.exists(annotation_path): raise Exception( f'{annotation_path} not exists, please check and try again.') src_image_root = root_path # outputs dst_label_file = osp.join(root_path, dst_label_filename) dst_image_root = osp.join(root_path, dst_image_path) os.makedirs(dst_image_root, exist_ok=True) annotation = mmcv.load(annotation_path) process_img_with_path = partial(process_img, src_image_root=src_image_root, dst_image_root=dst_image_root) tasks = [] for img_idx, img_info in enumerate(annotation['imgs'].values()): ann_ids = annotation['imgToAnns'][img_info['id']] anns = [annotation['anns'][ann_id] for ann_id in ann_ids] tasks.append((img_idx + img_start_idx, img_info, anns)) labels_list = mmcv.track_parallel_progress(process_img_with_path, tasks, keep_order=True, nproc=nproc) final_labels = [] for label_list in labels_list: final_labels += label_list list_to_file(dst_label_file, final_labels) return len(annotation['imgs'])
def convert_cocotext(root_path, split, preserve_vertical, format, nproc, img_start_idx=0): """Collect the annotation information and crop the images. The annotation format is as the following: { 'anns':{ '45346':{ 'mask': [468.9,286.7,468.9,295.2,493.0,295.8,493.0,287.2], 'class': 'machine printed', 'bbox': [468.9, 286.7, 24.1, 9.1], # x, y, w, h 'image_id': 217925, 'id': 45346, 'language': 'english', # 'english' or 'not english' 'area': 206.06, 'utf8_string': 'New', 'legibility': 'legible', # 'legible' or 'illegible' }, ... } 'imgs':{ '540965':{ 'id': 540965, 'set': 'train', # 'train' or 'val' 'width': 640, 'height': 360, 'file_name': 'COCO_train2014_000000540965.jpg' }, ... } 'imgToAnns':{ '540965': [], '260932': [63993, 63994, 63995, 63996, 63997, 63998, 63999], ... } } Args: root_path (str): Root path to the dataset split (str): Dataset split, which should be 'train' or 'val' preserve_vertical (bool): Whether to preserve vertical texts format (str): Annotation format, should be either 'jsonl' or 'txt' nproc (int): Number of processes img_start_idx (int): Index of start image Returns: img_info (dict): The dict of the img and annotation information """ annotation_path = osp.join(root_path, 'annotations/cocotext.v2.json') if not osp.exists(annotation_path): raise Exception( f'{annotation_path} not exists, please check and try again.') annotation = mmcv.load(annotation_path) # outputs dst_label_file = osp.join(root_path, f'{split}_label.{format}') dst_image_root = osp.join(root_path, 'crops', split) ignore_image_root = osp.join(root_path, 'ignores', split) src_image_root = osp.join(root_path, 'imgs') mmcv.mkdir_or_exist(dst_image_root) mmcv.mkdir_or_exist(ignore_image_root) process_img_with_path = partial(process_img, src_image_root=src_image_root, dst_image_root=dst_image_root, ignore_image_root=ignore_image_root, preserve_vertical=preserve_vertical, split=split, format=format) tasks = [] for img_idx, img_info in enumerate(annotation['imgs'].values()): if img_info['set'] == split: ann_ids = annotation['imgToAnns'][str(img_info['id'])] anns = [annotation['anns'][str(ann_id)] for ann_id in ann_ids] tasks.append((img_idx + img_start_idx, img_info, anns)) labels_list = mmcv.track_parallel_progress(process_img_with_path, tasks, keep_order=True, nproc=nproc) final_labels = [] for label_list in labels_list: final_labels += label_list list_to_file(dst_label_file, final_labels) return len(annotation['imgs'])
def convert_lsvt(root_path, split, ratio, preserve_vertical, format, nproc, img_start_idx=0): """Collect the annotation information and crop the images. The annotation format is as the following: [ {'gt_1234': # 'gt_1234' is file name [ { 'transcription': '一站式购物中心', 'points': [[45, 272], [215, 273], [212, 296], [45, 290]] 'illegibility': False }, ... ] } ] Args: root_path (str): The root path of the dataset split (str): The split of dataset. Namely: training or val ratio (float): Split ratio for val set preserve_vertical (bool): Whether to preserve vertical texts format (str): Annotation format, whether be txt or jsonl nproc (int): The number of process to collect annotations img_start_idx (int): Index of start image Returns: img_info (dict): The dict of the img and annotation information """ annotation_path = osp.join(root_path, 'annotations/train_full_labels.json') if not osp.exists(annotation_path): raise Exception( f'{annotation_path} not exists, please check and try again.') annotation = mmcv.load(annotation_path) # outputs dst_label_file = osp.join(root_path, f'{split}_label.{format}') dst_image_root = osp.join(root_path, 'crops', split) ignore_image_root = osp.join(root_path, 'ignores', split) src_image_root = osp.join(root_path, 'imgs') mmcv.mkdir_or_exist(dst_image_root) mmcv.mkdir_or_exist(ignore_image_root) process_img_with_path = partial( process_img, dst_image_root=dst_image_root, ignore_image_root=ignore_image_root, preserve_vertical=preserve_vertical, split=split, format=format) img_prefixes = annotation.keys() trn_files, val_files = [], [] if ratio > 0: for i, file in enumerate(img_prefixes): if i % math.floor(1 / ratio): trn_files.append(file) else: val_files.append(file) else: trn_files, val_files = img_prefixes, [] print(f'training #{len(trn_files)}, val #{len(val_files)}') if split == 'train': img_prefixes = trn_files elif split == 'val': img_prefixes = val_files else: raise NotImplementedError tasks = [] idx = 0 for img_idx, prefix in enumerate(img_prefixes): img_file = osp.join(src_image_root, prefix + '.jpg') img_info = {'file_name': img_file} # Skip not exist images if not osp.exists(img_file): continue tasks.append((img_idx + img_start_idx, img_info, annotation[prefix])) idx = idx + 1 labels_list = mmcv.track_parallel_progress( process_img_with_path, tasks, keep_order=True, nproc=nproc) final_labels = [] for label_list in labels_list: final_labels += label_list list_to_file(dst_label_file, final_labels) return idx
def convert_hiertext( root_path, split, level, preserve_vertical, format, nproc, ): """Collect the annotation information and crop the images. The annotation format is as the following: { "info": { "date": "release date", "version": "current version" }, "annotations": [ // List of dictionaries, one for each image. { "image_id": "the filename of corresponding image.", "image_width": image_width, // (int) The image width. "image_height": image_height, // (int) The image height. "paragraphs": [ // List of paragraphs. { "vertices": [[x1, y1], [x2, y2],...,[xn, yn]] "legible": true "lines": [ { "vertices": [[x1, y1], [x2, y2],...,[x4, y4]] "text": L "legible": true, "handwritten": false "vertical": false, "words": [ { "vertices": [[x1, y1], [x2, y2],...,[xm, ym]] "text": "the text content of this word", "legible": true "handwritten": false, "vertical": false, }, ... ] }, ... ] }, ... ] }, ... ] } Args: root_path (str): Root path to the dataset split (str): Dataset split, which should be 'train' or 'val' level (str): Crop word or line level instances preserve_vertical (bool): Whether to preserve vertical texts format (str): Annotation format, should be either 'jsonl' or 'txt' nproc (int): Number of processes Returns: img_info (dict): The dict of the img and annotation information """ annotation_path = osp.join(root_path, 'annotations/' + split + '.jsonl') if not osp.exists(annotation_path): raise Exception( f'{annotation_path} not exists, please check and try again.') annotation = json.load(open(annotation_path, 'r'))['annotations'] # outputs dst_label_file = osp.join(root_path, f'{split}_label.{format}') dst_image_root = osp.join(root_path, 'crops', split) ignore_image_root = osp.join(root_path, 'ignores', split) src_image_root = osp.join(root_path, 'imgs', split) mmcv.mkdir_or_exist(dst_image_root) mmcv.mkdir_or_exist(ignore_image_root) process_img_with_path = partial(process_img, src_image_root=src_image_root, dst_image_root=dst_image_root, ignore_image_root=ignore_image_root, level=level, preserve_vertical=preserve_vertical, split=split, format=format) tasks = [] for img_idx, img_info in enumerate(annotation): tasks.append((img_idx, img_info)) labels_list = mmcv.track_parallel_progress(process_img_with_path, tasks, keep_order=True, nproc=nproc) final_labels = [] for label_list in labels_list: final_labels += label_list list_to_file(dst_label_file, final_labels)
def generate_ann(root_path, image_infos, preserve_vertical, val_ratio, format): """Generate cropped annotations and label txt file. Args: root_path (str): The root path of the dataset image_infos (list[dict]): A list of dicts of the img and annotation information preserve_vertical (bool): Whether to preserve vertical texts val_ratio (float): Split ratio for val set format (str): Using jsonl(dict) or str to format annotations """ assert val_ratio <= 1. if val_ratio: image_infos = split_train_val_list(image_infos, val_ratio) splits = ['training', 'val'] else: image_infos = [image_infos] splits = ['training'] for i, split in enumerate(splits): dst_image_root = osp.join(root_path, 'crops', split) ignore_image_root = osp.join(root_path, 'ignores', split) dst_label_file = osp.join(root_path, f'{split}_label.{format}') os.makedirs(dst_image_root, exist_ok=True) lines = [] for image_info in image_infos[i]: index = 1 src_img_path = osp.join(root_path, 'imgs', image_info['file_name']) image = mmcv.imread(src_img_path) src_img_root = image_info['file_name'].split('.')[0] for anno in image_info['anno_info']: word = anno['word'] dst_img = crop_img(image, anno['bbox'], 0, 0) h, w, _ = dst_img.shape dst_img_name = f'{src_img_root}_{index}.png' index += 1 # Skip invalid annotations if min(dst_img.shape) == 0: continue # Skip vertical texts if not preserve_vertical and h / w > 2 and split == 'training': dst_img_path = osp.join(ignore_image_root, dst_img_name) else: dst_img_path = osp.join(dst_image_root, dst_img_name) mmcv.imwrite(dst_img, dst_img_path) filename = f'{osp.basename(dst_image_root)}/{dst_img_name}' if format == 'txt': lines.append(f'{filename} ' f'{word}') elif format == 'jsonl': lines.append( json.dumps({ 'filename': filename, 'text': word }, ensure_ascii=False)) else: raise NotImplementedError list_to_file(dst_label_file, lines)