Exemple #1
0
def generate_ann(root_path, split, image_infos):
    """Generate cropped annotations and label txt file.

    Args:
        root_path(str): The relative path of the totaltext file
        split(str): The split of dataset. Namely: training or test
        image_infos(list[dict]): A list of dicts of the img and
        annotation information
    """

    dst_image_root = osp.join(root_path, 'dst_imgs', split)
    if split == 'training':
        dst_label_file = osp.join(root_path, 'train_label.txt')
    elif split == 'test':
        dst_label_file = osp.join(root_path, 'test_label.txt')
    os.makedirs(dst_image_root, exist_ok=True)

    lines = []
    for image_info in image_infos:
        index = 1
        src_img_path = osp.join(root_path, 'imgs', image_info['file_name'])
        image = mmcv.imread(src_img_path)
        src_img_root = osp.splitext(image_info['file_name'])[0].split('/')[1]

        for anno in image_info['anno_info']:
            word = anno['word']
            dst_img = crop_img(image, anno['bbox'])
            dst_img_name = f'{src_img_root}_{index}.png'
            index += 1
            dst_img_path = osp.join(dst_image_root, dst_img_name)
            mmcv.imwrite(dst_img, dst_img_path)
            lines.append(f'{osp.basename(dst_image_root)}/{dst_img_name} '
                         f'{word}')
    list_to_file(dst_label_file, lines)
Exemple #2
0
def generate_ann(root_path, split, image_infos, preserve_vertical, format):
    """Generate cropped annotations and label txt file.

    Args:
        root_path (str): The root path of the dataset
        split (str): The split of dataset. Namely: training or test
        image_infos (list[dict]): A list of dicts of the img and
            annotation information
        preserve_vertical (bool): Whether to preserve vertical texts
        format (str): Annotation format, should be either 'txt' or 'jsonl'
    """

    dst_image_root = osp.join(root_path, 'crops', split)
    ignore_image_root = osp.join(root_path, 'ignores', split)
    if split == 'training':
        dst_label_file = osp.join(root_path, f'train_label.{format}')
    elif split == 'val':
        dst_label_file = osp.join(root_path, f'val_label.{format}')
    mmcv.mkdir_or_exist(dst_image_root)
    mmcv.mkdir_or_exist(ignore_image_root)

    lines = []
    for image_info in image_infos:
        index = 1
        src_img_path = osp.join(root_path, 'imgs', split,
                                image_info['file_name'])
        image = mmcv.imread(src_img_path)
        src_img_root = image_info['file_name'].split('.')[0]

        for anno in image_info['anno_info']:
            word = anno['word']
            dst_img = crop_img(image, anno['bbox'], 0, 0)
            h, w, _ = dst_img.shape

            dst_img_name = f'{src_img_root}_{index}.png'
            index += 1
            # Skip invalid annotations
            if min(dst_img.shape) == 0 or len(word) == 0:
                continue
            # Filter out vertical texts
            if not preserve_vertical and h / w > 2 and split == 'training':
                dst_img_path = osp.join(ignore_image_root, dst_img_name)
            else:
                dst_img_path = osp.join(dst_image_root, dst_img_name)
            mmcv.imwrite(dst_img, dst_img_path)

            if format == 'txt':
                lines.append(f'{osp.basename(dst_image_root)}/{dst_img_name} '
                             f'{word}')
            elif format == 'jsonl':
                lines.append(
                    json.dumps({
                        'filename':
                        f'{osp.basename(dst_image_root)}/{dst_img_name}',
                        'text': word
                    }))
            else:
                raise NotImplementedError

    list_to_file(dst_label_file, lines)
Exemple #3
0
def generate_ann(root_path, split, image_infos, preserve_vertical, format):
    """Generate cropped annotations and label txt file.

    Args:
        root_path (str): The root path of the dataset
        split (str): The split of dataset. Namely: training or test
        image_infos (list[dict]): A list of dicts of the img and
            annotation information
        preserve_vertical (bool): Whether to preserve vertical texts
        format (str): Using jsonl(dict) or str to format annotations
    """

    dst_image_root = osp.join(root_path, 'dst_imgs', split)
    if split == 'training':
        dst_label_file = osp.join(root_path, f'train_label.{format}')
    elif split == 'test':
        dst_label_file = osp.join(root_path, f'test_label.{format}')
    os.makedirs(dst_image_root, exist_ok=True)

    lines = []
    for image_info in image_infos:
        index = 1
        src_img_path = osp.join(root_path, 'imgs', image_info['file_name'])
        image = mmcv.imread(src_img_path)
        src_img_root = image_info['file_name'].split('.')[0]

        for anno in image_info['anno_info']:
            word = anno['word']
            dst_img = crop_img(image, anno['bbox'])
            h, w, _ = dst_img.shape

            # Skip invalid annotations
            if min(dst_img.shape) == 0:
                continue
            # Skip vertical texts
            if not preserve_vertical and h / w > 2:
                continue

            dst_img_name = f'{src_img_root}_{index}.png'
            index += 1
            dst_img_path = osp.join(dst_image_root, dst_img_name)
            mmcv.imwrite(dst_img, dst_img_path)
            if format == 'txt':
                lines.append(f'{osp.basename(dst_image_root)}/{dst_img_name} '
                             f'{word}')
            elif format == 'jsonl':
                lines.append(
                    json.dumps(
                        {
                            'filename':
                            f'{osp.basename(dst_image_root)}/{dst_img_name}',
                            'text': word
                        },
                        ensure_ascii=False))
            else:
                raise NotImplementedError

    list_to_file(dst_label_file, lines)
Exemple #4
0
def convert_annotations(root_path, split, format):
    """Convert original annotations to mmocr format.

    The annotation format is as the following:
        word_1.png, "flying"
        word_2.png, "today"
        word_3.png, "means"
    After this module, the annotation has been changed to the format below:
        txt:
        word_1.png flying
        word_2.png today
        word_3.png means

        jsonl:
        {'filename': 'word_1.png', 'text': 'flying'}
        {'filename': 'word_2.png', 'text': 'today'}
        {'filename': 'word_3.png', 'text': 'means'}

    Args:
        root_path (str): The root path of the dataset
        split (str): The split of dataset. Namely: Train or Test
        format (str): Annotation format, should be either 'txt' or 'jsonl'
    """
    assert isinstance(root_path, str)
    assert isinstance(split, str)

    lines = []
    with open(osp.join(root_path, 'annotations',
                       f'Challenge1_{split}_Task3_GT.txt'),
              'r',
              encoding='"utf-8-sig') as f:
        annos = f.readlines()
    dst_image_root = osp.join(root_path, split.lower())
    for anno in annos:
        # text may contain comma ','
        dst_img_name, word = anno.split(', "')
        word = word.replace('"\n', '')

        if format == 'txt':
            lines.append(f'{osp.basename(dst_image_root)}/{dst_img_name} '
                         f'{word}')
        elif format == 'jsonl':
            lines.append(
                json.dumps({
                    'filename':
                    f'{osp.basename(dst_image_root)}/{dst_img_name}',
                    'text': word
                }))
        else:
            raise NotImplementedError

    list_to_file(osp.join(root_path, f'{split.lower()}_label.{format}'), lines)
Exemple #5
0
def main():
    args = parse_args()
    root_path = args.root_path

    # inputs
    src_label_file = osp.join(root_path, 'test.xml')
    if not osp.exists(src_label_file):
        raise Exception(
            f'{src_label_file} not exists, please check and try again.')
    src_image_root = root_path

    # outputs
    dst_label_file = osp.join(root_path, 'test_label.txt')
    dst_image_root = osp.join(root_path, 'image')
    os.makedirs(dst_image_root, exist_ok=True)

    tree = ET.parse(src_label_file)
    root = tree.getroot()

    index = 1
    lines = []
    total_img_num = len(root)
    i = 1
    for image_node in root.findall('image'):
        image_name = image_node.find('imageName').text
        print(f'[{i}/{total_img_num}] Process image: {image_name}')
        i += 1
        lexicon = image_node.find('lex').text.lower()
        lexicon_list = lexicon.split(',')
        lex_size = len(lexicon_list)
        src_img = cv2.imread(osp.join(src_image_root, image_name))
        for rectangle in image_node.find('taggedRectangles'):
            x = int(rectangle.get('x'))
            y = int(rectangle.get('y'))
            w = int(rectangle.get('width'))
            h = int(rectangle.get('height'))
            rb, re = max(0, y), max(0, y + h)
            cb, ce = max(0, x), max(0, x + w)
            dst_img = src_img[rb:re, cb:ce]
            text_label = rectangle.find('tag').text.lower()
            if args.resize:
                dst_img = cv2.resize(dst_img, (args.width, args.height))
            dst_img_name = f'img_{index:04}' + '.jpg'
            index += 1
            dst_img_path = osp.join(dst_image_root, dst_img_name)
            cv2.imwrite(dst_img_path, dst_img)
            lines.append(f'{osp.basename(dst_image_root)}/{dst_img_name} '
                         f'{text_label} {lex_size} {lexicon}')
    list_to_file(dst_label_file, lines)
    print(f'Finish to generate svt testset, '
          f'with label file {dst_label_file}')
Exemple #6
0
def convert_annotations(root_path, split, format):
    """Convert original annotations to mmocr format.

    The annotation format is as the following:
        Crops/val/11/1/1.png weighted
        Crops/val/11/1/2.png 26
        Crops/val/11/1/3.png casting
        Crops/val/11/1/4.png 28
    After this module, the annotation has been changed to the format below:
        jsonl:
        {'filename': 'Crops/val/11/1/1.png', 'text': 'weighted'}
        {'filename': 'Crops/val/11/1/1.png', 'text': '26'}
        {'filename': 'Crops/val/11/1/1.png', 'text': 'casting'}
        {'filename': 'Crops/val/11/1/1.png', 'text': '28'}

    Args:
        root_path (str): The root path of the dataset
        split (str): The split of dataset. Namely: training or test
        format (str): Annotation format, should be either 'txt' or 'jsonl'
    """
    assert isinstance(root_path, str)
    assert isinstance(split, str)

    if format == 'txt':  # LV has already provided txt format annos
        return

    if format == 'jsonl':
        lines = []
        with open(osp.join(root_path, f'{split}_label.txt'),
                  'r',
                  encoding='"utf-8-sig') as f:
            annos = f.readlines()
        for anno in annos:
            if anno:
                # Text may contain spaces
                dst_img_name, word = anno.split('png ')
                word = word.strip('\n')
                lines.append(
                    json.dumps({
                        'filename': dst_img_name + 'png',
                        'text': word
                    }))
    else:
        raise NotImplementedError

    list_to_file(osp.join(root_path, f'{split}_label.{format}'), lines)
Exemple #7
0
def generate_ann(root_path, split, image_infos, format):

    dst_image_root = osp.join(root_path, 'crops', split)
    dst_label_file = osp.join(root_path, f'{split}_label.{format}')
    os.makedirs(dst_image_root, exist_ok=True)

    lines = []
    for image_info in image_infos:
        index = 1
        src_img_path = osp.join(root_path, 'imgs', image_info['file_name'])
        image = mmcv.imread(src_img_path)
        src_img_root = image_info['file_name'].split('.')[0]

        for anno in image_info['anno_info']:
            word = anno['word']
            dst_img = crop_img(image, anno['bbox'], 0, 0)

            # Skip invalid annotations
            if min(dst_img.shape) == 0:
                continue

            dst_img_name = f'{src_img_root}_{index}.png'
            index += 1
            dst_img_path = osp.join(dst_image_root, dst_img_name)
            mmcv.imwrite(dst_img, dst_img_path)

            if format == 'txt':
                lines.append(f'{osp.basename(dst_image_root)}/{dst_img_name} '
                             f'{word}')
            elif format == 'jsonl':
                lines.append(
                    json.dumps({
                        'filename':
                        f'{osp.basename(dst_image_root)}/{dst_img_name}',
                        'text': word
                    }))
            else:
                raise NotImplementedError

    list_to_file(dst_label_file, lines)
Exemple #8
0
def convert_textocr(root_path,
                    dst_image_path,
                    dst_label_filename,
                    annotation_filename,
                    img_start_idx=0,
                    nproc=1):

    annotation_path = osp.join(root_path, annotation_filename)
    if not osp.exists(annotation_path):
        raise Exception(
            f'{annotation_path} not exists, please check and try again.')
    src_image_root = root_path

    # outputs
    dst_label_file = osp.join(root_path, dst_label_filename)
    dst_image_root = osp.join(root_path, dst_image_path)
    os.makedirs(dst_image_root, exist_ok=True)

    annotation = mmcv.load(annotation_path)

    process_img_with_path = partial(process_img,
                                    src_image_root=src_image_root,
                                    dst_image_root=dst_image_root)
    tasks = []
    for img_idx, img_info in enumerate(annotation['imgs'].values()):
        ann_ids = annotation['imgToAnns'][img_info['id']]
        anns = [annotation['anns'][ann_id] for ann_id in ann_ids]
        tasks.append((img_idx + img_start_idx, img_info, anns))
    labels_list = mmcv.track_parallel_progress(process_img_with_path,
                                               tasks,
                                               keep_order=True,
                                               nproc=nproc)
    final_labels = []
    for label_list in labels_list:
        final_labels += label_list
    list_to_file(dst_label_file, final_labels)
    return len(annotation['imgs'])
def convert_cocotext(root_path,
                     split,
                     preserve_vertical,
                     format,
                     nproc,
                     img_start_idx=0):
    """Collect the annotation information and crop the images.

    The annotation format is as the following:
    {
        'anns':{
            '45346':{
                'mask': [468.9,286.7,468.9,295.2,493.0,295.8,493.0,287.2],
                'class': 'machine printed',
                'bbox': [468.9, 286.7, 24.1, 9.1], # x, y, w, h
                'image_id': 217925,
                'id': 45346,
                'language': 'english', # 'english' or 'not english'
                'area': 206.06,
                'utf8_string': 'New',
                'legibility': 'legible', # 'legible' or 'illegible'
            },
            ...
        }
        'imgs':{
            '540965':{
                'id': 540965,
                'set': 'train', # 'train' or 'val'
                'width': 640,
                'height': 360,
                'file_name': 'COCO_train2014_000000540965.jpg'
            },
            ...
        }
        'imgToAnns':{
            '540965': [],
            '260932': [63993, 63994, 63995, 63996, 63997, 63998, 63999],
            ...
        }
    }

    Args:
        root_path (str): Root path to the dataset
        split (str): Dataset split, which should be 'train' or 'val'
        preserve_vertical (bool): Whether to preserve vertical texts
        format (str): Annotation format, should be either 'jsonl' or 'txt'
        nproc (int): Number of processes
        img_start_idx (int): Index of start image

    Returns:
        img_info (dict): The dict of the img and annotation information
    """

    annotation_path = osp.join(root_path, 'annotations/cocotext.v2.json')
    if not osp.exists(annotation_path):
        raise Exception(
            f'{annotation_path} not exists, please check and try again.')

    annotation = mmcv.load(annotation_path)
    # outputs
    dst_label_file = osp.join(root_path, f'{split}_label.{format}')
    dst_image_root = osp.join(root_path, 'crops', split)
    ignore_image_root = osp.join(root_path, 'ignores', split)
    src_image_root = osp.join(root_path, 'imgs')
    mmcv.mkdir_or_exist(dst_image_root)
    mmcv.mkdir_or_exist(ignore_image_root)

    process_img_with_path = partial(process_img,
                                    src_image_root=src_image_root,
                                    dst_image_root=dst_image_root,
                                    ignore_image_root=ignore_image_root,
                                    preserve_vertical=preserve_vertical,
                                    split=split,
                                    format=format)
    tasks = []
    for img_idx, img_info in enumerate(annotation['imgs'].values()):
        if img_info['set'] == split:
            ann_ids = annotation['imgToAnns'][str(img_info['id'])]
            anns = [annotation['anns'][str(ann_id)] for ann_id in ann_ids]
            tasks.append((img_idx + img_start_idx, img_info, anns))
    labels_list = mmcv.track_parallel_progress(process_img_with_path,
                                               tasks,
                                               keep_order=True,
                                               nproc=nproc)
    final_labels = []
    for label_list in labels_list:
        final_labels += label_list
    list_to_file(dst_label_file, final_labels)

    return len(annotation['imgs'])
Exemple #10
0
def convert_lsvt(root_path,
                 split,
                 ratio,
                 preserve_vertical,
                 format,
                 nproc,
                 img_start_idx=0):
    """Collect the annotation information and crop the images.

    The annotation format is as the following:
    [
        {'gt_1234': # 'gt_1234' is file name
            [
                {
                    'transcription': '一站式购物中心',
                    'points': [[45, 272], [215, 273], [212, 296], [45, 290]]
                    'illegibility': False
                }, ...
            ]
        }
    ]


    Args:
        root_path (str): The root path of the dataset
        split (str): The split of dataset. Namely: training or val
        ratio (float): Split ratio for val set
        preserve_vertical (bool): Whether to preserve vertical texts
        format (str): Annotation format, whether be txt or jsonl
        nproc (int): The number of process to collect annotations
        img_start_idx (int): Index of start image

    Returns:
        img_info (dict): The dict of the img and annotation information
    """

    annotation_path = osp.join(root_path, 'annotations/train_full_labels.json')
    if not osp.exists(annotation_path):
        raise Exception(
            f'{annotation_path} not exists, please check and try again.')

    annotation = mmcv.load(annotation_path)
    # outputs
    dst_label_file = osp.join(root_path, f'{split}_label.{format}')
    dst_image_root = osp.join(root_path, 'crops', split)
    ignore_image_root = osp.join(root_path, 'ignores', split)
    src_image_root = osp.join(root_path, 'imgs')
    mmcv.mkdir_or_exist(dst_image_root)
    mmcv.mkdir_or_exist(ignore_image_root)

    process_img_with_path = partial(
        process_img,
        dst_image_root=dst_image_root,
        ignore_image_root=ignore_image_root,
        preserve_vertical=preserve_vertical,
        split=split,
        format=format)

    img_prefixes = annotation.keys()

    trn_files, val_files = [], []
    if ratio > 0:
        for i, file in enumerate(img_prefixes):
            if i % math.floor(1 / ratio):
                trn_files.append(file)
            else:
                val_files.append(file)
    else:
        trn_files, val_files = img_prefixes, []
    print(f'training #{len(trn_files)}, val #{len(val_files)}')

    if split == 'train':
        img_prefixes = trn_files
    elif split == 'val':
        img_prefixes = val_files
    else:
        raise NotImplementedError

    tasks = []
    idx = 0
    for img_idx, prefix in enumerate(img_prefixes):
        img_file = osp.join(src_image_root, prefix + '.jpg')
        img_info = {'file_name': img_file}
        # Skip not exist images
        if not osp.exists(img_file):
            continue
        tasks.append((img_idx + img_start_idx, img_info, annotation[prefix]))
        idx = idx + 1

    labels_list = mmcv.track_parallel_progress(
        process_img_with_path, tasks, keep_order=True, nproc=nproc)
    final_labels = []
    for label_list in labels_list:
        final_labels += label_list
    list_to_file(dst_label_file, final_labels)

    return idx
def convert_hiertext(
    root_path,
    split,
    level,
    preserve_vertical,
    format,
    nproc,
):
    """Collect the annotation information and crop the images.

    The annotation format is as the following:
    {
        "info": {
            "date": "release date",
            "version": "current version"
        },
        "annotations": [  // List of dictionaries, one for each image.
            {
            "image_id": "the filename of corresponding image.",
            "image_width": image_width,  // (int) The image width.
            "image_height": image_height, // (int) The image height.
            "paragraphs": [  // List of paragraphs.
                {
                "vertices": [[x1, y1], [x2, y2],...,[xn, yn]]
                "legible": true
                "lines": [
                    {
                    "vertices": [[x1, y1], [x2, y2],...,[x4, y4]]
                    "text": L
                    "legible": true,
                    "handwritten": false
                    "vertical": false,
                    "words": [
                        {
                        "vertices": [[x1, y1], [x2, y2],...,[xm, ym]]
                        "text": "the text content of this word",
                        "legible": true
                        "handwritten": false,
                        "vertical": false,
                        }, ...
                    ]
                    }, ...
                ]
                }, ...
            ]
            }, ...
        ]
        }

    Args:
        root_path (str): Root path to the dataset
        split (str): Dataset split, which should be 'train' or 'val'
        level (str): Crop word or line level instances
        preserve_vertical (bool): Whether to preserve vertical texts
        format (str): Annotation format, should be either 'jsonl' or 'txt'
        nproc (int): Number of processes

    Returns:
        img_info (dict): The dict of the img and annotation information
    """

    annotation_path = osp.join(root_path, 'annotations/' + split + '.jsonl')
    if not osp.exists(annotation_path):
        raise Exception(
            f'{annotation_path} not exists, please check and try again.')

    annotation = json.load(open(annotation_path, 'r'))['annotations']
    # outputs
    dst_label_file = osp.join(root_path, f'{split}_label.{format}')
    dst_image_root = osp.join(root_path, 'crops', split)
    ignore_image_root = osp.join(root_path, 'ignores', split)
    src_image_root = osp.join(root_path, 'imgs', split)
    mmcv.mkdir_or_exist(dst_image_root)
    mmcv.mkdir_or_exist(ignore_image_root)

    process_img_with_path = partial(process_img,
                                    src_image_root=src_image_root,
                                    dst_image_root=dst_image_root,
                                    ignore_image_root=ignore_image_root,
                                    level=level,
                                    preserve_vertical=preserve_vertical,
                                    split=split,
                                    format=format)
    tasks = []
    for img_idx, img_info in enumerate(annotation):
        tasks.append((img_idx, img_info))
    labels_list = mmcv.track_parallel_progress(process_img_with_path,
                                               tasks,
                                               keep_order=True,
                                               nproc=nproc)

    final_labels = []
    for label_list in labels_list:
        final_labels += label_list
    list_to_file(dst_label_file, final_labels)
Exemple #12
0
def generate_ann(root_path, image_infos, preserve_vertical, val_ratio, format):
    """Generate cropped annotations and label txt file.

    Args:
        root_path (str): The root path of the dataset
        image_infos (list[dict]): A list of dicts of the img and
            annotation information
        preserve_vertical (bool): Whether to preserve vertical texts
        val_ratio (float): Split ratio for val set
        format (str): Using jsonl(dict) or str to format annotations
    """

    assert val_ratio <= 1.

    if val_ratio:
        image_infos = split_train_val_list(image_infos, val_ratio)
        splits = ['training', 'val']

    else:
        image_infos = [image_infos]
        splits = ['training']

    for i, split in enumerate(splits):
        dst_image_root = osp.join(root_path, 'crops', split)
        ignore_image_root = osp.join(root_path, 'ignores', split)
        dst_label_file = osp.join(root_path, f'{split}_label.{format}')
        os.makedirs(dst_image_root, exist_ok=True)

        lines = []
        for image_info in image_infos[i]:
            index = 1
            src_img_path = osp.join(root_path, 'imgs', image_info['file_name'])
            image = mmcv.imread(src_img_path)
            src_img_root = image_info['file_name'].split('.')[0]

            for anno in image_info['anno_info']:
                word = anno['word']
                dst_img = crop_img(image, anno['bbox'], 0, 0)
                h, w, _ = dst_img.shape

                dst_img_name = f'{src_img_root}_{index}.png'
                index += 1
                # Skip invalid annotations
                if min(dst_img.shape) == 0:
                    continue
                # Skip vertical texts
                if not preserve_vertical and h / w > 2 and split == 'training':
                    dst_img_path = osp.join(ignore_image_root, dst_img_name)
                else:
                    dst_img_path = osp.join(dst_image_root, dst_img_name)
                mmcv.imwrite(dst_img, dst_img_path)
                filename = f'{osp.basename(dst_image_root)}/{dst_img_name}'
                if format == 'txt':
                    lines.append(f'{filename} ' f'{word}')
                elif format == 'jsonl':
                    lines.append(
                        json.dumps({
                            'filename': filename,
                            'text': word
                        },
                                   ensure_ascii=False))
                else:
                    raise NotImplementedError
        list_to_file(dst_label_file, lines)