Ejemplo n.º 1
0
def test_min_rect_crop():
    dummy_img = np.ones((600, 600, 3), dtype=np.uint8)
    dummy_box = [20, 20, 120, 20, 120, 40, 20, 40]

    cropped_img = crop_img(dummy_img, dummy_box)

    with pytest.raises(AssertionError):
        crop_img(dummy_img, [])
    with pytest.raises(AssertionError):
        crop_img(dummy_img, [20, 40, 40, 20])

    assert math.isclose(cropped_img.shape[0], 20)
    assert math.isclose(cropped_img.shape[1], 100)
Ejemplo n.º 2
0
def generate_ann(root_path, split, image_infos):
    """Generate cropped annotations and label txt file.

    Args:
        root_path(str): The relative path of the totaltext file
        split(str): The split of dataset. Namely: training or test
        image_infos(list[dict]): A list of dicts of the img and
        annotation information
    """

    dst_image_root = osp.join(root_path, 'dst_imgs', split)
    if split == 'training':
        dst_label_file = osp.join(root_path, 'train_label.txt')
    elif split == 'test':
        dst_label_file = osp.join(root_path, 'test_label.txt')
    os.makedirs(dst_image_root, exist_ok=True)

    lines = []
    for image_info in image_infos:
        index = 1
        src_img_path = osp.join(root_path, 'imgs', image_info['file_name'])
        image = mmcv.imread(src_img_path)
        src_img_root = osp.splitext(image_info['file_name'])[0].split('/')[1]

        for anno in image_info['anno_info']:
            word = anno['word']
            dst_img = crop_img(image, anno['bbox'])
            dst_img_name = f'{src_img_root}_{index}.png'
            index += 1
            dst_img_path = osp.join(dst_image_root, dst_img_name)
            mmcv.imwrite(dst_img, dst_img_path)
            lines.append(f'{osp.basename(dst_image_root)}/{dst_img_name} '
                         f'{word}')
    list_to_file(dst_label_file, lines)
Ejemplo n.º 3
0
def generate_ann(root_path, split, image_infos, preserve_vertical, format):
    """Generate cropped annotations and label txt file.

    Args:
        root_path (str): The root path of the dataset
        split (str): The split of dataset. Namely: training or test
        image_infos (list[dict]): A list of dicts of the img and
            annotation information
        preserve_vertical (bool): Whether to preserve vertical texts
        format (str): Annotation format, should be either 'txt' or 'jsonl'
    """

    dst_image_root = osp.join(root_path, 'crops', split)
    ignore_image_root = osp.join(root_path, 'ignores', split)
    if split == 'training':
        dst_label_file = osp.join(root_path, f'train_label.{format}')
    elif split == 'val':
        dst_label_file = osp.join(root_path, f'val_label.{format}')
    mmcv.mkdir_or_exist(dst_image_root)
    mmcv.mkdir_or_exist(ignore_image_root)

    lines = []
    for image_info in image_infos:
        index = 1
        src_img_path = osp.join(root_path, 'imgs', split,
                                image_info['file_name'])
        image = mmcv.imread(src_img_path)
        src_img_root = image_info['file_name'].split('.')[0]

        for anno in image_info['anno_info']:
            word = anno['word']
            dst_img = crop_img(image, anno['bbox'], 0, 0)
            h, w, _ = dst_img.shape

            dst_img_name = f'{src_img_root}_{index}.png'
            index += 1
            # Skip invalid annotations
            if min(dst_img.shape) == 0 or len(word) == 0:
                continue
            # Filter out vertical texts
            if not preserve_vertical and h / w > 2 and split == 'training':
                dst_img_path = osp.join(ignore_image_root, dst_img_name)
            else:
                dst_img_path = osp.join(dst_image_root, dst_img_name)
            mmcv.imwrite(dst_img, dst_img_path)

            if format == 'txt':
                lines.append(f'{osp.basename(dst_image_root)}/{dst_img_name} '
                             f'{word}')
            elif format == 'jsonl':
                lines.append(
                    json.dumps({
                        'filename':
                        f'{osp.basename(dst_image_root)}/{dst_img_name}',
                        'text': word
                    }))
            else:
                raise NotImplementedError

    list_to_file(dst_label_file, lines)
Ejemplo n.º 4
0
def det_and_recog_inference(args, det_model, recog_model):
    image_path = args.img
    end2end_res = {'filename': image_path}
    end2end_res['result'] = []

    image = mmcv.imread(image_path)
    det_result = model_inference(det_model, image)
    bboxes = det_result['boundary_result']

    for bbox in bboxes:
        box_res = {}
        box_res['box'] = [round(x) for x in bbox[:-1]]
        box_res['box_score'] = float(bbox[-1])
        box = bbox[:8]
        if len(bbox) > 9:
            min_x = min(bbox[0:-1:2])
            min_y = min(bbox[1:-1:2])
            max_x = max(bbox[0:-1:2])
            max_y = max(bbox[1:-1:2])
            box = [min_x, min_y, max_x, min_y, max_x, max_y, min_x, max_y]
        box_img = crop_img(image, box)

        recog_result = model_inference(recog_model, box_img)

        text = recog_result['text']
        text_score = recog_result['score']
        if isinstance(text_score, list):
            text_score = sum(text_score) / max(1, len(text))
        box_res['text'] = text
        box_res['text_score'] = text_score
        end2end_res['result'].append(box_res)

    return end2end_res
Ejemplo n.º 5
0
def generate_ann(root_path, split, image_infos, preserve_vertical, format):
    """Generate cropped annotations and label txt file.

    Args:
        root_path (str): The root path of the dataset
        split (str): The split of dataset. Namely: training or test
        image_infos (list[dict]): A list of dicts of the img and
            annotation information
        preserve_vertical (bool): Whether to preserve vertical texts
        format (str): Using jsonl(dict) or str to format annotations
    """

    dst_image_root = osp.join(root_path, 'dst_imgs', split)
    if split == 'training':
        dst_label_file = osp.join(root_path, f'train_label.{format}')
    elif split == 'test':
        dst_label_file = osp.join(root_path, f'test_label.{format}')
    os.makedirs(dst_image_root, exist_ok=True)

    lines = []
    for image_info in image_infos:
        index = 1
        src_img_path = osp.join(root_path, 'imgs', image_info['file_name'])
        image = mmcv.imread(src_img_path)
        src_img_root = image_info['file_name'].split('.')[0]

        for anno in image_info['anno_info']:
            word = anno['word']
            dst_img = crop_img(image, anno['bbox'])
            h, w, _ = dst_img.shape

            # Skip invalid annotations
            if min(dst_img.shape) == 0:
                continue
            # Skip vertical texts
            if not preserve_vertical and h / w > 2:
                continue

            dst_img_name = f'{src_img_root}_{index}.png'
            index += 1
            dst_img_path = osp.join(dst_image_root, dst_img_name)
            mmcv.imwrite(dst_img, dst_img_path)
            if format == 'txt':
                lines.append(f'{osp.basename(dst_image_root)}/{dst_img_name} '
                             f'{word}')
            elif format == 'jsonl':
                lines.append(
                    json.dumps(
                        {
                            'filename':
                            f'{osp.basename(dst_image_root)}/{dst_img_name}',
                            'text': word
                        },
                        ensure_ascii=False))
            else:
                raise NotImplementedError

    list_to_file(dst_label_file, lines)
Ejemplo n.º 6
0
def det_and_recog_inference(args, det_model, recog_model):
    image_path = args.img
    end2end_res = {'filename': image_path}
    end2end_res['result'] = []

    image = mmcv.imread(image_path)
    det_result = model_inference(det_model, image)
    bboxes = det_result['boundary_result']

    box_imgs = []
    for bbox in bboxes:
        box_res = {}
        box_res['box'] = [round(x) for x in bbox[:-1]]
        box_res['box_score'] = float(bbox[-1])
        box = bbox[:8]
        if len(bbox) > 9:
            min_x = min(bbox[0:-1:2])
            min_y = min(bbox[1:-1:2])
            max_x = max(bbox[0:-1:2])
            max_y = max(bbox[1:-1:2])
            box = [min_x, min_y, max_x, min_y, max_x, max_y, min_x, max_y]
        box_img = crop_img(image, box)
        if args.batch_mode:
            box_imgs.append(box_img)
        else:
            recog_result = model_inference(recog_model, box_img)
            text = recog_result['text']
            text_score = recog_result['score']
            if isinstance(text_score, list):
                text_score = sum(text_score) / max(1, len(text))
            box_res['text'] = text
            box_res['text_score'] = text_score

        end2end_res['result'].append(box_res)

    if args.batch_mode:
        batch_size = args.batch_size
        for chunk_idx in range(len(box_imgs) // batch_size + 1):
            start_idx = chunk_idx * batch_size
            end_idx = (chunk_idx + 1) * batch_size
            chunk_box_imgs = box_imgs[start_idx:end_idx]
            if len(chunk_box_imgs) == 0:
                continue
            recog_results = model_inference(recog_model,
                                            chunk_box_imgs,
                                            batch_mode=True)
            for i, recog_result in enumerate(recog_results):
                text = recog_result['text']
                text_score = recog_result['score']
                if isinstance(text_score, list):
                    text_score = sum(text_score) / max(1, len(text))
                end2end_res['result'][start_idx + i]['text'] = text
                end2end_res['result'][start_idx + i]['text_score'] = text_score

    return end2end_res
Ejemplo n.º 7
0
def generate_ann(root_path, split, image_infos, format):

    dst_image_root = osp.join(root_path, 'crops', split)
    dst_label_file = osp.join(root_path, f'{split}_label.{format}')
    os.makedirs(dst_image_root, exist_ok=True)

    lines = []
    for image_info in image_infos:
        index = 1
        src_img_path = osp.join(root_path, 'imgs', image_info['file_name'])
        image = mmcv.imread(src_img_path)
        src_img_root = image_info['file_name'].split('.')[0]

        for anno in image_info['anno_info']:
            word = anno['word']
            dst_img = crop_img(image, anno['bbox'], 0, 0)

            # Skip invalid annotations
            if min(dst_img.shape) == 0:
                continue

            dst_img_name = f'{src_img_root}_{index}.png'
            index += 1
            dst_img_path = osp.join(dst_image_root, dst_img_name)
            mmcv.imwrite(dst_img, dst_img_path)

            if format == 'txt':
                lines.append(f'{osp.basename(dst_image_root)}/{dst_img_name} '
                             f'{word}')
            elif format == 'jsonl':
                lines.append(
                    json.dumps({
                        'filename':
                        f'{osp.basename(dst_image_root)}/{dst_img_name}',
                        'text': word
                    }))
            else:
                raise NotImplementedError

    list_to_file(dst_label_file, lines)
Ejemplo n.º 8
0
def parse_labelme_json(json_file,
                       img_dir,
                       out_dir,
                       tasks,
                       ignore_marker='###',
                       recog_format='jsonl',
                       warp_flag=False):
    invalid_res = [[], [], []]

    json_obj = mmcv.load(json_file)

    img_file = osp.basename(json_obj['imagePath'])
    img_full_path = osp.join(img_dir, img_file)

    img_width = json_obj['imageWidth']
    img_height = json_obj['imageHeight']
    if 'recog' in tasks:
        src_img = mmcv.imread(img_full_path)
        img_basename = osp.splitext(img_file)[0]
        sub_dir = osp.join(out_dir, 'crops', img_basename)
        mmcv.mkdir_or_exist(sub_dir)

    det_line_json_list = []
    recog_crop_line_str_list = []
    recog_warp_line_str_list = []

    shape_info = json_obj['shapes']
    idx = 0
    annos = []
    for box_info in shape_info:
        shape = box_info['shape_type']
        if shape not in ['rectangle', 'polygon']:
            msg = 'Only \'rectangle\' and \'polygon\' boxes are supported. '
            msg += f'Boxes with {shape} will be discarded.'
            warnings.warn(msg)
            return invalid_res
        poly = []
        box_points = box_info['points']
        for point in box_points:
            poly.extend([int(x) for x in point])
        x_list = poly[0::2]
        y_list = poly[1::2]
        quad = []
        if shape == 'rectangle':
            warp_flag = False
            quad = [
                poly[0], poly[1], poly[2], poly[1], poly[2], poly[3], poly[0],
                poly[3]
            ]
        else:
            if len(poly) < 8 or len(poly) % 2 != 0:
                msg = f'Invalid polygon {poly}. '
                msg += 'The polygon is expected to have 8 or more than 8 '
                msg += 'even number of coordinates in MMOCR.'
                warnings.warn(msg)
                return invalid_res
            if len(poly) == 8:
                quad = poly
            else:
                warp_flag = False
                x_min, x_max, y_min, y_max = min(x_list), max(x_list), min(
                    y_list), max(y_list)
                quad = [x_min, y_min, x_max, y_min, x_max, y_max, x_min, y_max]
        text_label = box_info['label']
        # for textdet
        anno = {}
        anno['iscrowd'] = 0 if text_label != ignore_marker else 1
        anno['category_id'] = 1
        w = max(x_list) - min(x_list)
        h = max(y_list) - min(y_list)
        anno['bbox'] = [min(x_list), min(y_list), w, h]
        if shape == 'rectangle':
            anno['segmentation'] = [quad]
        else:
            anno['segmentation'] = [poly]
        anno['text'] = text_label
        annos.append(anno)
        # for textrecog
        if 'recog' in tasks:
            if text_label == ignore_marker or len(text_label) == 0:
                continue
            cropped_img = crop_img(src_img, quad)
            img_path_cropped_img = osp.join(sub_dir, f'crop_{idx}.jpg')
            mmcv.imwrite(cropped_img, img_path_cropped_img)
            if recog_format == 'txt':
                recog_crop_line_str_list.append(
                    f'{img_path_cropped_img} {text_label}')
            elif recog_format == 'jsonl':
                recog_crop_line_str_list.append(
                    json.dumps({
                        'filename': img_path_cropped_img,
                        'text': text_label
                    }))
            else:
                raise NotImplementedError
            if warp_flag:
                warpped_img = warp_img(src_img, quad)
                img_path_warpped_img = osp.join(sub_dir, f'warp_{idx}.jpg')
                mmcv.imwrite(warpped_img, img_path_warpped_img)
                if recog_format == 'txt':
                    recog_warp_line_str_list.append(
                        f'{img_path_warpped_img} {text_label}')
                elif recog_format == 'jsonl':
                    recog_warp_line_str_list.append(
                        json.dumps({
                            'filename': img_path_cropped_img,
                            'text': text_label
                        }))
        idx += 1

    line_json = {
        'file_name': img_file,
        'height': img_height,
        'width': img_width,
        'annotations': annos
    }
    det_line_json_list.append(json.dumps(line_json, ensure_ascii=False))

    return [
        det_line_json_list, recog_crop_line_str_list, recog_warp_line_str_list
    ]
Ejemplo n.º 9
0
    def det_recog_kie_inference(self, det_model, recog_model, kie_model=None):
        end2end_res = []
        # Find bounding boxes in the images (text detection)
        det_result = self.single_inference(det_model, self.args.arrays,
                                           self.args.batch_mode,
                                           self.args.det_batch_size)
        bboxes_list = [res['boundary_result'] for res in det_result]

        if kie_model:
            kie_dataset = KIEDataset(
                dict_file=kie_model.cfg.data.test.dict_file)

        # For each bounding box, the image is cropped and
        # sent to the recognition model either one by one
        # or all together depending on the batch_mode
        for filename, arr, bboxes, out_file in zip(self.args.filenames,
                                                   self.args.arrays,
                                                   bboxes_list,
                                                   self.args.output):
            img_e2e_res = {}
            img_e2e_res['filename'] = filename
            img_e2e_res['result'] = []
            box_imgs = []
            for bbox in bboxes:
                box_res = {}
                box_res['box'] = [round(x) for x in bbox[:-1]]
                box_res['box_score'] = float(bbox[-1])
                box = bbox[:8]
                if len(bbox) > 9:
                    min_x = min(bbox[0:-1:2])
                    min_y = min(bbox[1:-1:2])
                    max_x = max(bbox[0:-1:2])
                    max_y = max(bbox[1:-1:2])
                    box = [
                        min_x, min_y, max_x, min_y, max_x, max_y, min_x, max_y
                    ]
                box_img = crop_img(arr, box)
                if self.args.batch_mode:
                    box_imgs.append(box_img)
                else:
                    if recog_model == 'Tesseract_recog':
                        recog_result = self.single_inference(recog_model,
                                                             box_img,
                                                             batch_mode=True)
                    else:
                        recog_result = model_inference(recog_model, box_img)
                    text = recog_result['text']
                    text_score = recog_result['score']
                    if isinstance(text_score, list):
                        text_score = sum(text_score) / max(1, len(text))
                    box_res['text'] = text
                    box_res['text_score'] = text_score
                img_e2e_res['result'].append(box_res)

            if self.args.batch_mode:
                recog_results = self.single_inference(
                    recog_model, box_imgs, True, self.args.recog_batch_size)
                for i, recog_result in enumerate(recog_results):
                    text = recog_result['text']
                    text_score = recog_result['score']
                    if isinstance(text_score, (list, tuple)):
                        text_score = sum(text_score) / max(1, len(text))
                    img_e2e_res['result'][i]['text'] = text
                    img_e2e_res['result'][i]['text_score'] = text_score

            if self.args.merge:
                img_e2e_res['result'] = stitch_boxes_into_lines(
                    img_e2e_res['result'], self.args.merge_xdist, 0.5)

            if kie_model:
                annotations = copy.deepcopy(img_e2e_res['result'])
                # Customized for kie_dataset, which
                # assumes that boxes are represented by only 4 points
                for i, ann in enumerate(annotations):
                    min_x = min(ann['box'][::2])
                    min_y = min(ann['box'][1::2])
                    max_x = max(ann['box'][::2])
                    max_y = max(ann['box'][1::2])
                    annotations[i]['box'] = [
                        min_x, min_y, max_x, min_y, max_x, max_y, min_x, max_y
                    ]
                ann_info = kie_dataset._parse_anno_info(annotations)
                ann_info['ori_bboxes'] = ann_info.get('ori_bboxes',
                                                      ann_info['bboxes'])
                ann_info['gt_bboxes'] = ann_info.get('gt_bboxes',
                                                     ann_info['bboxes'])
                kie_result, data = model_inference(
                    kie_model,
                    arr,
                    ann=ann_info,
                    return_data=True,
                    batch_mode=self.args.batch_mode)
                # visualize KIE results
                self.visualize_kie_output(kie_model,
                                          data,
                                          kie_result,
                                          out_file=out_file,
                                          show=self.args.imshow)
                gt_bboxes = data['gt_bboxes'].data.numpy().tolist()
                labels = self.generate_kie_labels(kie_result, gt_bboxes,
                                                  kie_model.class_list)
                for i in range(len(gt_bboxes)):
                    img_e2e_res['result'][i]['label'] = labels[i][0]
                    img_e2e_res['result'][i]['label_score'] = labels[i][1]

            end2end_res.append(img_e2e_res)
        return end2end_res
Ejemplo n.º 10
0
def generate_ann(root_path, image_infos, preserve_vertical, val_ratio, format):
    """Generate cropped annotations and label txt file.

    Args:
        root_path (str): The root path of the dataset
        image_infos (list[dict]): A list of dicts of the img and
            annotation information
        preserve_vertical (bool): Whether to preserve vertical texts
        val_ratio (float): Split ratio for val set
        format (str): Using jsonl(dict) or str to format annotations
    """

    assert val_ratio <= 1.

    if val_ratio:
        image_infos = split_train_val_list(image_infos, val_ratio)
        splits = ['training', 'val']

    else:
        image_infos = [image_infos]
        splits = ['training']

    for i, split in enumerate(splits):
        dst_image_root = osp.join(root_path, 'crops', split)
        ignore_image_root = osp.join(root_path, 'ignores', split)
        dst_label_file = osp.join(root_path, f'{split}_label.{format}')
        os.makedirs(dst_image_root, exist_ok=True)

        lines = []
        for image_info in image_infos[i]:
            index = 1
            src_img_path = osp.join(root_path, 'imgs', image_info['file_name'])
            image = mmcv.imread(src_img_path)
            src_img_root = image_info['file_name'].split('.')[0]

            for anno in image_info['anno_info']:
                word = anno['word']
                dst_img = crop_img(image, anno['bbox'], 0, 0)
                h, w, _ = dst_img.shape

                dst_img_name = f'{src_img_root}_{index}.png'
                index += 1
                # Skip invalid annotations
                if min(dst_img.shape) == 0:
                    continue
                # Skip vertical texts
                if not preserve_vertical and h / w > 2 and split == 'training':
                    dst_img_path = osp.join(ignore_image_root, dst_img_name)
                else:
                    dst_img_path = osp.join(dst_image_root, dst_img_name)
                mmcv.imwrite(dst_img, dst_img_path)
                filename = f'{osp.basename(dst_image_root)}/{dst_img_name}'
                if format == 'txt':
                    lines.append(f'{filename} ' f'{word}')
                elif format == 'jsonl':
                    lines.append(
                        json.dumps({
                            'filename': filename,
                            'text': word
                        },
                                   ensure_ascii=False))
                else:
                    raise NotImplementedError
        list_to_file(dst_label_file, lines)