예제 #1
0
def cli(format_type, model, repolygonize, files):
    """
    A small script extracting rectified line polygons as defined in either ALTO or
    PageXML files or run a model to do the same.
    """
    if len(files) == 0:
        ctx = click.get_current_context()
        click.echo(ctx.get_help())
        ctx.exit()

    from PIL import Image
    from os.path import splitext
    from kraken import blla
    from kraken.lib import dataset, segmentation, vgsl, xml

    if model is None:
        for doc in files:
            click.echo(f'Processing {doc} ', nl=False)
            data = xml.preparse_xml_data([doc], format_type, repolygonize=repolygonize)
            if len(data) > 0:
                bounds = {'type': 'baselines', 'lines': [{'boundary': t['boundary'], 'baseline': t['baseline'], 'text': t['text']} for t in data]}
                for idx, (im, box) in enumerate(segmentation.extract_polygons(Image.open(data[0]['image']), bounds)):
                    click.echo('.', nl=False)
                    im.save('{}.{}.jpg'.format(splitext(data[0]['image'])[0], idx))
                    with open('{}.{}.gt.txt'.format(splitext(data[0]['image'])[0], idx), 'w') as fp:
                        fp.write(box['text'])
    else:
        net = vgsl.TorchVGSLModel.load_model(model)
        for doc in files:
            click.echo(f'Processing {doc} ', nl=False)
            full_im = Image.open(doc)
            bounds = blla.segment(full_im, model=net)
            for idx, (im, box) in enumerate(segmentation.extract_polygons(full_im, bounds)):
                click.echo('.', nl=False)
                im.save('{}.{}.jpg'.format(splitext(doc)[0], idx))
예제 #2
0
def segmenter(legacy, model, text_direction, scale, maxcolseps, black_colseps,
              remove_hlines, pad, mask, device, input, output) -> None:
    import json

    from kraken import pageseg
    from kraken import blla

    ctx = click.get_current_context()

    if ctx.meta['first_process']:
        if ctx.meta['input_format_type'] != 'image':
            input = get_input_parser(
                ctx.meta['input_format_type'])(input)['image']
        ctx.meta['first_process'] = False

    if 'base_image' not in ctx.meta:
        ctx.meta['base_image'] = input

    try:
        im = Image.open(input)
    except IOError as e:
        raise click.BadParameter(str(e))
    if mask:
        try:
            mask = Image.open(mask)
        except IOError as e:
            raise click.BadParameter(str(e))
    message('Segmenting\t', nl=False)
    try:
        if legacy:
            res = pageseg.segment(im,
                                  text_direction,
                                  scale,
                                  maxcolseps,
                                  black_colseps,
                                  no_hlines=remove_hlines,
                                  pad=pad,
                                  mask=mask)
        else:
            res = blla.segment(im,
                               text_direction,
                               mask=mask,
                               model=model,
                               device=device)
    except Exception:
        message('\u2717', fg='red')
        raise
    with open_file(output, 'w') as fp:
        fp = cast(IO[Any], fp)
        json.dump(res, fp)
    message('\u2713', fg='green')
예제 #3
0
def segmenter(legacy, model, text_direction, scale, maxcolseps, black_colseps,
              remove_hlines, pad, mask, device, input, output) -> None:
    import json

    from kraken import pageseg
    from kraken import blla

    ctx = click.get_current_context()

    if ctx.meta['first_process']:
        if ctx.meta['input_format_type'] != 'image':
            input = get_input_parser(
                ctx.meta['input_format_type'])(input)['image']
        ctx.meta['first_process'] = False

    if 'base_image' not in ctx.meta:
        ctx.meta['base_image'] = input

    try:
        im = Image.open(input)
    except IOError as e:
        raise click.BadParameter(str(e))
    if mask:
        try:
            mask = Image.open(mask)
        except IOError as e:
            raise click.BadParameter(str(e))
    message('Segmenting\t', nl=False)
    try:
        if legacy:
            res = pageseg.segment(im,
                                  text_direction,
                                  scale,
                                  maxcolseps,
                                  black_colseps,
                                  no_hlines=remove_hlines,
                                  pad=pad,
                                  mask=mask)
        else:
            res = blla.segment(im,
                               text_direction,
                               mask=mask,
                               model=model,
                               device=device)
    except Exception:
        message('\u2717', fg='red')
        raise
    if ctx.meta['last_process'] and ctx.meta['output_mode'] != 'native':
        with open_file(output, 'w', encoding='utf-8') as fp:
            fp = cast(IO[Any], fp)
            logger.info('Serializing as {} into {}'.format(
                ctx.meta['output_mode'], output))
            from kraken import serialization
            from kraken.rpred import ocr_record
            if 'type' in res and res['type'] == 'baselines':
                records = [ocr_record('', '', '', bl) for bl in res['lines']]
            else:
                records = []
                for line in res['boxes']:
                    xmin, xmax = min(line[::2]), max(line[::2])
                    ymin, ymax = min(line[1::2]), max(line[1::2])
                    records.append(
                        ocr_record('', [], [], [[xmin, ymin], [xmin, ymax],
                                                [xmax, ymax], [xmax, ymin]]))
            fp.write(
                serialization.serialize(
                    records,
                    image_name=ctx.meta['base_image'],
                    image_size=im.size,
                    regions=res['regions'] if 'regions' in res else None,
                    template=ctx.meta['output_mode']))
    else:
        with open_file(output, 'w') as fp:
            fp = cast(IO[Any], fp)
            json.dump(res, fp)
    message('\u2713', fg='green')
예제 #4
0
def segmenter(legacy, model, text_direction, scale, maxcolseps, black_colseps,
              remove_hlines, pad, mask, device, input, output) -> None:
    import json

    from kraken import pageseg
    from kraken import blla

    ctx = click.get_current_context()

    if ctx.meta['first_process']:
        if ctx.meta['input_format_type'] != 'image':
            input = get_input_parser(
                ctx.meta['input_format_type'])(input)['image']
        ctx.meta['first_process'] = False

    if 'base_image' not in ctx.meta:
        ctx.meta['base_image'] = input

    try:
        im = Image.open(input)
    except IOError as e:
        raise click.BadParameter(str(e))
    if mask:
        try:
            mask = Image.open(mask)
        except IOError as e:
            raise click.BadParameter(str(e))
    message('Segmenting\t', nl=False)
    try:
        if legacy:
            res = pageseg.segment(im,
                                  text_direction,
                                  scale,
                                  maxcolseps,
                                  black_colseps,
                                  no_hlines=remove_hlines,
                                  pad=pad,
                                  mask=mask)
        else:
            res = blla.segment(im,
                               text_direction,
                               mask=mask,
                               model=model,
                               device=device)
    except Exception:
        if ctx.meta['raise_failed']:
            raise
        message('\u2717', fg='red')
        ctx.exit(1)
    if ctx.meta['last_process'] and ctx.meta['output_mode'] != 'native':
        with click.open_file(output, 'w', encoding='utf-8') as fp:
            fp = cast(IO[Any], fp)
            logger.info('Serializing as {} into {}'.format(
                ctx.meta['output_mode'], output))
            from kraken import serialization
            from kraken.rpred import ocr_record
            fp.write(
                serialization.serialize_segmentation(
                    res,
                    image_name=ctx.meta['base_image'],
                    image_size=im.size,
                    template=ctx.meta['output_mode']))
    else:
        with click.open_file(output, 'w') as fp:
            fp = cast(IO[Any], fp)
            json.dump(res, fp)
    message('\u2713', fg='green')
예제 #5
0
def cli(format_type, model, repolygonize, files):
    """
    A script producing overlays of lines and regions from either ALTO or
    PageXML files or run a model to do the same.
    """
    if len(files) == 0:
        ctx = click.get_current_context()
        click.echo(ctx.get_help())
        ctx.exit()

    from PIL import Image, ImageDraw

    from kraken.lib import vgsl, xml
    from kraken import blla

    if model is None:
        if format_type == 'xml':
            fn = xml.parse_xml
        elif format_type == 'alto':
            fn = xml.parse_palto
        else:
            fn = xml.parse_page
        for doc in files:
            click.echo(f'Processing {doc} ', nl=False)
            data = fn(doc)
            # reorder lines by type
            lines = defaultdict(list)
            for line in data['lines']:
                lines[line['script']].append(line)
            im = Image.open(data['image']).convert('RGBA')
            for t, ls in lines.items():
                tmp = Image.new('RGBA', im.size, (0, 0, 0, 0))
                draw = ImageDraw.Draw(tmp)
                for idx, line in enumerate(ls):
                    c = next(cmap)
                    if line['boundary']:
                        draw.polygon([tuple(x) for x in line['boundary']],
                                     fill=c,
                                     outline=c[:3])
                    if line['baseline']:
                        draw.line([tuple(x) for x in line['baseline']],
                                  fill=bmap,
                                  width=2,
                                  joint='curve')
                    draw.text(line['baseline'][0],
                              str(idx),
                              fill=(0, 0, 0, 255))
                base_image = Image.alpha_composite(im, tmp)
                base_image.save(f'high_{os.path.basename(doc)}_lines_{t}.png')
            for t, regs in data['regions'].items():
                tmp = Image.new('RGBA', im.size, (0, 0, 0, 0))
                draw = ImageDraw.Draw(tmp)
                for reg in regs:
                    c = next(cmap)
                    draw.polygon(reg, fill=c, outline=c[:3])
                base_image = Image.alpha_composite(im, tmp)
                base_image.save(
                    f'high_{os.path.basename(doc)}_regions_{t}.png')
            click.secho('\u2713', fg='green')
    else:
        net = vgsl.TorchVGSLModel.load_model(model)
        for doc in files:
            click.echo(f'Processing {doc} ', nl=False)
            im = Image.open(doc)
            res = blla.segment(im, model=net)
            # reorder lines by type
            lines = defaultdict(list)
            for line in res['lines']:
                lines[line['script']].append(line)
            im = im.convert('RGBA')
            for t, ls in lines.items():
                tmp = Image.new('RGBA', im.size, (0, 0, 0, 0))
                draw = ImageDraw.Draw(tmp)
                for idx, line in enumerate(ls):
                    c = next(cmap)
                    draw.polygon([tuple(x) for x in line['boundary']],
                                 fill=c,
                                 outline=c[:3])
                    draw.line([tuple(x) for x in line['baseline']],
                              fill=bmap,
                              width=2,
                              joint='curve')
                    draw.text(line['baseline'][0],
                              str(idx),
                              fill=(0, 0, 0, 255))
                base_image = Image.alpha_composite(im, tmp)
                base_image.save(f'high_{os.path.basename(doc)}_lines_{t}.png')
            for t, regs in res['regions'].items():
                tmp = Image.new('RGBA', im.size, (0, 0, 0, 0))
                draw = ImageDraw.Draw(tmp)
                for reg in regs:
                    c = next(cmap)
                    draw.polygon([tuple(x) for x in reg],
                                 fill=c,
                                 outline=c[:3])
                base_image = Image.alpha_composite(im, tmp)
                base_image.save(
                    f'high_{os.path.basename(doc)}_regions_{t}.png')
            click.secho('\u2713', fg='green')