Example #1
0
def vec_lines(heatmap: torch.Tensor,
              cls_map: Dict,
              scale: float,
              text_direction: str = 'horizontal-lr',
              reading_order_fn: Callable = polygonal_reading_order,
              regions: Dict = None,
              scal_im = None,
              **kwargs):
    """
    Computes lines from a stack of heatmaps, a class mapping, and scaling
    factor.
    """
    st_sep = cls_map['aux']['_start_separator']
    end_sep = cls_map['aux']['_end_separator']

    logger.info('Vectorizing baselines')
    baselines = []
    for bl_type, idx in cls_map['baselines'].items():
        logger.debug(f'Vectorizing lines of type {bl_type}')
        baselines.extend([(bl_type,x) for x in vectorize_lines(heatmap[(st_sep, end_sep, idx), :, :])])
    logger.debug('Polygonizing lines')
    lines = list(filter(lambda x: x[2] is not None, zip([x[0] for x in baselines],
                                                        [x[1] for x in baselines],
                                                        calculate_polygonal_environment(scal_im, [x[1] for x in baselines]))))
    logger.debug('Scaling vectorized lines')
    sc = scale_polygonal_lines([x[1:] for x in lines], scale)
    lines = list(zip([x[0] for x in lines], [x[0] for x in sc], [x[1] for x in sc]))
    logger.debug('Reordering baselines')
    lines = reading_order_fn(lines=lines, regions=regions, text_direction=text_direction[-2:])
    return [{'script': bl_type, 'baseline': bl, 'boundary': pl} for bl_type, bl, pl in lines]
Example #2
0
def vec_lines(heatmap: torch.Tensor,
              cls_map: Dict,
              scale: float,
              text_direction: str = 'horizontal-lr',
              reading_order_fn: Callable = polygonal_reading_order,
              regions: Dict = None,
              scal_im=None,
              suppl_obj=None,
              topline=False,
              **kwargs):
    """
    Computes lines from a stack of heatmaps, a class mapping, and scaling
    factor.
    """
    st_sep = cls_map['aux']['_start_separator']
    end_sep = cls_map['aux']['_end_separator']

    logger.info('Vectorizing baselines')
    baselines = []
    for bl_type, idx in cls_map['baselines'].items():
        logger.debug(f'Vectorizing lines of type {bl_type}')
        baselines.extend([(bl_type, x)
                          for x in vectorize_lines(heatmap[(st_sep, end_sep,
                                                            idx), :, :])])
    logger.debug('Polygonizing lines')

    im_feats = gaussian_filter(sobel(scal_im), 0.5)

    lines = []
    reg_pols = [geom.Polygon(x) for x in regions]
    for bl_idx in range(len(baselines)):
        bl = baselines[bl_idx]
        mid_point = geom.LineString(bl[1]).interpolate(0.5, normalized=True)

        suppl_obj = [x[1] for x in baselines[:bl_idx] + baselines[bl_idx + 1:]]
        for reg_idx, reg_pol in enumerate(reg_pols):
            if reg_pol.contains(mid_point):
                suppl_obj.append(regions[reg_idx])

        pol = calculate_polygonal_environment(baselines=[bl[1]],
                                              im_feats=im_feats,
                                              suppl_obj=suppl_obj,
                                              topline=topline)
        if pol[0] is not None:
            lines.append((bl[0], bl[1], pol[0]))

    logger.debug('Scaling vectorized lines')
    sc = scale_polygonal_lines([x[1:] for x in lines], scale)
    lines = list(
        zip([x[0] for x in lines], [x[0] for x in sc], [x[1] for x in sc]))
    logger.debug('Reordering baselines')
    lines = reading_order_fn(lines=lines,
                             regions=regions,
                             text_direction=text_direction[-2:])
    return [{
        'script': bl_type,
        'baseline': bl,
        'boundary': pl
    } for bl_type, bl, pl in lines]
Example #3
0
def _repolygonize(im: Image.Image, lines):
    """
    Helper function taking an output of the lib.xml parse_* functions and
    recalculating the contained polygonization.

    Args:
        im (Image.Image): Input image
        lines (list): List of dicts [{'boundary': [[x0, y0], ...], 'baseline': [[x0, y0], ...], 'text': 'abcvsd'}, {...]

    Returns:
        A data structure `lines` with a changed polygonization.
    """
    im = Image.open(im).convert('L')
    polygons = calculate_polygonal_environment(im, [x['baseline'] for x in lines])
    return [{'boundary': polygon, 'baseline': orig['baseline'], 'text': orig['text'], 'script': orig['script']} for orig, polygon in zip(lines, polygons)]
Example #4
0
def cli(format_type, topline, files):
    """
    A small script repolygonizing line boundaries in ALTO or PageXML files.
    """
    if len(files) == 0:
        ctx = click.get_current_context()
        click.echo(ctx.get_help())
        ctx.exit()

    import os
    import numpy as np
    import sys
    from lxml import etree
    from os.path import splitext

    from kraken.lib import xml
    from kraken import serialization, rpred
    from PIL import Image
    from kraken.lib.segmentation import calculate_polygonal_environment, scale_polygonal_lines

    def _repl_alto(fname, polygons):
        with open(fname, 'rb') as fp:
            doc = etree.parse(fp)
            lines = doc.findall('.//{*}TextLine')
            idx = 0
            for line in lines:
                pol = line.find('./{*}Shape/{*}Polygon')
                if pol is not None:
                    pol.attrib['POINTS'] = ' '.join([str(coord) for pt in polygons[idx] for coord in pt])
                    idx += 1
            with open(splitext(fname)[0] + '_rewrite.xml', 'wb') as fp:
                doc.write(fp, encoding='UTF-8', xml_declaration=True)

    def _repl_page(fname, polygons):
        with open(fname, 'rb') as fp:
            doc = etree.parse(fp)
            lines = doc.findall('.//{*}TextLine')
            idx = 0
            for line in lines:
                pol = line.find('./{*}Coords')
                if pol is not None:
                    pol.attrib['points'] = ' '.join([','.join([str(x) for x in pt]) for pt in o[idx]])
                    idx += 1
            with open(splitext(fname)[0] + '_rewrite.xml', 'wb') as fp:
                doc.write(fp, encoding='UTF-8', xml_declaration=True)

    if format_type == 'page':
        parse_fn = xml.parse_page
        repl_fn = _repl_page
    else:
        parse_fn = xml.parse_alto
        repl_fn = _repl_alto

    topline = {'topline': True,
               'baseline': False,
               'centerline': None}[topline]

    for doc in files:
        click.echo(f'Processing {doc} ', nl=False)
        seg = parse_fn(doc)
        im = Image.open(seg['image']).convert('L')
        l = []
        for x in seg['lines']:
            bl = x['baseline'] if x['baseline'] is not None else [0, 0]
            l.append(bl)
        o = calculate_polygonal_environment(im, l, scale=(1800, 0), topline=topline)
        repl_fn(doc, o)
Example #5
0
def segment(im,
            text_direction: str = 'horizontal-lr',
            mask: Optional[np.array] = None,
            reading_order_fn: Callable = polygonal_reading_order,
            model=None,
            device: str = 'cpu'):
    """
    Segments a page into text lines using the baseline segmenter.

    Segments a page into text lines and returns the polyline formed by each
    baseline and their estimated environment.

    Args:
        im (PIL.Image): An RGB image.
        text_direction (str): Ignored by the segmenter but kept for
                              serialization.
        mask (PIL.Image): A bi-level mask image of the same size as `im` where
                          0-valued regions are ignored for segmentation
                          purposes. Disables column detection.
        reading_order_fn (function): Function to determine the reading order.
                                     Has to accept a list of tuples (baselines,
                                     polygon) and a text direction (`lr` or
                                     `rl`).
        model (vgsl.TorchVGSLModel): A TorchVGSLModel containing a segmentation
                                     model. If none is given a default model
                                     will be loaded.
        device (str or torch.Device): The target device to run the neural
                                      network on.

    Returns:
        {'text_direction': '$dir',
         'type': 'baseline',
         'lines': [
            {'baseline': [[x0, y0], [x1, y1], ..., [x_n, y_n]], 'boundary': [[x0, y0, x1, y1], ... [x_m, y_m]]},
            {'baseline': [[x0, ...]], 'boundary': [[x0, ...]]}
          ]
          'regions': [
            {'region': [[x0, y0], [x1, y1], ..., [x_n, y_n]], 'type': 'image'},
            {'region': [[x0, ...]], 'type': 'text'}
          ]
        }: A dictionary containing the text direction and under the key 'lines'
        a list of reading order sorted baselines (polylines) and their
        respective polygonal boundaries. The last and first point of each
        boundary polygon is connected.

    Raises:
        KrakenInputException if the input image is not binarized or the text
        direction is invalid.
    """
    im_str = get_im_str(im)
    logger.info(f'Segmenting {im_str}')

    if model is None:
        logger.info('No segmentation model given. Loading default model.')
        model = vgsl.TorchVGSLModel.load_model(pkg_resources.resource_filename(__name__, 'blla.mlmodel'))

    if model.one_channel_mode == '1' and not is_bitonal(im):
        logger.warning('Running binary model on non-binary input image '
                       '(mode {}). This will result in severely degraded '
                       'performance'.format(im.mode))

    model.eval()
    model.to(device)

    if mask:
        if mask.mode != '1' and not is_bitonal(mask):
            logger.error('Mask is not bitonal')
            raise KrakenInputException('Mask is not bitonal')
        mask = mask.convert('1')
        if mask.size != im.size:
            logger.error('Mask size {mask.size} doesn\'t match image size {im.size}')
            raise KrakenInputException('Mask size {mask.size} doesn\'t match image size {im.size}')
        logger.info('Masking enabled in segmenter.')
        mask = pil2array(mask)

    batch, channels, height, width = model.input
    transforms = dataset.generate_input_transforms(batch, height, width, channels, 0, valid_norm=False)
    res_tf = tf.Compose(transforms.transforms[:3])
    scal_im = res_tf(im).convert('L')

    with torch.no_grad():
        logger.debug('Running network forward pass')
        o = model.nn(transforms(im).unsqueeze(0).to(device))
    logger.debug('Upsampling network output')
    o = F.interpolate(o, size=scal_im.size[::-1])
    o = o.squeeze().cpu().numpy()
    scale = np.divide(im.size, o.shape[:0:-1])
    # postprocessing
    cls_map = model.user_metadata['class_mapping']
    st_sep = cls_map['aux']['_start_separator']
    end_sep = cls_map['aux']['_end_separator']

    logger.info('Vectorizing baselines')
    baselines = []
    regions = {}
    for bl_type, idx in cls_map['baselines'].items():
        logger.debug(f'Vectorizing lines of type {bl_type}')
        baselines.extend([(bl_type,x) for x in vectorize_lines(o[(st_sep, end_sep, idx), :, :])])
    logger.info('Vectorizing regions')
    for region_type, idx in cls_map['regions'].items():
        logger.debug(f'Vectorizing lines of type {bl_type}')
        regions[region_type] = vectorize_regions(o[idx])
    logger.debug('Polygonizing lines')
    lines = list(filter(lambda x: x[2] is not None, zip([x[0] for x in baselines],
                                                        [x[1] for x in baselines],
                                                        calculate_polygonal_environment(scal_im, [x[1] for x in baselines]))))
    logger.debug('Scaling vectorized lines')
    sc = scale_polygonal_lines([x[1:] for x in lines], scale)
    lines = list(zip([x[0] for x in lines], [x[0] for x in sc], [x[1] for x in sc]))
    logger.debug('Scaling vectorized regions')
    for reg_id, regs in regions.items():
        regions[reg_id] = scale_regions(regs, scale)
    logger.debug('Reordering baselines')
    order_regs = []
    for regs in regions.values():
        order_regs.extend(regs)
    lines = reading_order_fn(lines=lines, regions=order_regs, text_direction=text_direction[-2:])

    if 'class_mapping' in model.user_metadata and len(model.user_metadata['class_mapping']['baselines']) > 1:
        script_detection = True
    else:
        script_detection = False

    return {'text_direction': text_direction,
            'type': 'baselines',
            'lines': [{'script': bl_type, 'baseline': bl, 'boundary': pl} for bl_type, bl, pl in lines],
            'regions': regions,
            'script_detection': script_detection}
Example #6
0
def vec_lines(heatmap: torch.Tensor,
              cls_map: Dict[str, Dict[str, int]],
              scale: float,
              text_direction: str = 'horizontal-lr',
              reading_order_fn: Callable = polygonal_reading_order,
              regions: List[np.ndarray] = None,
              scal_im: np.ndarray = None,
              suppl_obj: List[np.ndarray] = None,
              topline: Optional[bool] = False,
              **kwargs) -> List[Dict[str, Any]]:
    r"""
    Computes lines from a stack of heatmaps, a class mapping, and scaling
    factor.

    Args:
        heatmap: A stack of heatmaps of shape `NxHxW` output from the network.
        cls_map: Dictionary mapping string identifiers to indices on the stack
                 of heatmaps.
        scale: Scaling factor between heatmap and unscaled input image.
        text_direction: Text directions used as hints in the reading order
                        algorithm.
        reading_order_fn: Reading order calculation function.
        regions: Regions to be used as boundaries during polygonization and
                 atomic blocks during reading order determination for lines
                 contained within.
        scal_im: A numpy array containing the scaled input image.
        suppl_obj: Supplementary objects which are used as boundaries during
                   polygonization.
        topline: True for a topline, False for baseline, or None for a
                 centerline.

    Returns:
        A list of dictionaries containing the baselines, bounding polygons, and
        line type in reading order:

        .. code-block::
           :force:

            [{'script': '$baseline_type', baseline': [[x0, y0], [x1, y1], ..., [x_n, y_n]], 'boundary': [[x0, y0, x1, y1], ... [x_m, y_m]]},
             {'script': '$baseline_type', baseline': [[x0, ...]], 'boundary': [[x0, ...]]},
             {'script': '$baseline_type', baseline': [[x0, ...]], 'boundary': [[x0, ...]]},
             ...
            ]
    """
    st_sep = cls_map['aux']['_start_separator']
    end_sep = cls_map['aux']['_end_separator']

    logger.info('Vectorizing baselines')
    baselines = []
    for bl_type, idx in cls_map['baselines'].items():
        logger.debug(f'Vectorizing lines of type {bl_type}')
        baselines.extend([(bl_type, x)
                          for x in vectorize_lines(heatmap[(st_sep, end_sep,
                                                            idx), :, :])])
    logger.debug('Polygonizing lines')

    im_feats = gaussian_filter(sobel(scal_im), 0.5)

    lines = []
    reg_pols = [geom.Polygon(x) for x in regions]
    for bl_idx in range(len(baselines)):
        bl = baselines[bl_idx]
        mid_point = geom.LineString(bl[1]).interpolate(0.5, normalized=True)

        suppl_obj = [x[1] for x in baselines[:bl_idx] + baselines[bl_idx + 1:]]
        for reg_idx, reg_pol in enumerate(reg_pols):
            if reg_pol.contains(mid_point):
                suppl_obj.append(regions[reg_idx])

        pol = calculate_polygonal_environment(baselines=[bl[1]],
                                              im_feats=im_feats,
                                              suppl_obj=suppl_obj,
                                              topline=topline)
        if pol[0] is not None:
            lines.append((bl[0], bl[1], pol[0]))

    logger.debug('Scaling vectorized lines')
    sc = scale_polygonal_lines([x[1:] for x in lines], scale)
    lines = list(
        zip([x[0] for x in lines], [x[0] for x in sc], [x[1] for x in sc]))
    logger.debug('Reordering baselines')
    lines = reading_order_fn(lines=lines,
                             regions=regions,
                             text_direction=text_direction[-2:])
    return [{
        'tags': {
            'type': bl_type
        },
        'baseline': bl,
        'boundary': pl
    } for bl_type, bl, pl in lines]
Example #7
0
def cli(format_type, model, repolygonize, files):
    """
    A script producing overlays of lines and regions from either ALTO or
    PageXML files or run a model to do the same.
    """
    if len(files) == 0:
        ctx = click.get_current_context()
        click.echo(ctx.get_help())
        ctx.exit()

    from PIL import Image, ImageDraw

    from kraken.lib import vgsl, xml, segmentation
    from kraken import blla

    if model is None:
        if format_type == 'xml':
            fn = xml.parse_xml
        elif format_type == 'alto':
            fn = xml.parse_alto
        else:
            fn = xml.parse_page
        for doc in files:
            click.echo(f'Processing {doc} ', nl=False)
            data = fn(doc)
            if repolygonize:
                im = Image.open(data['image']).convert('L')
                lines = data['lines']
                polygons = segmentation.calculate_polygonal_environment(
                    im, [x['baseline'] for x in lines], scale=(1200, 0))
                data['lines'] = [{
                    'boundary': polygon,
                    'baseline': orig['baseline'],
                    'text': orig['text'],
                    'script': orig['script']
                } for orig, polygon in zip(lines, polygons)]
            # reorder lines by type
            lines = defaultdict(list)
            for line in data['lines']:
                lines[line['script']].append(line)
            im = Image.open(data['image']).convert('RGBA')
            for t, ls in lines.items():
                tmp = Image.new('RGBA', im.size, (0, 0, 0, 0))
                draw = ImageDraw.Draw(tmp)
                for idx, line in enumerate(ls):
                    c = next(cmap)
                    if line['boundary']:
                        draw.polygon([tuple(x) for x in line['boundary']],
                                     fill=c,
                                     outline=c[:3])
                    if line['baseline']:
                        draw.line([tuple(x) for x in line['baseline']],
                                  fill=bmap,
                                  width=2,
                                  joint='curve')
                    draw.text(line['baseline'][0],
                              str(idx),
                              fill=(0, 0, 0, 255))
                base_image = Image.alpha_composite(im, tmp)
                base_image.save(
                    f'high_{os.path.basename(doc)}_lines_{slugify(t)}.png')
            for t, regs in data['regions'].items():
                tmp = Image.new('RGBA', im.size, (0, 0, 0, 0))
                draw = ImageDraw.Draw(tmp)
                for reg in regs:
                    c = next(cmap)
                    try:
                        draw.polygon(reg, fill=c, outline=c[:3])
                    except Exception:
                        pass
                base_image = Image.alpha_composite(im, tmp)
                base_image.save(
                    f'high_{os.path.basename(doc)}_regions_{slugify(t)}.png')
            click.secho('\u2713', fg='green')
    else:
        net = vgsl.TorchVGSLModel.load_model(model)
        for doc in files:
            click.echo(f'Processing {doc} ', nl=False)
            im = Image.open(doc)
            res = blla.segment(im, model=net)
            # reorder lines by type
            lines = defaultdict(list)
            for line in res['lines']:
                lines[line['script']].append(line)
            im = im.convert('RGBA')
            for t, ls in lines.items():
                tmp = Image.new('RGBA', im.size, (0, 0, 0, 0))
                draw = ImageDraw.Draw(tmp)
                for idx, line in enumerate(ls):
                    c = next(cmap)
                    draw.polygon([tuple(x) for x in line['boundary']],
                                 fill=c,
                                 outline=c[:3])
                    draw.line([tuple(x) for x in line['baseline']],
                              fill=bmap,
                              width=2,
                              joint='curve')
                    draw.text(line['baseline'][0],
                              str(idx),
                              fill=(0, 0, 0, 255))
                base_image = Image.alpha_composite(im, tmp)
                base_image.save(
                    f'high_{os.path.basename(doc)}_lines_{slugify(t)}.png')
            for t, regs in res['regions'].items():
                tmp = Image.new('RGBA', im.size, (0, 0, 0, 0))
                draw = ImageDraw.Draw(tmp)
                for reg in regs:
                    c = next(cmap)
                    try:
                        draw.polygon([tuple(x) for x in reg],
                                     fill=c,
                                     outline=c[:3])
                    except Exception:
                        pass

                base_image = Image.alpha_composite(im, tmp)
                base_image.save(
                    f'high_{os.path.basename(doc)}_regions_{slugify(t)}.png')
            click.secho('\u2713', fg='green')