Esempio n. 1
0
def vec_lines(heatmap: torch.Tensor,
              cls_map: Dict,
              scale: float,
              text_direction: str = 'horizontal-lr',
              reading_order_fn: Callable = polygonal_reading_order,
              regions: Dict = None,
              scal_im = None,
              **kwargs):
    """
    Computes lines from a stack of heatmaps, a class mapping, and scaling
    factor.
    """
    st_sep = cls_map['aux']['_start_separator']
    end_sep = cls_map['aux']['_end_separator']

    logger.info('Vectorizing baselines')
    baselines = []
    for bl_type, idx in cls_map['baselines'].items():
        logger.debug(f'Vectorizing lines of type {bl_type}')
        baselines.extend([(bl_type,x) for x in vectorize_lines(heatmap[(st_sep, end_sep, idx), :, :])])
    logger.debug('Polygonizing lines')
    lines = list(filter(lambda x: x[2] is not None, zip([x[0] for x in baselines],
                                                        [x[1] for x in baselines],
                                                        calculate_polygonal_environment(scal_im, [x[1] for x in baselines]))))
    logger.debug('Scaling vectorized lines')
    sc = scale_polygonal_lines([x[1:] for x in lines], scale)
    lines = list(zip([x[0] for x in lines], [x[0] for x in sc], [x[1] for x in sc]))
    logger.debug('Reordering baselines')
    lines = reading_order_fn(lines=lines, regions=regions, text_direction=text_direction[-2:])
    return [{'script': bl_type, 'baseline': bl, 'boundary': pl} for bl_type, bl, pl in lines]
Esempio n. 2
0
def vec_lines(heatmap: torch.Tensor,
              cls_map: Dict,
              scale: float,
              text_direction: str = 'horizontal-lr',
              reading_order_fn: Callable = polygonal_reading_order,
              regions: Dict = None,
              scal_im=None,
              suppl_obj=None,
              topline=False,
              **kwargs):
    """
    Computes lines from a stack of heatmaps, a class mapping, and scaling
    factor.
    """
    st_sep = cls_map['aux']['_start_separator']
    end_sep = cls_map['aux']['_end_separator']

    logger.info('Vectorizing baselines')
    baselines = []
    for bl_type, idx in cls_map['baselines'].items():
        logger.debug(f'Vectorizing lines of type {bl_type}')
        baselines.extend([(bl_type, x)
                          for x in vectorize_lines(heatmap[(st_sep, end_sep,
                                                            idx), :, :])])
    logger.debug('Polygonizing lines')

    im_feats = gaussian_filter(sobel(scal_im), 0.5)

    lines = []
    reg_pols = [geom.Polygon(x) for x in regions]
    for bl_idx in range(len(baselines)):
        bl = baselines[bl_idx]
        mid_point = geom.LineString(bl[1]).interpolate(0.5, normalized=True)

        suppl_obj = [x[1] for x in baselines[:bl_idx] + baselines[bl_idx + 1:]]
        for reg_idx, reg_pol in enumerate(reg_pols):
            if reg_pol.contains(mid_point):
                suppl_obj.append(regions[reg_idx])

        pol = calculate_polygonal_environment(baselines=[bl[1]],
                                              im_feats=im_feats,
                                              suppl_obj=suppl_obj,
                                              topline=topline)
        if pol[0] is not None:
            lines.append((bl[0], bl[1], pol[0]))

    logger.debug('Scaling vectorized lines')
    sc = scale_polygonal_lines([x[1:] for x in lines], scale)
    lines = list(
        zip([x[0] for x in lines], [x[0] for x in sc], [x[1] for x in sc]))
    logger.debug('Reordering baselines')
    lines = reading_order_fn(lines=lines,
                             regions=regions,
                             text_direction=text_direction[-2:])
    return [{
        'script': bl_type,
        'baseline': bl,
        'boundary': pl
    } for bl_type, bl, pl in lines]
Esempio n. 3
0
def segment(im,
            text_direction: str = 'horizontal-lr',
            mask: Optional[np.array] = None,
            reading_order_fn: Callable = polygonal_reading_order,
            model=None,
            device: str = 'cpu'):
    """
    Segments a page into text lines using the baseline segmenter.

    Segments a page into text lines and returns the polyline formed by each
    baseline and their estimated environment.

    Args:
        im (PIL.Image): An RGB image.
        text_direction (str): Ignored by the segmenter but kept for
                              serialization.
        mask (PIL.Image): A bi-level mask image of the same size as `im` where
                          0-valued regions are ignored for segmentation
                          purposes. Disables column detection.
        reading_order_fn (function): Function to determine the reading order.
                                     Has to accept a list of tuples (baselines,
                                     polygon) and a text direction (`lr` or
                                     `rl`).
        model (vgsl.TorchVGSLModel): A TorchVGSLModel containing a segmentation
                                     model. If none is given a default model
                                     will be loaded.
        device (str or torch.Device): The target device to run the neural
                                      network on.

    Returns:
        {'text_direction': '$dir',
         'type': 'baseline',
         'lines': [
            {'baseline': [[x0, y0], [x1, y1], ..., [x_n, y_n]], 'boundary': [[x0, y0, x1, y1], ... [x_m, y_m]]},
            {'baseline': [[x0, ...]], 'boundary': [[x0, ...]]}
          ]
          'regions': [
            {'region': [[x0, y0], [x1, y1], ..., [x_n, y_n]], 'type': 'image'},
            {'region': [[x0, ...]], 'type': 'text'}
          ]
        }: A dictionary containing the text direction and under the key 'lines'
        a list of reading order sorted baselines (polylines) and their
        respective polygonal boundaries. The last and first point of each
        boundary polygon is connected.

    Raises:
        KrakenInputException if the input image is not binarized or the text
        direction is invalid.
    """
    im_str = get_im_str(im)
    logger.info(f'Segmenting {im_str}')

    if model is None:
        logger.info('No segmentation model given. Loading default model.')
        model = vgsl.TorchVGSLModel.load_model(pkg_resources.resource_filename(__name__, 'blla.mlmodel'))

    if model.one_channel_mode == '1' and not is_bitonal(im):
        logger.warning('Running binary model on non-binary input image '
                       '(mode {}). This will result in severely degraded '
                       'performance'.format(im.mode))

    model.eval()
    model.to(device)

    if mask:
        if mask.mode != '1' and not is_bitonal(mask):
            logger.error('Mask is not bitonal')
            raise KrakenInputException('Mask is not bitonal')
        mask = mask.convert('1')
        if mask.size != im.size:
            logger.error('Mask size {mask.size} doesn\'t match image size {im.size}')
            raise KrakenInputException('Mask size {mask.size} doesn\'t match image size {im.size}')
        logger.info('Masking enabled in segmenter.')
        mask = pil2array(mask)

    batch, channels, height, width = model.input
    transforms = dataset.generate_input_transforms(batch, height, width, channels, 0, valid_norm=False)
    res_tf = tf.Compose(transforms.transforms[:3])
    scal_im = res_tf(im).convert('L')

    with torch.no_grad():
        logger.debug('Running network forward pass')
        o = model.nn(transforms(im).unsqueeze(0).to(device))
    logger.debug('Upsampling network output')
    o = F.interpolate(o, size=scal_im.size[::-1])
    o = o.squeeze().cpu().numpy()
    scale = np.divide(im.size, o.shape[:0:-1])
    # postprocessing
    cls_map = model.user_metadata['class_mapping']
    st_sep = cls_map['aux']['_start_separator']
    end_sep = cls_map['aux']['_end_separator']

    logger.info('Vectorizing baselines')
    baselines = []
    regions = {}
    for bl_type, idx in cls_map['baselines'].items():
        logger.debug(f'Vectorizing lines of type {bl_type}')
        baselines.extend([(bl_type,x) for x in vectorize_lines(o[(st_sep, end_sep, idx), :, :])])
    logger.info('Vectorizing regions')
    for region_type, idx in cls_map['regions'].items():
        logger.debug(f'Vectorizing lines of type {bl_type}')
        regions[region_type] = vectorize_regions(o[idx])
    logger.debug('Polygonizing lines')
    lines = list(filter(lambda x: x[2] is not None, zip([x[0] for x in baselines],
                                                        [x[1] for x in baselines],
                                                        calculate_polygonal_environment(scal_im, [x[1] for x in baselines]))))
    logger.debug('Scaling vectorized lines')
    sc = scale_polygonal_lines([x[1:] for x in lines], scale)
    lines = list(zip([x[0] for x in lines], [x[0] for x in sc], [x[1] for x in sc]))
    logger.debug('Scaling vectorized regions')
    for reg_id, regs in regions.items():
        regions[reg_id] = scale_regions(regs, scale)
    logger.debug('Reordering baselines')
    order_regs = []
    for regs in regions.values():
        order_regs.extend(regs)
    lines = reading_order_fn(lines=lines, regions=order_regs, text_direction=text_direction[-2:])

    if 'class_mapping' in model.user_metadata and len(model.user_metadata['class_mapping']['baselines']) > 1:
        script_detection = True
    else:
        script_detection = False

    return {'text_direction': text_direction,
            'type': 'baselines',
            'lines': [{'script': bl_type, 'baseline': bl, 'boundary': pl} for bl_type, bl, pl in lines],
            'regions': regions,
            'script_detection': script_detection}
Esempio n. 4
0
def vec_lines(heatmap: torch.Tensor,
              cls_map: Dict[str, Dict[str, int]],
              scale: float,
              text_direction: str = 'horizontal-lr',
              reading_order_fn: Callable = polygonal_reading_order,
              regions: List[np.ndarray] = None,
              scal_im: np.ndarray = None,
              suppl_obj: List[np.ndarray] = None,
              topline: Optional[bool] = False,
              **kwargs) -> List[Dict[str, Any]]:
    r"""
    Computes lines from a stack of heatmaps, a class mapping, and scaling
    factor.

    Args:
        heatmap: A stack of heatmaps of shape `NxHxW` output from the network.
        cls_map: Dictionary mapping string identifiers to indices on the stack
                 of heatmaps.
        scale: Scaling factor between heatmap and unscaled input image.
        text_direction: Text directions used as hints in the reading order
                        algorithm.
        reading_order_fn: Reading order calculation function.
        regions: Regions to be used as boundaries during polygonization and
                 atomic blocks during reading order determination for lines
                 contained within.
        scal_im: A numpy array containing the scaled input image.
        suppl_obj: Supplementary objects which are used as boundaries during
                   polygonization.
        topline: True for a topline, False for baseline, or None for a
                 centerline.

    Returns:
        A list of dictionaries containing the baselines, bounding polygons, and
        line type in reading order:

        .. code-block::
           :force:

            [{'script': '$baseline_type', baseline': [[x0, y0], [x1, y1], ..., [x_n, y_n]], 'boundary': [[x0, y0, x1, y1], ... [x_m, y_m]]},
             {'script': '$baseline_type', baseline': [[x0, ...]], 'boundary': [[x0, ...]]},
             {'script': '$baseline_type', baseline': [[x0, ...]], 'boundary': [[x0, ...]]},
             ...
            ]
    """
    st_sep = cls_map['aux']['_start_separator']
    end_sep = cls_map['aux']['_end_separator']

    logger.info('Vectorizing baselines')
    baselines = []
    for bl_type, idx in cls_map['baselines'].items():
        logger.debug(f'Vectorizing lines of type {bl_type}')
        baselines.extend([(bl_type, x)
                          for x in vectorize_lines(heatmap[(st_sep, end_sep,
                                                            idx), :, :])])
    logger.debug('Polygonizing lines')

    im_feats = gaussian_filter(sobel(scal_im), 0.5)

    lines = []
    reg_pols = [geom.Polygon(x) for x in regions]
    for bl_idx in range(len(baselines)):
        bl = baselines[bl_idx]
        mid_point = geom.LineString(bl[1]).interpolate(0.5, normalized=True)

        suppl_obj = [x[1] for x in baselines[:bl_idx] + baselines[bl_idx + 1:]]
        for reg_idx, reg_pol in enumerate(reg_pols):
            if reg_pol.contains(mid_point):
                suppl_obj.append(regions[reg_idx])

        pol = calculate_polygonal_environment(baselines=[bl[1]],
                                              im_feats=im_feats,
                                              suppl_obj=suppl_obj,
                                              topline=topline)
        if pol[0] is not None:
            lines.append((bl[0], bl[1], pol[0]))

    logger.debug('Scaling vectorized lines')
    sc = scale_polygonal_lines([x[1:] for x in lines], scale)
    lines = list(
        zip([x[0] for x in lines], [x[0] for x in sc], [x[1] for x in sc]))
    logger.debug('Reordering baselines')
    lines = reading_order_fn(lines=lines,
                             regions=regions,
                             text_direction=text_direction[-2:])
    return [{
        'tags': {
            'type': bl_type
        },
        'baseline': bl,
        'boundary': pl
    } for bl_type, bl, pl in lines]