Example #1
0
def binary_objects(binary: np.array) -> np.array:
    """
    Labels features in an array and segments them into objects.
    """
    labels, _ = morph.label(binary)
    objects = morph.find_objects(labels)
    return objects
Example #2
0
def compute_line_seeds(binary, bottom, top, colseps, scale, threshold=0.2):
    """
    Base on gradient maps, computes candidates for baselines and xheights.
    Then, it marks the regions between the two as a line seed.
    """
    vrange = int(scale)
    bmarked = maximum_filter(bottom == maximum_filter(bottom, (vrange, 0)),
                             (2, 2))
    bmarked = bmarked * (bottom > threshold*np.amax(bottom)*threshold)*(1-colseps)
    tmarked = maximum_filter(top == maximum_filter(top, (vrange, 0)), (2, 2))
    tmarked = tmarked * (top > threshold*np.amax(top)*threshold/2)*(1-colseps)
    tmarked = maximum_filter(tmarked, (1, 20))
    seeds = np.zeros(binary.shape, 'i')
    delta = max(3, int(scale/2))
    for x in range(bmarked.shape[1]):
        transitions = sorted([(y, 1) for y in find(bmarked[:, x])] +
                             [(y, 0) for y in find(tmarked[:, x])])[::-1]
        transitions += [(0, 0)]
        for l in range(len(transitions)-1):
            y0, s0 = transitions[l]
            if s0 == 0:
                continue
            seeds[y0-delta:y0, x] = 1
            y1, s1 = transitions[l+1]
            if s1 == 0 and (y0-y1) < 5*scale:
                seeds[y1:y0, x] = 1
    seeds = maximum_filter(seeds, (1, int(1+scale)))
    seeds = seeds * (1-colseps)
    seeds, _ = morph.label(seeds)
    return seeds
Example #3
0
def compute_line_seeds(binary, bottom, top, colseps, scale, threshold=0.2):
    """
    Base on gradient maps, computes candidates for baselines and xheights.
    Then, it marks the regions between the two as a line seed.
    """
    vrange = int(scale)
    bmarked = maximum_filter(bottom == maximum_filter(bottom, (vrange, 0)),
                             (2, 2))
    bmarked = bmarked * (bottom > threshold * np.amax(bottom) * threshold) * (
        1 - colseps)
    tmarked = maximum_filter(top == maximum_filter(top, (vrange, 0)), (2, 2))
    tmarked = tmarked * (top > threshold * np.amax(top) * threshold / 2) * (
        1 - colseps)
    tmarked = maximum_filter(tmarked, (1, 20))
    seeds = np.zeros(binary.shape, 'i')
    delta = max(3, int(scale / 2))
    for x in range(bmarked.shape[1]):
        transitions = sorted([(y, 1) for y in find(bmarked[:, x])] +
                             [(y, 0) for y in find(tmarked[:, x])])[::-1]
        transitions += [(0, 0)]
        for l in range(len(transitions) - 1):
            y0, s0 = transitions[l]
            if s0 == 0:
                continue
            seeds[y0 - delta:y0, x] = 1
            y1, s1 = transitions[l + 1]
            if s1 == 0 and (y0 - y1) < 5 * scale:
                seeds[y1:y0, x] = 1
    seeds = maximum_filter(seeds, (1, int(1 + scale)))
    seeds = seeds * (1 - colseps)
    seeds, _ = morph.label(seeds)
    return seeds
Example #4
0
def binary_objects(binary: np.array) -> np.array:
    """
    Labels features in an array and segments them into objects.
    """
    labels, _ = morph.label(binary)
    objects = morph.find_objects(labels)
    return objects
Example #5
0
def remove_hlines(binary, scale, maxsize=10):
    """
    Removes horizontal black lines that only interfere with page segmentation.

        Args:
            binary (numpy.array):
            scale (float):
            maxsize (int): maximum size of removed lines

        Returns:
            numpy.array containing the filtered image.

    """
    labels, _ = morph.label(binary)
    objects = morph.find_objects(labels)
    for i, b in enumerate(objects):
        if sl.width(b) > maxsize * scale:
            labels[b][labels[b] == i + 1] = 0
    return np.array(labels != 0, 'B')
Example #6
0
def remove_hlines(binary, scale, maxsize=10):
    """
    Removes horizontal black lines that only interfere with page segmentation.

        Args:
            binary (numpy.array):
            scale (float):
            maxsize (int): maximum size of removed lines

        Returns:
            numpy.array containing the filtered image.

    """
    labels, _ = morph.label(binary)
    objects = morph.find_objects(labels)
    for i, b in enumerate(objects):
        if sl.width(b) > maxsize*scale:
            labels[b][labels[b] == i+1] = 0
    return np.array(labels != 0, 'B')
Example #7
0
def remove_hlines(binary: np.ndarray,
                  scale: float,
                  maxsize: int = 10) -> np.ndarray:
    """
    Removes horizontal black lines that only interfere with page segmentation.

        Args:
            binary:
            scale:
            maxsize: maximum size of removed lines

        Returns:
            numpy.ndarray containing the filtered image.

    """
    logger.debug('Filtering horizontal lines')
    labels, _ = morph.label(binary)
    objects = morph.find_objects(labels)
    for i, b in enumerate(objects):
        if sl.width(b) > maxsize * scale:
            labels[b][labels[b] == i + 1] = 0
    return np.array(labels != 0, 'B')
Example #8
0
def binary_objects(binary):
    labels, n = morph.label(binary)
    objects = morph.find_objects(labels)
    return objects
Example #9
0
def binary_objects(binary):
    labels, n = morph.label(binary)
    objects = morph.find_objects(labels)
    return objects
Example #10
0
def segment(im,
            text_direction: str = 'horizontal-lr',
            scale: Optional[float] = None,
            maxcolseps: float = 2,
            black_colseps: bool = False,
            no_hlines: bool = True,
            pad: Union[int, Tuple[int, int]] = 0,
            mask: Optional[np.ndarray] = None,
            reading_order_fn: Callable = reading_order) -> Dict[str, Any]:
    """
    Segments a page into text lines.

    Segments a page into text lines and returns the absolute coordinates of
    each line in reading order.

    Args:
        im: A bi-level page of mode '1' or 'L'
        text_direction: Principal direction of the text
                        (horizontal-lr/rl/vertical-lr/rl)
        scale: Scale of the image. Will be auto-determined if set to `None`.
        maxcolseps: Maximum number of whitespace column separators
        black_colseps: Whether column separators are assumed to be vertical
                       black lines or not
        no_hlines: Switch for small horizontal line removal.
        pad: Padding to add to line bounding boxes. If int the same padding is
             used both left and right. If a 2-tuple, uses (padding_left,
             padding_right).
        mask: A bi-level mask image of the same size as `im` where 0-valued
              regions are ignored for segmentation purposes. Disables column
              detection.
        reading_order_fn: Function to call to order line output. Callable
                          accepting a list of slices (y, x) and a text
                          direction in (`rl`, `lr`).

    Returns:
        A dictionary containing the text direction and a list of reading order
        sorted bounding boxes under the key 'boxes':

        .. code-block::

            {'text_direction': '$dir', 'boxes': [(x1, y1, x2, y2),...]}

    Raises:
        KrakenInputException: if the input image is not binarized or the text
                              direction is invalid.
    """
    im_str = get_im_str(im)
    logger.info(f'Segmenting {im_str}')

    if im.mode != '1' and not is_bitonal(im):
        logger.error(f'Image {im_str} is not bi-level')
        raise KrakenInputException(f'Image {im_str} is not bi-level')

    # rotate input image for vertical lines
    if text_direction.startswith('horizontal'):
        angle = 0
        offset = (0, 0)
    elif text_direction == 'vertical-lr':
        angle = 270
        offset = (0, im.size[1])
    elif text_direction == 'vertical-rl':
        angle = 90
        offset = (im.size[0], 0)
    else:
        logger.error(f'Invalid text direction \'{text_direction}\'')
        raise KrakenInputException(f'Invalid text direction {text_direction}')

    logger.debug(f'Rotating input image by {angle} degrees')
    im = im.rotate(angle, expand=True)

    a = pil2array(im)
    binary = np.array(a > 0.5 * (np.amin(a) + np.amax(a)), 'i')
    binary = 1 - binary

    _, ccs = morph.label(1 - binary)
    if ccs > np.dot(*im.size) / (30 * 30):
        logger.warning(
            f'Too many connected components for a page image: {ccs}')
        return {'text_direction': text_direction, 'boxes': []}

    if not scale:
        scale = estimate_scale(binary)

    if no_hlines:
        binary = remove_hlines(binary, scale)
    # emptyish images will cause exceptions here.

    try:
        if mask:
            if mask.mode != '1' and not is_bitonal(mask):
                logger.error('Mask is not bitonal')
                raise KrakenInputException('Mask is not bitonal')
            mask = mask.convert('1')
            if mask.size != im.size:
                logger.error(
                    f'Mask size {mask.size} doesn\'t match image size {im.size}'
                )
                raise KrakenInputException(
                    f'Mask size {mask.size} doesn\'t match image size {im.size}'
                )
            logger.info(
                'Masking enabled in segmenter. Disabling column detection.')
            mask = mask.rotate(angle, expand=True)
            colseps = pil2array(mask)
        elif black_colseps:
            colseps, binary = compute_black_colseps(binary, scale, maxcolseps)
        else:
            colseps = compute_white_colseps(binary, scale, maxcolseps)
    except ValueError:
        logger.warning(
            f'Exception in column finder (probably empty image) for {im_str}')
        return {'text_direction': text_direction, 'boxes': []}

    bottom, top, boxmap = compute_gradmaps(binary, scale)
    seeds = compute_line_seeds(binary, bottom, top, colseps, scale)
    llabels = morph.propagate_labels(boxmap, seeds, conflict=0)
    spread = morph.spread_labels(seeds, maxdist=scale)
    llabels = np.where(llabels > 0, llabels, spread * binary)
    segmentation = llabels * binary

    lines = compute_lines(segmentation, scale)
    order = reading_order_fn([line.bounds for line in lines],
                             text_direction[-2:])
    lsort = topsort(order)
    lines = [lines[i].bounds for i in lsort]
    lines = [(s2.start, s1.start, s2.stop, s1.stop) for s1, s2 in lines]

    if isinstance(pad, int):
        pad = (pad, pad)
    lines = [(max(x[0] - pad[0], 0), x[1], min(x[2] + pad[1],
                                               im.size[0]), x[3])
             for x in lines]

    return {
        'text_direction': text_direction,
        'boxes': rotate_lines(lines, 360 - angle, offset).tolist(),
        'script_detection': False
    }