コード例 #1
0
ファイル: pageseg.py プロジェクト: david-leon/kraken
def segment(im, scale=None, black_colseps=False):
    """
    Segments a page into text lines.

    Segments a page into text lines and returns the absolute coordinates of
    each line in reading order.

    Args:
        im (PIL.Image): A bi-level page of mode '1' or 'L'
        scale (float): Scale of the image
        black_colseps (bool): Whether column separators are assumed to be
                              vertical black lines or not

    Returns:
        [(x1, y1, x2, y2),...]: A list of tuples containing the bounding boxes
                                of the segmented lines in reading order.

    Raises:
        KrakenInputException if the input image is not binarized
    """

    if im.mode != '1' and not is_bitonal(im):
        raise KrakenInputException('Image is not bi-level')
    # honestly I've got no idea what's going on here. In theory a simple
    # np.array(im, 'i') should suffice here but for some reason the
    # tostring/fromstring magic in pil2array alters the array in a way that is
    # needed for the algorithm to work correctly.
    a = pil2array(im)
    binary = np.array(a > 0.5 * (np.amin(a) + np.amax(a)), 'i')
    binary = 1 - binary

    if not scale:
        scale = estimate_scale(binary)

    binary = remove_hlines(binary, scale)
    if black_colseps:
        colseps, binary = compute_black_colseps(binary, scale)
    else:
        colseps = compute_white_colseps(binary, scale)
    bottom, top, boxmap = compute_gradmaps(binary, scale)
    seeds = compute_line_seeds(binary, bottom, top, colseps, scale)
    llabels = morph.propagate_labels(boxmap, seeds, conflict=0)
    spread = morph.spread_labels(seeds, maxdist=scale)
    llabels = np.where(llabels > 0, llabels, spread * binary)
    segmentation = llabels * binary

    lines = compute_lines(segmentation, scale)
    order = reading_order([l.bounds for l in lines])
    lsort = topsort(order)
    lines = [lines[i].bounds for i in lsort]
    return [(s2.start, s1.start, s2.stop, s1.stop) for s1, s2 in lines]
コード例 #2
0
ファイル: pageseg.py プロジェクト: tianyaqu/kraken
def segment(im, scale=None, black_colseps=False):
    """
    Segments a page into text lines.

    Segments a page into text lines and returns the absolute coordinates of
    each line in reading order.

    Args:
        im (PIL.Image): A bi-level page of mode '1' or 'L'
        scale (float): Scale of the image
        black_colseps (bool): Whether column separators are assumed to be
                              vertical black lines or not

    Returns:
        [(x1, y1, x2, y2),...]: A list of tuples containing the bounding boxes
                                of the segmented lines in reading order.

    Raises:
        KrakenInputException if the input image is not binarized
    """

    if im.mode != '1' and im.histogram().count(0) != 254:
        raise KrakenInputException('Image is not bi-level')
    # honestly I've got no idea what's going on here. In theory a simple
    # np.array(im, 'i') should suffice here but for some reason the
    # tostring/fromstring magic in pil2array alters the array in a way that is
    # needed for the algorithm to work correctly.
    a = pil2array(im)
    binary = np.array(a > 0.5*(np.amin(a) + np.amax(a)), 'i')
    binary = 1 - binary

    if not scale:
        scale = estimate_scale(binary)

    binary = remove_hlines(binary, scale)
    if black_colseps:
        colseps, binary = compute_black_colseps(binary, scale)
    else:
        colseps = compute_white_colseps(binary, scale)
    bottom, top, boxmap = compute_gradmaps(binary, scale)
    seeds = compute_line_seeds(binary, bottom, top, colseps, scale)
    llabels = morph.propagate_labels(boxmap, seeds, conflict=0)
    spread = morph.spread_labels(seeds, maxdist=scale)
    llabels = np.where(llabels > 0, llabels, spread*binary)
    segmentation = llabels*binary

    lines = compute_lines(segmentation, scale)
    order = reading_order([l.bounds for l in lines])
    lsort = topsort(order)
    lines = [lines[i].bounds for i in lsort]
    return [(s2.start, s1.start, s2.stop, s1.stop) for s1, s2 in lines]
コード例 #3
0
ファイル: pageseg.py プロジェクト: ucodai/kraken
def segment(im,
            text_direction: str = 'horizontal-lr',
            scale: Optional[float] = None,
            maxcolseps: float = 2,
            black_colseps: bool = False,
            no_hlines: bool = True,
            pad: int = 0,
            mask: Optional[np.array] = None,
            reading_order_fn: Callable = reading_order) -> Dict[str, Any]:
    """
    Segments a page into text lines.

    Segments a page into text lines and returns the absolute coordinates of
    each line in reading order.

    Args:
        im (PIL.Image): A bi-level page of mode '1' or 'L'
        text_direction (str): Principal direction of the text
                              (horizontal-lr/rl/vertical-lr/rl)
        scale (float): Scale of the image
        maxcolseps (int): Maximum number of whitespace column separators
        black_colseps (bool): Whether column separators are assumed to be
                              vertical black lines or not
        no_hlines (bool): Switch for horizontal line removal
        pad (int or tuple): Padding to add to line bounding boxes. If int the
                            same padding is used both left and right. If a
                            2-tuple, uses (padding_left, padding_right).
        mask (PIL.Image): A bi-level mask image of the same size as `im` where
                          0-valued regions are ignored for segmentation
                          purposes. Disables column detection.
        reading_order_fn (Callable): Function to call to order line output.
                                     Callable accepting a list of slices (y, x)
                                     and a text direction in (`rl`, `lr`).

    Returns:
        {'text_direction': '$dir', 'boxes': [(x1, y1, x2, y2),...]}: A
        dictionary containing the text direction and a list of reading order
        sorted bounding boxes under the key 'boxes'.

    Raises:
        KrakenInputException if the input image is not binarized or the text
        direction is invalid.
    """
    im_str = get_im_str(im)
    logger.info(f'Segmenting {im_str}')

    if im.mode != '1' and not is_bitonal(im):
        logger.error(f'Image {im_str} is not bi-level')
        raise KrakenInputException(f'Image {im_str} is not bi-level')

    # rotate input image for vertical lines
    if text_direction.startswith('horizontal'):
        angle = 0
        offset = (0, 0)
    elif text_direction == 'vertical-lr':
        angle = 270
        offset = (0, im.size[1])
    elif text_direction == 'vertical-rl':
        angle = 90
        offset = (im.size[0], 0)
    else:
        logger.error(f'Invalid text direction \'{text_direction}\'')
        raise KrakenInputException(f'Invalid text direction {text_direction}')

    logger.debug(f'Rotating input image by {angle} degrees')
    im = im.rotate(angle, expand=True)

    a = pil2array(im)
    binary = np.array(a > 0.5 * (np.amin(a) + np.amax(a)), 'i')
    binary = 1 - binary

    if not scale:
        scale = estimate_scale(binary)

    if no_hlines:
        binary = remove_hlines(binary, scale)
    # emptyish images wll cause exceptions here.

    try:
        if mask:
            if mask.mode != '1' and not is_bitonal(mask):
                logger.error('Mask is not bitonal')
                raise KrakenInputException('Mask is not bitonal')
            mask = mask.convert('1')
            if mask.size != im.size:
                logger.error(
                    f'Mask size {mask.size} doesn\'t match image size {im.size}'
                )
                raise KrakenInputException(
                    f'Mask size {mask.size} doesn\'t match image size {im.size}'
                )
            logger.info(
                'Masking enabled in segmenter. Disabling column detection.')
            mask = mask.rotate(angle, expand=True)
            colseps = pil2array(mask)
        elif black_colseps:
            colseps, binary = compute_black_colseps(binary, scale, maxcolseps)
        else:
            colseps = compute_white_colseps(binary, scale, maxcolseps)
    except ValueError:
        logger.warning(
            f'Exception in column finder (probably empty image) for {im_str}')
        return {'text_direction': text_direction, 'boxes': []}

    bottom, top, boxmap = compute_gradmaps(binary, scale)
    seeds = compute_line_seeds(binary, bottom, top, colseps, scale)
    llabels = morph.propagate_labels(boxmap, seeds, conflict=0)
    spread = morph.spread_labels(seeds, maxdist=scale)
    llabels = np.where(llabels > 0, llabels, spread * binary)
    segmentation = llabels * binary

    lines = compute_lines(segmentation, scale)
    order = reading_order_fn([l.bounds for l in lines], text_direction[-2:])
    lsort = topsort(order)
    lines = [lines[i].bounds for i in lsort]
    lines = [(s2.start, s1.start, s2.stop, s1.stop) for s1, s2 in lines]

    if isinstance(pad, int):
        pad = (pad, pad)
    lines = [(max(x[0] - pad[0], 0), x[1], min(x[2] + pad[1],
                                               im.size[0]), x[3])
             for x in lines]

    return {
        'text_direction': text_direction,
        'boxes': rotate_lines(lines, 360 - angle, offset).tolist(),
        'script_detection': False
    }
コード例 #4
0
ファイル: pageseg.py プロジェクト: asgundogdu/kraken
def segment(im,
            text_direction='horizontal-lr',
            scale=None,
            maxcolseps=2,
            black_colseps=False):
    """
    Segments a page into text lines.

    Segments a page into text lines and returns the absolute coordinates of
    each line in reading order.

    Args:
        im (PIL.Image): A bi-level page of mode '1' or 'L'
        text_direction (str): Principal direction of the text
                              (horizontal-lr/rl/vertical-lr/rl)
        scale (float): Scale of the image
        maxcolseps (int): Maximum number of whitespace column separators
        black_colseps (bool): Whether column separators are assumed to be
                              vertical black lines or not

    Returns:
        {'text_direction': '$dir', 'boxes': [(x1, y1, x2, y2),...]}: A
        dictionary containing the text direction and a list of reading order
        sorted bounding boxes under the key 'boxes'.

    Raises:
        KrakenInputException if the input image is not binarized or the text
        direction is invalid.
    """

    if im.mode != '1' and not is_bitonal(im):
        raise KrakenInputException('Image is not bi-level')

    # rotate input image for vertical lines
    if text_direction.startswith('horizontal'):
        angle = 0
        offset = (0, 0)
    elif text_direction == 'vertical-lr':
        angle = 270
        offset = (0, im.size[1])
    elif text_direction == 'vertical-rl':
        angle = 90
        offset = (im.size[0], 0)
    else:
        raise KrakenInputException('Invalid text direction')

    im = im.rotate(angle, expand=True)

    # honestly I've got no idea what's going on here. In theory a simple
    # np.array(im, 'i') should suffice here but for some reason the
    # tostring/fromstring magic in pil2array alters the array in a way that is
    # needed for the algorithm to work correctly.
    a = pil2array(im)
    binary = np.array(a > 0.5 * (np.amin(a) + np.amax(a)), 'i')
    binary = 1 - binary

    if not scale:
        scale = estimate_scale(binary)

    binary = remove_hlines(binary, scale)
    # emptyish images wll cause exceptions here.
    try:
        if black_colseps:
            colseps, binary = compute_black_colseps(binary, scale, maxcolseps)
        else:
            colseps = compute_white_colseps(binary, scale, maxcolseps)
    except ValueError:
        return {'text_direction': text_direction, 'boxes': []}

    bottom, top, boxmap = compute_gradmaps(binary, scale)
    seeds = compute_line_seeds(binary, bottom, top, colseps, scale)
    llabels = morph.propagate_labels(boxmap, seeds, conflict=0)
    spread = morph.spread_labels(seeds, maxdist=scale)
    llabels = np.where(llabels > 0, llabels, spread * binary)
    segmentation = llabels * binary

    lines = compute_lines(segmentation, scale)
    order = reading_order([l.bounds for l in lines], text_direction[-2:])
    lsort = topsort(order)
    lines = [lines[i].bounds for i in lsort]
    lines = [(s2.start, s1.start, s2.stop, s1.stop) for s1, s2 in lines]
    return {
        'text_direction': text_direction,
        'boxes': rotate_lines(lines, 360 - angle, offset).tolist(),
        'script_detection': False
    }
コード例 #5
0
ファイル: pageseg.py プロジェクト: mittagessen/kraken
def segment(im, text_direction='horizontal-lr', scale=None, maxcolseps=2,
            black_colseps=False, no_hlines=True, pad=0, mask=None):
    """
    Segments a page into text lines.

    Segments a page into text lines and returns the absolute coordinates of
    each line in reading order.

    Args:
        im (PIL.Image): A bi-level page of mode '1' or 'L'
        text_direction (str): Principal direction of the text
                              (horizontal-lr/rl/vertical-lr/rl)
        scale (float): Scale of the image
        maxcolseps (int): Maximum number of whitespace column separators
        black_colseps (bool): Whether column separators are assumed to be
                              vertical black lines or not
        no_hlines (bool): Switch for horizontal line removal
        pad (int or tuple): Padding to add to line bounding boxes. If int the
                            same padding is used both left and right. If a
                            2-tuple, uses (padding_left, padding_right).
        mask (PIL.Image): A bi-level mask image of the same size as `im` where
                          0-valued regions are ignored for segmentation
                          purposes. Disables column detection.

    Returns:
        {'text_direction': '$dir', 'boxes': [(x1, y1, x2, y2),...]}: A
        dictionary containing the text direction and a list of reading order
        sorted bounding boxes under the key 'boxes'.

    Raises:
        KrakenInputException if the input image is not binarized or the text
        direction is invalid.
    """
    im_str = get_im_str(im)
    logger.info('Segmenting {}'.format(im_str))

    if im.mode != '1' and not is_bitonal(im):
        logger.error('Image {} is not bi-level'.format(im_str))
        raise KrakenInputException('Image {} is not bi-level'.format(im_str))

    # rotate input image for vertical lines
    if text_direction.startswith('horizontal'):
        angle = 0
        offset = (0, 0)
    elif text_direction == 'vertical-lr':
        angle = 270
        offset = (0, im.size[1])
    elif text_direction == 'vertical-rl':
        angle = 90
        offset = (im.size[0], 0)
    else:
        logger.error('Invalid text direction \'{}\''.format(text_direction))
        raise KrakenInputException('Invalid text direction {}'.format(text_direction))

    logger.debug('Rotating input image by {} degrees'.format(angle))
    im = im.rotate(angle, expand=True)

    # honestly I've got no idea what's going on here. In theory a simple
    # np.array(im, 'i') should suffice here but for some reason the
    # tostring/fromstring magic in pil2array alters the array in a way that is
    # needed for the algorithm to work correctly.
    a = pil2array(im)
    binary = np.array(a > 0.5*(np.amin(a) + np.amax(a)), 'i')
    binary = 1 - binary

    if not scale:
        scale = estimate_scale(binary)

    if no_hlines:
        binary = remove_hlines(binary, scale)
    # emptyish images wll cause exceptions here.

    try:
        if mask:
            if mask.mode != '1' and not is_bitonal(mask):
                logger.error('Mask is not bitonal')
                raise KrakenInputException('Mask is not bitonal')
            mask = mask.convert('1')
            if mask.size != im.size:
                logger.error('Mask size {} doesn\'t match image size {}'.format(mask.size, im.size))
                raise KrakenInputException('Mask size {} doesn\'t match image size {}'.format(mask.size, im.size))
            logger.info('Masking enabled in segmenter. Disabling column detection.')
            mask = mask.rotate(angle, expand=True)
            colseps = pil2array(mask)
        elif black_colseps:
            colseps, binary = compute_black_colseps(binary, scale, maxcolseps)
        else:
            colseps = compute_white_colseps(binary, scale, maxcolseps)
    except ValueError:
        logger.warning('Exception in column finder (probably empty image) for {}.'.format(im_str))
        return {'text_direction': text_direction, 'boxes':  []}

    bottom, top, boxmap = compute_gradmaps(binary, scale)
    seeds = compute_line_seeds(binary, bottom, top, colseps, scale)
    llabels = morph.propagate_labels(boxmap, seeds, conflict=0)
    spread = morph.spread_labels(seeds, maxdist=scale)
    llabels = np.where(llabels > 0, llabels, spread*binary)
    segmentation = llabels*binary

    lines = compute_lines(segmentation, scale)
    order = reading_order([l.bounds for l in lines], text_direction[-2:])
    lsort = topsort(order)
    lines = [lines[i].bounds for i in lsort]
    lines = [(s2.start, s1.start, s2.stop, s1.stop) for s1, s2 in lines]

    if isinstance(pad, int):
        pad = (pad, pad)
    lines = [(max(x[0]-pad[0], 0), x[1], min(x[2]+pad[1], im.size[0]), x[3]) for x in lines]

    return {'text_direction': text_direction, 'boxes':  rotate_lines(lines, 360-angle, offset).tolist(), 'script_detection': False}