def segment(im, scale=None, black_colseps=False): """ Segments a page into text lines. Segments a page into text lines and returns the absolute coordinates of each line in reading order. Args: im (PIL.Image): A bi-level page of mode '1' or 'L' scale (float): Scale of the image black_colseps (bool): Whether column separators are assumed to be vertical black lines or not Returns: [(x1, y1, x2, y2),...]: A list of tuples containing the bounding boxes of the segmented lines in reading order. Raises: KrakenInputException if the input image is not binarized """ if im.mode != '1' and not is_bitonal(im): raise KrakenInputException('Image is not bi-level') # honestly I've got no idea what's going on here. In theory a simple # np.array(im, 'i') should suffice here but for some reason the # tostring/fromstring magic in pil2array alters the array in a way that is # needed for the algorithm to work correctly. a = pil2array(im) binary = np.array(a > 0.5 * (np.amin(a) + np.amax(a)), 'i') binary = 1 - binary if not scale: scale = estimate_scale(binary) binary = remove_hlines(binary, scale) if black_colseps: colseps, binary = compute_black_colseps(binary, scale) else: colseps = compute_white_colseps(binary, scale) bottom, top, boxmap = compute_gradmaps(binary, scale) seeds = compute_line_seeds(binary, bottom, top, colseps, scale) llabels = morph.propagate_labels(boxmap, seeds, conflict=0) spread = morph.spread_labels(seeds, maxdist=scale) llabels = np.where(llabels > 0, llabels, spread * binary) segmentation = llabels * binary lines = compute_lines(segmentation, scale) order = reading_order([l.bounds for l in lines]) lsort = topsort(order) lines = [lines[i].bounds for i in lsort] return [(s2.start, s1.start, s2.stop, s1.stop) for s1, s2 in lines]
def segment(im, scale=None, black_colseps=False): """ Segments a page into text lines. Segments a page into text lines and returns the absolute coordinates of each line in reading order. Args: im (PIL.Image): A bi-level page of mode '1' or 'L' scale (float): Scale of the image black_colseps (bool): Whether column separators are assumed to be vertical black lines or not Returns: [(x1, y1, x2, y2),...]: A list of tuples containing the bounding boxes of the segmented lines in reading order. Raises: KrakenInputException if the input image is not binarized """ if im.mode != '1' and im.histogram().count(0) != 254: raise KrakenInputException('Image is not bi-level') # honestly I've got no idea what's going on here. In theory a simple # np.array(im, 'i') should suffice here but for some reason the # tostring/fromstring magic in pil2array alters the array in a way that is # needed for the algorithm to work correctly. a = pil2array(im) binary = np.array(a > 0.5*(np.amin(a) + np.amax(a)), 'i') binary = 1 - binary if not scale: scale = estimate_scale(binary) binary = remove_hlines(binary, scale) if black_colseps: colseps, binary = compute_black_colseps(binary, scale) else: colseps = compute_white_colseps(binary, scale) bottom, top, boxmap = compute_gradmaps(binary, scale) seeds = compute_line_seeds(binary, bottom, top, colseps, scale) llabels = morph.propagate_labels(boxmap, seeds, conflict=0) spread = morph.spread_labels(seeds, maxdist=scale) llabels = np.where(llabels > 0, llabels, spread*binary) segmentation = llabels*binary lines = compute_lines(segmentation, scale) order = reading_order([l.bounds for l in lines]) lsort = topsort(order) lines = [lines[i].bounds for i in lsort] return [(s2.start, s1.start, s2.stop, s1.stop) for s1, s2 in lines]
def segment(im, text_direction: str = 'horizontal-lr', scale: Optional[float] = None, maxcolseps: float = 2, black_colseps: bool = False, no_hlines: bool = True, pad: int = 0, mask: Optional[np.array] = None, reading_order_fn: Callable = reading_order) -> Dict[str, Any]: """ Segments a page into text lines. Segments a page into text lines and returns the absolute coordinates of each line in reading order. Args: im (PIL.Image): A bi-level page of mode '1' or 'L' text_direction (str): Principal direction of the text (horizontal-lr/rl/vertical-lr/rl) scale (float): Scale of the image maxcolseps (int): Maximum number of whitespace column separators black_colseps (bool): Whether column separators are assumed to be vertical black lines or not no_hlines (bool): Switch for horizontal line removal pad (int or tuple): Padding to add to line bounding boxes. If int the same padding is used both left and right. If a 2-tuple, uses (padding_left, padding_right). mask (PIL.Image): A bi-level mask image of the same size as `im` where 0-valued regions are ignored for segmentation purposes. Disables column detection. reading_order_fn (Callable): Function to call to order line output. Callable accepting a list of slices (y, x) and a text direction in (`rl`, `lr`). Returns: {'text_direction': '$dir', 'boxes': [(x1, y1, x2, y2),...]}: A dictionary containing the text direction and a list of reading order sorted bounding boxes under the key 'boxes'. Raises: KrakenInputException if the input image is not binarized or the text direction is invalid. """ im_str = get_im_str(im) logger.info(f'Segmenting {im_str}') if im.mode != '1' and not is_bitonal(im): logger.error(f'Image {im_str} is not bi-level') raise KrakenInputException(f'Image {im_str} is not bi-level') # rotate input image for vertical lines if text_direction.startswith('horizontal'): angle = 0 offset = (0, 0) elif text_direction == 'vertical-lr': angle = 270 offset = (0, im.size[1]) elif text_direction == 'vertical-rl': angle = 90 offset = (im.size[0], 0) else: logger.error(f'Invalid text direction \'{text_direction}\'') raise KrakenInputException(f'Invalid text direction {text_direction}') logger.debug(f'Rotating input image by {angle} degrees') im = im.rotate(angle, expand=True) a = pil2array(im) binary = np.array(a > 0.5 * (np.amin(a) + np.amax(a)), 'i') binary = 1 - binary if not scale: scale = estimate_scale(binary) if no_hlines: binary = remove_hlines(binary, scale) # emptyish images wll cause exceptions here. try: if mask: if mask.mode != '1' and not is_bitonal(mask): logger.error('Mask is not bitonal') raise KrakenInputException('Mask is not bitonal') mask = mask.convert('1') if mask.size != im.size: logger.error( f'Mask size {mask.size} doesn\'t match image size {im.size}' ) raise KrakenInputException( f'Mask size {mask.size} doesn\'t match image size {im.size}' ) logger.info( 'Masking enabled in segmenter. Disabling column detection.') mask = mask.rotate(angle, expand=True) colseps = pil2array(mask) elif black_colseps: colseps, binary = compute_black_colseps(binary, scale, maxcolseps) else: colseps = compute_white_colseps(binary, scale, maxcolseps) except ValueError: logger.warning( f'Exception in column finder (probably empty image) for {im_str}') return {'text_direction': text_direction, 'boxes': []} bottom, top, boxmap = compute_gradmaps(binary, scale) seeds = compute_line_seeds(binary, bottom, top, colseps, scale) llabels = morph.propagate_labels(boxmap, seeds, conflict=0) spread = morph.spread_labels(seeds, maxdist=scale) llabels = np.where(llabels > 0, llabels, spread * binary) segmentation = llabels * binary lines = compute_lines(segmentation, scale) order = reading_order_fn([l.bounds for l in lines], text_direction[-2:]) lsort = topsort(order) lines = [lines[i].bounds for i in lsort] lines = [(s2.start, s1.start, s2.stop, s1.stop) for s1, s2 in lines] if isinstance(pad, int): pad = (pad, pad) lines = [(max(x[0] - pad[0], 0), x[1], min(x[2] + pad[1], im.size[0]), x[3]) for x in lines] return { 'text_direction': text_direction, 'boxes': rotate_lines(lines, 360 - angle, offset).tolist(), 'script_detection': False }
def segment(im, text_direction='horizontal-lr', scale=None, maxcolseps=2, black_colseps=False): """ Segments a page into text lines. Segments a page into text lines and returns the absolute coordinates of each line in reading order. Args: im (PIL.Image): A bi-level page of mode '1' or 'L' text_direction (str): Principal direction of the text (horizontal-lr/rl/vertical-lr/rl) scale (float): Scale of the image maxcolseps (int): Maximum number of whitespace column separators black_colseps (bool): Whether column separators are assumed to be vertical black lines or not Returns: {'text_direction': '$dir', 'boxes': [(x1, y1, x2, y2),...]}: A dictionary containing the text direction and a list of reading order sorted bounding boxes under the key 'boxes'. Raises: KrakenInputException if the input image is not binarized or the text direction is invalid. """ if im.mode != '1' and not is_bitonal(im): raise KrakenInputException('Image is not bi-level') # rotate input image for vertical lines if text_direction.startswith('horizontal'): angle = 0 offset = (0, 0) elif text_direction == 'vertical-lr': angle = 270 offset = (0, im.size[1]) elif text_direction == 'vertical-rl': angle = 90 offset = (im.size[0], 0) else: raise KrakenInputException('Invalid text direction') im = im.rotate(angle, expand=True) # honestly I've got no idea what's going on here. In theory a simple # np.array(im, 'i') should suffice here but for some reason the # tostring/fromstring magic in pil2array alters the array in a way that is # needed for the algorithm to work correctly. a = pil2array(im) binary = np.array(a > 0.5 * (np.amin(a) + np.amax(a)), 'i') binary = 1 - binary if not scale: scale = estimate_scale(binary) binary = remove_hlines(binary, scale) # emptyish images wll cause exceptions here. try: if black_colseps: colseps, binary = compute_black_colseps(binary, scale, maxcolseps) else: colseps = compute_white_colseps(binary, scale, maxcolseps) except ValueError: return {'text_direction': text_direction, 'boxes': []} bottom, top, boxmap = compute_gradmaps(binary, scale) seeds = compute_line_seeds(binary, bottom, top, colseps, scale) llabels = morph.propagate_labels(boxmap, seeds, conflict=0) spread = morph.spread_labels(seeds, maxdist=scale) llabels = np.where(llabels > 0, llabels, spread * binary) segmentation = llabels * binary lines = compute_lines(segmentation, scale) order = reading_order([l.bounds for l in lines], text_direction[-2:]) lsort = topsort(order) lines = [lines[i].bounds for i in lsort] lines = [(s2.start, s1.start, s2.stop, s1.stop) for s1, s2 in lines] return { 'text_direction': text_direction, 'boxes': rotate_lines(lines, 360 - angle, offset).tolist(), 'script_detection': False }
def segment(im, text_direction='horizontal-lr', scale=None, maxcolseps=2, black_colseps=False, no_hlines=True, pad=0, mask=None): """ Segments a page into text lines. Segments a page into text lines and returns the absolute coordinates of each line in reading order. Args: im (PIL.Image): A bi-level page of mode '1' or 'L' text_direction (str): Principal direction of the text (horizontal-lr/rl/vertical-lr/rl) scale (float): Scale of the image maxcolseps (int): Maximum number of whitespace column separators black_colseps (bool): Whether column separators are assumed to be vertical black lines or not no_hlines (bool): Switch for horizontal line removal pad (int or tuple): Padding to add to line bounding boxes. If int the same padding is used both left and right. If a 2-tuple, uses (padding_left, padding_right). mask (PIL.Image): A bi-level mask image of the same size as `im` where 0-valued regions are ignored for segmentation purposes. Disables column detection. Returns: {'text_direction': '$dir', 'boxes': [(x1, y1, x2, y2),...]}: A dictionary containing the text direction and a list of reading order sorted bounding boxes under the key 'boxes'. Raises: KrakenInputException if the input image is not binarized or the text direction is invalid. """ im_str = get_im_str(im) logger.info('Segmenting {}'.format(im_str)) if im.mode != '1' and not is_bitonal(im): logger.error('Image {} is not bi-level'.format(im_str)) raise KrakenInputException('Image {} is not bi-level'.format(im_str)) # rotate input image for vertical lines if text_direction.startswith('horizontal'): angle = 0 offset = (0, 0) elif text_direction == 'vertical-lr': angle = 270 offset = (0, im.size[1]) elif text_direction == 'vertical-rl': angle = 90 offset = (im.size[0], 0) else: logger.error('Invalid text direction \'{}\''.format(text_direction)) raise KrakenInputException('Invalid text direction {}'.format(text_direction)) logger.debug('Rotating input image by {} degrees'.format(angle)) im = im.rotate(angle, expand=True) # honestly I've got no idea what's going on here. In theory a simple # np.array(im, 'i') should suffice here but for some reason the # tostring/fromstring magic in pil2array alters the array in a way that is # needed for the algorithm to work correctly. a = pil2array(im) binary = np.array(a > 0.5*(np.amin(a) + np.amax(a)), 'i') binary = 1 - binary if not scale: scale = estimate_scale(binary) if no_hlines: binary = remove_hlines(binary, scale) # emptyish images wll cause exceptions here. try: if mask: if mask.mode != '1' and not is_bitonal(mask): logger.error('Mask is not bitonal') raise KrakenInputException('Mask is not bitonal') mask = mask.convert('1') if mask.size != im.size: logger.error('Mask size {} doesn\'t match image size {}'.format(mask.size, im.size)) raise KrakenInputException('Mask size {} doesn\'t match image size {}'.format(mask.size, im.size)) logger.info('Masking enabled in segmenter. Disabling column detection.') mask = mask.rotate(angle, expand=True) colseps = pil2array(mask) elif black_colseps: colseps, binary = compute_black_colseps(binary, scale, maxcolseps) else: colseps = compute_white_colseps(binary, scale, maxcolseps) except ValueError: logger.warning('Exception in column finder (probably empty image) for {}.'.format(im_str)) return {'text_direction': text_direction, 'boxes': []} bottom, top, boxmap = compute_gradmaps(binary, scale) seeds = compute_line_seeds(binary, bottom, top, colseps, scale) llabels = morph.propagate_labels(boxmap, seeds, conflict=0) spread = morph.spread_labels(seeds, maxdist=scale) llabels = np.where(llabels > 0, llabels, spread*binary) segmentation = llabels*binary lines = compute_lines(segmentation, scale) order = reading_order([l.bounds for l in lines], text_direction[-2:]) lsort = topsort(order) lines = [lines[i].bounds for i in lsort] lines = [(s2.start, s1.start, s2.stop, s1.stop) for s1, s2 in lines] if isinstance(pad, int): pad = (pad, pad) lines = [(max(x[0]-pad[0], 0), x[1], min(x[2]+pad[1], im.size[0]), x[3]) for x in lines] return {'text_direction': text_direction, 'boxes': rotate_lines(lines, 360-angle, offset).tolist(), 'script_detection': False}