def vec_lines(heatmap: torch.Tensor, cls_map: Dict, scale: float, text_direction: str = 'horizontal-lr', reading_order_fn: Callable = polygonal_reading_order, regions: Dict = None, scal_im = None, **kwargs): """ Computes lines from a stack of heatmaps, a class mapping, and scaling factor. """ st_sep = cls_map['aux']['_start_separator'] end_sep = cls_map['aux']['_end_separator'] logger.info('Vectorizing baselines') baselines = [] for bl_type, idx in cls_map['baselines'].items(): logger.debug(f'Vectorizing lines of type {bl_type}') baselines.extend([(bl_type,x) for x in vectorize_lines(heatmap[(st_sep, end_sep, idx), :, :])]) logger.debug('Polygonizing lines') lines = list(filter(lambda x: x[2] is not None, zip([x[0] for x in baselines], [x[1] for x in baselines], calculate_polygonal_environment(scal_im, [x[1] for x in baselines])))) logger.debug('Scaling vectorized lines') sc = scale_polygonal_lines([x[1:] for x in lines], scale) lines = list(zip([x[0] for x in lines], [x[0] for x in sc], [x[1] for x in sc])) logger.debug('Reordering baselines') lines = reading_order_fn(lines=lines, regions=regions, text_direction=text_direction[-2:]) return [{'script': bl_type, 'baseline': bl, 'boundary': pl} for bl_type, bl, pl in lines]
def vec_lines(heatmap: torch.Tensor, cls_map: Dict, scale: float, text_direction: str = 'horizontal-lr', reading_order_fn: Callable = polygonal_reading_order, regions: Dict = None, scal_im=None, suppl_obj=None, topline=False, **kwargs): """ Computes lines from a stack of heatmaps, a class mapping, and scaling factor. """ st_sep = cls_map['aux']['_start_separator'] end_sep = cls_map['aux']['_end_separator'] logger.info('Vectorizing baselines') baselines = [] for bl_type, idx in cls_map['baselines'].items(): logger.debug(f'Vectorizing lines of type {bl_type}') baselines.extend([(bl_type, x) for x in vectorize_lines(heatmap[(st_sep, end_sep, idx), :, :])]) logger.debug('Polygonizing lines') im_feats = gaussian_filter(sobel(scal_im), 0.5) lines = [] reg_pols = [geom.Polygon(x) for x in regions] for bl_idx in range(len(baselines)): bl = baselines[bl_idx] mid_point = geom.LineString(bl[1]).interpolate(0.5, normalized=True) suppl_obj = [x[1] for x in baselines[:bl_idx] + baselines[bl_idx + 1:]] for reg_idx, reg_pol in enumerate(reg_pols): if reg_pol.contains(mid_point): suppl_obj.append(regions[reg_idx]) pol = calculate_polygonal_environment(baselines=[bl[1]], im_feats=im_feats, suppl_obj=suppl_obj, topline=topline) if pol[0] is not None: lines.append((bl[0], bl[1], pol[0])) logger.debug('Scaling vectorized lines') sc = scale_polygonal_lines([x[1:] for x in lines], scale) lines = list( zip([x[0] for x in lines], [x[0] for x in sc], [x[1] for x in sc])) logger.debug('Reordering baselines') lines = reading_order_fn(lines=lines, regions=regions, text_direction=text_direction[-2:]) return [{ 'script': bl_type, 'baseline': bl, 'boundary': pl } for bl_type, bl, pl in lines]
def _repolygonize(im: Image.Image, lines): """ Helper function taking an output of the lib.xml parse_* functions and recalculating the contained polygonization. Args: im (Image.Image): Input image lines (list): List of dicts [{'boundary': [[x0, y0], ...], 'baseline': [[x0, y0], ...], 'text': 'abcvsd'}, {...] Returns: A data structure `lines` with a changed polygonization. """ im = Image.open(im).convert('L') polygons = calculate_polygonal_environment(im, [x['baseline'] for x in lines]) return [{'boundary': polygon, 'baseline': orig['baseline'], 'text': orig['text'], 'script': orig['script']} for orig, polygon in zip(lines, polygons)]
def cli(format_type, topline, files): """ A small script repolygonizing line boundaries in ALTO or PageXML files. """ if len(files) == 0: ctx = click.get_current_context() click.echo(ctx.get_help()) ctx.exit() import os import numpy as np import sys from lxml import etree from os.path import splitext from kraken.lib import xml from kraken import serialization, rpred from PIL import Image from kraken.lib.segmentation import calculate_polygonal_environment, scale_polygonal_lines def _repl_alto(fname, polygons): with open(fname, 'rb') as fp: doc = etree.parse(fp) lines = doc.findall('.//{*}TextLine') idx = 0 for line in lines: pol = line.find('./{*}Shape/{*}Polygon') if pol is not None: pol.attrib['POINTS'] = ' '.join([str(coord) for pt in polygons[idx] for coord in pt]) idx += 1 with open(splitext(fname)[0] + '_rewrite.xml', 'wb') as fp: doc.write(fp, encoding='UTF-8', xml_declaration=True) def _repl_page(fname, polygons): with open(fname, 'rb') as fp: doc = etree.parse(fp) lines = doc.findall('.//{*}TextLine') idx = 0 for line in lines: pol = line.find('./{*}Coords') if pol is not None: pol.attrib['points'] = ' '.join([','.join([str(x) for x in pt]) for pt in o[idx]]) idx += 1 with open(splitext(fname)[0] + '_rewrite.xml', 'wb') as fp: doc.write(fp, encoding='UTF-8', xml_declaration=True) if format_type == 'page': parse_fn = xml.parse_page repl_fn = _repl_page else: parse_fn = xml.parse_alto repl_fn = _repl_alto topline = {'topline': True, 'baseline': False, 'centerline': None}[topline] for doc in files: click.echo(f'Processing {doc} ', nl=False) seg = parse_fn(doc) im = Image.open(seg['image']).convert('L') l = [] for x in seg['lines']: bl = x['baseline'] if x['baseline'] is not None else [0, 0] l.append(bl) o = calculate_polygonal_environment(im, l, scale=(1800, 0), topline=topline) repl_fn(doc, o)
def segment(im, text_direction: str = 'horizontal-lr', mask: Optional[np.array] = None, reading_order_fn: Callable = polygonal_reading_order, model=None, device: str = 'cpu'): """ Segments a page into text lines using the baseline segmenter. Segments a page into text lines and returns the polyline formed by each baseline and their estimated environment. Args: im (PIL.Image): An RGB image. text_direction (str): Ignored by the segmenter but kept for serialization. mask (PIL.Image): A bi-level mask image of the same size as `im` where 0-valued regions are ignored for segmentation purposes. Disables column detection. reading_order_fn (function): Function to determine the reading order. Has to accept a list of tuples (baselines, polygon) and a text direction (`lr` or `rl`). model (vgsl.TorchVGSLModel): A TorchVGSLModel containing a segmentation model. If none is given a default model will be loaded. device (str or torch.Device): The target device to run the neural network on. Returns: {'text_direction': '$dir', 'type': 'baseline', 'lines': [ {'baseline': [[x0, y0], [x1, y1], ..., [x_n, y_n]], 'boundary': [[x0, y0, x1, y1], ... [x_m, y_m]]}, {'baseline': [[x0, ...]], 'boundary': [[x0, ...]]} ] 'regions': [ {'region': [[x0, y0], [x1, y1], ..., [x_n, y_n]], 'type': 'image'}, {'region': [[x0, ...]], 'type': 'text'} ] }: A dictionary containing the text direction and under the key 'lines' a list of reading order sorted baselines (polylines) and their respective polygonal boundaries. The last and first point of each boundary polygon is connected. Raises: KrakenInputException if the input image is not binarized or the text direction is invalid. """ im_str = get_im_str(im) logger.info(f'Segmenting {im_str}') if model is None: logger.info('No segmentation model given. Loading default model.') model = vgsl.TorchVGSLModel.load_model(pkg_resources.resource_filename(__name__, 'blla.mlmodel')) if model.one_channel_mode == '1' and not is_bitonal(im): logger.warning('Running binary model on non-binary input image ' '(mode {}). This will result in severely degraded ' 'performance'.format(im.mode)) model.eval() model.to(device) if mask: if mask.mode != '1' and not is_bitonal(mask): logger.error('Mask is not bitonal') raise KrakenInputException('Mask is not bitonal') mask = mask.convert('1') if mask.size != im.size: logger.error('Mask size {mask.size} doesn\'t match image size {im.size}') raise KrakenInputException('Mask size {mask.size} doesn\'t match image size {im.size}') logger.info('Masking enabled in segmenter.') mask = pil2array(mask) batch, channels, height, width = model.input transforms = dataset.generate_input_transforms(batch, height, width, channels, 0, valid_norm=False) res_tf = tf.Compose(transforms.transforms[:3]) scal_im = res_tf(im).convert('L') with torch.no_grad(): logger.debug('Running network forward pass') o = model.nn(transforms(im).unsqueeze(0).to(device)) logger.debug('Upsampling network output') o = F.interpolate(o, size=scal_im.size[::-1]) o = o.squeeze().cpu().numpy() scale = np.divide(im.size, o.shape[:0:-1]) # postprocessing cls_map = model.user_metadata['class_mapping'] st_sep = cls_map['aux']['_start_separator'] end_sep = cls_map['aux']['_end_separator'] logger.info('Vectorizing baselines') baselines = [] regions = {} for bl_type, idx in cls_map['baselines'].items(): logger.debug(f'Vectorizing lines of type {bl_type}') baselines.extend([(bl_type,x) for x in vectorize_lines(o[(st_sep, end_sep, idx), :, :])]) logger.info('Vectorizing regions') for region_type, idx in cls_map['regions'].items(): logger.debug(f'Vectorizing lines of type {bl_type}') regions[region_type] = vectorize_regions(o[idx]) logger.debug('Polygonizing lines') lines = list(filter(lambda x: x[2] is not None, zip([x[0] for x in baselines], [x[1] for x in baselines], calculate_polygonal_environment(scal_im, [x[1] for x in baselines])))) logger.debug('Scaling vectorized lines') sc = scale_polygonal_lines([x[1:] for x in lines], scale) lines = list(zip([x[0] for x in lines], [x[0] for x in sc], [x[1] for x in sc])) logger.debug('Scaling vectorized regions') for reg_id, regs in regions.items(): regions[reg_id] = scale_regions(regs, scale) logger.debug('Reordering baselines') order_regs = [] for regs in regions.values(): order_regs.extend(regs) lines = reading_order_fn(lines=lines, regions=order_regs, text_direction=text_direction[-2:]) if 'class_mapping' in model.user_metadata and len(model.user_metadata['class_mapping']['baselines']) > 1: script_detection = True else: script_detection = False return {'text_direction': text_direction, 'type': 'baselines', 'lines': [{'script': bl_type, 'baseline': bl, 'boundary': pl} for bl_type, bl, pl in lines], 'regions': regions, 'script_detection': script_detection}
def vec_lines(heatmap: torch.Tensor, cls_map: Dict[str, Dict[str, int]], scale: float, text_direction: str = 'horizontal-lr', reading_order_fn: Callable = polygonal_reading_order, regions: List[np.ndarray] = None, scal_im: np.ndarray = None, suppl_obj: List[np.ndarray] = None, topline: Optional[bool] = False, **kwargs) -> List[Dict[str, Any]]: r""" Computes lines from a stack of heatmaps, a class mapping, and scaling factor. Args: heatmap: A stack of heatmaps of shape `NxHxW` output from the network. cls_map: Dictionary mapping string identifiers to indices on the stack of heatmaps. scale: Scaling factor between heatmap and unscaled input image. text_direction: Text directions used as hints in the reading order algorithm. reading_order_fn: Reading order calculation function. regions: Regions to be used as boundaries during polygonization and atomic blocks during reading order determination for lines contained within. scal_im: A numpy array containing the scaled input image. suppl_obj: Supplementary objects which are used as boundaries during polygonization. topline: True for a topline, False for baseline, or None for a centerline. Returns: A list of dictionaries containing the baselines, bounding polygons, and line type in reading order: .. code-block:: :force: [{'script': '$baseline_type', baseline': [[x0, y0], [x1, y1], ..., [x_n, y_n]], 'boundary': [[x0, y0, x1, y1], ... [x_m, y_m]]}, {'script': '$baseline_type', baseline': [[x0, ...]], 'boundary': [[x0, ...]]}, {'script': '$baseline_type', baseline': [[x0, ...]], 'boundary': [[x0, ...]]}, ... ] """ st_sep = cls_map['aux']['_start_separator'] end_sep = cls_map['aux']['_end_separator'] logger.info('Vectorizing baselines') baselines = [] for bl_type, idx in cls_map['baselines'].items(): logger.debug(f'Vectorizing lines of type {bl_type}') baselines.extend([(bl_type, x) for x in vectorize_lines(heatmap[(st_sep, end_sep, idx), :, :])]) logger.debug('Polygonizing lines') im_feats = gaussian_filter(sobel(scal_im), 0.5) lines = [] reg_pols = [geom.Polygon(x) for x in regions] for bl_idx in range(len(baselines)): bl = baselines[bl_idx] mid_point = geom.LineString(bl[1]).interpolate(0.5, normalized=True) suppl_obj = [x[1] for x in baselines[:bl_idx] + baselines[bl_idx + 1:]] for reg_idx, reg_pol in enumerate(reg_pols): if reg_pol.contains(mid_point): suppl_obj.append(regions[reg_idx]) pol = calculate_polygonal_environment(baselines=[bl[1]], im_feats=im_feats, suppl_obj=suppl_obj, topline=topline) if pol[0] is not None: lines.append((bl[0], bl[1], pol[0])) logger.debug('Scaling vectorized lines') sc = scale_polygonal_lines([x[1:] for x in lines], scale) lines = list( zip([x[0] for x in lines], [x[0] for x in sc], [x[1] for x in sc])) logger.debug('Reordering baselines') lines = reading_order_fn(lines=lines, regions=regions, text_direction=text_direction[-2:]) return [{ 'tags': { 'type': bl_type }, 'baseline': bl, 'boundary': pl } for bl_type, bl, pl in lines]
def cli(format_type, model, repolygonize, files): """ A script producing overlays of lines and regions from either ALTO or PageXML files or run a model to do the same. """ if len(files) == 0: ctx = click.get_current_context() click.echo(ctx.get_help()) ctx.exit() from PIL import Image, ImageDraw from kraken.lib import vgsl, xml, segmentation from kraken import blla if model is None: if format_type == 'xml': fn = xml.parse_xml elif format_type == 'alto': fn = xml.parse_alto else: fn = xml.parse_page for doc in files: click.echo(f'Processing {doc} ', nl=False) data = fn(doc) if repolygonize: im = Image.open(data['image']).convert('L') lines = data['lines'] polygons = segmentation.calculate_polygonal_environment( im, [x['baseline'] for x in lines], scale=(1200, 0)) data['lines'] = [{ 'boundary': polygon, 'baseline': orig['baseline'], 'text': orig['text'], 'script': orig['script'] } for orig, polygon in zip(lines, polygons)] # reorder lines by type lines = defaultdict(list) for line in data['lines']: lines[line['script']].append(line) im = Image.open(data['image']).convert('RGBA') for t, ls in lines.items(): tmp = Image.new('RGBA', im.size, (0, 0, 0, 0)) draw = ImageDraw.Draw(tmp) for idx, line in enumerate(ls): c = next(cmap) if line['boundary']: draw.polygon([tuple(x) for x in line['boundary']], fill=c, outline=c[:3]) if line['baseline']: draw.line([tuple(x) for x in line['baseline']], fill=bmap, width=2, joint='curve') draw.text(line['baseline'][0], str(idx), fill=(0, 0, 0, 255)) base_image = Image.alpha_composite(im, tmp) base_image.save( f'high_{os.path.basename(doc)}_lines_{slugify(t)}.png') for t, regs in data['regions'].items(): tmp = Image.new('RGBA', im.size, (0, 0, 0, 0)) draw = ImageDraw.Draw(tmp) for reg in regs: c = next(cmap) try: draw.polygon(reg, fill=c, outline=c[:3]) except Exception: pass base_image = Image.alpha_composite(im, tmp) base_image.save( f'high_{os.path.basename(doc)}_regions_{slugify(t)}.png') click.secho('\u2713', fg='green') else: net = vgsl.TorchVGSLModel.load_model(model) for doc in files: click.echo(f'Processing {doc} ', nl=False) im = Image.open(doc) res = blla.segment(im, model=net) # reorder lines by type lines = defaultdict(list) for line in res['lines']: lines[line['script']].append(line) im = im.convert('RGBA') for t, ls in lines.items(): tmp = Image.new('RGBA', im.size, (0, 0, 0, 0)) draw = ImageDraw.Draw(tmp) for idx, line in enumerate(ls): c = next(cmap) draw.polygon([tuple(x) for x in line['boundary']], fill=c, outline=c[:3]) draw.line([tuple(x) for x in line['baseline']], fill=bmap, width=2, joint='curve') draw.text(line['baseline'][0], str(idx), fill=(0, 0, 0, 255)) base_image = Image.alpha_composite(im, tmp) base_image.save( f'high_{os.path.basename(doc)}_lines_{slugify(t)}.png') for t, regs in res['regions'].items(): tmp = Image.new('RGBA', im.size, (0, 0, 0, 0)) draw = ImageDraw.Draw(tmp) for reg in regs: c = next(cmap) try: draw.polygon([tuple(x) for x in reg], fill=c, outline=c[:3]) except Exception: pass base_image = Image.alpha_composite(im, tmp) base_image.save( f'high_{os.path.basename(doc)}_regions_{slugify(t)}.png') click.secho('\u2713', fg='green')