def cli(format_type, model, repolygonize, files): """ A small script extracting rectified line polygons as defined in either ALTO or PageXML files or run a model to do the same. """ if len(files) == 0: ctx = click.get_current_context() click.echo(ctx.get_help()) ctx.exit() from PIL import Image from os.path import splitext from kraken import blla from kraken.lib import dataset, segmentation, vgsl, xml if model is None: for doc in files: click.echo(f'Processing {doc} ', nl=False) data = xml.preparse_xml_data([doc], format_type, repolygonize=repolygonize) if len(data) > 0: bounds = {'type': 'baselines', 'lines': [{'boundary': t['boundary'], 'baseline': t['baseline'], 'text': t['text']} for t in data]} for idx, (im, box) in enumerate(segmentation.extract_polygons(Image.open(data[0]['image']), bounds)): click.echo('.', nl=False) im.save('{}.{}.jpg'.format(splitext(data[0]['image'])[0], idx)) with open('{}.{}.gt.txt'.format(splitext(data[0]['image'])[0], idx), 'w') as fp: fp.write(box['text']) else: net = vgsl.TorchVGSLModel.load_model(model) for doc in files: click.echo(f'Processing {doc} ', nl=False) full_im = Image.open(doc) bounds = blla.segment(full_im, model=net) for idx, (im, box) in enumerate(segmentation.extract_polygons(full_im, bounds)): click.echo('.', nl=False) im.save('{}.{}.jpg'.format(splitext(doc)[0], idx))
def segmenter(legacy, model, text_direction, scale, maxcolseps, black_colseps, remove_hlines, pad, mask, device, input, output) -> None: import json from kraken import pageseg from kraken import blla ctx = click.get_current_context() if ctx.meta['first_process']: if ctx.meta['input_format_type'] != 'image': input = get_input_parser( ctx.meta['input_format_type'])(input)['image'] ctx.meta['first_process'] = False if 'base_image' not in ctx.meta: ctx.meta['base_image'] = input try: im = Image.open(input) except IOError as e: raise click.BadParameter(str(e)) if mask: try: mask = Image.open(mask) except IOError as e: raise click.BadParameter(str(e)) message('Segmenting\t', nl=False) try: if legacy: res = pageseg.segment(im, text_direction, scale, maxcolseps, black_colseps, no_hlines=remove_hlines, pad=pad, mask=mask) else: res = blla.segment(im, text_direction, mask=mask, model=model, device=device) except Exception: message('\u2717', fg='red') raise with open_file(output, 'w') as fp: fp = cast(IO[Any], fp) json.dump(res, fp) message('\u2713', fg='green')
def segmenter(legacy, model, text_direction, scale, maxcolseps, black_colseps, remove_hlines, pad, mask, device, input, output) -> None: import json from kraken import pageseg from kraken import blla ctx = click.get_current_context() if ctx.meta['first_process']: if ctx.meta['input_format_type'] != 'image': input = get_input_parser( ctx.meta['input_format_type'])(input)['image'] ctx.meta['first_process'] = False if 'base_image' not in ctx.meta: ctx.meta['base_image'] = input try: im = Image.open(input) except IOError as e: raise click.BadParameter(str(e)) if mask: try: mask = Image.open(mask) except IOError as e: raise click.BadParameter(str(e)) message('Segmenting\t', nl=False) try: if legacy: res = pageseg.segment(im, text_direction, scale, maxcolseps, black_colseps, no_hlines=remove_hlines, pad=pad, mask=mask) else: res = blla.segment(im, text_direction, mask=mask, model=model, device=device) except Exception: message('\u2717', fg='red') raise if ctx.meta['last_process'] and ctx.meta['output_mode'] != 'native': with open_file(output, 'w', encoding='utf-8') as fp: fp = cast(IO[Any], fp) logger.info('Serializing as {} into {}'.format( ctx.meta['output_mode'], output)) from kraken import serialization from kraken.rpred import ocr_record if 'type' in res and res['type'] == 'baselines': records = [ocr_record('', '', '', bl) for bl in res['lines']] else: records = [] for line in res['boxes']: xmin, xmax = min(line[::2]), max(line[::2]) ymin, ymax = min(line[1::2]), max(line[1::2]) records.append( ocr_record('', [], [], [[xmin, ymin], [xmin, ymax], [xmax, ymax], [xmax, ymin]])) fp.write( serialization.serialize( records, image_name=ctx.meta['base_image'], image_size=im.size, regions=res['regions'] if 'regions' in res else None, template=ctx.meta['output_mode'])) else: with open_file(output, 'w') as fp: fp = cast(IO[Any], fp) json.dump(res, fp) message('\u2713', fg='green')
def segmenter(legacy, model, text_direction, scale, maxcolseps, black_colseps, remove_hlines, pad, mask, device, input, output) -> None: import json from kraken import pageseg from kraken import blla ctx = click.get_current_context() if ctx.meta['first_process']: if ctx.meta['input_format_type'] != 'image': input = get_input_parser( ctx.meta['input_format_type'])(input)['image'] ctx.meta['first_process'] = False if 'base_image' not in ctx.meta: ctx.meta['base_image'] = input try: im = Image.open(input) except IOError as e: raise click.BadParameter(str(e)) if mask: try: mask = Image.open(mask) except IOError as e: raise click.BadParameter(str(e)) message('Segmenting\t', nl=False) try: if legacy: res = pageseg.segment(im, text_direction, scale, maxcolseps, black_colseps, no_hlines=remove_hlines, pad=pad, mask=mask) else: res = blla.segment(im, text_direction, mask=mask, model=model, device=device) except Exception: if ctx.meta['raise_failed']: raise message('\u2717', fg='red') ctx.exit(1) if ctx.meta['last_process'] and ctx.meta['output_mode'] != 'native': with click.open_file(output, 'w', encoding='utf-8') as fp: fp = cast(IO[Any], fp) logger.info('Serializing as {} into {}'.format( ctx.meta['output_mode'], output)) from kraken import serialization from kraken.rpred import ocr_record fp.write( serialization.serialize_segmentation( res, image_name=ctx.meta['base_image'], image_size=im.size, template=ctx.meta['output_mode'])) else: with click.open_file(output, 'w') as fp: fp = cast(IO[Any], fp) json.dump(res, fp) message('\u2713', fg='green')
def cli(format_type, model, repolygonize, files): """ A script producing overlays of lines and regions from either ALTO or PageXML files or run a model to do the same. """ if len(files) == 0: ctx = click.get_current_context() click.echo(ctx.get_help()) ctx.exit() from PIL import Image, ImageDraw from kraken.lib import vgsl, xml from kraken import blla if model is None: if format_type == 'xml': fn = xml.parse_xml elif format_type == 'alto': fn = xml.parse_palto else: fn = xml.parse_page for doc in files: click.echo(f'Processing {doc} ', nl=False) data = fn(doc) # reorder lines by type lines = defaultdict(list) for line in data['lines']: lines[line['script']].append(line) im = Image.open(data['image']).convert('RGBA') for t, ls in lines.items(): tmp = Image.new('RGBA', im.size, (0, 0, 0, 0)) draw = ImageDraw.Draw(tmp) for idx, line in enumerate(ls): c = next(cmap) if line['boundary']: draw.polygon([tuple(x) for x in line['boundary']], fill=c, outline=c[:3]) if line['baseline']: draw.line([tuple(x) for x in line['baseline']], fill=bmap, width=2, joint='curve') draw.text(line['baseline'][0], str(idx), fill=(0, 0, 0, 255)) base_image = Image.alpha_composite(im, tmp) base_image.save(f'high_{os.path.basename(doc)}_lines_{t}.png') for t, regs in data['regions'].items(): tmp = Image.new('RGBA', im.size, (0, 0, 0, 0)) draw = ImageDraw.Draw(tmp) for reg in regs: c = next(cmap) draw.polygon(reg, fill=c, outline=c[:3]) base_image = Image.alpha_composite(im, tmp) base_image.save( f'high_{os.path.basename(doc)}_regions_{t}.png') click.secho('\u2713', fg='green') else: net = vgsl.TorchVGSLModel.load_model(model) for doc in files: click.echo(f'Processing {doc} ', nl=False) im = Image.open(doc) res = blla.segment(im, model=net) # reorder lines by type lines = defaultdict(list) for line in res['lines']: lines[line['script']].append(line) im = im.convert('RGBA') for t, ls in lines.items(): tmp = Image.new('RGBA', im.size, (0, 0, 0, 0)) draw = ImageDraw.Draw(tmp) for idx, line in enumerate(ls): c = next(cmap) draw.polygon([tuple(x) for x in line['boundary']], fill=c, outline=c[:3]) draw.line([tuple(x) for x in line['baseline']], fill=bmap, width=2, joint='curve') draw.text(line['baseline'][0], str(idx), fill=(0, 0, 0, 255)) base_image = Image.alpha_composite(im, tmp) base_image.save(f'high_{os.path.basename(doc)}_lines_{t}.png') for t, regs in res['regions'].items(): tmp = Image.new('RGBA', im.size, (0, 0, 0, 0)) draw = ImageDraw.Draw(tmp) for reg in regs: c = next(cmap) draw.polygon([tuple(x) for x in reg], fill=c, outline=c[:3]) base_image = Image.alpha_composite(im, tmp) base_image.save( f'high_{os.path.basename(doc)}_regions_{t}.png') click.secho('\u2713', fg='green')