def test_rpred_outbounds(self): """ Tests correct handling of invalid line coordinates. """ nn = load_any(os.path.join(resources, 'toy.clstm')) pred = rpred(nn, self.im, {'boxes': [[-1, -1, 10000, 10000]], 'text_direction': 'horizontal'}, True) next(pred)
def transcription(ctx, text_direction, scale, maxcolseps, black_colseps, font, font_style, prefill, output, images): st_time = time.time() ti = transcribe.TranscriptionInterface(font, font_style) if prefill: if ctx.meta['verbose'] > 0: click.echo(u'[{:2.4f}] Loading model {}'.format(time.time() - st_time, prefill)) else: spin('Loading RNN') prefill = models.load_any(prefill.encode('utf-8')) if not ctx.meta['verbose']: click.secho(u'\b\u2713', fg='green', nl=False) click.echo('\033[?25h\n', nl=False) for fp in images: if ctx.meta['verbose'] > 0: click.echo(u'[{:2.4f}] Reading {}'.format(time.time() - st_time, fp.name)) else: spin('Reading images') im = Image.open(fp) if not binarization.is_bitonal(im): if ctx.meta['verbose'] > 0: click.echo(u'[{:2.4f}] Binarizing page'.format(time.time() - st_time)) im = binarization.nlbin(im) if ctx.meta['verbose'] > 0: click.echo(u'[{:2.4f}] Segmenting page'.format(time.time() - st_time)) res = pageseg.segment(im, text_direction, scale, maxcolseps, black_colseps) if prefill: it = rpred.rpred(prefill, im, res) preds = [] for pred in it: if ctx.meta['verbose'] > 0: click.echo(u'[{:2.4f}] {}'.format(time.time() - st_time, pred.prediction)) else: spin('Recognizing') preds.append(pred) if ctx.meta['verbose'] > 0: click.echo(u'Execution time: {}s'.format(time.time() - st_time)) else: click.secho(u'\b\u2713', fg='green', nl=False) click.echo('\033[?25h\n', nl=False) ti.add_page(im, res, records=preds) else: ti.add_page(im, res) fp.close() if not ctx.meta['verbose']: click.secho(u'\b\u2713', fg='green', nl=False) click.echo('\033[?25h\n', nl=False) if ctx.meta['verbose'] > 0: click.echo(u'[{:2.4f}] Writing transcription to {}'.format(time.time() - st_time, output.name)) else: spin('Writing output') ti.write(output) if not ctx.meta['verbose']: click.secho(u'\b\u2713', fg='green', nl=False) click.echo('\033[?25h\n', nl=False)
def detect_scripts(im, bounds, model=None): """ Detects scripts in a segmented page. Classifies lines returned by the page segmenter into runs of scripts/writing systems. Args: im (PIL.Image): A bi-level page of mode '1' or 'L' bounds (dict): A dictionary containing a 'boxes' entry with a list of coordinates (x0, y0, x1, y1) of a text line in the image and an entry 'text_direction' containing 'horizontal-tb/vertical-lr/rl'. model (str): Location of the script classification model or None for default. Returns: {'text_direction': '$dir', 'boxes': [[(script, (x1, y1, x2, y2)),...]]}: A dictionary containing the text direction and a list of lists of reading order sorted bounding boxes under the key 'boxes' with each list containing the script segmentation of a single line. Script is a ISO15924 4 character identifier. Raises: KrakenInputException if the input image is not binarized or the text direction is invalid. KrakenInvalidModelException if no clstm module is available. """ if not model: model = pkg_resources.resource_filename(__name__, 'script.clstm') rnn = models.load_clstm(model) # load numerical to 4 char identifier map with pkg_resources.resource_stream(__name__, 'iso15924.json') as fp: n2s = json.load(fp) it = rpred(rnn, im, bounds) preds = [] for pred in it: # substitute inherited scripts with neighboring runs def subs(m, s): p = u'' for c in s: if c in m and p: p += p[-1] else: p += c return p p = subs([u'\U000f03e6', u'\U000f03e6'], pred.prediction) # do a reverse run to fix leading inherited scripts pred.prediction = ''.join(reversed(subs([u'\U000f03e6', u'\U000f03e6'], reversed(p)))) # group by grapheme t = [] for k, g in groupby(pred, key=lambda x: x[0]): # convert to ISO15924 numerical identifier k = ord(k) - 0xF0000 b = max_bbox(x[1] for x in g) t.append((n2s[str(k)], b)) preds.append(t) return {'boxes': preds, 'text_direction': bounds['text_direction']}
def test_rpred_outbounds(self): """ Tests correct handling of invalid line coordinates. """ pred = rpred(None, self.im, { 'boxes': [[-1, -1, 10000, 10000]], 'text_direction': 'horizontal' }, True) next(pred)
def test_rpred_outbounds(self): """ Tests correct handling of invalid line coordinates. """ nn = load_any(os.path.join(resources, 'toy.clstm')) pred = rpred(nn, self.im, { 'boxes': [[-1, -1, 10000, 10000]], 'text_direction': 'horizontal' }, True) next(pred)
def test_rpred_bbox_outbounds(self): """ Tests correct handling of invalid bbox line coordinates. """ with raises(KrakenInputException): pred = rpred(self.model, self.im, { 'boxes': [[-1, -1, 10000, 10000]], 'text_direction': 'horizontal' }, True) next(pred)
def test_simple_bl_rpred(self): """ Tests simple recognition without tags. """ pred = rpred(self.model, self.overfit_line, { 'boxes': [[0, 0, 2544, 156]], 'text_direction': 'horizontal' }, True) record = next(pred) self.assertEqual(record.prediction, 'ܡ ܘܡ ܗ ܡܕܐ ܐ ܐܐ ܡ ܗܗܐܐܐܕ')
def transcription(ctx, font, font_style, prefill, output, images): st_time = time.time() ti = transcrib.TranscriptionInterface(font, font_style) if prefill: if ctx.meta['verbose'] > 0: click.echo(u'[{:2.4f}] Loading model {}'.format(time.time() - st_time, prefill)) else: spin('Loading RNN') prefill = models.load_any(prefill) if not ctx.meta['verbose']: click.secho(u'\b\u2713', fg='green', nl=False) click.echo('\033[?25h\n', nl=False) for fp in images: if ctx.meta['verbose'] > 0: click.echo(u'[{:2.4f}] Reading {}'.format(time.time() - st_time, fp.name)) else: spin('Reading images') im = Image.open(fp) if not binarization.is_bitonal(im): if ctx.meta['verbose'] > 0: click.echo(u'[{:2.4f}] Binarizing page'.format(time.time() - st_time)) im = binarization.nlbin(im) if ctx.meta['verbose'] > 0: click.echo(u'[{:2.4f}] Segmenting page'.format(time.time() - st_time)) res = pageseg.segment(im) if prefill: it = rpred.rpred(prefill, im, res) preds = [] for pred in it: if ctx.meta['verbose'] > 0: click.echo(u'[{:2.4f}] {}'.format(time.time() - st_time, pred.prediction)) else: spin('Recognizing') preds.append(pred) if ctx.meta['verbose'] > 0: click.echo(u'Execution time: {}s'.format(time.time() - st_time)) else: click.secho(u'\b\u2713', fg='green', nl=False) click.echo('\033[?25h\n', nl=False) ti.add_page(im, records=preds) else: ti.add_page(im, res) if not ctx.meta['verbose']: click.secho(u'\b\u2713', fg='green', nl=False) click.echo('\033[?25h\n', nl=False) if ctx.meta['verbose'] > 0: click.echo(u'[{:2.4f}] Writing transcription to {}'.format(time.time() - st_time, output.name)) else: spin('Writing output') ti.write(output) if not ctx.meta['verbose']: click.secho(u'\b\u2713', fg='green', nl=False) click.echo('\033[?25h\n', nl=False)
def recognizer(model, pad, bidi_reordering, base_image, input, output, lines): try: im = Image.open(base_image) except IOError as e: raise click.BadParameter(str(e)) ctx = click.get_current_context() scripts = None if not lines: lines = input with open_file(lines, 'r') as fp: bounds = json.load(fp) # script detection if bounds['script_detection']: scripts = set() for l in bounds['boxes']: for t in l: scripts.add(t[0]) if ctx.meta['verbose'] > 0: click.echo(u'[{:2.4f}] Executing multi-script recognition'.format(time.time() - st_time)) it = rpred.mm_rpred(model, im, bounds, pad, bidi_reordering=bidi_reordering) else: if ctx.meta['verbose'] > 0: click.echo(u'[{:2.4f}] Executing mono-script recognition'.format(time.time() - st_time)) it = rpred.rpred(model['default'], im, bounds, pad, bidi_reordering=bidi_reordering) preds = [] st_time = time.time() for pred in it: if ctx.meta['verbose'] > 0: click.echo(u'[{:2.4f}] {}'.format(time.time() - st_time, pred.prediction)) else: spin('Processing') preds.append(pred) if ctx.meta['verbose'] > 0: click.echo(u'Execution time: {}s'.format(time.time() - st_time)) else: click.secho(u'\b\u2713', fg='green', nl=False) click.echo('\033[?25h\n', nl=False) ctx = click.get_current_context() with open_file(output, 'w', encoding='utf-8') as fp: click.echo('Writing recognition results for {}\t'.format(base_image), nl=False) if ctx.meta['mode'] != 'text': fp.write(serialization.serialize(preds, base_image, Image.open(base_image).size, ctx.meta['text_direction'], scripts, ctx.meta['mode'])) else: fp.write(u'\n'.join(s.prediction for s in preds)) if not ctx.meta['verbose']: click.secho(u'\u2713', fg='green')
def transcription(ctx, text_direction, scale, maxcolseps, black_colseps, font, font_style, prefill, output, images, segment_page): ti = transcribe.TranscriptionInterface(font, font_style) if prefill: logger.info('Loading model {}'.format(prefill)) spin('Loading RNN') prefill = models.load_any(prefill.encode('utf-8')) message(u'\b\u2713', fg='green', nl=False) message('\033[?25h\n', nl=False) for fp in images: logger.info('Reading {}'.format(fp.name)) spin('Reading images') im = Image.open(fp) if not binarization.is_bitonal(im): logger.info(u'Binarizing page') im = binarization.nlbin(im) if segment_page: logger.info(u'Segmenting page') res = pageseg.segment(im, text_direction, scale, maxcolseps, black_colseps) else: res = { 'text_direction': 'horizontal-tb', 'boxes': [(0, 0) + im.size] } if prefill: it = rpred.rpred(prefill, im, res) preds = [] for pred in it: logger.info('{}'.format(pred.prediction)) spin('Recognizing') preds.append(pred) message(u'\b\u2713', fg='green', nl=False) message('\033[?25h\n', nl=False) ti.add_page(im, res, records=preds) else: ti.add_page(im, res) fp.close() message(u'\b\u2713', fg='green', nl=False) message('\033[?25h\n', nl=False) logger.info(u'Writing transcription to {}'.format(output.name)) spin('Writing output') ti.write(output) message(u'\b\u2713', fg='green', nl=False) message('\033[?25h\n', nl=False)
def recognizer(model, pad, base_image, input, output, lines): try: im = Image.open(base_image) except IOError as e: raise click.BadParameter(str(e)) ctx = click.get_current_context() if not lines: lines = input with open_file(lines, 'r') as fp: bounds = [(int(x1), int(y1), int(x2), int(y2)) for x1, y1, x2, y2 in csv.reader(fp)] it = rpred.rpred(model, im, bounds, pad) preds = [] st_time = time.time() for pred in it: if ctx.meta['verbose'] > 0: click.echo(u'[{:2.4f}] {}'.format(time.time() - st_time, pred.prediction)) else: spin('Processing') preds.append(pred) if ctx.meta['verbose'] > 0: click.echo(u'Execution time: {}s'.format(time.time() - st_time)) else: click.secho(u'\b\u2713', fg='green', nl=False) click.echo('\033[?25h\n', nl=False) ctx = click.get_current_context() with open_file(output, 'w', encoding='utf-8') as fp: click.echo('Writing recognition results for {}\t'.format(base_image), nl=False) if ctx.meta['mode'] != 'text': fp.write( serialization.serialize(preds, base_image, Image.open(base_image).size, ctx.meta['mode'])) else: fp.write(u'\n'.join(s.prediction for s in preds)) if not ctx.meta['verbose']: click.secho(u'\u2713', fg='green')
def recognizer(input_image, model, pad, no_segmentation, bidi_reordering, script_ignore, mode, text_direction, segments) -> None: bounds = segments # Script detection. if bounds['script_detection']: for l in bounds['boxes']: for t in l: scripts.add(t[0]) it = rpred.mm_rpred(model, input_image, bounds, pad, bidi_reordering=bidi_reordering, script_ignore=script_ignore) else: it = rpred.rpred(model['default'], input_image, bounds, pad, bidi_reordering=bidi_reordering) preds = [] with log.progressbar(it, label='Processing', length=len(bounds['boxes'])) as bar: for pred in bar: preds.append(pred) #-------------------- print('Recognition results = {}.'.format('\n'.join(s.prediction for s in preds))) if False: with open_file(output, 'w', encoding='utf-8') as fp: print('Serializing as {} into {}'.format(mode, output)) if mode != 'text': from kraken import serialization fp.write( serialization.serialize(preds, base_image, Image.open(base_image).size, text_direction, scripts, mode)) else: fp.write('\n'.join(s.prediction for s in preds))
def test_rpred_bl_outbounds(self): """ Tests correct handling of invalid baseline coordinates. """ with raises(KrakenInputException): pred = rpred( self.model, self.im, { 'lines': [{ 'tags': { 'type': 'default' }, 'baseline': [[0, 0], [10000, 0]], 'boundary': [[-1, -1], [-1, 10000], [10000, 10000], [10000, -1]] }], 'text_direction': 'horizontal', 'type': 'baselines' }, True) next(pred)
def recognizer(model, pad, base_image, input, output, lines): try: im = Image.open(base_image) except IOError as e: raise click.BadParameter(str(e)) ctx = click.get_current_context() if not lines: lines = input with open_file(lines, 'r') as fp: bounds = [(int(x1), int(y1), int(x2), int(y2)) for x1, y1, x2, y2 in csv.reader(fp)] it = rpred.rpred(model, im, bounds, pad) preds = [] st_time = time.time() for pred in it: if ctx.meta['verbose'] > 0: click.echo(u'[{:2.4f}] {}'.format(time.time() - st_time, pred.prediction)) else: spin('Processing') preds.append(pred) if ctx.meta['verbose'] > 0: click.echo(u'Execution time: {}s'.format(time.time() - st_time)) else: click.secho(u'\b\u2713', fg='green', nl=False) click.echo('\033[?25h\n', nl=False) ctx = click.get_current_context() with open_file(output, 'w', encoding='utf-8') as fp: click.echo('Writing recognition results for {}\t'.format(base_image), nl=False) if ctx.meta['mode'] != 'text': fp.write(serialization.serialize(preds, base_image, Image.open(base_image).size, ctx.meta['mode'])) else: fp.write(u'\n'.join(s.prediction for s in preds)) if not ctx.meta['verbose']: click.secho(u'\u2713', fg='green')
def forced_align(doc, model): """ Performs a forced character alignment of text with recognition model output activations. Argument: doc (dict): Parsed document. model (kraken.lib.model.TorchSeqRecognizer): Recognition model to use for alignment. Returns: A list of kraken.rpred.ocr_record. """ im = Image.open(doc['image']) predictor = rpred.rpred(model, im, doc) records = [] for line in doc['lines']: bidi_text = get_display(line['text']) gt_fst = fst_from_text(bidi_text, model.codec) next(predictor) lat_fst = fst_from_lattice(model.outputs) composed_graph = _compose(lat_fst, gt_fst) short_graph = _shortest_path(composed_graph) pred = [] pos = [] conf = [] for act, label in _generate_line_record(short_graph, model.outputs.shape[2] - 1): pos.append( compute_polygon_section( line['baseline'], line['boundary'], predictor._scale_val(act[0], 0, predictor.box.size[0]), predictor._scale_val(act[1], 0, predictor.box.size[0]))) conf.append(1.0) pred.append(model.codec.decode([(label, 0, 0, 0)])[0][0]) records.append( rpred.bidi_record(rpred.ocr_record(pred, pos, conf, line))) return records
def recognizer(model, pad, no_segmentation, bidi_reordering, script_ignore, base_image, input, output, lines) -> None: import json import tempfile from kraken import rpred try: im = Image.open(base_image) except IOError as e: raise click.BadParameter(str(e)) ctx = click.get_current_context() # input may either be output from the segmenter then it is a JSON file or # be an image file when running the OCR subcommand alone. might still come # from some other subcommand though. scripts = set() if not lines and base_image != input: lines = input if not lines: if no_segmentation: lines = tempfile.NamedTemporaryFile(mode='w', delete=False) logger.info( 'Running in no_segmentation mode. Creating temporary segmentation {}.' .format(lines.name)) json.dump( { 'script_detection': False, 'text_direction': 'horizontal-lr', 'boxes': [(0, 0) + im.size] }, lines) lines.close() lines = lines.name else: raise click.UsageError( 'No line segmentation given. Add one with `-l` or run `segment` first.' ) elif no_segmentation: logger.warning( 'no_segmentation mode enabled but segmentation defined. Ignoring --no-segmentation option.' ) with open_file(lines, 'r') as fp: try: fp = cast(IO[Any], fp) bounds = json.load(fp) except ValueError as e: raise click.UsageError('{} invalid segmentation: {}'.format( lines, str(e))) # script detection if bounds['script_detection']: for l in bounds['boxes']: for t in l: scripts.add(t[0]) it = rpred.mm_rpred(model, im, bounds, pad, bidi_reordering=bidi_reordering, script_ignore=script_ignore) else: it = rpred.rpred(model['default'], im, bounds, pad, bidi_reordering=bidi_reordering) if not lines and no_segmentation: logger.debug('Removing temporary segmentation file.') os.unlink(lines.name) preds = [] with log.progressbar(it, label='Processing', length=len(bounds['boxes'])) as bar: for pred in bar: preds.append(pred) ctx = click.get_current_context() with open_file(output, 'w', encoding='utf-8') as fp: fp = cast(IO[Any], fp) message('Writing recognition results for {}\t'.format(base_image), nl=False) logger.info('Serializing as {} into {}'.format(ctx.meta['mode'], output)) if ctx.meta['mode'] != 'text': from kraken import serialization fp.write( serialization.serialize(preds, base_image, Image.open(base_image).size, ctx.meta['text_direction'], scripts, ctx.meta['mode'])) else: fp.write('\n'.join(s.prediction for s in preds)) message('\u2713', fg='green')
def test_rpred_outbounds(self): """ Tests correct handling of invalid line coordinates. """ pred = rpred(None, self.im, [(-1, -1, 10000, 10000)]) next(pred)
def ocr_kraken(doc, method=u'ocr_kraken', model=None): """ Runs kraken on an input document and writes a TEI file. Args: doc (unicode, unicode): The input document tuple method (unicode): The suffix string append to all output files model (unicode): Identifier for the font model to use Returns: (unicode, unicode): Storage tuple for the output file """ input_path = storage.get_abs_path(*doc[1]) output_path = (doc[1][0], os.path.splitext(storage.insert_suffix(doc[1][1], method, model))[0] + '.xml') logger.debug('Searching for model {}'.format(model)) if model in nidaba_cfg['kraken_models']: model = storage.get_abs_path(*(nidaba_cfg['kraken_models'][model])) elif model in nidaba_cfg['ocropus_models']: model = storage.get_abs_path(*(nidaba_cfg['ocropus_models'][model])) else: raise NidabaInvalidParameterException('Model not defined in ' 'configuration') img = Image.open(input_path) logger.debug('Reading TEI segmentation from {}'.format(doc[1])) tei = OCRRecord() with storage.StorageFile(*doc[0]) as seg: tei.load_tei(seg) logger.debug('Clearing out word/grapheme boxes') # kraken is a line recognizer tei.clear_graphemes() tei.clear_segments() # add and scope new responsibility statement tei.add_respstmt('kraken', 'character recognition') lines = tei.lines logger.debug('Loading model {}'.format(model)) rnn = models.load_any(model) i = 0 logger.debug('Start recognizing characters') for line_id, rec in zip(lines, rpred.rpred(rnn, img, [x['bbox'] for x in lines.itervalues()])): # scope the current line and add all graphemes recognized by kraken to # it. logger.debug('Scoping line {}'.format(line_id)) tei.scope_line(line_id) i += 1 splits = regex.split(u'(\s+)', rec.prediction) line_offset = 0 for segment, whitespace in izip_longest(splits[0::2], splits[1::2]): if len(segment): seg_bbox = max_bbox(rec.cuts[line_offset:line_offset + len(segment)]) logger.debug('Creating new segment at {} {} {} {}'.format(*seg_bbox)) tei.add_segment(seg_bbox) logger.debug('Adding graphemes (segment): {}'.format(rec.prediction[line_offset:line_offset+len(segment)])) tei.add_graphemes([{'grapheme': x[0], 'bbox': x[1], 'confidence': int(x[2] * 100)} for x in rec[line_offset:line_offset+len(segment)]]) line_offset += len(segment) if whitespace: logger.debug('Adding graphemes (whitespace): {}'.format(rec.prediction[line_offset:line_offset+len(whitespace)])) seg_bbox = max_bbox(rec.cuts[line_offset:line_offset + len(whitespace)]) tei.add_segment(seg_bbox) tei.add_graphemes([{'grapheme': x[0], 'bbox': x[1], 'confidence': int(x[2] * 100)} for x in rec[line_offset:line_offset+len(whitespace)]]) line_offset += len(whitespace) with storage.StorageFile(*output_path, mode='wb') as fp: logger.debug('Writing TEI to {}'.format(fp.abs_path)) tei.write_tei(fp) return output_path
#! /usr/bin/env python from kraken.lib import models from kraken import rpred from PIL import Image from glob import glob import sys model = sys.argv[1] gt = sys.argv[2] if len(sys.argv) > 2 else '.' rnn = models.load_any(model) ims = glob(gt + '/*.png') for f in ims: print(f) im = Image.open(f) it = rpred.rpred(rnn, im, [(0, 0) + im.size]) with open(f + '.rec.txt', 'wb') as fp: fp.write(it.next().prediction.encode('utf-8'))
from kraken.pageseg import segment from kraken.binarization import nlbin from kraken.rpred import rpred from itertools import cycle from kraken.lib import models cmap = cycle([(230, 25, 75, 127), (60, 180, 75, 127), (255, 225, 25, 127), (0, 130, 200, 127), (245, 130, 48, 127), (145, 30, 180, 127), (70, 240, 240, 127)]) net = models.load_any(sys.argv[1]) for fname in sys.argv[2:]: im = Image.open(fname) print(fname) im = nlbin(im) res = segment(im, maxcolseps=0) pred = rpred(net, im, res) im = im.convert('RGBA') tmp = Image.new('RGBA', im.size, (0, 0, 0, 0)) draw = ImageDraw.Draw(tmp) for line in pred: for box in line.cuts: draw.rectangle(box, fill=next(cmap)) im = Image.alpha_composite(im, tmp) im.save('high_{}'.format(os.path.basename(fname)))
def ocr_kraken(doc, method=u'ocr_kraken', model=None): """ Runs kraken on an input document and writes a TEI file. Args: doc (unicode, unicode): The input document tuple method (unicode): The suffix string append to all output files model (unicode): Identifier for the font model to use Returns: (unicode, unicode): Storage tuple for the output file """ output_path = ( doc[0], os.path.splitext(storage.insert_suffix(doc[1], method, model))[0] + '.xml') logger.debug('Loading model {}'.format(model)) try: rnn = models.load_any(mod_db[model]) except Exception as e: raise NidabaInvalidParameterException(str(e)) logger.debug('Reading TEI segmentation from {}'.format(doc)) tei = OCRRecord() with storage.StorageFile(*doc) as seg: tei.load_tei(seg) img = Image.open( storage.get_abs_path(*storage.get_storage_path_url(tei.img))) if is_bitonal(img): img = img.convert('1') else: raise NidabaInvalidParameterException('Input image is not bitonal') logger.debug('Clearing out word/grapheme boxes') # kraken is a line recognizer tei.clear_graphemes() tei.clear_segments() # add and scope new responsibility statement tei.add_respstmt('kraken', 'character recognition') lines = tei.lines i = 0 rnn = models.load_any(mod_db[model]) logger.debug('Start recognizing characters') for line_id, rec in izip( lines, rpred.rpred( rnn, img, { 'text_direction': 'horizontal-tb', 'boxes': [list(x['bbox']) for x in lines.itervalues()] })): # scope the current line and add all graphemes recognized by kraken to # it. logger.debug('Scoping line {}'.format(line_id)) tei.scope_line(line_id) i += 1 splits = regex.split(u'(\s+)', rec.prediction) line_offset = 0 for segment, whitespace in izip_longest(splits[0::2], splits[1::2]): if len(segment): seg_bbox = max_bbox(rec.cuts[line_offset:line_offset + len(segment)]) logger.debug( 'Creating new segment at {} {} {} {}'.format(*seg_bbox)) tei.add_segment(seg_bbox) logger.debug('Adding graphemes (segment): {}'.format( rec.prediction[line_offset:line_offset + len(segment)])) tei.add_graphemes([{ 'grapheme': x[0], 'bbox': x[1], 'confidence': int(x[2] * 100) } for x in rec[line_offset:line_offset + len(segment)]]) line_offset += len(segment) if whitespace: logger.debug('Adding graphemes (whitespace): {}'.format( rec.prediction[line_offset:line_offset + len(whitespace)])) seg_bbox = max_bbox(rec.cuts[line_offset:line_offset + len(whitespace)]) tei.add_segment(seg_bbox) tei.add_graphemes([{ 'grapheme': x[0], 'bbox': x[1], 'confidence': int(x[2] * 100) } for x in rec[line_offset:line_offset + len(whitespace)]]) line_offset += len(whitespace) with storage.StorageFile(*output_path, mode='wb') as fp: logger.debug('Writing TEI to {}'.format(fp.abs_path)) tei.write_tei(fp) return output_path
def ocr_kraken(doc, method=u'ocr_kraken', model=None): """ Runs kraken on an input document and writes a TEI file. Args: doc (unicode, unicode): The input document tuple method (unicode): The suffix string append to all output files model (unicode): Identifier for the font model to use Returns: (unicode, unicode): Storage tuple for the output file """ input_path = storage.get_abs_path(*doc[1]) output_path = ( doc[1][0], os.path.splitext(storage.insert_suffix(doc[1][1], method, model))[0] + '.xml') logger.debug('Searching for model {}'.format(model)) if model in nidaba_cfg['kraken_models']: model = storage.get_abs_path(*(nidaba_cfg['kraken_models'][model])) elif model in nidaba_cfg['ocropus_models']: model = storage.get_abs_path(*(nidaba_cfg['ocropus_models'][model])) else: raise NidabaInvalidParameterException('Model not defined in ' 'configuration') img = Image.open(input_path) logger.debug('Reading TEI segmentation from {}'.format(doc[1])) tei = TEIFacsimile() with storage.StorageFile(*doc[0]) as seg: tei.read(seg) logger.debug('Clearing out word/grapheme boxes') # kraken is a line recognizer tei.clear_graphemes() tei.clear_segments() # add and scope new responsibility statement tei.add_respstmt('kraken', 'character recognition') lines = tei.lines logger.debug('Loading model {}'.format(model)) rnn = models.load_any(model) i = 0 logger.debug('Start recognizing characters') for rec in rpred.rpred(rnn, img, [(int(x[0]), int(x[1]), int(x[2]), int(x[3])) for x in lines]): # scope the current line and add all graphemes recognized by kraken to # it. logger.debug('Scoping line {}'.format(lines[i][4])) tei.scope_line(lines[i][4]) i += 1 splits = regex.split(u'(\s+)', rec.prediction) line_offset = 0 for segment, whitespace in izip_longest(splits[0::2], splits[1::2]): if len(segment): seg_bbox = max_bbox(rec.cuts[line_offset:line_offset + len(segment)]) logger.debug( 'Creating new segment at {} {} {} {}'.format(*seg_bbox)) tei.add_segment(seg_bbox) logger.debug('Adding graphemes (segment): {}'.format( rec.prediction[line_offset:line_offset + len(segment)])) tei.add_graphemes([ (x[0], x[1], int(x[2] * 100)) for x in rec[line_offset:line_offset + len(segment)] ]) line_offset += len(segment) if whitespace: logger.debug('Adding graphemes (whitespace): {}'.format( rec.prediction[line_offset:line_offset + len(whitespace)])) seg_bbox = max_bbox(rec.cuts[line_offset:line_offset + len(whitespace)]) tei.add_segment(seg_bbox) tei.add_graphemes([ (x[0], x[1], int(x[2] * 100)) for x in rec[line_offset:line_offset + len(whitespace)] ]) line_offset += len(whitespace) with storage.StorageFile(*output_path, mode='wb') as fp: logger.debug('Writing TEI to {}'.format(fp.abs_path)) tei.write(fp) return output_path
def read(self, page): """Perfoms OCR with Kraken.""" stages = page.stages scan = stages.get("clean", None) if scan is None: return None nonLetter = self.nonLetter model = self.ensureLoaded() blocks = page.blocks ocrChars = [] ocrWords = [] ocrLines = [] stages["char"] = ocrChars stages["word"] = ocrWords stages["line"] = ocrLines binary = pil2array(nlbin(array2pil(scan))) for ((stripe, block), data) in blocks.items(): (left, top, right, bottom) = data["inner"] thisBinary = binary[top:bottom, left:right] lines = data["bands"]["main"]["lines"] for (ln, (up, lo)) in enumerate(lines): lln = ln + 1 roi = thisBinary[up : lo + 1] (b, e, roi) = removeMargins(roi, keep=16) ocrLines.append((stripe, block, lln, left + b, top + up, left + e, top + lo)) (roiH, roiW) = roi.shape[0:2] roi = array2pil(roi) bounds = dict(boxes=([0, 0, roiW, roiH],), text_direction=RL) # adapt the boxes, because they corresponds to peaks of recognition, # not to character extends # # See https://github.com/mittagessen/kraken/issues/184 adaptedPreds = [] for (c, (le, to, ri, bo), conf) in chain.from_iterable( rpred(model, roi, bounds, pad=0, bidi_reordering=True) ): if adaptedPreds: prevPred = adaptedPreds[-1] prevEdge = prevPred[1][0] else: prevEdge = roiW correction = int(round((prevEdge - ri) / 2)) thisRi = ri + correction if adaptedPreds: adaptedPreds[-1][1][0] -= correction adaptedPreds.append([c, [le, to, thisRi, bo], conf]) if adaptedPreds: adaptedPreds[-1][1][0] = 0 # divide into words, not only on spaces, but also on punctuation curWord = [[], []] inWord = True for (c, (le, to, ri, bo), conf) in adaptedPreds: offsetW = left + b offsetH = top + up pos = (le + offsetW, to + offsetH, ri + offsetW, bo + offsetH) conf = int(round(conf * 100)) ocrChars.append((stripe, block, lln, *pos, conf, c)) spaceSeen = c == " " changeWord = not inWord and c not in nonLetter element = (c, pos, conf) if spaceSeen: curWord[1].append(element) if spaceSeen or changeWord: if curWord[0] or curWord[1]: ocrWords.append((stripe, block, lln, *addWord(curWord))) curWord = [[], []] inWord = True continue if inWord: if c in nonLetter: inWord = False dest = 0 if inWord else 1 curWord[dest].append(element) if curWord[0] or curWord[1]: ocrWords.append((stripe, block, lln, *addWord(curWord))) page.write(stage="line,word,char")
def detect_scripts(im, bounds, model=pkg_resources.resource_filename(__name__, 'script.mlmodel'), valid_scripts=None): """ Detects scripts in a segmented page. Classifies lines returned by the page segmenter into runs of scripts/writing systems. Args: im (PIL.Image): A bi-level page of mode '1' or 'L' bounds (dict): A dictionary containing a 'boxes' entry with a list of coordinates (x0, y0, x1, y1) of a text line in the image and an entry 'text_direction' containing 'horizontal-lr/rl/vertical-lr/rl'. model (str): Location of the script classification model or None for default. valid_scripts (list): List of valid scripts. Returns: {'script_detection': True, 'text_direction': '$dir', 'boxes': [[(script, (x1, y1, x2, y2)),...]]}: A dictionary containing the text direction and a list of lists of reading order sorted bounding boxes under the key 'boxes' with each list containing the script segmentation of a single line. Script is a ISO15924 4 character identifier. Raises: KrakenInvalidModelException if no clstm module is available. """ raise NotImplementedError('Temporarily unavailable. Please open a github ticket if you want this fixed sooner.') im_str = get_im_str(im) logger.info(u'Detecting scripts with {} in {} lines on {}'.format(model, len(bounds['boxes']), im_str)) logger.debug(u'Loading detection model {}'.format(model)) rnn = models.load_any(model) # load numerical to 4 char identifier map logger.debug(u'Loading label to identifier map') with pkg_resources.resource_stream(__name__, 'iso15924.json') as fp: n2s = json.load(fp) # convert allowed scripts to labels val_scripts = [] if valid_scripts: logger.debug(u'Converting allowed scripts list {}'.format(valid_scripts)) for k, v in n2s.items(): if v in valid_scripts: val_scripts.append(chr(int(k) + 0xF0000)) else: valid_scripts = [] it = rpred(rnn, im, bounds, bidi_reordering=False) preds = [] logger.debug(u'Running detection') for pred, bbox in zip(it, bounds['boxes']): # substitute inherited scripts with neighboring runs def _subs(m, s, r=False): p = u'' for c in s: if c in m and p and not r: p += p[-1] elif c not in m and p and r: p += p[-1] else: p += c return p logger.debug(u'Substituting scripts') p = _subs([u'\U000f03e2', u'\U000f03e6'], pred.prediction) # do a reverse run to fix leading inherited scripts pred.prediction = ''.join(reversed(_subs([u'\U000f03e2', u'\U000f03e6'], reversed(p)))) # group by valid scripts. two steps: 1. substitute common confusions # (Latin->Fraktur and Syriac->Arabic) if given in script list. if 'Arab' in valid_scripts and 'Syrc' not in valid_scripts: pred.prediction = pred.prediction.replace(u'\U000f0087', u'\U000f00a0') if 'Latn' in valid_scripts and 'Latf' not in valid_scripts: pred.prediction = pred.prediction.replace(u'\U000f00d9', u'\U000f00d7') # next merge adjacent scripts if val_scripts: pred.prediction = _subs(val_scripts, pred.prediction, r=True) # group by grapheme t = [] logger.debug(u'Merging detections') # if line contains only a single script return whole line bounding box if len(set(pred.prediction)) == 1: logger.debug('Only one script on line. Emitting whole line bbox') k = ord(pred.prediction[0]) - 0xF0000 t.append((n2s[str(k)], bbox)) else: for k, g in groupby(pred, key=lambda x: x[0]): # convert to ISO15924 numerical identifier k = ord(k) - 0xF0000 b = max_bbox(x[1] for x in g) t.append((n2s[str(k)], b)) preds.append(t) return {'boxes': preds, 'text_direction': bounds['text_direction'], 'script_detection': True}
def recognizer(model, pad, no_segmentation, bidi_reordering, script_ignore, base_image, input, output, lines) -> None: import json import tempfile from kraken import rpred try: im = Image.open(base_image) except IOError as e: raise click.BadParameter(str(e)) ctx = click.get_current_context() # input may either be output from the segmenter then it is a JSON file or # be an image file when running the OCR subcommand alone. might still come # from some other subcommand though. scripts = set() if not lines and base_image != input: lines = input if not lines: if no_segmentation: lines = tempfile.NamedTemporaryFile(mode='w', delete=False) logger.info('Running in no_segmentation mode. Creating temporary segmentation {}.'.format(lines.name)) json.dump({'script_detection': False, 'text_direction': 'horizontal-lr', 'boxes': [(0, 0) + im.size]}, lines) lines.close() lines = lines.name else: raise click.UsageError('No line segmentation given. Add one with `-l` or run `segment` first.') elif no_segmentation: logger.warning('no_segmentation mode enabled but segmentation defined. Ignoring --no-segmentation option.') with open_file(lines, 'r') as fp: try: fp = cast(IO[Any], fp) bounds = json.load(fp) except ValueError as e: raise click.UsageError('{} invalid segmentation: {}'.format(lines, str(e))) # script detection if bounds['script_detection']: for l in bounds['boxes']: for t in l: scripts.add(t[0]) it = rpred.mm_rpred(model, im, bounds, pad, bidi_reordering=bidi_reordering, script_ignore=script_ignore) else: it = rpred.rpred(model['default'], im, bounds, pad, bidi_reordering=bidi_reordering) if not lines and no_segmentation: logger.debug('Removing temporary segmentation file.') os.unlink(lines.name) preds = [] with log.progressbar(it, label='Processing', length=len(bounds['boxes'])) as bar: for pred in bar: preds.append(pred) ctx = click.get_current_context() with open_file(output, 'w', encoding='utf-8') as fp: fp = cast(IO[Any], fp) message('Writing recognition results for {}\t'.format(base_image), nl=False) logger.info('Serializing as {} into {}'.format(ctx.meta['mode'], output)) if ctx.meta['mode'] != 'text': from kraken import serialization fp.write(serialization.serialize(preds, base_image, Image.open(base_image).size, ctx.meta['text_direction'], scripts, ctx.meta['mode'])) else: fp.write('\n'.join(s.prediction for s in preds)) message('\u2713', fg='green')
def recognizer(model, pad, bidi_reordering, script_ignore, base_image, input, output, lines): try: im = Image.open(base_image) except IOError as e: raise click.BadParameter(str(e)) ctx = click.get_current_context() scripts = None st_time = time.time() if not lines: lines = input with open_file(lines, 'r') as fp: bounds = json.load(fp) # script detection if bounds['script_detection']: scripts = set() for l in bounds['boxes']: for t in l: scripts.add(t[0]) it = rpred.mm_rpred(model, im, bounds, pad, bidi_reordering=bidi_reordering, script_ignore=script_ignore) else: it = rpred.rpred(model['default'], im, bounds, pad, bidi_reordering=bidi_reordering) preds = [] for pred in it: spin('Processing') preds.append(pred) message(u'\b\u2713', fg='green', nl=False) message('\033[?25h\n', nl=False) ctx = click.get_current_context() with open_file(output, 'w', encoding='utf-8') as fp: message(u'Writing recognition results for {}\t'.format(base_image), nl=False) if PY2: output = output.decode('utf-8') logger.info(u'Serializing as {} into {}'.format( ctx.meta['mode'], output)) if ctx.meta['mode'] != 'text': fp.write( serialization.serialize(preds, base_image, Image.open(base_image).size, ctx.meta['text_direction'], scripts, ctx.meta['mode'])) else: fp.write(u'\n'.join(s.prediction for s in preds)) if not ctx.meta['verbose']: message(u'\u2713', fg='green')
def transcription(ctx, text_direction, scale, bw, maxcolseps, black_colseps, font, font_style, prefill, pad, lines, output, images): """ Creates transcription environments for ground truth generation. """ from PIL import Image from kraken import rpred from kraken import pageseg from kraken import transcribe from kraken import binarization from kraken.lib import models from kraken.lib.util import is_bitonal ti = transcribe.TranscriptionInterface(font, font_style) if len(images) > 1 and lines: raise click.UsageError( '--lines option is incompatible with multiple image files') if prefill: logger.info('Loading model {}'.format(prefill)) message('Loading RNN', nl=False) prefill = models.load_any(prefill) message('\u2713', fg='green') with log.progressbar(images, label='Reading images') as bar: for fp in bar: logger.info('Reading {}'.format(fp.name)) im = Image.open(fp) if im.mode not in ['1', 'L', 'P', 'RGB']: logger.warning( 'Input {} is in {} color mode. Converting to RGB'.format( fp.name, im.mode)) im = im.convert('RGB') logger.info('Binarizing page') im_bin = binarization.nlbin(im) im_bin = im_bin.convert('1') logger.info('Segmenting page') if not lines: res = pageseg.segment(im_bin, text_direction, scale, maxcolseps, black_colseps, pad=pad) else: with open_file(lines, 'r') as fp: try: fp = cast(IO[Any], fp) res = json.load(fp) except ValueError as e: raise click.UsageError( '{} invalid segmentation: {}'.format( lines, str(e))) if prefill: it = rpred.rpred(prefill, im_bin, res) preds = [] logger.info('Recognizing') for pred in it: logger.debug('{}'.format(pred.prediction)) preds.append(pred) ti.add_page(im, res, records=preds) else: ti.add_page(im, res) fp.close() logger.info('Writing transcription to {}'.format(output.name)) message('Writing output', nl=False) ti.write(output) message('\u2713', fg='green')
import sys from PIL import Image, ImageDraw from kraken.pageseg import segment from kraken.binarization import nlbin from kraken.rpred import rpred from itertools import cycle from kraken.lib import models cmap = cycle([(230, 25, 75, 127), (60, 180, 75, 127), (255, 225, 25, 127), (0, 130, 200, 127), (245, 130, 48, 127), (145, 30, 180, 127), (70, 240, 240, 127)]) net = models.load_any(sys.argv[1]) for fname in sys.argv[2:]: im = Image.open(fname) print(fname) im = nlbin(im) res = segment(im, maxcolseps=0) pred = rpred(net, im, res) im = im.convert('RGBA') tmp = Image.new('RGBA', im.size, (0, 0, 0, 0)) draw = ImageDraw.Draw(tmp) for line in pred: for box in line.cuts: draw.rectangle(box, fill=next(cmap)) im = Image.alpha_composite(im, tmp) im.save('high_{}'.format(os.path.basename(fname)))
def recognizer(model, pad, no_segmentation, bidi_reordering, script_ignore, input, output) -> None: import json from kraken import rpred ctx = click.get_current_context() bounds = None if 'base_image' not in ctx.meta: ctx.meta['base_image'] = input if ctx.meta['first_process']: if ctx.meta['input_format_type'] != 'image': doc = get_input_parser(ctx.meta['input_format_type'])(input) ctx.meta['base_image'] = doc['image'] doc['text_direction'] = 'horizontal-lr' bounds = doc try: im = Image.open(ctx.meta['base_image']) except IOError as e: raise click.BadParameter(str(e)) if not bounds and ctx.meta['base_image'] != input: with open_file(input, 'r') as fp: try: fp = cast(IO[Any], fp) bounds = json.load(fp) except ValueError as e: raise click.UsageError( f'{input} invalid segmentation: {str(e)}') elif not bounds: if no_segmentation: bounds = { 'script_detection': False, 'text_direction': 'horizontal-lr', 'boxes': [(0, 0) + im.size] } else: raise click.UsageError( 'No line segmentation given. Add one with the input or run `segment` first.' ) elif no_segmentation: logger.warning( 'no_segmentation mode enabled but segmentation defined. Ignoring --no-segmentation option.' ) scripts = set() # script detection if 'script_detection' in bounds and bounds['script_detection']: it = rpred.mm_rpred(model, im, bounds, pad, bidi_reordering=bidi_reordering, script_ignore=script_ignore) else: it = rpred.rpred(model['default'], im, bounds, pad, bidi_reordering=bidi_reordering) preds = [] with log.progressbar(it, label='Processing') as bar: for pred in bar: preds.append(pred) ctx = click.get_current_context() with open_file(output, 'w', encoding='utf-8') as fp: fp = cast(IO[Any], fp) message(f'Writing recognition results for {ctx.meta["orig_file"]}\t', nl=False) logger.info('Serializing as {} into {}'.format(ctx.meta['output_mode'], output)) if ctx.meta['output_mode'] != 'native': from kraken import serialization fp.write( serialization.serialize( preds, ctx.meta['base_image'], Image.open(ctx.meta['base_image']).size, ctx.meta['text_direction'], scripts, bounds['regions'] if 'regions' in bounds else None, ctx.meta['output_mode'])) else: fp.write('\n'.join(s.prediction for s in preds)) message('\u2713', fg='green')
x, y, w, h = one_box ## Different color for each row r = random.randint(0, 255) g = random.randint(0, 255) b = random.randint(0, 255) # Drawing box cv2.rectangle(dummy_image, (x, y), (x + w, y + h), (b, g, r), 2) ######################################################################################### ## Kraken Text Extraction cord = [x, y, x + w, y + h] bound = {'boxes': [tuple(cord)], 'text_direction': 'horizontal-lr'} ## Using Kraken API generator = rpred.rpred(network=model, im=genrator_image, bounds=bound) nxt_gen = next(generator) box_text = nxt_gen.prediction print("Box_Text = {} | Y = {}".format(box_text, y)) ########################################################################################## ## Kraken bash script # small = img[y:y + h, x:x + w] # cv2.imwrite("images/temp.jpg", small) # box_text = " " # try: # call(["kraken", "-i", "images/temp.jpg", "image.txt", "binarize", "segment", "ocr"]) # box_text = open("image.txt", "r").read() # except Exception as e: # pass
def transcription(ctx, text_direction, scale, bw, maxcolseps, black_colseps, font, font_style, prefill, pad, lines, output, images): """ Creates transcription environments for ground truth generation. """ from PIL import Image from kraken import rpred from kraken import pageseg from kraken import transcribe from kraken import binarization from kraken.lib import models from kraken.lib.util import is_bitonal ti = transcribe.TranscriptionInterface(font, font_style) if len(images) > 1 and lines: raise click.UsageError('--lines option is incompatible with multiple image files') if prefill: logger.info('Loading model {}'.format(prefill)) message('Loading RNN', nl=False) prefill = models.load_any(prefill) message('\u2713', fg='green') with log.progressbar(images, label='Reading images') as bar: for fp in bar: logger.info('Reading {}'.format(fp.name)) im = Image.open(fp) if im.mode not in ['1', 'L', 'P', 'RGB']: logger.warning('Input {} is in {} color mode. Converting to RGB'.format(fp.name, im.mode)) im = im.convert('RGB') logger.info('Binarizing page') im_bin = binarization.nlbin(im) im_bin = im_bin.convert('1') logger.info('Segmenting page') if not lines: res = pageseg.segment(im_bin, text_direction, scale, maxcolseps, black_colseps, pad=pad) else: with open_file(lines, 'r') as fp: try: fp = cast(IO[Any], fp) res = json.load(fp) except ValueError as e: raise click.UsageError('{} invalid segmentation: {}'.format(lines, str(e))) if prefill: it = rpred.rpred(prefill, im_bin, res) preds = [] logger.info('Recognizing') for pred in it: logger.debug('{}'.format(pred.prediction)) preds.append(pred) ti.add_page(im, res, records=preds) else: ti.add_page(im, res) fp.close() logger.info('Writing transcription to {}'.format(output.name)) message('Writing output', nl=False) ti.write(output) message('\u2713', fg='green')
def detect_scripts(im, bounds, model=pkg_resources.resource_filename( __name__, 'script.mlmodel'), valid_scripts=None): """ Detects scripts in a segmented page. Classifies lines returned by the page segmenter into runs of scripts/writing systems. Args: im (PIL.Image): A bi-level page of mode '1' or 'L' bounds (dict): A dictionary containing a 'boxes' entry with a list of coordinates (x0, y0, x1, y1) of a text line in the image and an entry 'text_direction' containing 'horizontal-lr/rl/vertical-lr/rl'. model (str): Location of the script classification model or None for default. valid_scripts (list): List of valid scripts. Returns: {'script_detection': True, 'text_direction': '$dir', 'boxes': [[(script, (x1, y1, x2, y2)),...]]}: A dictionary containing the text direction and a list of lists of reading order sorted bounding boxes under the key 'boxes' with each list containing the script segmentation of a single line. Script is a ISO15924 4 character identifier. Raises: KrakenInvalidModelException if no clstm module is available. """ raise NotImplementedError( 'Temporarily unavailable. Please open a github ticket if you want this fixed sooner.' ) im_str = get_im_str(im) logger.info(u'Detecting scripts with {} in {} lines on {}'.format( model, len(bounds['boxes']), im_str)) logger.debug(u'Loading detection model {}'.format(model)) rnn = models.load_any(model) # load numerical to 4 char identifier map logger.debug(u'Loading label to identifier map') with pkg_resources.resource_stream(__name__, 'iso15924.json') as fp: n2s = json.load(fp) # convert allowed scripts to labels val_scripts = [] if valid_scripts: logger.debug( u'Converting allowed scripts list {}'.format(valid_scripts)) for k, v in n2s.items(): if v in valid_scripts: val_scripts.append(chr(int(k) + 0xF0000)) else: valid_scripts = [] it = rpred(rnn, im, bounds, bidi_reordering=False) preds = [] logger.debug(u'Running detection') for pred, bbox in zip(it, bounds['boxes']): # substitute inherited scripts with neighboring runs def _subs(m, s, r=False): p = u'' for c in s: if c in m and p and not r: p += p[-1] elif c not in m and p and r: p += p[-1] else: p += c return p logger.debug(u'Substituting scripts') p = _subs([u'\U000f03e2', u'\U000f03e6'], pred.prediction) # do a reverse run to fix leading inherited scripts pred.prediction = ''.join( reversed(_subs([u'\U000f03e2', u'\U000f03e6'], reversed(p)))) # group by valid scripts. two steps: 1. substitute common confusions # (Latin->Fraktur and Syriac->Arabic) if given in script list. if 'Arab' in valid_scripts and 'Syrc' not in valid_scripts: pred.prediction = pred.prediction.replace(u'\U000f0087', u'\U000f00a0') if 'Latn' in valid_scripts and 'Latf' not in valid_scripts: pred.prediction = pred.prediction.replace(u'\U000f00d9', u'\U000f00d7') # next merge adjacent scripts if val_scripts: pred.prediction = _subs(val_scripts, pred.prediction, r=True) # group by grapheme t = [] logger.debug(u'Merging detections') # if line contains only a single script return whole line bounding box if len(set(pred.prediction)) == 1: logger.debug('Only one script on line. Emitting whole line bbox') k = ord(pred.prediction[0]) - 0xF0000 t.append((n2s[str(k)], bbox)) else: for k, g in groupby(pred, key=lambda x: x[0]): # convert to ISO15924 numerical identifier k = ord(k) - 0xF0000 b = max_bbox(x[1] for x in g) t.append((n2s[str(k)], b)) preds.append(t) return { 'boxes': preds, 'text_direction': bounds['text_direction'], 'script_detection': True }