Beispiel #1
0
 def test_rpred_outbounds(self):
     """
     Tests correct handling of invalid line coordinates.
     """
     nn = load_any(os.path.join(resources, 'toy.clstm'))
     pred = rpred(nn, self.im, {'boxes': [[-1, -1, 10000, 10000]], 'text_direction': 'horizontal'}, True)
     next(pred)
Beispiel #2
0
def transcription(ctx, text_direction, scale, maxcolseps, black_colseps, font,
                  font_style, prefill, output, images):
    st_time = time.time()
    ti = transcribe.TranscriptionInterface(font, font_style)

    if prefill:
        if ctx.meta['verbose'] > 0:
            click.echo(u'[{:2.4f}] Loading model {}'.format(time.time() - st_time, prefill))
        else:
            spin('Loading RNN')
        prefill = models.load_any(prefill.encode('utf-8'))
        if not ctx.meta['verbose']:
            click.secho(u'\b\u2713', fg='green', nl=False)
            click.echo('\033[?25h\n', nl=False)

    for fp in images:
        if ctx.meta['verbose'] > 0:
            click.echo(u'[{:2.4f}] Reading {}'.format(time.time() - st_time, fp.name))
        else:
            spin('Reading images')
        im = Image.open(fp)
        if not binarization.is_bitonal(im):
            if ctx.meta['verbose'] > 0:
                click.echo(u'[{:2.4f}] Binarizing page'.format(time.time() - st_time))
            im = binarization.nlbin(im)
        if ctx.meta['verbose'] > 0:
            click.echo(u'[{:2.4f}] Segmenting page'.format(time.time() - st_time))
        res = pageseg.segment(im, text_direction, scale, maxcolseps, black_colseps)
        if prefill:
            it = rpred.rpred(prefill, im, res)
            preds = []
            for pred in it: 
                if ctx.meta['verbose'] > 0:
                    click.echo(u'[{:2.4f}] {}'.format(time.time() - st_time, pred.prediction))
                else:
                    spin('Recognizing')
                preds.append(pred)
            if ctx.meta['verbose'] > 0:
                click.echo(u'Execution time: {}s'.format(time.time() - st_time))
            else:
                click.secho(u'\b\u2713', fg='green', nl=False)
                click.echo('\033[?25h\n', nl=False)
            ti.add_page(im, res, records=preds)
        else:
            ti.add_page(im, res)
        fp.close()
    if not ctx.meta['verbose']:
        click.secho(u'\b\u2713', fg='green', nl=False)
        click.echo('\033[?25h\n', nl=False)
    if ctx.meta['verbose'] > 0:
        click.echo(u'[{:2.4f}] Writing transcription to {}'.format(time.time() - st_time, output.name))
    else:
        spin('Writing output')
    ti.write(output)
    if not ctx.meta['verbose']:
        click.secho(u'\b\u2713', fg='green', nl=False)
        click.echo('\033[?25h\n', nl=False)
Beispiel #3
0
def detect_scripts(im, bounds, model=None):
    """
    Detects scripts in a segmented page.

    Classifies lines returned by the page segmenter into runs of scripts/writing systems.

    Args:
        im (PIL.Image): A bi-level page of mode '1' or 'L'
        bounds (dict): A dictionary containing a 'boxes' entry with a list of
                       coordinates (x0, y0, x1, y1) of a text line in the image
                       and an entry 'text_direction' containing
                       'horizontal-tb/vertical-lr/rl'.
        model (str): Location of the script classification model or None for default.

    Returns:
        {'text_direction': '$dir', 'boxes': [[(script, (x1, y1, x2, y2)),...]]}: A
        dictionary containing the text direction and a list of lists of reading
        order sorted bounding boxes under the key 'boxes' with each list
        containing the script segmentation of a single line. Script is a
        ISO15924 4 character identifier.

    Raises:
        KrakenInputException if the input image is not binarized or the text
        direction is invalid.
        KrakenInvalidModelException if no clstm module is available.
    """
    if not model:
        model = pkg_resources.resource_filename(__name__, 'script.clstm')
    rnn = models.load_clstm(model)
    # load numerical to 4 char identifier map
    with pkg_resources.resource_stream(__name__, 'iso15924.json') as fp:
        n2s = json.load(fp)
    it = rpred(rnn, im, bounds)
    preds = []
    for pred in it:
        # substitute inherited scripts with neighboring runs
        def subs(m, s):
            p = u''
            for c in s:
                if c in m and p:
                    p += p[-1]
                else:
                    p += c
            return p
        p = subs([u'\U000f03e6', u'\U000f03e6'], pred.prediction)
        # do a reverse run to fix leading inherited scripts
        pred.prediction = ''.join(reversed(subs([u'\U000f03e6', u'\U000f03e6'], reversed(p))))
        # group by grapheme
        t = []
        for k, g in groupby(pred, key=lambda x: x[0]):
            # convert to ISO15924 numerical identifier
            k = ord(k) - 0xF0000
            b = max_bbox(x[1] for x in g)
            t.append((n2s[str(k)], b))
        preds.append(t)

    return {'boxes': preds, 'text_direction': bounds['text_direction']}
Beispiel #4
0
 def test_rpred_outbounds(self):
     """
     Tests correct handling of invalid line coordinates.
     """
     pred = rpred(None, self.im, {
         'boxes': [[-1, -1, 10000, 10000]],
         'text_direction': 'horizontal'
     }, True)
     next(pred)
Beispiel #5
0
 def test_rpred_outbounds(self):
     """
     Tests correct handling of invalid line coordinates.
     """
     nn = load_any(os.path.join(resources, 'toy.clstm'))
     pred = rpred(nn, self.im, {
         'boxes': [[-1, -1, 10000, 10000]],
         'text_direction': 'horizontal'
     }, True)
     next(pred)
Beispiel #6
0
 def test_rpred_bbox_outbounds(self):
     """
     Tests correct handling of invalid bbox line coordinates.
     """
     with raises(KrakenInputException):
         pred = rpred(self.model, self.im, {
             'boxes': [[-1, -1, 10000, 10000]],
             'text_direction': 'horizontal'
         }, True)
         next(pred)
Beispiel #7
0
 def test_simple_bl_rpred(self):
     """
     Tests simple recognition without tags.
     """
     pred = rpred(self.model, self.overfit_line, {
         'boxes': [[0, 0, 2544, 156]],
         'text_direction': 'horizontal'
     }, True)
     record = next(pred)
     self.assertEqual(record.prediction, 'ܡ ܘܡ ܗ ܡܕܐ ܐ ܐܐ ܡ ܗܗܐܐܐܕ')
Beispiel #8
0
def transcription(ctx, font, font_style, prefill, output, images):
    st_time = time.time()
    ti = transcrib.TranscriptionInterface(font, font_style)

    if prefill:
        if ctx.meta['verbose'] > 0:
            click.echo(u'[{:2.4f}] Loading model {}'.format(time.time() - st_time, prefill))
        else:
            spin('Loading RNN')
        prefill = models.load_any(prefill)
        if not ctx.meta['verbose']:
            click.secho(u'\b\u2713', fg='green', nl=False)
            click.echo('\033[?25h\n', nl=False)

    for fp in images:
        if ctx.meta['verbose'] > 0:
            click.echo(u'[{:2.4f}] Reading {}'.format(time.time() - st_time, fp.name))
        else:
            spin('Reading images')
        im = Image.open(fp)
        if not binarization.is_bitonal(im):
            if ctx.meta['verbose'] > 0:
                click.echo(u'[{:2.4f}] Binarizing page'.format(time.time() - st_time))
            im = binarization.nlbin(im)
        if ctx.meta['verbose'] > 0:
            click.echo(u'[{:2.4f}] Segmenting page'.format(time.time() - st_time))
        res = pageseg.segment(im)
        if prefill:
            it = rpred.rpred(prefill, im, res)
            preds = []
            for pred in it: 
                if ctx.meta['verbose'] > 0:
                    click.echo(u'[{:2.4f}] {}'.format(time.time() - st_time, pred.prediction))
                else:
                    spin('Recognizing')
                preds.append(pred)
            if ctx.meta['verbose'] > 0:
                click.echo(u'Execution time: {}s'.format(time.time() - st_time))
            else:
                click.secho(u'\b\u2713', fg='green', nl=False)
                click.echo('\033[?25h\n', nl=False)
            ti.add_page(im, records=preds)
        else:
            ti.add_page(im, res)
    if not ctx.meta['verbose']:
        click.secho(u'\b\u2713', fg='green', nl=False)
        click.echo('\033[?25h\n', nl=False)
    if ctx.meta['verbose'] > 0:
        click.echo(u'[{:2.4f}] Writing transcription to {}'.format(time.time() - st_time, output.name))
    else:
        spin('Writing output')
    ti.write(output)
    if not ctx.meta['verbose']:
        click.secho(u'\b\u2713', fg='green', nl=False)
        click.echo('\033[?25h\n', nl=False)
Beispiel #9
0
def recognizer(model, pad, bidi_reordering, base_image, input, output, lines):
    try:
        im = Image.open(base_image)
    except IOError as e:
        raise click.BadParameter(str(e))

    ctx = click.get_current_context()

    scripts = None
    if not lines:
        lines = input
    with open_file(lines, 'r') as fp:
        bounds = json.load(fp)
        # script detection
        if bounds['script_detection']:
            scripts = set()
            for l in bounds['boxes']:
                for t in l:
                    scripts.add(t[0])
            if ctx.meta['verbose'] > 0:
                click.echo(u'[{:2.4f}] Executing multi-script recognition'.format(time.time() - st_time))
            it = rpred.mm_rpred(model, im, bounds, pad, bidi_reordering=bidi_reordering)
        else:
            if ctx.meta['verbose'] > 0:
                click.echo(u'[{:2.4f}] Executing mono-script recognition'.format(time.time() - st_time))
            it = rpred.rpred(model['default'], im, bounds, pad, bidi_reordering=bidi_reordering)

    preds = []

    st_time = time.time()
    for pred in it:
        if ctx.meta['verbose'] > 0:
            click.echo(u'[{:2.4f}] {}'.format(time.time() - st_time, pred.prediction))
        else:
            spin('Processing')
        preds.append(pred)
    if ctx.meta['verbose'] > 0:
        click.echo(u'Execution time: {}s'.format(time.time() - st_time))
    else:
        click.secho(u'\b\u2713', fg='green', nl=False)
        click.echo('\033[?25h\n', nl=False)

    ctx = click.get_current_context()
    with open_file(output, 'w', encoding='utf-8') as fp:
        click.echo('Writing recognition results for {}\t'.format(base_image), nl=False)
        if ctx.meta['mode'] != 'text':
            fp.write(serialization.serialize(preds, base_image,
                     Image.open(base_image).size, ctx.meta['text_direction'],
                     scripts, ctx.meta['mode']))
        else:
            fp.write(u'\n'.join(s.prediction for s in preds))
        if not ctx.meta['verbose']:
            click.secho(u'\u2713', fg='green')
Beispiel #10
0
def transcription(ctx, text_direction, scale, maxcolseps, black_colseps, font,
                  font_style, prefill, output, images, segment_page):
    ti = transcribe.TranscriptionInterface(font, font_style)

    if prefill:
        logger.info('Loading model {}'.format(prefill))
        spin('Loading RNN')
        prefill = models.load_any(prefill.encode('utf-8'))
        message(u'\b\u2713', fg='green', nl=False)
        message('\033[?25h\n', nl=False)

    for fp in images:
        logger.info('Reading {}'.format(fp.name))
        spin('Reading images')
        im = Image.open(fp)
        if not binarization.is_bitonal(im):
            logger.info(u'Binarizing page')
            im = binarization.nlbin(im)
        if segment_page:
            logger.info(u'Segmenting page')
            res = pageseg.segment(im, text_direction, scale, maxcolseps,
                                  black_colseps)
        else:
            res = {
                'text_direction': 'horizontal-tb',
                'boxes': [(0, 0) + im.size]
            }
        if prefill:
            it = rpred.rpred(prefill, im, res)
            preds = []
            for pred in it:
                logger.info('{}'.format(pred.prediction))
                spin('Recognizing')
                preds.append(pred)
            message(u'\b\u2713', fg='green', nl=False)
            message('\033[?25h\n', nl=False)
            ti.add_page(im, res, records=preds)
        else:
            ti.add_page(im, res)
        fp.close()
    message(u'\b\u2713', fg='green', nl=False)
    message('\033[?25h\n', nl=False)
    logger.info(u'Writing transcription to {}'.format(output.name))
    spin('Writing output')
    ti.write(output)
    message(u'\b\u2713', fg='green', nl=False)
    message('\033[?25h\n', nl=False)
Beispiel #11
0
def recognizer(model, pad, base_image, input, output, lines):
    try:
        im = Image.open(base_image)
    except IOError as e:
        raise click.BadParameter(str(e))

    ctx = click.get_current_context()

    if not lines:
        lines = input
    with open_file(lines, 'r') as fp:
        bounds = [(int(x1), int(y1), int(x2), int(y2))
                  for x1, y1, x2, y2 in csv.reader(fp)]
        it = rpred.rpred(model, im, bounds, pad)
    preds = []

    st_time = time.time()
    for pred in it:
        if ctx.meta['verbose'] > 0:
            click.echo(u'[{:2.4f}] {}'.format(time.time() - st_time,
                                              pred.prediction))
        else:
            spin('Processing')
        preds.append(pred)
    if ctx.meta['verbose'] > 0:
        click.echo(u'Execution time: {}s'.format(time.time() - st_time))
    else:
        click.secho(u'\b\u2713', fg='green', nl=False)
        click.echo('\033[?25h\n', nl=False)

    ctx = click.get_current_context()
    with open_file(output, 'w', encoding='utf-8') as fp:
        click.echo('Writing recognition results for {}\t'.format(base_image),
                   nl=False)
        if ctx.meta['mode'] != 'text':

            fp.write(
                serialization.serialize(preds, base_image,
                                        Image.open(base_image).size,
                                        ctx.meta['mode']))
        else:
            fp.write(u'\n'.join(s.prediction for s in preds))
        if not ctx.meta['verbose']:
            click.secho(u'\u2713', fg='green')
Beispiel #12
0
def recognizer(input_image, model, pad, no_segmentation, bidi_reordering,
               script_ignore, mode, text_direction, segments) -> None:
    bounds = segments

    # Script detection.
    if bounds['script_detection']:
        for l in bounds['boxes']:
            for t in l:
                scripts.add(t[0])
        it = rpred.mm_rpred(model,
                            input_image,
                            bounds,
                            pad,
                            bidi_reordering=bidi_reordering,
                            script_ignore=script_ignore)
    else:
        it = rpred.rpred(model['default'],
                         input_image,
                         bounds,
                         pad,
                         bidi_reordering=bidi_reordering)

    preds = []
    with log.progressbar(it, label='Processing',
                         length=len(bounds['boxes'])) as bar:
        for pred in bar:
            preds.append(pred)

    #--------------------
    print('Recognition results = {}.'.format('\n'.join(s.prediction
                                                       for s in preds)))

    if False:
        with open_file(output, 'w', encoding='utf-8') as fp:
            print('Serializing as {} into {}'.format(mode, output))
            if mode != 'text':
                from kraken import serialization
                fp.write(
                    serialization.serialize(preds, base_image,
                                            Image.open(base_image).size,
                                            text_direction, scripts, mode))
            else:
                fp.write('\n'.join(s.prediction for s in preds))
Beispiel #13
0
 def test_rpred_bl_outbounds(self):
     """
     Tests correct handling of invalid baseline coordinates.
     """
     with raises(KrakenInputException):
         pred = rpred(
             self.model, self.im, {
                 'lines': [{
                     'tags': {
                         'type': 'default'
                     },
                     'baseline': [[0, 0], [10000, 0]],
                     'boundary': [[-1, -1], [-1, 10000], [10000, 10000],
                                  [10000, -1]]
                 }],
                 'text_direction':
                 'horizontal',
                 'type':
                 'baselines'
             }, True)
         next(pred)
Beispiel #14
0
def recognizer(model, pad, base_image, input, output, lines):
    try:
        im = Image.open(base_image)
    except IOError as e:
        raise click.BadParameter(str(e))

    ctx = click.get_current_context()

    if not lines:
        lines = input
    with open_file(lines, 'r') as fp:
        bounds = [(int(x1), int(y1), int(x2), int(y2)) for x1, y1, x2, y2
                  in csv.reader(fp)]
        it = rpred.rpred(model, im, bounds, pad)
    preds = []

    st_time = time.time()
    for pred in it:
        if ctx.meta['verbose'] > 0:
            click.echo(u'[{:2.4f}] {}'.format(time.time() - st_time, pred.prediction))
        else:
            spin('Processing')
        preds.append(pred)
    if ctx.meta['verbose'] > 0:
        click.echo(u'Execution time: {}s'.format(time.time() - st_time))
    else:
        click.secho(u'\b\u2713', fg='green', nl=False)
        click.echo('\033[?25h\n', nl=False)
    
    ctx = click.get_current_context()
    with open_file(output, 'w', encoding='utf-8') as fp:
        click.echo('Writing recognition results for {}\t'.format(base_image), nl=False)
        if ctx.meta['mode'] != 'text':

            fp.write(serialization.serialize(preds, base_image, Image.open(base_image).size, ctx.meta['mode']))
        else:
            fp.write(u'\n'.join(s.prediction for s in preds))
        if not ctx.meta['verbose']:
            click.secho(u'\u2713', fg='green')
Beispiel #15
0
def forced_align(doc, model):
    """
    Performs a forced character alignment of text with recognition model
    output activations.

    Argument:
        doc (dict): Parsed document.
        model (kraken.lib.model.TorchSeqRecognizer): Recognition model to use for alignment.

    Returns:
        A list of kraken.rpred.ocr_record.
    """
    im = Image.open(doc['image'])
    predictor = rpred.rpred(model, im, doc)
    records = []
    for line in doc['lines']:
        bidi_text = get_display(line['text'])
        gt_fst = fst_from_text(bidi_text, model.codec)
        next(predictor)
        lat_fst = fst_from_lattice(model.outputs)
        composed_graph = _compose(lat_fst, gt_fst)
        short_graph = _shortest_path(composed_graph)
        pred = []
        pos = []
        conf = []
        for act, label in _generate_line_record(short_graph,
                                                model.outputs.shape[2] - 1):
            pos.append(
                compute_polygon_section(
                    line['baseline'], line['boundary'],
                    predictor._scale_val(act[0], 0, predictor.box.size[0]),
                    predictor._scale_val(act[1], 0, predictor.box.size[0])))
            conf.append(1.0)
            pred.append(model.codec.decode([(label, 0, 0, 0)])[0][0])
        records.append(
            rpred.bidi_record(rpred.ocr_record(pred, pos, conf, line)))
    return records
Beispiel #16
0
def recognizer(model, pad, no_segmentation, bidi_reordering, script_ignore,
               base_image, input, output, lines) -> None:

    import json
    import tempfile

    from kraken import rpred

    try:
        im = Image.open(base_image)
    except IOError as e:
        raise click.BadParameter(str(e))

    ctx = click.get_current_context()

    # input may either be output from the segmenter then it is a JSON file or
    # be an image file when running the OCR subcommand alone. might still come
    # from some other subcommand though.
    scripts = set()
    if not lines and base_image != input:
        lines = input
    if not lines:
        if no_segmentation:
            lines = tempfile.NamedTemporaryFile(mode='w', delete=False)
            logger.info(
                'Running in no_segmentation mode. Creating temporary segmentation {}.'
                .format(lines.name))
            json.dump(
                {
                    'script_detection': False,
                    'text_direction': 'horizontal-lr',
                    'boxes': [(0, 0) + im.size]
                }, lines)
            lines.close()
            lines = lines.name
        else:
            raise click.UsageError(
                'No line segmentation given. Add one with `-l` or run `segment` first.'
            )
    elif no_segmentation:
        logger.warning(
            'no_segmentation mode enabled but segmentation defined. Ignoring --no-segmentation option.'
        )

    with open_file(lines, 'r') as fp:
        try:
            fp = cast(IO[Any], fp)
            bounds = json.load(fp)
        except ValueError as e:
            raise click.UsageError('{} invalid segmentation: {}'.format(
                lines, str(e)))
        # script detection
        if bounds['script_detection']:
            for l in bounds['boxes']:
                for t in l:
                    scripts.add(t[0])
            it = rpred.mm_rpred(model,
                                im,
                                bounds,
                                pad,
                                bidi_reordering=bidi_reordering,
                                script_ignore=script_ignore)
        else:
            it = rpred.rpred(model['default'],
                             im,
                             bounds,
                             pad,
                             bidi_reordering=bidi_reordering)

    if not lines and no_segmentation:
        logger.debug('Removing temporary segmentation file.')
        os.unlink(lines.name)

    preds = []

    with log.progressbar(it, label='Processing',
                         length=len(bounds['boxes'])) as bar:
        for pred in bar:
            preds.append(pred)

    ctx = click.get_current_context()
    with open_file(output, 'w', encoding='utf-8') as fp:
        fp = cast(IO[Any], fp)
        message('Writing recognition results for {}\t'.format(base_image),
                nl=False)
        logger.info('Serializing as {} into {}'.format(ctx.meta['mode'],
                                                       output))
        if ctx.meta['mode'] != 'text':
            from kraken import serialization
            fp.write(
                serialization.serialize(preds, base_image,
                                        Image.open(base_image).size,
                                        ctx.meta['text_direction'], scripts,
                                        ctx.meta['mode']))
        else:
            fp.write('\n'.join(s.prediction for s in preds))
        message('\u2713', fg='green')
Beispiel #17
0
 def test_rpred_outbounds(self):
     """
     Tests correct handling of invalid line coordinates.
     """
     pred = rpred(None, self.im, [(-1, -1, 10000, 10000)])
     next(pred)
Beispiel #18
0
def ocr_kraken(doc, method=u'ocr_kraken', model=None):
    """
    Runs kraken on an input document and writes a TEI file.

    Args:
        doc (unicode, unicode): The input document tuple
        method (unicode): The suffix string append to all output files
        model (unicode): Identifier for the font model to use

    Returns:
        (unicode, unicode): Storage tuple for the output file
    """
    input_path = storage.get_abs_path(*doc[1])
    output_path = (doc[1][0], os.path.splitext(storage.insert_suffix(doc[1][1],
                                                                     method,
                                                                     model))[0]
                   + '.xml')
    logger.debug('Searching for model {}'.format(model))
    if model in nidaba_cfg['kraken_models']:
        model = storage.get_abs_path(*(nidaba_cfg['kraken_models'][model]))
    elif model in nidaba_cfg['ocropus_models']:
        model = storage.get_abs_path(*(nidaba_cfg['ocropus_models'][model]))
    else:
        raise NidabaInvalidParameterException('Model not defined in '
                                              'configuration')
    img = Image.open(input_path)
    logger.debug('Reading TEI segmentation from {}'.format(doc[1]))
    tei = OCRRecord()
    with storage.StorageFile(*doc[0]) as seg:
        tei.load_tei(seg)

    logger.debug('Clearing out word/grapheme boxes')
    # kraken is a line recognizer
    tei.clear_graphemes()
    tei.clear_segments()
    # add and scope new responsibility statement
    tei.add_respstmt('kraken', 'character recognition')
    lines = tei.lines

    logger.debug('Loading model {}'.format(model))
    rnn = models.load_any(model)
    i = 0
    logger.debug('Start recognizing characters')
    for line_id, rec in zip(lines, rpred.rpred(rnn, img, [x['bbox'] for x in lines.itervalues()])):
        # scope the current line and add all graphemes recognized by kraken to
        # it.
        logger.debug('Scoping line {}'.format(line_id))
        tei.scope_line(line_id)
        i += 1

        splits = regex.split(u'(\s+)', rec.prediction)
        line_offset = 0
        for segment, whitespace in izip_longest(splits[0::2], splits[1::2]):
            if len(segment):
                seg_bbox = max_bbox(rec.cuts[line_offset:line_offset + len(segment)])
                logger.debug('Creating new segment at {} {} {} {}'.format(*seg_bbox))
                tei.add_segment(seg_bbox)
                logger.debug('Adding graphemes (segment): {}'.format(rec.prediction[line_offset:line_offset+len(segment)]))
                tei.add_graphemes([{'grapheme': x[0], 
                                    'bbox': x[1],
                                    'confidence': int(x[2] * 100)} for x in rec[line_offset:line_offset+len(segment)]])
                line_offset += len(segment)
            if whitespace:
                logger.debug('Adding graphemes (whitespace): {}'.format(rec.prediction[line_offset:line_offset+len(whitespace)]))
                seg_bbox = max_bbox(rec.cuts[line_offset:line_offset + len(whitespace)])
                tei.add_segment(seg_bbox)
                tei.add_graphemes([{'grapheme': x[0], 
                                    'bbox': x[1],
                                    'confidence': int(x[2] * 100)} for x in rec[line_offset:line_offset+len(whitespace)]])
                line_offset += len(whitespace)
    with storage.StorageFile(*output_path, mode='wb') as fp:
        logger.debug('Writing TEI to {}'.format(fp.abs_path))
        tei.write_tei(fp)
    return output_path
Beispiel #19
0
#! /usr/bin/env python

from kraken.lib import models
from kraken import rpred
from PIL import Image
from glob import glob
import sys

model = sys.argv[1]
gt = sys.argv[2] if len(sys.argv) > 2 else '.'

rnn = models.load_any(model)
ims = glob(gt + '/*.png')

for f in ims:
    print(f)
    im = Image.open(f)
    it = rpred.rpred(rnn, im, [(0, 0) + im.size])
    with open(f + '.rec.txt', 'wb') as fp:
        fp.write(it.next().prediction.encode('utf-8'))
Beispiel #20
0
from kraken.pageseg import segment
from kraken.binarization import nlbin
from kraken.rpred import rpred
from itertools import cycle
from kraken.lib import models

cmap = cycle([(230, 25, 75, 127),
              (60, 180, 75, 127),
              (255, 225, 25, 127),
              (0, 130, 200, 127),
              (245, 130, 48, 127),
              (145, 30, 180, 127),
              (70, 240, 240, 127)])

net = models.load_any(sys.argv[1])

for fname in sys.argv[2:]:
    im = Image.open(fname)
    print(fname)
    im = nlbin(im)
    res = segment(im, maxcolseps=0)
    pred = rpred(net, im, res)
    im = im.convert('RGBA')
    tmp = Image.new('RGBA', im.size, (0, 0, 0, 0))
    draw = ImageDraw.Draw(tmp)
    for line in pred:
        for box in line.cuts:
            draw.rectangle(box, fill=next(cmap))
    im = Image.alpha_composite(im, tmp)
    im.save('high_{}'.format(os.path.basename(fname)))
Beispiel #21
0
def ocr_kraken(doc, method=u'ocr_kraken', model=None):
    """
    Runs kraken on an input document and writes a TEI file.

    Args:
        doc (unicode, unicode): The input document tuple
        method (unicode): The suffix string append to all output files
        model (unicode): Identifier for the font model to use

    Returns:
        (unicode, unicode): Storage tuple for the output file
    """
    output_path = (
        doc[0],
        os.path.splitext(storage.insert_suffix(doc[1], method, model))[0] +
        '.xml')
    logger.debug('Loading model {}'.format(model))
    try:
        rnn = models.load_any(mod_db[model])
    except Exception as e:
        raise NidabaInvalidParameterException(str(e))
    logger.debug('Reading TEI segmentation from {}'.format(doc))
    tei = OCRRecord()
    with storage.StorageFile(*doc) as seg:
        tei.load_tei(seg)

    img = Image.open(
        storage.get_abs_path(*storage.get_storage_path_url(tei.img)))
    if is_bitonal(img):
        img = img.convert('1')
    else:
        raise NidabaInvalidParameterException('Input image is not bitonal')

    logger.debug('Clearing out word/grapheme boxes')
    # kraken is a line recognizer
    tei.clear_graphemes()
    tei.clear_segments()
    # add and scope new responsibility statement
    tei.add_respstmt('kraken', 'character recognition')
    lines = tei.lines

    i = 0
    rnn = models.load_any(mod_db[model])
    logger.debug('Start recognizing characters')
    for line_id, rec in izip(
            lines,
            rpred.rpred(
                rnn, img, {
                    'text_direction': 'horizontal-tb',
                    'boxes': [list(x['bbox']) for x in lines.itervalues()]
                })):
        # scope the current line and add all graphemes recognized by kraken to
        # it.
        logger.debug('Scoping line {}'.format(line_id))
        tei.scope_line(line_id)
        i += 1

        splits = regex.split(u'(\s+)', rec.prediction)
        line_offset = 0
        for segment, whitespace in izip_longest(splits[0::2], splits[1::2]):
            if len(segment):
                seg_bbox = max_bbox(rec.cuts[line_offset:line_offset +
                                             len(segment)])
                logger.debug(
                    'Creating new segment at {} {} {} {}'.format(*seg_bbox))
                tei.add_segment(seg_bbox)
                logger.debug('Adding graphemes (segment): {}'.format(
                    rec.prediction[line_offset:line_offset + len(segment)]))
                tei.add_graphemes([{
                    'grapheme': x[0],
                    'bbox': x[1],
                    'confidence': int(x[2] * 100)
                } for x in rec[line_offset:line_offset + len(segment)]])
                line_offset += len(segment)
            if whitespace:
                logger.debug('Adding graphemes (whitespace): {}'.format(
                    rec.prediction[line_offset:line_offset + len(whitespace)]))
                seg_bbox = max_bbox(rec.cuts[line_offset:line_offset +
                                             len(whitespace)])
                tei.add_segment(seg_bbox)
                tei.add_graphemes([{
                    'grapheme': x[0],
                    'bbox': x[1],
                    'confidence': int(x[2] * 100)
                } for x in rec[line_offset:line_offset + len(whitespace)]])
                line_offset += len(whitespace)
    with storage.StorageFile(*output_path, mode='wb') as fp:
        logger.debug('Writing TEI to {}'.format(fp.abs_path))
        tei.write_tei(fp)
    return output_path
Beispiel #22
0
def ocr_kraken(doc, method=u'ocr_kraken', model=None):
    """
    Runs kraken on an input document and writes a TEI file.

    Args:
        doc (unicode, unicode): The input document tuple
        method (unicode): The suffix string append to all output files
        model (unicode): Identifier for the font model to use

    Returns:
        (unicode, unicode): Storage tuple for the output file
    """
    input_path = storage.get_abs_path(*doc[1])
    output_path = (
        doc[1][0],
        os.path.splitext(storage.insert_suffix(doc[1][1], method, model))[0] +
        '.xml')
    logger.debug('Searching for model {}'.format(model))
    if model in nidaba_cfg['kraken_models']:
        model = storage.get_abs_path(*(nidaba_cfg['kraken_models'][model]))
    elif model in nidaba_cfg['ocropus_models']:
        model = storage.get_abs_path(*(nidaba_cfg['ocropus_models'][model]))
    else:
        raise NidabaInvalidParameterException('Model not defined in '
                                              'configuration')
    img = Image.open(input_path)
    logger.debug('Reading TEI segmentation from {}'.format(doc[1]))
    tei = TEIFacsimile()
    with storage.StorageFile(*doc[0]) as seg:
        tei.read(seg)

    logger.debug('Clearing out word/grapheme boxes')
    # kraken is a line recognizer
    tei.clear_graphemes()
    tei.clear_segments()
    # add and scope new responsibility statement
    tei.add_respstmt('kraken', 'character recognition')
    lines = tei.lines

    logger.debug('Loading model {}'.format(model))
    rnn = models.load_any(model)
    i = 0
    logger.debug('Start recognizing characters')
    for rec in rpred.rpred(rnn, img,
                           [(int(x[0]), int(x[1]), int(x[2]), int(x[3]))
                            for x in lines]):
        # scope the current line and add all graphemes recognized by kraken to
        # it.
        logger.debug('Scoping line {}'.format(lines[i][4]))
        tei.scope_line(lines[i][4])
        i += 1

        splits = regex.split(u'(\s+)', rec.prediction)
        line_offset = 0
        for segment, whitespace in izip_longest(splits[0::2], splits[1::2]):
            if len(segment):
                seg_bbox = max_bbox(rec.cuts[line_offset:line_offset +
                                             len(segment)])
                logger.debug(
                    'Creating new segment at {} {} {} {}'.format(*seg_bbox))
                tei.add_segment(seg_bbox)
                logger.debug('Adding graphemes (segment): {}'.format(
                    rec.prediction[line_offset:line_offset + len(segment)]))
                tei.add_graphemes([
                    (x[0], x[1], int(x[2] * 100))
                    for x in rec[line_offset:line_offset + len(segment)]
                ])
                line_offset += len(segment)
            if whitespace:
                logger.debug('Adding graphemes (whitespace): {}'.format(
                    rec.prediction[line_offset:line_offset + len(whitespace)]))
                seg_bbox = max_bbox(rec.cuts[line_offset:line_offset +
                                             len(whitespace)])
                tei.add_segment(seg_bbox)
                tei.add_graphemes([
                    (x[0], x[1], int(x[2] * 100))
                    for x in rec[line_offset:line_offset + len(whitespace)]
                ])
                line_offset += len(whitespace)
    with storage.StorageFile(*output_path, mode='wb') as fp:
        logger.debug('Writing TEI to {}'.format(fp.abs_path))
        tei.write(fp)
    return output_path
Beispiel #23
0
    def read(self, page):
        """Perfoms OCR with Kraken."""

        stages = page.stages
        scan = stages.get("clean", None)
        if scan is None:
            return None

        nonLetter = self.nonLetter

        model = self.ensureLoaded()

        blocks = page.blocks
        ocrChars = []
        ocrWords = []
        ocrLines = []
        stages["char"] = ocrChars
        stages["word"] = ocrWords
        stages["line"] = ocrLines
        binary = pil2array(nlbin(array2pil(scan)))

        for ((stripe, block), data) in blocks.items():
            (left, top, right, bottom) = data["inner"]
            thisBinary = binary[top:bottom, left:right]
            lines = data["bands"]["main"]["lines"]
            for (ln, (up, lo)) in enumerate(lines):
                lln = ln + 1
                roi = thisBinary[up : lo + 1]
                (b, e, roi) = removeMargins(roi, keep=16)
                ocrLines.append((stripe, block, lln, left + b, top + up, left + e, top + lo))
                (roiH, roiW) = roi.shape[0:2]
                roi = array2pil(roi)
                bounds = dict(boxes=([0, 0, roiW, roiH],), text_direction=RL)

                # adapt the boxes, because they corresponds to peaks of recognition,
                # not to character extends
                #
                # See https://github.com/mittagessen/kraken/issues/184

                adaptedPreds = []
                for (c, (le, to, ri, bo), conf) in chain.from_iterable(
                    rpred(model, roi, bounds, pad=0, bidi_reordering=True)
                ):
                    if adaptedPreds:
                        prevPred = adaptedPreds[-1]
                        prevEdge = prevPred[1][0]
                    else:
                        prevEdge = roiW
                    correction = int(round((prevEdge - ri) / 2))
                    thisRi = ri + correction
                    if adaptedPreds:
                        adaptedPreds[-1][1][0] -= correction
                    adaptedPreds.append([c, [le, to, thisRi, bo], conf])
                if adaptedPreds:
                    adaptedPreds[-1][1][0] = 0

                # divide into words, not only on spaces, but also on punctuation

                curWord = [[], []]
                inWord = True

                for (c, (le, to, ri, bo), conf) in adaptedPreds:
                    offsetW = left + b
                    offsetH = top + up
                    pos = (le + offsetW, to + offsetH, ri + offsetW, bo + offsetH)
                    conf = int(round(conf * 100))
                    ocrChars.append((stripe, block, lln, *pos, conf, c))

                    spaceSeen = c == " "
                    changeWord = not inWord and c not in nonLetter
                    element = (c, pos, conf)

                    if spaceSeen:
                        curWord[1].append(element)
                    if spaceSeen or changeWord:
                        if curWord[0] or curWord[1]:
                            ocrWords.append((stripe, block, lln, *addWord(curWord)))
                            curWord = [[], []]
                            inWord = True
                            continue

                    if inWord:
                        if c in nonLetter:
                            inWord = False
                    dest = 0 if inWord else 1
                    curWord[dest].append(element)
                if curWord[0] or curWord[1]:
                    ocrWords.append((stripe, block, lln, *addWord(curWord)))

        page.write(stage="line,word,char")
Beispiel #24
0
def detect_scripts(im, bounds, model=pkg_resources.resource_filename(__name__, 'script.mlmodel'), valid_scripts=None):
    """
    Detects scripts in a segmented page.

    Classifies lines returned by the page segmenter into runs of scripts/writing systems.

    Args:
        im (PIL.Image): A bi-level page of mode '1' or 'L'
        bounds (dict): A dictionary containing a 'boxes' entry with a list of
                       coordinates (x0, y0, x1, y1) of a text line in the image
                       and an entry 'text_direction' containing
                       'horizontal-lr/rl/vertical-lr/rl'.
        model (str): Location of the script classification model or None for default.
        valid_scripts (list): List of valid scripts.

    Returns:
        {'script_detection': True, 'text_direction': '$dir', 'boxes':
        [[(script, (x1, y1, x2, y2)),...]]}: A dictionary containing the text
        direction and a list of lists of reading order sorted bounding boxes
        under the key 'boxes' with each list containing the script segmentation
        of a single line. Script is a ISO15924 4 character identifier.

    Raises:
        KrakenInvalidModelException if no clstm module is available.
    """
    raise NotImplementedError('Temporarily unavailable. Please open a github ticket if you want this fixed sooner.')
    im_str = get_im_str(im)
    logger.info(u'Detecting scripts with {} in {} lines on {}'.format(model, len(bounds['boxes']), im_str))
    logger.debug(u'Loading detection model {}'.format(model))
    rnn = models.load_any(model)
    # load numerical to 4 char identifier map
    logger.debug(u'Loading label to identifier map')
    with pkg_resources.resource_stream(__name__, 'iso15924.json') as fp:
        n2s = json.load(fp)
    # convert allowed scripts to labels
    val_scripts = []
    if valid_scripts:
        logger.debug(u'Converting allowed scripts list {}'.format(valid_scripts))
        for k, v in n2s.items():
            if v in valid_scripts:
                val_scripts.append(chr(int(k) + 0xF0000))
    else:
        valid_scripts = []
    it = rpred(rnn, im, bounds, bidi_reordering=False)
    preds = []
    logger.debug(u'Running detection')
    for pred, bbox in zip(it, bounds['boxes']):
        # substitute inherited scripts with neighboring runs
        def _subs(m, s, r=False):
            p = u''
            for c in s:
                if c in m and p and not r:
                    p += p[-1]
                elif c not in m and p and r:
                    p += p[-1]
                else:
                    p += c
            return p

        logger.debug(u'Substituting scripts')
        p = _subs([u'\U000f03e2', u'\U000f03e6'], pred.prediction)
        # do a reverse run to fix leading inherited scripts
        pred.prediction = ''.join(reversed(_subs([u'\U000f03e2', u'\U000f03e6'], reversed(p))))
        # group by valid scripts. two steps: 1. substitute common confusions
        # (Latin->Fraktur and Syriac->Arabic) if given in script list.
        if 'Arab' in valid_scripts and 'Syrc' not in valid_scripts:
            pred.prediction = pred.prediction.replace(u'\U000f0087', u'\U000f00a0')
        if 'Latn' in valid_scripts and 'Latf' not in valid_scripts:
            pred.prediction = pred.prediction.replace(u'\U000f00d9', u'\U000f00d7')
        # next merge adjacent scripts
        if val_scripts:
            pred.prediction = _subs(val_scripts, pred.prediction, r=True)

        # group by grapheme
        t = []
        logger.debug(u'Merging detections')
        # if line contains only a single script return whole line bounding box
        if len(set(pred.prediction)) == 1:
            logger.debug('Only one script on line. Emitting whole line bbox')
            k = ord(pred.prediction[0]) - 0xF0000
            t.append((n2s[str(k)], bbox))
        else:
            for k, g in groupby(pred, key=lambda x: x[0]):
                # convert to ISO15924 numerical identifier
                k = ord(k) - 0xF0000
                b = max_bbox(x[1] for x in g)
                t.append((n2s[str(k)], b))
        preds.append(t)
    return {'boxes': preds, 'text_direction': bounds['text_direction'], 'script_detection': True}
Beispiel #25
0
 def test_rpred_outbounds(self):
     """
     Tests correct handling of invalid line coordinates.
     """
     pred = rpred(None, self.im, [(-1, -1, 10000, 10000)])
     next(pred)
Beispiel #26
0
def recognizer(model, pad, no_segmentation, bidi_reordering, script_ignore, base_image, input, output, lines) -> None:

    import json
    import tempfile

    from kraken import rpred

    try:
        im = Image.open(base_image)
    except IOError as e:
        raise click.BadParameter(str(e))

    ctx = click.get_current_context()

    # input may either be output from the segmenter then it is a JSON file or
    # be an image file when running the OCR subcommand alone. might still come
    # from some other subcommand though.
    scripts = set()
    if not lines and base_image != input:
        lines = input
    if not lines:
        if no_segmentation:
            lines = tempfile.NamedTemporaryFile(mode='w', delete=False)
            logger.info('Running in no_segmentation mode. Creating temporary segmentation {}.'.format(lines.name))
            json.dump({'script_detection': False,
                       'text_direction': 'horizontal-lr',
                       'boxes': [(0, 0) + im.size]}, lines)
            lines.close()
            lines = lines.name
        else:
            raise click.UsageError('No line segmentation given. Add one with `-l` or run `segment` first.')
    elif no_segmentation:
        logger.warning('no_segmentation mode enabled but segmentation defined. Ignoring --no-segmentation option.')

    with open_file(lines, 'r') as fp:
        try:
            fp = cast(IO[Any], fp)
            bounds = json.load(fp)
        except ValueError as e:
            raise click.UsageError('{} invalid segmentation: {}'.format(lines, str(e)))
        # script detection
        if bounds['script_detection']:
            for l in bounds['boxes']:
                for t in l:
                    scripts.add(t[0])
            it = rpred.mm_rpred(model, im, bounds, pad,
                                bidi_reordering=bidi_reordering,
                                script_ignore=script_ignore)
        else:
            it = rpred.rpred(model['default'], im, bounds, pad,
                             bidi_reordering=bidi_reordering)

    if not lines and no_segmentation:
        logger.debug('Removing temporary segmentation file.')
        os.unlink(lines.name)

    preds = []

    with log.progressbar(it, label='Processing', length=len(bounds['boxes'])) as bar:
        for pred in bar:
            preds.append(pred)

    ctx = click.get_current_context()
    with open_file(output, 'w', encoding='utf-8') as fp:
        fp = cast(IO[Any], fp)
        message('Writing recognition results for {}\t'.format(base_image), nl=False)
        logger.info('Serializing as {} into {}'.format(ctx.meta['mode'], output))
        if ctx.meta['mode'] != 'text':
            from kraken import serialization
            fp.write(serialization.serialize(preds, base_image,
                                             Image.open(base_image).size,
                                             ctx.meta['text_direction'],
                                             scripts,
                                             ctx.meta['mode']))
        else:
            fp.write('\n'.join(s.prediction for s in preds))
        message('\u2713', fg='green')
Beispiel #27
0
def recognizer(model, pad, bidi_reordering, script_ignore, base_image, input,
               output, lines):
    try:
        im = Image.open(base_image)
    except IOError as e:
        raise click.BadParameter(str(e))

    ctx = click.get_current_context()

    scripts = None

    st_time = time.time()

    if not lines:
        lines = input
    with open_file(lines, 'r') as fp:
        bounds = json.load(fp)
        # script detection
        if bounds['script_detection']:
            scripts = set()
            for l in bounds['boxes']:
                for t in l:
                    scripts.add(t[0])
            it = rpred.mm_rpred(model,
                                im,
                                bounds,
                                pad,
                                bidi_reordering=bidi_reordering,
                                script_ignore=script_ignore)
        else:
            it = rpred.rpred(model['default'],
                             im,
                             bounds,
                             pad,
                             bidi_reordering=bidi_reordering)

    preds = []

    for pred in it:
        spin('Processing')
        preds.append(pred)
    message(u'\b\u2713', fg='green', nl=False)
    message('\033[?25h\n', nl=False)

    ctx = click.get_current_context()
    with open_file(output, 'w', encoding='utf-8') as fp:
        message(u'Writing recognition results for {}\t'.format(base_image),
                nl=False)
        if PY2:
            output = output.decode('utf-8')
        logger.info(u'Serializing as {} into {}'.format(
            ctx.meta['mode'], output))
        if ctx.meta['mode'] != 'text':
            fp.write(
                serialization.serialize(preds, base_image,
                                        Image.open(base_image).size,
                                        ctx.meta['text_direction'], scripts,
                                        ctx.meta['mode']))
        else:
            fp.write(u'\n'.join(s.prediction for s in preds))
        if not ctx.meta['verbose']:
            message(u'\u2713', fg='green')
Beispiel #28
0
def transcription(ctx, text_direction, scale, bw, maxcolseps, black_colseps,
                  font, font_style, prefill, pad, lines, output, images):
    """
    Creates transcription environments for ground truth generation.
    """
    from PIL import Image

    from kraken import rpred
    from kraken import pageseg
    from kraken import transcribe
    from kraken import binarization

    from kraken.lib import models
    from kraken.lib.util import is_bitonal

    ti = transcribe.TranscriptionInterface(font, font_style)

    if len(images) > 1 and lines:
        raise click.UsageError(
            '--lines option is incompatible with multiple image files')

    if prefill:
        logger.info('Loading model {}'.format(prefill))
        message('Loading RNN', nl=False)
        prefill = models.load_any(prefill)
        message('\u2713', fg='green')

    with log.progressbar(images, label='Reading images') as bar:
        for fp in bar:
            logger.info('Reading {}'.format(fp.name))
            im = Image.open(fp)
            if im.mode not in ['1', 'L', 'P', 'RGB']:
                logger.warning(
                    'Input {} is in {} color mode. Converting to RGB'.format(
                        fp.name, im.mode))
                im = im.convert('RGB')
            logger.info('Binarizing page')
            im_bin = binarization.nlbin(im)
            im_bin = im_bin.convert('1')
            logger.info('Segmenting page')
            if not lines:
                res = pageseg.segment(im_bin,
                                      text_direction,
                                      scale,
                                      maxcolseps,
                                      black_colseps,
                                      pad=pad)
            else:
                with open_file(lines, 'r') as fp:
                    try:
                        fp = cast(IO[Any], fp)
                        res = json.load(fp)
                    except ValueError as e:
                        raise click.UsageError(
                            '{} invalid segmentation: {}'.format(
                                lines, str(e)))
            if prefill:
                it = rpred.rpred(prefill, im_bin, res)
                preds = []
                logger.info('Recognizing')
                for pred in it:
                    logger.debug('{}'.format(pred.prediction))
                    preds.append(pred)
                ti.add_page(im, res, records=preds)
            else:
                ti.add_page(im, res)
            fp.close()
    logger.info('Writing transcription to {}'.format(output.name))
    message('Writing output', nl=False)
    ti.write(output)
    message('\u2713', fg='green')
Beispiel #29
0
import sys

from PIL import Image, ImageDraw

from kraken.pageseg import segment
from kraken.binarization import nlbin
from kraken.rpred import rpred
from itertools import cycle
from kraken.lib import models

cmap = cycle([(230, 25, 75, 127), (60, 180, 75, 127), (255, 225, 25, 127),
              (0, 130, 200, 127), (245, 130, 48, 127), (145, 30, 180, 127),
              (70, 240, 240, 127)])

net = models.load_any(sys.argv[1])

for fname in sys.argv[2:]:
    im = Image.open(fname)
    print(fname)
    im = nlbin(im)
    res = segment(im, maxcolseps=0)
    pred = rpred(net, im, res)
    im = im.convert('RGBA')
    tmp = Image.new('RGBA', im.size, (0, 0, 0, 0))
    draw = ImageDraw.Draw(tmp)
    for line in pred:
        for box in line.cuts:
            draw.rectangle(box, fill=next(cmap))
    im = Image.alpha_composite(im, tmp)
    im.save('high_{}'.format(os.path.basename(fname)))
Beispiel #30
0
def recognizer(model, pad, no_segmentation, bidi_reordering, script_ignore,
               input, output) -> None:

    import json

    from kraken import rpred

    ctx = click.get_current_context()

    bounds = None
    if 'base_image' not in ctx.meta:
        ctx.meta['base_image'] = input

    if ctx.meta['first_process']:
        if ctx.meta['input_format_type'] != 'image':
            doc = get_input_parser(ctx.meta['input_format_type'])(input)
            ctx.meta['base_image'] = doc['image']
            doc['text_direction'] = 'horizontal-lr'
            bounds = doc
    try:
        im = Image.open(ctx.meta['base_image'])
    except IOError as e:
        raise click.BadParameter(str(e))

    if not bounds and ctx.meta['base_image'] != input:
        with open_file(input, 'r') as fp:
            try:
                fp = cast(IO[Any], fp)
                bounds = json.load(fp)
            except ValueError as e:
                raise click.UsageError(
                    f'{input} invalid segmentation: {str(e)}')
    elif not bounds:
        if no_segmentation:
            bounds = {
                'script_detection': False,
                'text_direction': 'horizontal-lr',
                'boxes': [(0, 0) + im.size]
            }
        else:
            raise click.UsageError(
                'No line segmentation given. Add one with the input or run `segment` first.'
            )
    elif no_segmentation:
        logger.warning(
            'no_segmentation mode enabled but segmentation defined. Ignoring --no-segmentation option.'
        )

    scripts = set()
    # script detection
    if 'script_detection' in bounds and bounds['script_detection']:
        it = rpred.mm_rpred(model,
                            im,
                            bounds,
                            pad,
                            bidi_reordering=bidi_reordering,
                            script_ignore=script_ignore)
    else:
        it = rpred.rpred(model['default'],
                         im,
                         bounds,
                         pad,
                         bidi_reordering=bidi_reordering)

    preds = []

    with log.progressbar(it, label='Processing') as bar:
        for pred in bar:
            preds.append(pred)

    ctx = click.get_current_context()
    with open_file(output, 'w', encoding='utf-8') as fp:
        fp = cast(IO[Any], fp)
        message(f'Writing recognition results for {ctx.meta["orig_file"]}\t',
                nl=False)
        logger.info('Serializing as {} into {}'.format(ctx.meta['output_mode'],
                                                       output))
        if ctx.meta['output_mode'] != 'native':
            from kraken import serialization
            fp.write(
                serialization.serialize(
                    preds, ctx.meta['base_image'],
                    Image.open(ctx.meta['base_image']).size,
                    ctx.meta['text_direction'], scripts,
                    bounds['regions'] if 'regions' in bounds else None,
                    ctx.meta['output_mode']))
        else:
            fp.write('\n'.join(s.prediction for s in preds))
        message('\u2713', fg='green')
            x, y, w, h = one_box
            ## Different color for each row
            r = random.randint(0, 255)
            g = random.randint(0, 255)
            b = random.randint(0, 255)

            # Drawing box
            cv2.rectangle(dummy_image, (x, y), (x + w, y + h), (b, g, r), 2)
            #########################################################################################

            ## Kraken Text Extraction
            cord = [x, y, x + w, y + h]
            bound = {'boxes': [tuple(cord)], 'text_direction': 'horizontal-lr'}

            ## Using Kraken API
            generator = rpred.rpred(network=model, im=genrator_image, bounds=bound)
            nxt_gen = next(generator)
            box_text = nxt_gen.prediction
            print("Box_Text = {} | Y = {}".format(box_text, y))

            ##########################################################################################

            ## Kraken bash script
            # small = img[y:y + h, x:x + w]
            # cv2.imwrite("images/temp.jpg", small)
            # box_text = " "
            # try:
            #     call(["kraken", "-i", "images/temp.jpg", "image.txt", "binarize", "segment", "ocr"])
            #     box_text = open("image.txt", "r").read()
            # except Exception as e:
            #     pass
Beispiel #32
0
def transcription(ctx, text_direction, scale, bw, maxcolseps,
                  black_colseps, font, font_style, prefill, pad, lines, output,
                  images):
    """
    Creates transcription environments for ground truth generation.
    """
    from PIL import Image

    from kraken import rpred
    from kraken import pageseg
    from kraken import transcribe
    from kraken import binarization

    from kraken.lib import models
    from kraken.lib.util import is_bitonal

    ti = transcribe.TranscriptionInterface(font, font_style)

    if len(images) > 1 and lines:
        raise click.UsageError('--lines option is incompatible with multiple image files')

    if prefill:
        logger.info('Loading model {}'.format(prefill))
        message('Loading RNN', nl=False)
        prefill = models.load_any(prefill)
        message('\u2713', fg='green')

    with log.progressbar(images, label='Reading images') as bar:
        for fp in bar:
            logger.info('Reading {}'.format(fp.name))
            im = Image.open(fp)
            if im.mode not in ['1', 'L', 'P', 'RGB']:
                logger.warning('Input {} is in {} color mode. Converting to RGB'.format(fp.name, im.mode))
                im = im.convert('RGB')
            logger.info('Binarizing page')
            im_bin = binarization.nlbin(im)
            im_bin = im_bin.convert('1')
            logger.info('Segmenting page')
            if not lines:
                res = pageseg.segment(im_bin, text_direction, scale, maxcolseps, black_colseps, pad=pad)
            else:
                with open_file(lines, 'r') as fp:
                    try:
                        fp = cast(IO[Any], fp)
                        res = json.load(fp)
                    except ValueError as e:
                        raise click.UsageError('{} invalid segmentation: {}'.format(lines, str(e)))
            if prefill:
                it = rpred.rpred(prefill, im_bin, res)
                preds = []
                logger.info('Recognizing')
                for pred in it:
                    logger.debug('{}'.format(pred.prediction))
                    preds.append(pred)
                ti.add_page(im, res, records=preds)
            else:
                ti.add_page(im, res)
            fp.close()
    logger.info('Writing transcription to {}'.format(output.name))
    message('Writing output', nl=False)
    ti.write(output)
    message('\u2713', fg='green')
Beispiel #33
0
def detect_scripts(im,
                   bounds,
                   model=pkg_resources.resource_filename(
                       __name__, 'script.mlmodel'),
                   valid_scripts=None):
    """
    Detects scripts in a segmented page.

    Classifies lines returned by the page segmenter into runs of scripts/writing systems.

    Args:
        im (PIL.Image): A bi-level page of mode '1' or 'L'
        bounds (dict): A dictionary containing a 'boxes' entry with a list of
                       coordinates (x0, y0, x1, y1) of a text line in the image
                       and an entry 'text_direction' containing
                       'horizontal-lr/rl/vertical-lr/rl'.
        model (str): Location of the script classification model or None for default.
        valid_scripts (list): List of valid scripts.

    Returns:
        {'script_detection': True, 'text_direction': '$dir', 'boxes':
        [[(script, (x1, y1, x2, y2)),...]]}: A dictionary containing the text
        direction and a list of lists of reading order sorted bounding boxes
        under the key 'boxes' with each list containing the script segmentation
        of a single line. Script is a ISO15924 4 character identifier.

    Raises:
        KrakenInvalidModelException if no clstm module is available.
    """
    raise NotImplementedError(
        'Temporarily unavailable. Please open a github ticket if you want this fixed sooner.'
    )
    im_str = get_im_str(im)
    logger.info(u'Detecting scripts with {} in {} lines on {}'.format(
        model, len(bounds['boxes']), im_str))
    logger.debug(u'Loading detection model {}'.format(model))
    rnn = models.load_any(model)
    # load numerical to 4 char identifier map
    logger.debug(u'Loading label to identifier map')
    with pkg_resources.resource_stream(__name__, 'iso15924.json') as fp:
        n2s = json.load(fp)
    # convert allowed scripts to labels
    val_scripts = []
    if valid_scripts:
        logger.debug(
            u'Converting allowed scripts list {}'.format(valid_scripts))
        for k, v in n2s.items():
            if v in valid_scripts:
                val_scripts.append(chr(int(k) + 0xF0000))
    else:
        valid_scripts = []
    it = rpred(rnn, im, bounds, bidi_reordering=False)
    preds = []
    logger.debug(u'Running detection')
    for pred, bbox in zip(it, bounds['boxes']):
        # substitute inherited scripts with neighboring runs
        def _subs(m, s, r=False):
            p = u''
            for c in s:
                if c in m and p and not r:
                    p += p[-1]
                elif c not in m and p and r:
                    p += p[-1]
                else:
                    p += c
            return p

        logger.debug(u'Substituting scripts')
        p = _subs([u'\U000f03e2', u'\U000f03e6'], pred.prediction)
        # do a reverse run to fix leading inherited scripts
        pred.prediction = ''.join(
            reversed(_subs([u'\U000f03e2', u'\U000f03e6'], reversed(p))))
        # group by valid scripts. two steps: 1. substitute common confusions
        # (Latin->Fraktur and Syriac->Arabic) if given in script list.
        if 'Arab' in valid_scripts and 'Syrc' not in valid_scripts:
            pred.prediction = pred.prediction.replace(u'\U000f0087',
                                                      u'\U000f00a0')
        if 'Latn' in valid_scripts and 'Latf' not in valid_scripts:
            pred.prediction = pred.prediction.replace(u'\U000f00d9',
                                                      u'\U000f00d7')
        # next merge adjacent scripts
        if val_scripts:
            pred.prediction = _subs(val_scripts, pred.prediction, r=True)

        # group by grapheme
        t = []
        logger.debug(u'Merging detections')
        # if line contains only a single script return whole line bounding box
        if len(set(pred.prediction)) == 1:
            logger.debug('Only one script on line. Emitting whole line bbox')
            k = ord(pred.prediction[0]) - 0xF0000
            t.append((n2s[str(k)], bbox))
        else:
            for k, g in groupby(pred, key=lambda x: x[0]):
                # convert to ISO15924 numerical identifier
                k = ord(k) - 0xF0000
                b = max_bbox(x[1] for x in g)
                t.append((n2s[str(k)], b))
        preds.append(t)
    return {
        'boxes': preds,
        'text_direction': bounds['text_direction'],
        'script_detection': True
    }