예제 #1
0
 def test_load_pyrnn_no_seqrecognizer(self):
     """
     Test correct handling of non-SeqRecognizer pickles.
     """
     pickle.dump(u'Iámnõtãrécðçnízer', self.temp)
     self.temp.close()
     models.load_any(self.temp.name)
예제 #2
0
 def test_load_pyrnn_no_seqrecognizer(self):
     """
     Test correct handling of non-SeqRecognizer pickles.
     """
     pickle.dump(u'Iámnõtãrécðçnízer', self.temp)
     self.temp.close()
     models.load_any(self.temp.name)
예제 #3
0
 def test_load_pyrnn_no_seqrecognizer(self):
     """
     Test correct handling of non-SeqRecognizer pickles.
     """
     pickle.dump(u'Iámnõtãrécðçnízer', self.temp)
     self.temp.close()
     with raises(KrakenInvalidModelException):
         models.load_any(self.temp.name)
예제 #4
0
 def test_load_clstm(self):
     """
     Tests loading of valid clstm files.
     """
     rnn = models.load_any(
         os.path.join(resources, 'toy.clstm').encode('utf-8'))
     self.assertIsInstance(rnn, models.TorchSeqRecognizer)
예제 #5
0
def ocr(ctx, model, pad, reorder, base_dir, no_segmentation, text_direction,
        threads):
    """
    Recognizes text in line images.
    """
    from kraken.lib import models

    if ctx.meta['input_format_type'] != 'image' and no_segmentation:
        raise click.BadParameter(
            'no_segmentation mode is incompatible with page/alto inputs')

    if reorder and base_dir != 'auto':
        reorder = base_dir

    # first we try to find the model in the absolue path, then ~/.kraken, then
    # LEGACY_MODEL_DIR
    nm = {}  # type: Dict[str, models.TorchSeqRecognizer]
    ign_tags = model.pop('ignore')
    for k, v in model.items():
        search = [
            v,
            os.path.join(click.get_app_dir(APP_NAME), v),
            os.path.join(LEGACY_MODEL_DIR, v)
        ]
        location = None
        for loc in search:
            if os.path.isfile(loc):
                location = loc
                break
        if not location:
            raise click.BadParameter(f'No model for {k} found')
        message(f'Loading ANN {k}\t', nl=False)
        try:
            rnn = models.load_any(location, device=ctx.meta['device'])
            nm[k] = rnn
        except Exception:
            if ctx.meta['raise_failed']:
                raise
            message('\u2717', fg='red')
            ctx.exit(1)
        message('\u2713', fg='green')

    if 'default' in nm:
        from collections import defaultdict

        nn = defaultdict(lambda: nm['default']
                         )  # type: Dict[str, models.TorchSeqRecognizer]
        nn.update(nm)
        nm = nn
    # thread count is global so setting it once is sufficient
    nm[k].nn.set_num_threads(threads)

    # set output mode
    ctx.meta['text_direction'] = text_direction
    return partial(recognizer,
                   model=nm,
                   pad=pad,
                   no_segmentation=no_segmentation,
                   bidi_reordering=reorder,
                   tags_ignore=ign_tags)
예제 #6
0
 def test_rpred_outbounds(self):
     """
     Tests correct handling of invalid line coordinates.
     """
     nn = load_any(os.path.join(resources, 'toy.clstm'))
     pred = rpred(nn, self.im, {'boxes': [[-1, -1, 10000, 10000]], 'text_direction': 'horizontal'}, True)
     next(pred)
예제 #7
0
파일: ketos.py 프로젝트: yufish/kraken
def transcription(ctx, text_direction, scale, maxcolseps, black_colseps, font,
                  font_style, prefill, output, images):
    st_time = time.time()
    ti = transcribe.TranscriptionInterface(font, font_style)

    if prefill:
        if ctx.meta['verbose'] > 0:
            click.echo(u'[{:2.4f}] Loading model {}'.format(time.time() - st_time, prefill))
        else:
            spin('Loading RNN')
        prefill = models.load_any(prefill.encode('utf-8'))
        if not ctx.meta['verbose']:
            click.secho(u'\b\u2713', fg='green', nl=False)
            click.echo('\033[?25h\n', nl=False)

    for fp in images:
        if ctx.meta['verbose'] > 0:
            click.echo(u'[{:2.4f}] Reading {}'.format(time.time() - st_time, fp.name))
        else:
            spin('Reading images')
        im = Image.open(fp)
        if not binarization.is_bitonal(im):
            if ctx.meta['verbose'] > 0:
                click.echo(u'[{:2.4f}] Binarizing page'.format(time.time() - st_time))
            im = binarization.nlbin(im)
        if ctx.meta['verbose'] > 0:
            click.echo(u'[{:2.4f}] Segmenting page'.format(time.time() - st_time))
        res = pageseg.segment(im, text_direction, scale, maxcolseps, black_colseps)
        if prefill:
            it = rpred.rpred(prefill, im, res)
            preds = []
            for pred in it: 
                if ctx.meta['verbose'] > 0:
                    click.echo(u'[{:2.4f}] {}'.format(time.time() - st_time, pred.prediction))
                else:
                    spin('Recognizing')
                preds.append(pred)
            if ctx.meta['verbose'] > 0:
                click.echo(u'Execution time: {}s'.format(time.time() - st_time))
            else:
                click.secho(u'\b\u2713', fg='green', nl=False)
                click.echo('\033[?25h\n', nl=False)
            ti.add_page(im, res, records=preds)
        else:
            ti.add_page(im, res)
        fp.close()
    if not ctx.meta['verbose']:
        click.secho(u'\b\u2713', fg='green', nl=False)
        click.echo('\033[?25h\n', nl=False)
    if ctx.meta['verbose'] > 0:
        click.echo(u'[{:2.4f}] Writing transcription to {}'.format(time.time() - st_time, output.name))
    else:
        spin('Writing output')
    ti.write(output)
    if not ctx.meta['verbose']:
        click.secho(u'\b\u2713', fg='green', nl=False)
        click.echo('\033[?25h\n', nl=False)
예제 #8
0
파일: ketos.py 프로젝트: mittagessen/kraken
def publish(ctx, metadata, access_token, model):
    """
    Publishes a model on the zenodo model repository.
    """
    import json
    import pkg_resources

    from functools import partial
    from jsonschema import validate
    from jsonschema.exceptions import ValidationError

    from kraken import repo
    from kraken.lib import models

    with pkg_resources.resource_stream(__name__, 'metadata.schema.json') as fp:
        schema = json.load(fp)

    nn = models.load_any(model)

    if not metadata:
        author = click.prompt('author')
        affiliation = click.prompt('affiliation')
        summary = click.prompt('summary')
        description = click.edit('Write long form description (training data, transcription standards) of the model here')
        accuracy_default = None
        # take last accuracy measurement in model metadata
        if 'accuracy' in nn.nn.user_metadata and nn.nn.user_metadata['accuracy']:
           accuracy_default = nn.nn.user_metadata['accuracy'][-1][1] * 100
        accuracy = click.prompt('accuracy on test set', type=float, default=accuracy_default)
        script = [click.prompt('script', type=click.Choice(sorted(schema['properties']['script']['items']['enum'])), show_choices=True)]
        license = click.prompt('license', type=click.Choice(sorted(schema['properties']['license']['enum'])), show_choices=True)
        metadata = {
                'authors': [{'name': author, 'affiliation': affiliation}],
                'summary': summary,
                'description': description,
                'accuracy': accuracy,
                'license': license,
                'script': script,
                'name': os.path.basename(model),
                'graphemes': ['a']
        }
        while True:
            try:
                validate(metadata, schema)
            except ValidationError as e:
                message(e.message)
                metadata[e.path[-1]] = click.prompt(e.path[-1], type=float if e.schema['type'] == 'number' else str)
                continue
            break

    else:
        metadata = json.load(metadata)
        validate(metadata, schema)
    metadata['graphemes'] = [char for char in ''.join(nn.codec.c2l.keys())]
    oid = repo.publish_model(model, metadata, access_token, partial(message, '.', nl=False))
    print('\nmodel PID: {}'.format(oid))
예제 #9
0
파일: kraken.py 프로젝트: andbue/kraken
def ocr(ctx, model, pad, reorder, serialization, text_direction, lines, conv):
    """
    Recognizes text in line images.
    """
    # we do the locating and loading of the model here to spare us the overhead
    # in each worker.

    # first we try to find the model in the absolue path, then ~/.kraken, then
    # LEGACY_MODEL_DIR
    search = [
        model,
        os.path.join(click.get_app_dir(APP_NAME), model),
        os.path.join(LEGACY_MODEL_DIR, model)
    ]
    # if automatic conversion is enabled we look for an converted model in
    # ~/.kraken
    if conv is True:
        search.insert(
            0,
            os.path.join(
                click.get_app_dir(APP_NAME),
                os.path.basename(os.path.splitext(model)[0]) + '.hdf5'))
    location = None
    for loc in search:
        if os.path.isfile(loc):
            location = loc
            break
    if not location:
        raise click.BadParameter('No model found')
    click.echo('Loading RNN\t', nl=False)
    try:
        rnn = models.load_any(location)
    except:
        click.secho(u'\u2717', fg='red')
        raise
        ctx.exit(1)
    click.secho(u'\u2713', fg='green')

    # convert input model to protobuf
    if conv and rnn.kind == 'pyrnn':
        name, _ = os.path.splitext(os.path.basename(model))
        op = os.path.join(click.get_app_dir(APP_NAME), name + '.pronn')
        try:
            os.makedirs(click.get_app_dir(APP_NAME))
        except OSError:
            pass
        models.pyrnn_to_pronn(rnn, op)

    # set output mode
    ctx.meta['mode'] = serialization
    ctx.meta['text_direction'] = text_direction
    return partial(recognizer,
                   model=rnn,
                   pad=pad,
                   bidi_reordering=reorder,
                   lines=lines)
예제 #10
0
 def test_rpred_outbounds(self):
     """
     Tests correct handling of invalid line coordinates.
     """
     nn = load_any(os.path.join(resources, 'toy.clstm'))
     pred = rpred(nn, self.im, {
         'boxes': [[-1, -1, 10000, 10000]],
         'text_direction': 'horizontal'
     }, True)
     next(pred)
예제 #11
0
파일: ketos.py 프로젝트: QuLogic/ocropy
def transcription(ctx, font, font_style, prefill, output, images):
    st_time = time.time()
    ti = transcrib.TranscriptionInterface(font, font_style)

    if prefill:
        if ctx.meta['verbose'] > 0:
            click.echo(u'[{:2.4f}] Loading model {}'.format(time.time() - st_time, prefill))
        else:
            spin('Loading RNN')
        prefill = models.load_any(prefill)
        if not ctx.meta['verbose']:
            click.secho(u'\b\u2713', fg='green', nl=False)
            click.echo('\033[?25h\n', nl=False)

    for fp in images:
        if ctx.meta['verbose'] > 0:
            click.echo(u'[{:2.4f}] Reading {}'.format(time.time() - st_time, fp.name))
        else:
            spin('Reading images')
        im = Image.open(fp)
        if not binarization.is_bitonal(im):
            if ctx.meta['verbose'] > 0:
                click.echo(u'[{:2.4f}] Binarizing page'.format(time.time() - st_time))
            im = binarization.nlbin(im)
        if ctx.meta['verbose'] > 0:
            click.echo(u'[{:2.4f}] Segmenting page'.format(time.time() - st_time))
        res = pageseg.segment(im)
        if prefill:
            it = rpred.rpred(prefill, im, res)
            preds = []
            for pred in it: 
                if ctx.meta['verbose'] > 0:
                    click.echo(u'[{:2.4f}] {}'.format(time.time() - st_time, pred.prediction))
                else:
                    spin('Recognizing')
                preds.append(pred)
            if ctx.meta['verbose'] > 0:
                click.echo(u'Execution time: {}s'.format(time.time() - st_time))
            else:
                click.secho(u'\b\u2713', fg='green', nl=False)
                click.echo('\033[?25h\n', nl=False)
            ti.add_page(im, records=preds)
        else:
            ti.add_page(im, res)
    if not ctx.meta['verbose']:
        click.secho(u'\b\u2713', fg='green', nl=False)
        click.echo('\033[?25h\n', nl=False)
    if ctx.meta['verbose'] > 0:
        click.echo(u'[{:2.4f}] Writing transcription to {}'.format(time.time() - st_time, output.name))
    else:
        spin('Writing output')
    ti.write(output)
    if not ctx.meta['verbose']:
        click.secho(u'\b\u2713', fg='green', nl=False)
        click.echo('\033[?25h\n', nl=False)
예제 #12
0
def ocr(ctx, model, pad, reorder, no_segmentation, serializer, text_direction,
        lines, threads):
    """
    Recognizes text in line images.
    """
    from kraken.lib import models

    # first we try to find the model in the absolue path, then ~/.kraken, then
    # LEGACY_MODEL_DIR
    nm = {}  # type: Dict[str, models.TorchSeqRecognizer]
    ign_scripts = model.pop('ignore')
    for k, v in model.items():
        search = [
            v,
            os.path.join(click.get_app_dir(APP_NAME), v),
            os.path.join(LEGACY_MODEL_DIR, v)
        ]
        location = None
        for loc in search:
            if os.path.isfile(loc):
                location = loc
                break
        if not location:
            raise click.BadParameter('No model for {} found'.format(k))
        message('Loading RNN {}\t'.format(k), nl=False)
        try:
            rnn = models.load_any(location, device=ctx.meta['device'])
            nm[k] = rnn
        except Exception:
            message('\u2717', fg='red')
            raise
            ctx.exit(1)
        message('\u2713', fg='green')

    if 'default' in nm:
        from collections import defaultdict

        nn = defaultdict(lambda: nm['default']
                         )  # type: Dict[str, models.TorchSeqRecognizer]
        nn.update(nm)
        nm = nn
    # thread count is global so setting it once is sufficient
    nn[k].nn.set_num_threads(threads)

    # set output mode
    ctx.meta['mode'] = serializer
    ctx.meta['text_direction'] = text_direction
    return partial(recognizer,
                   model=nm,
                   pad=pad,
                   no_segmentation=no_segmentation,
                   bidi_reordering=reorder,
                   script_ignore=ign_scripts,
                   lines=lines)
예제 #13
0
def ocr(ctx, model, pad, reorder, serialization, text_direction, lines, conv):
    """
    Recognizes text in line images.
    """
    # we do the locating and loading of the model here to spare us the overhead
    # in each worker.

    # first we try to find the model in the absolue path, then ~/.kraken, then
    # LEGACY_MODEL_DIR
    nm = {}
    for k, v in model.items():
        search = [v,
                  os.path.join(click.get_app_dir(APP_NAME), v),
                  os.path.join(LEGACY_MODEL_DIR, v)]
        # if automatic conversion is enabled we look for an converted model in
        # ~/.kraken
        if conv is True:
            search.insert(0, os.path.join(click.get_app_dir(APP_NAME),
                          os.path.basename(os.path.splitext(v)[0]) + '.pronn'))
        location = None
        for loc in search:
            if os.path.isfile(loc):
                location = loc
                break
        if not location:
            raise click.BadParameter('No model for {} found'.format(k))
        message('Loading RNN {}\t'.format(k), nl=False)
        try:
            rnn = models.load_any(location.encode('utf-8'))
            nm[k] = rnn
        except:
            message(u'\u2717', fg='red')
            raise
            ctx.exit(1)
        message(u'\u2713', fg='green')

        # convert input model to protobuf
        if conv and rnn.kind == 'pyrnn':
            name, _ = os.path.splitext(os.path.basename(v))
            op = os.path.join(click.get_app_dir(APP_NAME), name.encode('utf-8') + '.pronn')
            try:
                os.makedirs(click.get_app_dir(APP_NAME))
            except OSError:
                pass
            models.pyrnn_to_pronn(rnn, op)

    if 'default' in nm:
        nn = defaultdict(lambda: nm['default'])
        nn.update(nm)
        nm = nn
    # set output mode
    ctx.meta['mode'] = serialization
    ctx.meta['text_direction'] = text_direction
    return partial(recognizer, model=nm, pad=pad, bidi_reordering=reorder, lines=lines)
예제 #14
0
def ocr(ctx, model, pad, reorder, no_segmentation, serializer, text_direction, lines, threads):
    """
    Recognizes text in line images.
    """
    from kraken.lib import models

    # first we try to find the model in the absolue path, then ~/.kraken, then
    # LEGACY_MODEL_DIR
    nm = {}  # type: Dict[str, models.TorchSeqRecognizer]
    ign_scripts = model.pop('ignore')
    for k, v in model.items():
        search = [v,
                  os.path.join(click.get_app_dir(APP_NAME), v),
                  os.path.join(LEGACY_MODEL_DIR, v)]
        location = None
        for loc in search:
            if os.path.isfile(loc):
                location = loc
                break
        if not location:
            raise click.BadParameter('No model {} for {} found'.format(v, k))
        message('Loading RNN {}\t'.format(k), nl=False)
        try:
            rnn = models.load_any(location, device=ctx.meta['device'])
            nm[k] = rnn
        except Exception:
            message('\u2717', fg='red')
            raise
            ctx.exit(1)
        message('\u2713', fg='green')

    if 'default' in nm:
        from collections import defaultdict

        nn = defaultdict(lambda: nm['default'])  # type: Dict[str, models.TorchSeqRecognizer]
        nn.update(nm)
        nm = nn
    # thread count is global so setting it once is sufficient
    nn[k].nn.set_num_threads(threads)

    # set output mode
    ctx.meta['mode'] = serializer
    ctx.meta['text_direction'] = text_direction
    return partial(recognizer,
                   model=nm,
                   pad=pad,
                   no_segmentation=no_segmentation,
                   bidi_reordering=reorder,
                   script_ignore=ign_scripts,
                   lines=lines)
예제 #15
0
파일: kraken.py 프로젝트: tianyaqu/kraken
def ocr(ctx, model, pad, hocr, lines, conv):
    """
    Recognizes text in line images.
    """
    # we do the locating and loading of the model here to spare us the overhead
    # in each worker.

    # first we try to find the model in the absolue path, then ~/.kraken, then
    # LEGACY_MODEL_DIR
    search = [model,
              os.path.join(click.get_app_dir(APP_NAME), model),
              os.path.join(LEGACY_MODEL_DIR, model)]
    # if automatic conversion is enabled we look for an converted model in
    # ~/.kraken
    if conv is True:
        search.insert(0, os.path.join(click.get_app_dir(APP_NAME),
                      os.path.basename(os.path.splitext(model)[0]) + '.hdf5'))
    location = None
    for loc in search:
        if os.path.isfile(loc):
            location = loc
            break
    if not location:
        raise click.BadParameter('No model found')
    click.echo('Loading RNN\t', nl=False)
    try:
        rnn = models.load_any(location)
    except:
        click.secho(u'\u2717', fg='red')
        raise
        ctx.exit(1)
    click.secho(u'\u2713', fg='green')

    # convert input model to protobuf
    if conv and rnn.kind == 'pyrnn':
        name, _ = os.path.splitext(os.path.basename(model))
        op = os.path.join(click.get_app_dir(APP_NAME), name + '.pronn')
        try:
            os.makedirs(click.get_app_dir(APP_NAME))
        except OSError:
            pass
        models.pyrnn_to_pronn(rnn, op)

    # set output mode
    if hocr:
        ctx.meta['mode'] = 'hocr'
    else:
        ctx.meta['mode'] = 'text'
    return partial(recognizer, model=rnn, pad=pad, lines=lines)
예제 #16
0
def cli(format_type, model, output, files):
    """
    A script producing overlays of lines and regions from either ALTO or
    PageXML files or run a model to do the same.
    """
    if len(files) == 0:
        ctx = click.get_current_context()
        click.echo(ctx.get_help())
        ctx.exit()

    from PIL import Image, ImageDraw

    from kraken.lib import models, xml
    from kraken import align, serialization

    if format_type == 'xml':
        fn = xml.parse_xml
    elif format_type == 'alto':
        fn = xml.parse_palto
    else:
        fn = xml.parse_page
    click.echo(f'Loading model {model}')
    net = models.load_any(model)

    for doc in files:
        click.echo(f'Processing {doc} ', nl=False)
        data = fn(doc)
        im = Image.open(data['image']).convert('RGBA')
        records = align.forced_align(data, net)
        if output == 'overlay':
            tmp = Image.new('RGBA', im.size, (0, 0, 0, 0))
            draw = ImageDraw.Draw(tmp)
            for record in records:
                for pol in record.cuts:
                    c = next(cmap)
                    draw.polygon([tuple(x) for x in pol],
                                 fill=c,
                                 outline=c[:3])
            base_image = Image.alpha_composite(im, tmp)
            base_image.save(f'high_{os.path.basename(doc)}_algn.png')
        else:
            with open(f'{os.path.basename(doc)}_algn.xml', 'w') as fp:
                fp.write(
                    serialization.serialize(records,
                                            image_name=data['image'],
                                            regions=data['regions'],
                                            template=output))
        click.secho('\u2713', fg='green')
예제 #17
0
파일: ocr.py 프로젝트: among/fusus
    def ensureLoaded(self):
        if self.model is None:
            engine = self.engine
            C = engine.C
            tm = engine.tm
            info = tm.info
            modelPath = C.modelPath

            info(f"Loading for Kraken: {unexpanduser(modelPath)}", force=True)
            with warnings.catch_warnings():
                warnings.filterwarnings("ignore")
                model = load_any(modelPath)
            info("model loaded", force=True)

            self.model = model
        return self.model
예제 #18
0
파일: ketos.py 프로젝트: tewhalen/kraken
def transcription(ctx, text_direction, scale, maxcolseps, black_colseps, font,
                  font_style, prefill, output, images, segment_page):
    ti = transcribe.TranscriptionInterface(font, font_style)

    if prefill:
        logger.info('Loading model {}'.format(prefill))
        spin('Loading RNN')
        prefill = models.load_any(prefill.encode('utf-8'))
        message(u'\b\u2713', fg='green', nl=False)
        message('\033[?25h\n', nl=False)

    for fp in images:
        logger.info('Reading {}'.format(fp.name))
        spin('Reading images')
        im = Image.open(fp)
        if not binarization.is_bitonal(im):
            logger.info(u'Binarizing page')
            im = binarization.nlbin(im)
        if segment_page:
            logger.info(u'Segmenting page')
            res = pageseg.segment(im, text_direction, scale, maxcolseps,
                                  black_colseps)
        else:
            res = {
                'text_direction': 'horizontal-tb',
                'boxes': [(0, 0) + im.size]
            }
        if prefill:
            it = rpred.rpred(prefill, im, res)
            preds = []
            for pred in it:
                logger.info('{}'.format(pred.prediction))
                spin('Recognizing')
                preds.append(pred)
            message(u'\b\u2713', fg='green', nl=False)
            message('\033[?25h\n', nl=False)
            ti.add_page(im, res, records=preds)
        else:
            ti.add_page(im, res)
        fp.close()
    message(u'\b\u2713', fg='green', nl=False)
    message('\033[?25h\n', nl=False)
    logger.info(u'Writing transcription to {}'.format(output.name))
    spin('Writing output')
    ti.write(output)
    message(u'\b\u2713', fg='green', nl=False)
    message('\033[?25h\n', nl=False)
예제 #19
0
파일: ketos.py 프로젝트: mittagessen/kraken
def transcription(ctx, text_direction, scale, bw, maxcolseps,
                  black_colseps, font, font_style, prefill, pad, lines, output,
                  images):
    """
    Creates transcription environments for ground truth generation.
    """
    from PIL import Image

    from kraken import rpred
    from kraken import pageseg
    from kraken import transcribe
    from kraken import binarization

    from kraken.lib import models
    from kraken.lib.util import is_bitonal

    ti = transcribe.TranscriptionInterface(font, font_style)

    if len(images) > 1 and lines:
        raise click.UsageError('--lines option is incompatible with multiple image files')

    if prefill:
        logger.info('Loading model {}'.format(prefill))
        message('Loading RNN', nl=False)
        prefill = models.load_any(prefill)
        message('\u2713', fg='green')

    with log.progressbar(images, label='Reading images') as bar:
        for fp in bar:
            logger.info('Reading {}'.format(fp.name))
            im = Image.open(fp)
            if im.mode not in ['1', 'L', 'P', 'RGB']:
                logger.warning('Input {} is in {} color mode. Converting to RGB'.format(fp.name, im.mode))
                im = im.convert('RGB')
            logger.info('Binarizing page')
            im_bin = binarization.nlbin(im)
            im_bin = im_bin.convert('1')
            logger.info('Segmenting page')
            if not lines:
                res = pageseg.segment(im_bin, text_direction, scale, maxcolseps, black_colseps, pad=pad)
            else:
                with open_file(lines, 'r') as fp:
                    try:
                        fp = cast(IO[Any], fp)
                        res = json.load(fp)
                    except ValueError as e:
                        raise click.UsageError('{} invalid segmentation: {}'.format(lines, str(e)))
            if prefill:
                it = rpred.rpred(prefill, im_bin, res)
                preds = []
                logger.info('Recognizing')
                for pred in it:
                    logger.debug('{}'.format(pred.prediction))
                    preds.append(pred)
                ti.add_page(im, res, records=preds)
            else:
                ti.add_page(im, res)
            fp.close()
    logger.info('Writing transcription to {}'.format(output.name))
    message('Writing output', nl=False)
    ti.write(output)
    message('\u2713', fg='green')
예제 #20
0
def detect_scripts(im,
                   bounds,
                   model=pkg_resources.resource_filename(
                       __name__, 'script.mlmodel'),
                   valid_scripts=None):
    """
    Detects scripts in a segmented page.

    Classifies lines returned by the page segmenter into runs of scripts/writing systems.

    Args:
        im (PIL.Image): A bi-level page of mode '1' or 'L'
        bounds (dict): A dictionary containing a 'boxes' entry with a list of
                       coordinates (x0, y0, x1, y1) of a text line in the image
                       and an entry 'text_direction' containing
                       'horizontal-lr/rl/vertical-lr/rl'.
        model (str): Location of the script classification model or None for default.
        valid_scripts (list): List of valid scripts.

    Returns:
        {'script_detection': True, 'text_direction': '$dir', 'boxes':
        [[(script, (x1, y1, x2, y2)),...]]}: A dictionary containing the text
        direction and a list of lists of reading order sorted bounding boxes
        under the key 'boxes' with each list containing the script segmentation
        of a single line. Script is a ISO15924 4 character identifier.

    Raises:
        KrakenInvalidModelException if no clstm module is available.
    """
    raise NotImplementedError(
        'Temporarily unavailable. Please open a github ticket if you want this fixed sooner.'
    )
    im_str = get_im_str(im)
    logger.info(u'Detecting scripts with {} in {} lines on {}'.format(
        model, len(bounds['boxes']), im_str))
    logger.debug(u'Loading detection model {}'.format(model))
    rnn = models.load_any(model)
    # load numerical to 4 char identifier map
    logger.debug(u'Loading label to identifier map')
    with pkg_resources.resource_stream(__name__, 'iso15924.json') as fp:
        n2s = json.load(fp)
    # convert allowed scripts to labels
    val_scripts = []
    if valid_scripts:
        logger.debug(
            u'Converting allowed scripts list {}'.format(valid_scripts))
        for k, v in n2s.items():
            if v in valid_scripts:
                val_scripts.append(chr(int(k) + 0xF0000))
    else:
        valid_scripts = []
    it = rpred(rnn, im, bounds, bidi_reordering=False)
    preds = []
    logger.debug(u'Running detection')
    for pred, bbox in zip(it, bounds['boxes']):
        # substitute inherited scripts with neighboring runs
        def _subs(m, s, r=False):
            p = u''
            for c in s:
                if c in m and p and not r:
                    p += p[-1]
                elif c not in m and p and r:
                    p += p[-1]
                else:
                    p += c
            return p

        logger.debug(u'Substituting scripts')
        p = _subs([u'\U000f03e2', u'\U000f03e6'], pred.prediction)
        # do a reverse run to fix leading inherited scripts
        pred.prediction = ''.join(
            reversed(_subs([u'\U000f03e2', u'\U000f03e6'], reversed(p))))
        # group by valid scripts. two steps: 1. substitute common confusions
        # (Latin->Fraktur and Syriac->Arabic) if given in script list.
        if 'Arab' in valid_scripts and 'Syrc' not in valid_scripts:
            pred.prediction = pred.prediction.replace(u'\U000f0087',
                                                      u'\U000f00a0')
        if 'Latn' in valid_scripts and 'Latf' not in valid_scripts:
            pred.prediction = pred.prediction.replace(u'\U000f00d9',
                                                      u'\U000f00d7')
        # next merge adjacent scripts
        if val_scripts:
            pred.prediction = _subs(val_scripts, pred.prediction, r=True)

        # group by grapheme
        t = []
        logger.debug(u'Merging detections')
        # if line contains only a single script return whole line bounding box
        if len(set(pred.prediction)) == 1:
            logger.debug('Only one script on line. Emitting whole line bbox')
            k = ord(pred.prediction[0]) - 0xF0000
            t.append((n2s[str(k)], bbox))
        else:
            for k, g in groupby(pred, key=lambda x: x[0]):
                # convert to ISO15924 numerical identifier
                k = ord(k) - 0xF0000
                b = max_bbox(x[1] for x in g)
                t.append((n2s[str(k)], b))
        preds.append(t)
    return {
        'boxes': preds,
        'text_direction': bounds['text_direction'],
        'script_detection': True
    }
예제 #21
0
파일: ketos.py 프로젝트: rsharmapty/kraken
def transcription(ctx, text_direction, scale, bw, maxcolseps, black_colseps,
                  font, font_style, prefill, pad, lines, output, images):
    """
    Creates transcription environments for ground truth generation.
    """
    from PIL import Image

    from kraken import rpred
    from kraken import pageseg
    from kraken import transcribe
    from kraken import binarization

    from kraken.lib import models
    from kraken.lib.util import is_bitonal

    ti = transcribe.TranscriptionInterface(font, font_style)

    if len(images) > 1 and lines:
        raise click.UsageError(
            '--lines option is incompatible with multiple image files')

    if prefill:
        logger.info('Loading model {}'.format(prefill))
        message('Loading RNN', nl=False)
        prefill = models.load_any(prefill)
        message('\u2713', fg='green')

    with log.progressbar(images, label='Reading images') as bar:
        for fp in bar:
            logger.info('Reading {}'.format(fp.name))
            im = Image.open(fp)
            if im.mode not in ['1', 'L', 'P', 'RGB']:
                logger.warning(
                    'Input {} is in {} color mode. Converting to RGB'.format(
                        fp.name, im.mode))
                im = im.convert('RGB')
            logger.info('Binarizing page')
            im_bin = binarization.nlbin(im)
            im_bin = im_bin.convert('1')
            logger.info('Segmenting page')
            if not lines:
                res = pageseg.segment(im_bin,
                                      text_direction,
                                      scale,
                                      maxcolseps,
                                      black_colseps,
                                      pad=pad)
            else:
                with open_file(lines, 'r') as fp:
                    try:
                        fp = cast(IO[Any], fp)
                        res = json.load(fp)
                    except ValueError as e:
                        raise click.UsageError(
                            '{} invalid segmentation: {}'.format(
                                lines, str(e)))
            if prefill:
                it = rpred.rpred(prefill, im_bin, res)
                preds = []
                logger.info('Recognizing')
                for pred in it:
                    logger.debug('{}'.format(pred.prediction))
                    preds.append(pred)
                ti.add_page(im, res, records=preds)
            else:
                ti.add_page(im, res)
            fp.close()
    logger.info('Writing transcription to {}'.format(output.name))
    message('Writing output', nl=False)
    ti.write(output)
    message('\u2713', fg='green')
예제 #22
0
파일: kraken.py 프로젝트: brunsgaard/nidaba
def ocr_kraken(doc, method=u'ocr_kraken', model=None):
    """
    Runs kraken on an input document and writes a TEI file.

    Args:
        doc (unicode, unicode): The input document tuple
        method (unicode): The suffix string append to all output files
        model (unicode): Identifier for the font model to use

    Returns:
        (unicode, unicode): Storage tuple for the output file
    """
    output_path = (
        doc[0],
        os.path.splitext(storage.insert_suffix(doc[1], method, model))[0] +
        '.xml')
    logger.debug('Loading model {}'.format(model))
    try:
        rnn = models.load_any(mod_db[model])
    except Exception as e:
        raise NidabaInvalidParameterException(str(e))
    logger.debug('Reading TEI segmentation from {}'.format(doc))
    tei = OCRRecord()
    with storage.StorageFile(*doc) as seg:
        tei.load_tei(seg)

    img = Image.open(
        storage.get_abs_path(*storage.get_storage_path_url(tei.img)))
    if is_bitonal(img):
        img = img.convert('1')
    else:
        raise NidabaInvalidParameterException('Input image is not bitonal')

    logger.debug('Clearing out word/grapheme boxes')
    # kraken is a line recognizer
    tei.clear_graphemes()
    tei.clear_segments()
    # add and scope new responsibility statement
    tei.add_respstmt('kraken', 'character recognition')
    lines = tei.lines

    i = 0
    rnn = models.load_any(mod_db[model])
    logger.debug('Start recognizing characters')
    for line_id, rec in izip(
            lines,
            rpred.rpred(
                rnn, img, {
                    'text_direction': 'horizontal-tb',
                    'boxes': [list(x['bbox']) for x in lines.itervalues()]
                })):
        # scope the current line and add all graphemes recognized by kraken to
        # it.
        logger.debug('Scoping line {}'.format(line_id))
        tei.scope_line(line_id)
        i += 1

        splits = regex.split(u'(\s+)', rec.prediction)
        line_offset = 0
        for segment, whitespace in izip_longest(splits[0::2], splits[1::2]):
            if len(segment):
                seg_bbox = max_bbox(rec.cuts[line_offset:line_offset +
                                             len(segment)])
                logger.debug(
                    'Creating new segment at {} {} {} {}'.format(*seg_bbox))
                tei.add_segment(seg_bbox)
                logger.debug('Adding graphemes (segment): {}'.format(
                    rec.prediction[line_offset:line_offset + len(segment)]))
                tei.add_graphemes([{
                    'grapheme': x[0],
                    'bbox': x[1],
                    'confidence': int(x[2] * 100)
                } for x in rec[line_offset:line_offset + len(segment)]])
                line_offset += len(segment)
            if whitespace:
                logger.debug('Adding graphemes (whitespace): {}'.format(
                    rec.prediction[line_offset:line_offset + len(whitespace)]))
                seg_bbox = max_bbox(rec.cuts[line_offset:line_offset +
                                             len(whitespace)])
                tei.add_segment(seg_bbox)
                tei.add_graphemes([{
                    'grapheme': x[0],
                    'bbox': x[1],
                    'confidence': int(x[2] * 100)
                } for x in rec[line_offset:line_offset + len(whitespace)]])
                line_offset += len(whitespace)
    with storage.StorageFile(*output_path, mode='wb') as fp:
        logger.debug('Writing TEI to {}'.format(fp.abs_path))
        tei.write_tei(fp)
    return output_path
예제 #23
0
 def test_load_any_pyrnn(self):
     """
     Test load_any loads pickled models.
     """
     rnn = models.load_any(os.path.join(resources, 'model.pyrnn.gz'))
     self.assertIsInstance(rnn, kraken.lib.lstm.SeqRecognizer)
예제 #24
0
파일: ketos.py 프로젝트: rsharmapty/kraken
def publish(ctx, metadata, access_token, model):
    """
    Publishes a model on the zenodo model repository.
    """
    import json
    import pkg_resources

    from functools import partial
    from jsonschema import validate
    from jsonschema.exceptions import ValidationError

    from kraken import repo
    from kraken.lib import models

    with pkg_resources.resource_stream(__name__, 'metadata.schema.json') as fp:
        schema = json.load(fp)

    nn = models.load_any(model)

    if not metadata:
        author = click.prompt('author')
        affiliation = click.prompt('affiliation')
        summary = click.prompt('summary')
        description = click.edit(
            'Write long form description (training data, transcription standards) of the model here'
        )
        accuracy_default = None
        # take last accuracy measurement in model metadata
        if 'accuracy' in nn.nn.user_metadata and nn.nn.user_metadata[
                'accuracy']:
            accuracy_default = nn.nn.user_metadata['accuracy'][-1][1] * 100
        accuracy = click.prompt('accuracy on test set',
                                type=float,
                                default=accuracy_default)
        script = [
            click.prompt(
                'script',
                type=click.Choice(
                    sorted(schema['properties']['script']['items']['enum'])),
                show_choices=True)
        ]
        license = click.prompt(
            'license',
            type=click.Choice(sorted(schema['properties']['license']['enum'])),
            show_choices=True)
        metadata = {
            'authors': [{
                'name': author,
                'affiliation': affiliation
            }],
            'summary': summary,
            'description': description,
            'accuracy': accuracy,
            'license': license,
            'script': script,
            'name': os.path.basename(model),
            'graphemes': ['a']
        }
        while True:
            try:
                validate(metadata, schema)
            except ValidationError as e:
                message(e.message)
                metadata[e.path[-1]] = click.prompt(
                    e.path[-1],
                    type=float if e.schema['type'] == 'number' else str)
                continue
            break

    else:
        metadata = json.load(metadata)
        validate(metadata, schema)
    metadata['graphemes'] = [char for char in ''.join(nn.codec.c2l.keys())]
    oid = repo.publish_model(model, metadata, access_token,
                             partial(message, '.', nl=False))
    print('\nmodel PID: {}'.format(oid))
예제 #25
0
 def test_load_any_pyrnn_py3(self):
     """
     Test load_any doesn't load pickled models on python 3
     """
     rnn = models.load_any(os.path.join(resources, 'model.pyrnn.gz'))
예제 #26
0
 def test_load_clstm(self):
     """
     Tests loading of valid clstm files.
     """
     rnn = models.load_any(resources / 'toy.clstm')
     self.assertIsInstance(rnn, models.TorchSeqRecognizer)
예제 #27
0
from kraken.pageseg import segment
from kraken.binarization import nlbin
from kraken.rpred import rpred
from itertools import cycle
from kraken.lib import models

cmap = cycle([(230, 25, 75, 127),
              (60, 180, 75, 127),
              (255, 225, 25, 127),
              (0, 130, 200, 127),
              (245, 130, 48, 127),
              (145, 30, 180, 127),
              (70, 240, 240, 127)])

net = models.load_any(sys.argv[1])

for fname in sys.argv[2:]:
    im = Image.open(fname)
    print(fname)
    im = nlbin(im)
    res = segment(im, maxcolseps=0)
    pred = rpred(net, im, res)
    im = im.convert('RGBA')
    tmp = Image.new('RGBA', im.size, (0, 0, 0, 0))
    draw = ImageDraw.Draw(tmp)
    for line in pred:
        for box in line.cuts:
            draw.rectangle(box, fill=next(cmap))
    im = Image.alpha_composite(im, tmp)
    im.save('high_{}'.format(os.path.basename(fname)))
예제 #28
0
 def test_load_any_proto(self):
     """
     Test load_any loads protobuf models.
     """
     rnn = models.load_any(resources / 'model.pronn')
     self.assertIsInstance(rnn, kraken.lib.models.TorchSeqRecognizer)
예제 #29
0
 def test_load_invalid(self):
     """
     Tests correct handling of invalid files.
     """
     models.load_any(self.temp.name)
예제 #30
0
 def test_load_invalid(self):
     """
     Tests correct handling of invalid files.
     """
     models.load_any(self.temp.name)
예제 #31
0
 def test_load_any_pyrnn_py3(self):
     """
     Test load_any doesn't load pickled models on python 3
     """
     rnn = models.load_any(os.path.join(resources, 'model.pyrnn.gz'))
예제 #32
0
 def test_load_any_pyrnn(self):
     """
     Test load_any loads pickled models.
     """
     rnn = models.load_any(os.path.join(resources, 'model.pyrnn.gz'))
     self.assertIsInstance(rnn, kraken.lib.lstm.SeqRecognizer)
예제 #33
0
def ocr_kraken(doc, method=u'ocr_kraken', model=None):
    """
    Runs kraken on an input document and writes a TEI file.

    Args:
        doc (unicode, unicode): The input document tuple
        method (unicode): The suffix string append to all output files
        model (unicode): Identifier for the font model to use

    Returns:
        (unicode, unicode): Storage tuple for the output file
    """
    input_path = storage.get_abs_path(*doc[1])
    output_path = (
        doc[1][0],
        os.path.splitext(storage.insert_suffix(doc[1][1], method, model))[0] +
        '.xml')
    logger.debug('Searching for model {}'.format(model))
    if model in nidaba_cfg['kraken_models']:
        model = storage.get_abs_path(*(nidaba_cfg['kraken_models'][model]))
    elif model in nidaba_cfg['ocropus_models']:
        model = storage.get_abs_path(*(nidaba_cfg['ocropus_models'][model]))
    else:
        raise NidabaInvalidParameterException('Model not defined in '
                                              'configuration')
    img = Image.open(input_path)
    logger.debug('Reading TEI segmentation from {}'.format(doc[1]))
    tei = TEIFacsimile()
    with storage.StorageFile(*doc[0]) as seg:
        tei.read(seg)

    logger.debug('Clearing out word/grapheme boxes')
    # kraken is a line recognizer
    tei.clear_graphemes()
    tei.clear_segments()
    # add and scope new responsibility statement
    tei.add_respstmt('kraken', 'character recognition')
    lines = tei.lines

    logger.debug('Loading model {}'.format(model))
    rnn = models.load_any(model)
    i = 0
    logger.debug('Start recognizing characters')
    for rec in rpred.rpred(rnn, img,
                           [(int(x[0]), int(x[1]), int(x[2]), int(x[3]))
                            for x in lines]):
        # scope the current line and add all graphemes recognized by kraken to
        # it.
        logger.debug('Scoping line {}'.format(lines[i][4]))
        tei.scope_line(lines[i][4])
        i += 1

        splits = regex.split(u'(\s+)', rec.prediction)
        line_offset = 0
        for segment, whitespace in izip_longest(splits[0::2], splits[1::2]):
            if len(segment):
                seg_bbox = max_bbox(rec.cuts[line_offset:line_offset +
                                             len(segment)])
                logger.debug(
                    'Creating new segment at {} {} {} {}'.format(*seg_bbox))
                tei.add_segment(seg_bbox)
                logger.debug('Adding graphemes (segment): {}'.format(
                    rec.prediction[line_offset:line_offset + len(segment)]))
                tei.add_graphemes([
                    (x[0], x[1], int(x[2] * 100))
                    for x in rec[line_offset:line_offset + len(segment)]
                ])
                line_offset += len(segment)
            if whitespace:
                logger.debug('Adding graphemes (whitespace): {}'.format(
                    rec.prediction[line_offset:line_offset + len(whitespace)]))
                seg_bbox = max_bbox(rec.cuts[line_offset:line_offset +
                                             len(whitespace)])
                tei.add_segment(seg_bbox)
                tei.add_graphemes([
                    (x[0], x[1], int(x[2] * 100))
                    for x in rec[line_offset:line_offset + len(whitespace)]
                ])
                line_offset += len(whitespace)
    with storage.StorageFile(*output_path, mode='wb') as fp:
        logger.debug('Writing TEI to {}'.format(fp.abs_path))
        tei.write(fp)
    return output_path
예제 #34
0
 def test_load_any_proto(self):
     """
     Test load_any loads protobuf models.
     """
     rnn = models.load_any(os.path.join(resources, 'model.pronn'))
     self.assertIsInstance(rnn, kraken.lib.lstm.SeqRecognizer)
예제 #35
0
def simple_example():
    if 'posix' == os.name:
        data_dir_path = '/home/sangwook/work/dataset'
    else:
        data_dir_path = 'D:/work/dataset'
    image_filepath = data_dir_path + '/text/receipt_epapyrus/keit_20190619/크기변환_카드영수증_5-1.png'
    #image_filepath = data_dir_path + '/text/receipt_epapyrus/epapyrus_20190618/receipt_1/img01.jpg'

    try:
        input_image = Image.open(image_filepath)
    except IOError:
        print('Failed to load an image, {}.'.format(image_filepath))
        return

    #--------------------
    threshold = 0.5
    zoom = 0.5
    escale = 1.0
    border = 0.1
    perc = 80  # [1, 100].
    range = 20
    low = 5  # [1, 100].
    high = 90  # [1, 100].

    binary = binarizer(input_image, threshold, zoom, escale, border, perc,
                       range, low, high)

    #--------------------
    text_direction = 'horizontal-lr'  # Sets principal text direction. {'horizontal-lr', 'horizontal-rl', 'vertical-lr', 'vertical-rl'}.
    script_detect = False  # Enable script detection on segmenter output.
    allowed_scripts = None  # List of allowed scripts in script detection output. Ignored if disabled.
    scale = None
    maxcolseps = 2
    black_colseps = False
    remove_hlines = True
    pad = (0, 0)  # Left and right padding around lines.
    mask_filepath = None  # Segmentation mask suppressing page areas for line detection. 0-valued image regions are ignored for segmentation purposes. Disables column detection.

    segments = segmenter(binary, text_direction, script_detect,
                         allowed_scripts, scale, maxcolseps, black_colseps,
                         remove_hlines, pad, mask_filepath)
    # segments.keys() = ['text_direction', 'boxes', 'script_detection'].

    #--------------------
    # Visualize bounding boxes.
    if False:
        import cv2
        rgb = cv2.imread(image_filepath, cv2.IMREAD_COLOR)
        if rgb is None:
            print('Failed to load an image file, {}.'.format(image_filepath))
            return
        else:
            for bbox in segments['boxes']:
                x0, y0, x1, y1 = bbox
                cv2.rectangle(rgb, (x0, y0), (x1, y1), (0, 0, 255), 1,
                              cv2.LINE_AA)
            cv2.imshow('Image', rgb)
            cv2.waitKey(0)

    #--------------------
    # Download model.
    #	kraken.py get 10.5281/zenodo.2577813
    #	python kraken.py get 10.5281/zenodo.2577813
    #		~/.config/kraken
    #		~/.kraken
    #		/usr/local/share/ocropus
    #DEFAULT_MODEL = 'en-default.mlmodel'
    DEFAULT_MODEL = './en_best.mlmodel'

    #model = DEFAULT_MODEL  # Path to an recognition model or mapping of the form $script1:$model1. Add multiple mappings to run multi-model recognition based on detected scripts. Use the default keyword for adding a catch-all model. Recognition on scripts can be ignored with the model value ignore.
    pad = 16  # Left and right padding around lines.
    reorder = True  # Reorder code points to logical order.
    no_segmentation = False  # Enables non-segmentation mode treating each input image as a whole line.
    serializer = 'text'  # Switch between hOCR, ALTO, and plain text output. {'hocr', 'alto', 'abbyyxml', 'text'}.
    text_direction = 'horizontal-tb'  # Sets principal text direction in serialization output. {'horizontal-tb', 'vertical-lr', 'vertical-rl'}.
    #lines = 'lines.json'  # JSON file containing line coordinates.
    threads = 1  # Number of threads to use for OpenMP parallelization.
    device = 'cpu'  # Select device to use (cpu, cuda:0, cuda:1, ...).

    model_dict = {'ignore': []}  # type: Dict[str, Union[str, List[str]]]
    model_dict['default'] = DEFAULT_MODEL

    nm = {}  # type: Dict[str, models.TorchSeqRecognizer].
    ign_scripts = model_dict.pop('ignore')
    for k, v in model_dict.items():
        location = None
        if os.path.isfile(v):
            location = v
        if not location:
            print('No model {} for {} found.'.format(v, k))
            continue

        try:
            rnn = models.load_any(location, device=device)
            nm[k] = rnn
        except Exception:
            print('Model loading error, {}.'.format(location))
            continue

    if 'default' in nm:
        from collections import defaultdict

        nn = defaultdict(lambda: nm['default']
                         )  # type: Dict[str, models.TorchSeqRecognizer].
        nn.update(nm)
        nm = nn
    else:
        print('No default model.')
        return
    # Thread count is global so setting it once is sufficient.
    nn[k].nn.set_num_threads(threads)

    return recognizer(input_image,
                      model=nm,
                      pad=pad,
                      no_segmentation=no_segmentation,
                      bidi_reordering=reorder,
                      script_ignore=ign_scripts,
                      mode=serializer,
                      text_direction=text_direction,
                      segments=segments)
예제 #36
0
 def test_load_any_invalid(self):
     """
     Test load_any raises the proper exception if object is neither pickle
     nor protobuf.
     """
     models.load_any(self.temp.name)
import cv2
import numpy as np
import pandas as pd
import random
from kraken.lib.models import load_any
from kraken import rpred, binarization
from PIL import Image
from subprocess import call
from imutils import contours
import argparse
import warnings

warnings.filterwarnings("ignore", category=FutureWarning)

## ---Loading Kraken Model---
model = load_any("en-default.mlmodel")


def preprocessing_non_tabular(path):
    img = cv2.imread(path)

    ## ---Binarization of image---
    genrator_image = Image.fromarray(img)
    genrator_image = binarization.nlbin(genrator_image)

    # ----Grayscaling Image----
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    # --- performing Otsu threshold ---
    ret, thresh1 = cv2.threshold(gray, 0, 255, cv2.THRESH_OTSU | cv2.THRESH_BINARY_INV)
    cv2.imwrite("processed_image/threshold.png", thresh1)
예제 #38
0
 def test_load_any_proto(self):
     """
     Test load_any loads protobuf models.
     """
     rnn = models.load_any(os.path.join(resources, 'model.pronn'))
     self.assertIsInstance(rnn, kraken.lib.lstm.SeqRecognizer)
예제 #39
0
#! /usr/bin/env python

from kraken.lib import models
from kraken import rpred
from PIL import Image
from glob import glob
import sys

model = sys.argv[1]
gt = sys.argv[2] if len(sys.argv) > 2 else '.'

rnn = models.load_any(model)
ims = glob(gt + '/*.png')

for f in ims:
    print(f)
    im = Image.open(f)
    it = rpred.rpred(rnn, im, [(0, 0) + im.size])
    with open(f + '.rec.txt', 'wb') as fp:
        fp.write(it.next().prediction.encode('utf-8'))
예제 #40
0
파일: ketos.py 프로젝트: rsharmapty/kraken
def test(ctx, model, evaluation_files, device, pad, threads, test_set):
    """
    Evaluate on a test set.
    """
    if not model:
        raise click.UsageError('No model to evaluate given.')

    import numpy as np
    from PIL import Image

    from kraken.serialization import render_report
    from kraken.lib import models
    from kraken.lib.dataset import global_align, compute_confusions, generate_input_transforms

    logger.info('Building test set from {} line images'.format(
        len(test_set) + len(evaluation_files)))

    nn = {}
    for p in model:
        message('Loading model {}\t'.format(p), nl=False)
        nn[p] = models.load_any(p)
        message('\u2713', fg='green')

    test_set = list(test_set)

    # set number of OpenMP threads
    logger.debug('Set OpenMP threads to {}'.format(threads))
    next(iter(nn.values())).nn.set_num_threads(threads)

    # merge training_files into ground_truth list
    if evaluation_files:
        test_set.extend(evaluation_files)

    if len(test_set) == 0:
        raise click.UsageError(
            'No evaluation data was provided to the test command. Use `-e` or the `test_set` argument.'
        )

    def _get_text(im):
        with open(os.path.splitext(im)[0] + '.gt.txt', 'r') as fp:
            return get_display(fp.read())

    acc_list = []
    for p, net in nn.items():
        algn_gt: List[str] = []
        algn_pred: List[str] = []
        chars = 0
        error = 0
        message('Evaluating {}'.format(p))
        logger.info('Evaluating {}'.format(p))
        batch, channels, height, width = net.nn.input
        ts = generate_input_transforms(batch, height, width, channels, pad)
        with log.progressbar(test_set, label='Evaluating') as bar:
            for im_path in bar:
                i = ts(Image.open(im_path))
                text = _get_text(im_path)
                pred = net.predict_string(i)
                chars += len(text)
                c, algn1, algn2 = global_align(text, pred)
                algn_gt.extend(algn1)
                algn_pred.extend(algn2)
                error += c
        acc_list.append((chars - error) / chars)
        confusions, scripts, ins, dels, subs = compute_confusions(
            algn_gt, algn_pred)
        rep = render_report(p, chars, error, confusions, scripts, ins, dels,
                            subs)
        logger.info(rep)
        message(rep)
    logger.info('Average accuracy: {:0.2f}%, (stddev: {:0.2f})'.format(
        np.mean(acc_list) * 100,
        np.std(acc_list) * 100))
    message('Average accuracy: {:0.2f}%, (stddev: {:0.2f})'.format(
        np.mean(acc_list) * 100,
        np.std(acc_list) * 100))
예제 #41
0
파일: kraken.py 프로젝트: ryanfb/nidaba
def ocr_kraken(doc, method=u'ocr_kraken', model=None):
    """
    Runs kraken on an input document and writes a TEI file.

    Args:
        doc (unicode, unicode): The input document tuple
        method (unicode): The suffix string append to all output files
        model (unicode): Identifier for the font model to use

    Returns:
        (unicode, unicode): Storage tuple for the output file
    """
    input_path = storage.get_abs_path(*doc[1])
    output_path = (doc[1][0], os.path.splitext(storage.insert_suffix(doc[1][1],
                                                                     method,
                                                                     model))[0]
                   + '.xml')
    logger.debug('Searching for model {}'.format(model))
    if model in nidaba_cfg['kraken_models']:
        model = storage.get_abs_path(*(nidaba_cfg['kraken_models'][model]))
    elif model in nidaba_cfg['ocropus_models']:
        model = storage.get_abs_path(*(nidaba_cfg['ocropus_models'][model]))
    else:
        raise NidabaInvalidParameterException('Model not defined in '
                                              'configuration')
    img = Image.open(input_path)
    logger.debug('Reading TEI segmentation from {}'.format(doc[1]))
    tei = OCRRecord()
    with storage.StorageFile(*doc[0]) as seg:
        tei.load_tei(seg)

    logger.debug('Clearing out word/grapheme boxes')
    # kraken is a line recognizer
    tei.clear_graphemes()
    tei.clear_segments()
    # add and scope new responsibility statement
    tei.add_respstmt('kraken', 'character recognition')
    lines = tei.lines

    logger.debug('Loading model {}'.format(model))
    rnn = models.load_any(model)
    i = 0
    logger.debug('Start recognizing characters')
    for line_id, rec in zip(lines, rpred.rpred(rnn, img, [x['bbox'] for x in lines.itervalues()])):
        # scope the current line and add all graphemes recognized by kraken to
        # it.
        logger.debug('Scoping line {}'.format(line_id))
        tei.scope_line(line_id)
        i += 1

        splits = regex.split(u'(\s+)', rec.prediction)
        line_offset = 0
        for segment, whitespace in izip_longest(splits[0::2], splits[1::2]):
            if len(segment):
                seg_bbox = max_bbox(rec.cuts[line_offset:line_offset + len(segment)])
                logger.debug('Creating new segment at {} {} {} {}'.format(*seg_bbox))
                tei.add_segment(seg_bbox)
                logger.debug('Adding graphemes (segment): {}'.format(rec.prediction[line_offset:line_offset+len(segment)]))
                tei.add_graphemes([{'grapheme': x[0], 
                                    'bbox': x[1],
                                    'confidence': int(x[2] * 100)} for x in rec[line_offset:line_offset+len(segment)]])
                line_offset += len(segment)
            if whitespace:
                logger.debug('Adding graphemes (whitespace): {}'.format(rec.prediction[line_offset:line_offset+len(whitespace)]))
                seg_bbox = max_bbox(rec.cuts[line_offset:line_offset + len(whitespace)])
                tei.add_segment(seg_bbox)
                tei.add_graphemes([{'grapheme': x[0], 
                                    'bbox': x[1],
                                    'confidence': int(x[2] * 100)} for x in rec[line_offset:line_offset+len(whitespace)]])
                line_offset += len(whitespace)
    with storage.StorageFile(*output_path, mode='wb') as fp:
        logger.debug('Writing TEI to {}'.format(fp.abs_path))
        tei.write_tei(fp)
    return output_path
예제 #42
0
def detect_scripts(im, bounds, model=pkg_resources.resource_filename(__name__, 'script.mlmodel'), valid_scripts=None):
    """
    Detects scripts in a segmented page.

    Classifies lines returned by the page segmenter into runs of scripts/writing systems.

    Args:
        im (PIL.Image): A bi-level page of mode '1' or 'L'
        bounds (dict): A dictionary containing a 'boxes' entry with a list of
                       coordinates (x0, y0, x1, y1) of a text line in the image
                       and an entry 'text_direction' containing
                       'horizontal-lr/rl/vertical-lr/rl'.
        model (str): Location of the script classification model or None for default.
        valid_scripts (list): List of valid scripts.

    Returns:
        {'script_detection': True, 'text_direction': '$dir', 'boxes':
        [[(script, (x1, y1, x2, y2)),...]]}: A dictionary containing the text
        direction and a list of lists of reading order sorted bounding boxes
        under the key 'boxes' with each list containing the script segmentation
        of a single line. Script is a ISO15924 4 character identifier.

    Raises:
        KrakenInvalidModelException if no clstm module is available.
    """
    raise NotImplementedError('Temporarily unavailable. Please open a github ticket if you want this fixed sooner.')
    im_str = get_im_str(im)
    logger.info(u'Detecting scripts with {} in {} lines on {}'.format(model, len(bounds['boxes']), im_str))
    logger.debug(u'Loading detection model {}'.format(model))
    rnn = models.load_any(model)
    # load numerical to 4 char identifier map
    logger.debug(u'Loading label to identifier map')
    with pkg_resources.resource_stream(__name__, 'iso15924.json') as fp:
        n2s = json.load(fp)
    # convert allowed scripts to labels
    val_scripts = []
    if valid_scripts:
        logger.debug(u'Converting allowed scripts list {}'.format(valid_scripts))
        for k, v in n2s.items():
            if v in valid_scripts:
                val_scripts.append(chr(int(k) + 0xF0000))
    else:
        valid_scripts = []
    it = rpred(rnn, im, bounds, bidi_reordering=False)
    preds = []
    logger.debug(u'Running detection')
    for pred, bbox in zip(it, bounds['boxes']):
        # substitute inherited scripts with neighboring runs
        def _subs(m, s, r=False):
            p = u''
            for c in s:
                if c in m and p and not r:
                    p += p[-1]
                elif c not in m and p and r:
                    p += p[-1]
                else:
                    p += c
            return p

        logger.debug(u'Substituting scripts')
        p = _subs([u'\U000f03e2', u'\U000f03e6'], pred.prediction)
        # do a reverse run to fix leading inherited scripts
        pred.prediction = ''.join(reversed(_subs([u'\U000f03e2', u'\U000f03e6'], reversed(p))))
        # group by valid scripts. two steps: 1. substitute common confusions
        # (Latin->Fraktur and Syriac->Arabic) if given in script list.
        if 'Arab' in valid_scripts and 'Syrc' not in valid_scripts:
            pred.prediction = pred.prediction.replace(u'\U000f0087', u'\U000f00a0')
        if 'Latn' in valid_scripts and 'Latf' not in valid_scripts:
            pred.prediction = pred.prediction.replace(u'\U000f00d9', u'\U000f00d7')
        # next merge adjacent scripts
        if val_scripts:
            pred.prediction = _subs(val_scripts, pred.prediction, r=True)

        # group by grapheme
        t = []
        logger.debug(u'Merging detections')
        # if line contains only a single script return whole line bounding box
        if len(set(pred.prediction)) == 1:
            logger.debug('Only one script on line. Emitting whole line bbox')
            k = ord(pred.prediction[0]) - 0xF0000
            t.append((n2s[str(k)], bbox))
        else:
            for k, g in groupby(pred, key=lambda x: x[0]):
                # convert to ISO15924 numerical identifier
                k = ord(k) - 0xF0000
                b = max_bbox(x[1] for x in g)
                t.append((n2s[str(k)], b))
        preds.append(t)
    return {'boxes': preds, 'text_direction': bounds['text_direction'], 'script_detection': True}
예제 #43
0
 def test_load_any_pyrnn_py3(self):
     """
     Test load_any doesn't load pickled models on python 3
     """
     with raises(KrakenInvalidModelException):
         rnn = models.load_any(resources / 'model.pyrnn.gz')
예제 #44
0
파일: ketos.py 프로젝트: mittagessen/kraken
def test(ctx, model, evaluation_files, device, pad, threads, test_set):
    """
    Evaluate on a test set.
    """
    if not model:
        raise click.UsageError('No model to evaluate given.')

    import numpy as np
    from PIL import Image

    from kraken.serialization import render_report
    from kraken.lib import models
    from kraken.lib.dataset import global_align, compute_confusions, generate_input_transforms

    logger.info('Building test set from {} line images'.format(len(test_set) + len(evaluation_files)))

    nn = {}
    for p in model:
        message('Loading model {}\t'.format(p), nl=False)
        nn[p] = models.load_any(p)
        message('\u2713', fg='green')

    test_set = list(test_set)

    # set number of OpenMP threads
    logger.debug('Set OpenMP threads to {}'.format(threads))
    next(iter(nn.values())).nn.set_num_threads(threads)

    # merge training_files into ground_truth list
    if evaluation_files:
        test_set.extend(evaluation_files)

    if len(test_set) == 0:
        raise click.UsageError('No evaluation data was provided to the test command. Use `-e` or the `test_set` argument.')

    def _get_text(im):
        with open(os.path.splitext(im)[0] + '.gt.txt', 'r') as fp:
            return get_display(fp.read())

    acc_list = []
    for p, net in nn.items():
        algn_gt: List[str] = []
        algn_pred: List[str] = []
        chars = 0
        error = 0
        message('Evaluating {}'.format(p))
        logger.info('Evaluating {}'.format(p))
        batch, channels, height, width = net.nn.input
        ts = generate_input_transforms(batch, height, width, channels, pad)
        with log.progressbar(test_set, label='Evaluating') as bar:
            for im_path in bar:
                i = ts(Image.open(im_path))
                text = _get_text(im_path)
                pred = net.predict_string(i)
                chars += len(text)
                c, algn1, algn2 = global_align(text, pred)
                algn_gt.extend(algn1)
                algn_pred.extend(algn2)
                error += c
        acc_list.append((chars-error)/chars)
        confusions, scripts, ins, dels, subs = compute_confusions(algn_gt, algn_pred)
        rep = render_report(p, chars, error, confusions, scripts, ins, dels, subs)
        logger.info(rep)
        message(rep)
    logger.info('Average accuracy: {:0.2f}%, (stddev: {:0.2f})'.format(np.mean(acc_list) * 100, np.std(acc_list) * 100))
    message('Average accuracy: {:0.2f}%, (stddev: {:0.2f})'.format(np.mean(acc_list) * 100, np.std(acc_list) * 100))
예제 #45
0
 def test_load_any_invalid(self):
     """
     Test load_any raises the proper exception if object is neither pickle
     nor protobuf.
     """
     models.load_any(self.temp.name)
예제 #46
0
 def test_load_clstm(self):
     """
     Tests loading of valid clstm files.
     """
     rnn = models.load_any(os.path.join(resources, 'toy.clstm').encode('utf-8'))
     self.assertIsInstance(rnn, models.TorchSeqRecognizer)
예제 #47
0
 def setUp(self):
     self.im = Image.open(resources / 'bw.png')
     self.overfit_line = Image.open(resources / '000236.png')
     self.model = load_any(resources / 'overfit.mlmodel')