Beispiel #1
0
def serialize_segmentation(segresult: Dict[str, Any],
                           image_name: str = None,
                           image_size: Tuple[int, int] = (0, 0),
                           template: str = 'hocr') -> str:
    """
    Serializes a segmentation result into an output document.

    Args:
        segresult: Result of blla.segment
        image_name (str): Name of the source image
        image_size (tuple): Dimensions of the source image
        template (str): Selector for the serialization format. May be
                        'hocr' or 'alto'.

    Returns:
            (str) rendered template.
    """
    if 'type' in segresult and segresult['type'] == 'baselines':
        records = [ocr_record('', '', '', bl) for bl in segresult['lines']]
    else:
        records = []
        for line in segresult['boxes']:
            xmin, xmax = min(line[::2]), max(line[::2])
            ymin, ymax = min(line[1::2]), max(line[1::2])
            records.append(
                ocr_record(
                    '', [], [],
                    [[xmin, ymin], [xmin, ymax], [xmax, ymax], [xmax, ymin]]))
    return serialize(
        records,
        image_name=image_name,
        image_size=image_size,
        regions=segresult['regions'] if 'regions' in segresult else None,
        template=template)
Beispiel #2
0
    def setUp(self):
        with open(resources / 'records.json', 'r') as fp:
            self.box_records = [rpred.ocr_record(**x) for x in json.load(fp)]

        with open(resources / 'bl_records.json', 'r') as fp:
            recs = json.load(fp)
            self.bl_records = [rpred.ocr_record(**bl) for bl in recs['lines']]
            self.bl_regions = recs['regions']
Beispiel #3
0
def get_bounding_boxes_from_transcription(path):
    
    doc = html.parse(path)
    etree.strip_tags(doc, etree.Comment)
    td = doc.find(".//meta[@itemprop='text_direction']")
    
    if td is None:
        td = 'horizontal-lr'
    else:
        td = td.attrib['content']
    records = []

    for isection, section in enumerate(doc.xpath('//section')):

        img_data = section.find('.//img').attrib['src']
        img_data = img_data[len('data:image/png;base64,'):]
        im = Image.open(BytesIO(base64.b64decode(img_data)))

        records.append({
            "writing_mode":td,
            "lines": [],
            "image_size": im.size
        })
        for line in section.iter('li'):
            if line.get('contenteditable') and (not u''.join(line.itertext()).isspace() and u''.join(line.itertext())):
                left, upper, right, lower = [int(x) for x in line.get('data-bbox').split(',')]
                
                # add some margin on the edges
                width = right-left
                height = lower-upper
                left = int(left - width*.025)
                right = int(right + width*.025)
                upper = int(upper - height*.025)
                lower = int(lower + height*.05)

                raw = u''.join(line.itertext()).strip()

                text = "".join(translate_char(char) for char in raw if unicodedata.category(char)[0] != "C")
                rec = ocr_record(
                    text, [left,upper,right,lower], [1.0]*len(text)
                )
                records[-1]["lines"].append({
                    'text': rec.prediction,
                    'bbox': rec.cuts
                })
    
    return records

# recs = get_bounding_boxes_from_transcription('../data/transcriptions/2jMfAAAAMAAJ/transcribe.html')
Beispiel #4
0
def forced_align(doc, model):
    """
    Performs a forced character alignment of text with recognition model
    output activations.

    Argument:
        doc (dict): Parsed document.
        model (kraken.lib.model.TorchSeqRecognizer): Recognition model to use for alignment.

    Returns:
        A list of kraken.rpred.ocr_record.
    """
    im = Image.open(doc['image'])
    predictor = rpred.rpred(model, im, doc)
    records = []
    for line in doc['lines']:
        bidi_text = get_display(line['text'])
        gt_fst = fst_from_text(bidi_text, model.codec)
        next(predictor)
        lat_fst = fst_from_lattice(model.outputs)
        composed_graph = _compose(lat_fst, gt_fst)
        short_graph = _shortest_path(composed_graph)
        pred = []
        pos = []
        conf = []
        for act, label in _generate_line_record(short_graph,
                                                model.outputs.shape[2] - 1):
            pos.append(
                compute_polygon_section(
                    line['baseline'], line['boundary'],
                    predictor._scale_val(act[0], 0, predictor.box.size[0]),
                    predictor._scale_val(act[1], 0, predictor.box.size[0])))
            conf.append(1.0)
            pred.append(model.codec.decode([(label, 0, 0, 0)])[0][0])
        records.append(
            rpred.bidi_record(rpred.ocr_record(pred, pos, conf, line)))
    return records
Beispiel #5
0
def segmenter(legacy, model, text_direction, scale, maxcolseps, black_colseps,
              remove_hlines, pad, mask, device, input, output) -> None:
    import json

    from kraken import pageseg
    from kraken import blla

    ctx = click.get_current_context()

    if ctx.meta['first_process']:
        if ctx.meta['input_format_type'] != 'image':
            input = get_input_parser(
                ctx.meta['input_format_type'])(input)['image']
        ctx.meta['first_process'] = False

    if 'base_image' not in ctx.meta:
        ctx.meta['base_image'] = input

    try:
        im = Image.open(input)
    except IOError as e:
        raise click.BadParameter(str(e))
    if mask:
        try:
            mask = Image.open(mask)
        except IOError as e:
            raise click.BadParameter(str(e))
    message('Segmenting\t', nl=False)
    try:
        if legacy:
            res = pageseg.segment(im,
                                  text_direction,
                                  scale,
                                  maxcolseps,
                                  black_colseps,
                                  no_hlines=remove_hlines,
                                  pad=pad,
                                  mask=mask)
        else:
            res = blla.segment(im,
                               text_direction,
                               mask=mask,
                               model=model,
                               device=device)
    except Exception:
        message('\u2717', fg='red')
        raise
    if ctx.meta['last_process'] and ctx.meta['output_mode'] != 'native':
        with open_file(output, 'w', encoding='utf-8') as fp:
            fp = cast(IO[Any], fp)
            logger.info('Serializing as {} into {}'.format(
                ctx.meta['output_mode'], output))
            from kraken import serialization
            from kraken.rpred import ocr_record
            if 'type' in res and res['type'] == 'baselines':
                records = [ocr_record('', '', '', bl) for bl in res['lines']]
            else:
                records = []
                for line in res['boxes']:
                    xmin, xmax = min(line[::2]), max(line[::2])
                    ymin, ymax = min(line[1::2]), max(line[1::2])
                    records.append(
                        ocr_record('', [], [], [[xmin, ymin], [xmin, ymax],
                                                [xmax, ymax], [xmax, ymin]]))
            fp.write(
                serialization.serialize(
                    records,
                    image_name=ctx.meta['base_image'],
                    image_size=im.size,
                    regions=res['regions'] if 'regions' in res else None,
                    template=ctx.meta['output_mode']))
    else:
        with open_file(output, 'w') as fp:
            fp = cast(IO[Any], fp)
            json.dump(res, fp)
    message('\u2713', fg='green')
 def setUp(self):
     with open(os.path.join(resources, 'records.json'), 'r') as fp:
         self.records = [rpred.ocr_record(**x) for x in json.load(fp)]
     self.validator = HocrValidator('standard')
Beispiel #7
0
 def setUp(self):
     with open(os.path.join(resources, 'records.json'), 'r') as fp:
         self.records = [rpred.ocr_record(**x) for x in json.load(fp)]
Beispiel #8
0
 def setUp(self):
     with open(os.path.join(resources, 'records.json'), 'r') as fp:
         self.records = [rpred.ocr_record(**x) for x in json.load(fp)]
 def setUp(self):
     with open(os.path.join(resources, 'records.json'), 'r') as fp:
         self.records = [rpred.ocr_record(**x) for x in json.load(fp)]
     self.validator = HocrValidator('standard')