def _process_region(self, it, region, rogroup, region_image,
                     region_coords):
     LOG = getLogger('processor.TesserocrSegmentTable')
     # equivalent to GetComponentImages with raw_image=True,
     # (which would also give raw coordinates),
     # except we are also interested in the iterator's BlockType() here,
     index = 0
     if rogroup:
         for elem in (rogroup.get_RegionRefIndexed() +
                      rogroup.get_OrderedGroupIndexed() +
                      rogroup.get_UnorderedGroupIndexed()):
             if elem.index >= index:
                 index = elem.index + 1
     while it and not it.Empty(RIL.BLOCK):
         bbox = it.BoundingBox(RIL.BLOCK)
         polygon = polygon_from_x0y0x1y1(bbox)
         polygon = coordinates_for_segment(polygon, region_image,
                                           region_coords)
         points = points_from_polygon(polygon)
         coords = CoordsType(points=points)
         # if xywh['w'] < 30 or xywh['h'] < 30:
         #     LOG.info('Ignoring too small region: %s', points)
         #     it.Next(RIL.BLOCK)
         #     continue
         #
         # add the region reference in the reading order element
         # (but ignore non-text regions entirely)
         ID = region.id + "_%04d" % index
         subregion = TextRegionType(id=ID,
                                    Coords=coords,
                                    type=TextTypeSimpleType.PARAGRAPH)
         block_type = it.BlockType()
         if block_type == PT.FLOWING_TEXT:
             pass
         elif block_type == PT.HEADING_TEXT:
             subregion.set_type(TextTypeSimpleType.HEADING)
         elif block_type == PT.PULLOUT_TEXT:
             subregion.set_type(TextTypeSimpleType.FLOATING)
         elif block_type == PT.CAPTION_TEXT:
             subregion.set_type(TextTypeSimpleType.CAPTION)
         elif block_type == PT.VERTICAL_TEXT:
             subregion.set_orientation(90.0)
         else:
             it.Next(RIL.BLOCK)
             continue
         LOG.info("Detected cell '%s': %s (%s)", ID, points,
                  membername(PT, block_type))
         region.add_TextRegion(subregion)
         if rogroup:
             rogroup.add_RegionRefIndexed(
                 RegionRefIndexedType(regionRef=ID, index=index))
         #
         # iterator increment
         #
         index += 1
         it.Next(RIL.BLOCK)
Exemple #2
0
 def _process_words_in_line(self, result_it, line, line_xywh):
     LOG = getLogger('processor.TesserocrRecognize')
     if not result_it or result_it.Empty(RIL.WORD):
         LOG.warning("No text in line '%s'", line.id)
         return
     # iterate until IsAtFinalElement(RIL.LINE, RIL.WORD):
     word_no = 0
     while result_it and not result_it.Empty(RIL.WORD):
         word_id = '%s_word%04d' % (line.id, word_no)
         LOG.debug("Decoding text in word '%s'", word_id)
         bbox = result_it.BoundingBox(RIL.WORD)
         # convert to absolute coordinates:
         polygon = coordinates_for_segment(polygon_from_x0y0x1y1(bbox),
                                           None, line_xywh) - self.parameter['padding']
         polygon2 = polygon_for_parent(polygon, line)
         if polygon2 is not None:
             polygon = polygon2
         points = points_from_polygon(polygon)
         word = WordType(id=word_id, Coords=CoordsType(points))
         if polygon2 is None:
             # could happen due to rotation
             LOG.info('Ignoring extant word: %s', points)
         else:
             line.add_Word(word)
         # todo: determine if font attributes available for word level will work with LSTM models
         word_attributes = result_it.WordFontAttributes()
         if word_attributes:
             word_style = TextStyleType(
                 fontSize=word_attributes['pointsize']
                 if 'pointsize' in word_attributes else None,
                 fontFamily=word_attributes['font_name']
                 if 'font_name' in word_attributes else None,
                 bold=word_attributes['bold']
                 if 'bold' in word_attributes else None,
                 italic=word_attributes['italic']
                 if 'italic' in word_attributes else None,
                 underlined=word_attributes['underlined']
                 if 'underlined' in word_attributes else None,
                 monospace=word_attributes['monospace']
                 if 'monospace' in word_attributes else None,
                 serif=word_attributes['serif']
                 if 'serif' in word_attributes else None)
             word.set_TextStyle(word_style) # (or somewhere in custom attribute?)
         # add word annotation unconditionally (i.e. even for glyph level):
         word.add_TextEquiv(TextEquivType(
             Unicode=result_it.GetUTF8Text(RIL.WORD),
             conf=result_it.Confidence(RIL.WORD)/100))
         if self.parameter['textequiv_level'] != 'word':
             self._process_glyphs_in_word(result_it, word, line_xywh)
         if result_it.IsAtFinalElement(RIL.TEXTLINE, RIL.WORD):
             break
         else:
             word_no += 1
             result_it.Next(RIL.WORD)
Exemple #3
0
 def _process_glyphs_in_word(self, result_it, word, word_xywh):
     LOG = getLogger('processor.TesserocrRecognize')
     if not result_it or result_it.Empty(RIL.SYMBOL):
         LOG.debug("No glyph in word '%s'", word.id)
         return
     # iterate until IsAtFinalElement(RIL.WORD, RIL.SYMBOL):
     glyph_no = 0
     while result_it and not result_it.Empty(RIL.SYMBOL):
         glyph_id = '%s_glyph%04d' % (word.id, glyph_no)
         LOG.debug("Decoding text in glyph '%s'", glyph_id)
         #  glyph_text = result_it.GetUTF8Text(RIL.SYMBOL) # equals first choice?
         glyph_conf = result_it.Confidence(RIL.SYMBOL)/100 # equals first choice?
         #LOG.debug('best glyph: "%s" [%f]', glyph_text, glyph_conf)
         bbox = result_it.BoundingBox(RIL.SYMBOL)
         # convert to absolute coordinates:
         polygon = coordinates_for_segment(polygon_from_x0y0x1y1(bbox),
                                           None, word_xywh) - self.parameter['padding']
         polygon2 = polygon_for_parent(polygon, word)
         if polygon2 is not None:
             polygon = polygon2
         points = points_from_polygon(polygon)
         glyph = GlyphType(id=glyph_id, Coords=CoordsType(points))
         if polygon2 is None:
             # could happen due to rotation
             LOG.info('Ignoring extant glyph: %s', points)
         else:
             word.add_Glyph(glyph)
         choice_it = result_it.GetChoiceIterator()
         for (choice_no, choice) in enumerate(choice_it):
             alternative_text = choice.GetUTF8Text()
             alternative_conf = choice.Confidence()/100
             #LOG.debug('alternative glyph: "%s" [%f]', alternative_text, alternative_conf)
             if (glyph_conf - alternative_conf > CHOICE_THRESHOLD_CONF or
                 choice_no > CHOICE_THRESHOLD_NUM):
                 break
             # todo: consider SymbolIsSuperscript (TextStyle), SymbolIsDropcap (RelationType) etc
             glyph.add_TextEquiv(TextEquivType(index=choice_no, Unicode=alternative_text, conf=alternative_conf))
         if result_it.IsAtFinalElement(RIL.WORD, RIL.SYMBOL):
             break
         else:
             glyph_no += 1
             result_it.Next(RIL.SYMBOL)
    def process(self):
        """
        Performs the recognition.
        """

        assert_file_grp_cardinality(self.input_file_grp, 1)
        assert_file_grp_cardinality(self.output_file_grp, 1)

        self._init_calamari()

        for (n, input_file) in enumerate(self.input_files):
            page_id = input_file.pageId or input_file.ID
            log.info("INPUT FILE %i / %s", n, page_id)
            pcgts = page_from_file(self.workspace.download_file(input_file))

            page = pcgts.get_Page()
            page_image, page_xywh, page_image_info = self.workspace.image_from_page(
                page, page_id)

            for region in pcgts.get_Page().get_TextRegion():
                region_image, region_xywh = self.workspace.image_from_segment(
                    region, page_image, page_xywh)

                textlines = region.get_TextLine()
                log.info("About to recognize %i lines of region '%s'",
                         len(textlines), region.id)
                for (line_no, line) in enumerate(textlines):
                    log.debug("Recognizing line '%s' in region '%s'", line.id,
                              region.id)

                    line_image, line_coords = self.workspace.image_from_segment(
                        line, region_image, region_xywh)
                    line_image_np = np.array(line_image, dtype=np.uint8)

                    raw_results = list(
                        self.predictor.predict_raw([line_image_np],
                                                   progress_bar=False))[0]
                    for i, p in enumerate(raw_results):
                        p.prediction.id = "fold_{}".format(i)

                    prediction = self.voter.vote_prediction_result(raw_results)
                    prediction.id = "voted"

                    # Build line text on our own
                    #
                    # Calamari does whitespace post-processing on prediction.sentence, while it does not do the same
                    # on prediction.positions. Do it on our own to have consistency.
                    #
                    # XXX Check Calamari's built-in post-processing on prediction.sentence

                    def _sort_chars(p):
                        """Filter and sort chars of prediction p"""
                        chars = p.chars
                        chars = [
                            c for c in chars if c.char
                        ]  # XXX Note that omission probabilities are not normalized?!
                        chars = [
                            c for c in chars if c.probability >=
                            self.parameter['glyph_conf_cutoff']
                        ]
                        chars = sorted(chars,
                                       key=lambda k: k.probability,
                                       reverse=True)
                        return chars

                    def _drop_leading_spaces(positions):
                        return list(
                            itertools.dropwhile(
                                lambda p: _sort_chars(p)[0].char == " ",
                                positions))

                    def _drop_trailing_spaces(positions):
                        return list(
                            reversed(_drop_leading_spaces(
                                reversed(positions))))

                    def _drop_double_spaces(positions):
                        def _drop_double_spaces_generator(positions):
                            last_was_space = False
                            for p in positions:
                                if p.chars[0].char == " ":
                                    if not last_was_space:
                                        yield p
                                    last_was_space = True
                                else:
                                    yield p
                                    last_was_space = False

                        return list(_drop_double_spaces_generator(positions))

                    positions = prediction.positions
                    positions = _drop_leading_spaces(positions)
                    positions = _drop_trailing_spaces(positions)
                    positions = _drop_double_spaces(positions)
                    positions = list(positions)

                    line_text = ''.join(
                        _sort_chars(p)[0].char for p in positions)
                    if line_text != prediction.sentence:
                        log.warning(
                            "Our own line text is not the same as Calamari's: '%s' != '%s'",
                            line_text, prediction.sentence)

                    # Delete existing results
                    if line.get_TextEquiv():
                        log.warning("Line '%s' already contained text results",
                                    line.id)
                    line.set_TextEquiv([])
                    if line.get_Word():
                        log.warning(
                            "Line '%s' already contained word segmentation",
                            line.id)
                    line.set_Word([])

                    # Save line results
                    line_conf = prediction.avg_char_probability
                    line.set_TextEquiv(
                        [TextEquivType(Unicode=line_text, conf=line_conf)])

                    # Save word results
                    #
                    # Calamari OCR does not provide word positions, so we infer word positions from a. text segmentation
                    # and b. the glyph positions. This is necessary because the PAGE XML format enforces a strict
                    # hierarchy of lines > words > glyphs.

                    def _words(s):
                        """Split words based on spaces and include spaces as 'words'"""
                        spaces = None
                        word = ''
                        for c in s:
                            if c == ' ' and spaces is True:
                                word += c
                            elif c != ' ' and spaces is False:
                                word += c
                            else:
                                if word:
                                    yield word
                                word = c
                                spaces = (c == ' ')
                        yield word

                    if self.parameter['textequiv_level'] in ['word', 'glyph']:
                        word_no = 0
                        i = 0

                        for word_text in _words(line_text):
                            word_length = len(word_text)
                            if not all(c == ' ' for c in word_text):
                                word_positions = positions[i:i + word_length]
                                word_start = word_positions[0].global_start
                                word_end = word_positions[-1].global_end

                                polygon = polygon_from_x0y0x1y1([
                                    word_start, 0, word_end, line_image.height
                                ])
                                points = points_from_polygon(
                                    coordinates_for_segment(
                                        polygon, None, line_coords))
                                # XXX Crop to line polygon?

                                word = WordType(id='%s_word%04d' %
                                                (line.id, word_no),
                                                Coords=CoordsType(points))
                                word.add_TextEquiv(
                                    TextEquivType(Unicode=word_text))

                                if self.parameter[
                                        'textequiv_level'] == 'glyph':
                                    for glyph_no, p in enumerate(
                                            word_positions):
                                        glyph_start = p.global_start
                                        glyph_end = p.global_end

                                        polygon = polygon_from_x0y0x1y1([
                                            glyph_start, 0, glyph_end,
                                            line_image.height
                                        ])
                                        points = points_from_polygon(
                                            coordinates_for_segment(
                                                polygon, None, line_coords))

                                        glyph = GlyphType(
                                            id='%s_glyph%04d' %
                                            (word.id, glyph_no),
                                            Coords=CoordsType(points))

                                        # Add predictions (= TextEquivs)
                                        char_index_start = 1  # Must start with 1, see https://ocr-d.github.io/page#multiple-textequivs
                                        for char_index, char in enumerate(
                                                _sort_chars(p),
                                                start=char_index_start):
                                            glyph.add_TextEquiv(
                                                TextEquivType(
                                                    Unicode=char.char,
                                                    index=char_index,
                                                    conf=char.probability))

                                        word.add_Glyph(glyph)

                                line.add_Word(word)
                                word_no += 1

                            i += word_length

            _page_update_higher_textequiv_levels('line', pcgts)

            # Add metadata about this operation and its runtime parameters:
            metadata = pcgts.get_Metadata()  # ensured by from_file()
            metadata.add_MetadataItem(
                MetadataItemType(
                    type_="processingStep",
                    name=self.ocrd_tool['steps'][0],
                    value=TOOL,
                    Labels=[
                        LabelsType(externalModel="ocrd-tool",
                                   externalId="parameters",
                                   Label=[
                                       LabelType(type_=name,
                                                 value=self.parameter[name])
                                       for name in self.parameter.keys()
                                   ])
                    ]))

            file_id = make_file_id(input_file, self.output_file_grp)
            pcgts.set_pcGtsId(file_id)
            self.workspace.add_file(ID=file_id,
                                    file_grp=self.output_file_grp,
                                    pageId=input_file.pageId,
                                    mimetype=MIMETYPE_PAGE,
                                    local_filename=os.path.join(
                                        self.output_file_grp,
                                        file_id + '.xml'),
                                    content=to_xml(pcgts))
Exemple #5
0
    def process(self):
        """
        Perform text recognition with Calamari on the workspace.

        If ``texequiv_level`` is ``word`` or ``glyph``, then additionally create word / glyph level segments by
        splitting at white space characters / glyph boundaries. In the case of ``glyph``, add all alternative character
        hypotheses down to ``glyph_conf_cutoff`` confidence threshold.
        """
        log = getLogger('processor.CalamariRecognize')

        assert_file_grp_cardinality(self.input_file_grp, 1)
        assert_file_grp_cardinality(self.output_file_grp, 1)

        for (n, input_file) in enumerate(self.input_files):
            page_id = input_file.pageId or input_file.ID
            log.info("INPUT FILE %i / %s", n, page_id)
            pcgts = page_from_file(self.workspace.download_file(input_file))

            page = pcgts.get_Page()
            page_image, page_coords, page_image_info = self.workspace.image_from_page(
                page, page_id, feature_selector=self.features)

            for region in page.get_AllRegions(classes=['Text']):
                region_image, region_coords = self.workspace.image_from_segment(
                    region,
                    page_image,
                    page_coords,
                    feature_selector=self.features)

                textlines = region.get_TextLine()
                log.info("About to recognize %i lines of region '%s'",
                         len(textlines), region.id)
                line_images_np = []
                line_coordss = []
                for line in textlines:
                    log.debug("Recognizing line '%s' in region '%s'", line.id,
                              region.id)

                    line_image, line_coords = self.workspace.image_from_segment(
                        line,
                        region_image,
                        region_coords,
                        feature_selector=self.features)
                    if ('binarized' not in line_coords['features']
                            and 'grayscale_normalized'
                            not in line_coords['features']
                            and self.network_input_channels == 1):
                        # We cannot use a feature selector for this since we don't
                        # know whether the model expects (has been trained on)
                        # binarized or grayscale images; but raw images are likely
                        # always inadequate:
                        log.warning(
                            "Using raw image for line '%s' in region '%s'",
                            line.id, region.id)

                    line_image = line_image if all(line_image.size) else [[0]]
                    line_image_np = np.array(line_image, dtype=np.uint8)
                    line_images_np.append(line_image_np)
                    line_coordss.append(line_coords)
                raw_results_all = self.predictor.predict_raw(
                    line_images_np, progress_bar=False)

                for line, line_coords, raw_results in zip(
                        textlines, line_coordss, raw_results_all):

                    for i, p in enumerate(raw_results):
                        p.prediction.id = "fold_{}".format(i)

                    prediction = self.voter.vote_prediction_result(raw_results)
                    prediction.id = "voted"

                    # Build line text on our own
                    #
                    # Calamari does whitespace post-processing on prediction.sentence, while it does not do the same
                    # on prediction.positions. Do it on our own to have consistency.
                    #
                    # XXX Check Calamari's built-in post-processing on prediction.sentence

                    def _sort_chars(p):
                        """Filter and sort chars of prediction p"""
                        chars = p.chars
                        chars = [
                            c for c in chars if c.char
                        ]  # XXX Note that omission probabilities are not normalized?!
                        chars = [
                            c for c in chars if c.probability >=
                            self.parameter['glyph_conf_cutoff']
                        ]
                        chars = sorted(chars,
                                       key=lambda k: k.probability,
                                       reverse=True)
                        return chars

                    def _drop_leading_spaces(positions):
                        return list(
                            itertools.dropwhile(
                                lambda p: _sort_chars(p)[0].char == " ",
                                positions))

                    def _drop_trailing_spaces(positions):
                        return list(
                            reversed(_drop_leading_spaces(
                                reversed(positions))))

                    def _drop_double_spaces(positions):
                        def _drop_double_spaces_generator(positions):
                            last_was_space = False
                            for p in positions:
                                if p.chars[0].char == " ":
                                    if not last_was_space:
                                        yield p
                                    last_was_space = True
                                else:
                                    yield p
                                    last_was_space = False

                        return list(_drop_double_spaces_generator(positions))

                    positions = prediction.positions
                    positions = _drop_leading_spaces(positions)
                    positions = _drop_trailing_spaces(positions)
                    positions = _drop_double_spaces(positions)
                    positions = list(positions)

                    line_text = ''.join(
                        _sort_chars(p)[0].char for p in positions)
                    if line_text != prediction.sentence:
                        log.warning(
                            "Our own line text is not the same as Calamari's: '%s' != '%s'",
                            line_text, prediction.sentence)

                    # Delete existing results
                    if line.get_TextEquiv():
                        log.warning("Line '%s' already contained text results",
                                    line.id)
                    line.set_TextEquiv([])
                    if line.get_Word():
                        log.warning(
                            "Line '%s' already contained word segmentation",
                            line.id)
                    line.set_Word([])

                    # Save line results
                    line_conf = prediction.avg_char_probability
                    line.set_TextEquiv(
                        [TextEquivType(Unicode=line_text, conf=line_conf)])

                    # Save word results
                    #
                    # Calamari OCR does not provide word positions, so we infer word positions from a. text segmentation
                    # and b. the glyph positions. This is necessary because the PAGE XML format enforces a strict
                    # hierarchy of lines > words > glyphs.

                    def _words(s):
                        """Split words based on spaces and include spaces as 'words'"""
                        spaces = None
                        word = ''
                        for c in s:
                            if c == ' ' and spaces is True:
                                word += c
                            elif c != ' ' and spaces is False:
                                word += c
                            else:
                                if word:
                                    yield word
                                word = c
                                spaces = (c == ' ')
                        yield word

                    if self.parameter['textequiv_level'] in ['word', 'glyph']:
                        word_no = 0
                        i = 0

                        for word_text in _words(line_text):
                            word_length = len(word_text)
                            if not all(c == ' ' for c in word_text):
                                word_positions = positions[i:i + word_length]
                                word_start = word_positions[0].global_start
                                word_end = word_positions[-1].global_end

                                polygon = polygon_from_x0y0x1y1([
                                    word_start, 0, word_end, line_image.height
                                ])
                                points = points_from_polygon(
                                    coordinates_for_segment(
                                        polygon, None, line_coords))
                                # XXX Crop to line polygon?

                                word = WordType(id='%s_word%04d' %
                                                (line.id, word_no),
                                                Coords=CoordsType(points))
                                word.add_TextEquiv(
                                    TextEquivType(Unicode=word_text))

                                if self.parameter[
                                        'textequiv_level'] == 'glyph':
                                    for glyph_no, p in enumerate(
                                            word_positions):
                                        glyph_start = p.global_start
                                        glyph_end = p.global_end

                                        polygon = polygon_from_x0y0x1y1([
                                            glyph_start, 0, glyph_end,
                                            line_image.height
                                        ])
                                        points = points_from_polygon(
                                            coordinates_for_segment(
                                                polygon, None, line_coords))

                                        glyph = GlyphType(
                                            id='%s_glyph%04d' %
                                            (word.id, glyph_no),
                                            Coords=CoordsType(points))

                                        # Add predictions (= TextEquivs)
                                        char_index_start = 1  # Must start with 1, see https://ocr-d.github.io/page#multiple-textequivs
                                        for char_index, char in enumerate(
                                                _sort_chars(p),
                                                start=char_index_start):
                                            glyph.add_TextEquiv(
                                                TextEquivType(
                                                    Unicode=char.char,
                                                    index=char_index,
                                                    conf=char.probability))

                                        word.add_Glyph(glyph)

                                line.add_Word(word)
                                word_no += 1

                            i += word_length

            _page_update_higher_textequiv_levels('line', pcgts)

            # Add metadata about this operation and its runtime parameters:
            self.add_metadata(pcgts)
            file_id = make_file_id(input_file, self.output_file_grp)
            pcgts.set_pcGtsId(file_id)
            self.workspace.add_file(ID=file_id,
                                    file_grp=self.output_file_grp,
                                    pageId=input_file.pageId,
                                    mimetype=MIMETYPE_PAGE,
                                    local_filename=os.path.join(
                                        self.output_file_grp,
                                        file_id + '.xml'),
                                    content=to_xml(pcgts))
Exemple #6
0
 def test_polygon_from_x0y0x1y1(self):
     self.assertEqual(polygon_from_x0y0x1y1([100, 100, 200, 200]),
                      [[100, 100], [200, 100], [200, 200], [100, 200]])
Exemple #7
0
 def _process_page(self, it, page, page_image, page_coords, page_id):
     LOG = getLogger('processor.TesserocrSegmentRegion')
     # equivalent to GetComponentImages with raw_image=True,
     # (which would also give raw coordinates),
     # except we are also interested in the iterator's BlockType() here,
     # and its BlockPolygon()
     index = 0
     ro = page.get_ReadingOrder()
     if not ro:
         ro = ReadingOrderType()
         page.set_ReadingOrder(ro)
     og = ro.get_OrderedGroup()
     if og:
         # start counting from largest existing index
         for elem in (og.get_RegionRefIndexed() +
                      og.get_OrderedGroupIndexed() +
                      og.get_UnorderedGroupIndexed()):
             if elem.index >= index:
                 index = elem.index + 1
     else:
         # new top-level group
         og = OrderedGroupType(id="reading-order")
         ro.set_OrderedGroup(og)
     while it and not it.Empty(RIL.BLOCK):
         # (padding will be passed to both BoundingBox and GetImage)
         # (actually, Tesseract honours padding only on the left and bottom,
         #  whereas right and top are increased less!)
         bbox = it.BoundingBox(RIL.BLOCK, padding=self.parameter['padding'])
         # sometimes these polygons are not planar, which causes
         # PIL.ImageDraw.Draw.polygon (and likely others as well)
         # to misbehave; however, PAGE coordinate semantics prohibit
         # multi-path polygons!
         # (probably a bug in Tesseract itself, cf. tesseract#2826):
         if self.parameter['crop_polygons']:
             polygon = it.BlockPolygon()
         else:
             polygon = polygon_from_x0y0x1y1(bbox)
         polygon = coordinates_for_segment(polygon, page_image, page_coords)
         polygon2 = polygon_for_parent(polygon, page)
         if polygon2 is not None:
             polygon = polygon2
         points = points_from_polygon(polygon)
         coords = CoordsType(points=points)
         if polygon2 is None:
             LOG.info('Ignoring extant region: %s', points)
             it.Next(RIL.BLOCK)
             continue
         # if xywh['w'] < 30 or xywh['h'] < 30:
         #     LOG.info('Ignoring too small region: %s', points)
         #     it.Next(RIL.BLOCK)
         #     continue
         # region_image_bin = it.GetBinaryImage(RIL.BLOCK)
         # if not region_image_bin.getbbox():
         #     LOG.info('Ignoring binary-empty region: %s', points)
         #     it.Next(RIL.BLOCK)
         #     continue
         #
         # add the region reference in the reading order element
         # (will be removed again if Separator/Noise region below)
         ID = "region%04d" % index
         og.add_RegionRefIndexed(
             RegionRefIndexedType(regionRef=ID, index=index))
         #
         # region type switch
         #
         block_type = it.BlockType()
         if block_type in [
                 PT.FLOWING_TEXT,
                 PT.HEADING_TEXT,
                 PT.PULLOUT_TEXT,
                 PT.CAPTION_TEXT,
                 # TABLE is contained in PTIsTextType, but
                 # it is a bad idea to create a TextRegion
                 # for it (better set `find_tables` False):
                 # PT.TABLE,
                 # will also get a 90° @orientation
                 # (but that can be overridden by deskew/OSD):
                 PT.VERTICAL_TEXT
         ]:
             region = TextRegionType(id=ID,
                                     Coords=coords,
                                     type=TextTypeSimpleType.PARAGRAPH)
             if block_type == PT.VERTICAL_TEXT:
                 region.set_orientation(90.0)
             elif block_type == PT.HEADING_TEXT:
                 region.set_type(TextTypeSimpleType.HEADING)
             elif block_type == PT.PULLOUT_TEXT:
                 region.set_type(TextTypeSimpleType.FLOATING)
             elif block_type == PT.CAPTION_TEXT:
                 region.set_type(TextTypeSimpleType.CAPTION)
             page.add_TextRegion(region)
             if self.parameter['sparse_text']:
                 region.set_type(TextTypeSimpleType.OTHER)
                 region.add_TextLine(
                     TextLineType(id=region.id + '_line', Coords=coords))
         elif block_type in [
                 PT.FLOWING_IMAGE, PT.HEADING_IMAGE, PT.PULLOUT_IMAGE
         ]:
             region = ImageRegionType(id=ID, Coords=coords)
             page.add_ImageRegion(region)
         elif block_type in [PT.HORZ_LINE, PT.VERT_LINE]:
             region = SeparatorRegionType(id=ID, Coords=coords)
             page.add_SeparatorRegion(region)
             # undo appending in ReadingOrder
             og.set_RegionRefIndexed(og.get_RegionRefIndexed()[:-1])
         elif block_type in [PT.INLINE_EQUATION, PT.EQUATION]:
             region = MathsRegionType(id=ID, Coords=coords)
             page.add_MathsRegion(region)
         elif block_type == PT.TABLE:
             # without API access to StructuredTable we cannot
             # do much for a TableRegionType (i.e. nrows, ncols,
             # coordinates of cells for recursive regions etc),
             # but this can be achieved afterwards by segment-table
             region = TableRegionType(id=ID, Coords=coords)
             page.add_TableRegion(region)
         else:
             region = NoiseRegionType(id=ID, Coords=coords)
             page.add_NoiseRegion()
             # undo appending in ReadingOrder
             og.set_RegionRefIndexed(og.get_RegionRefIndexed()[:-1])
         LOG.info("Detected region '%s': %s (%s)", ID, points,
                  membername(PT, block_type))
         #
         # iterator increment
         #
         index += 1
         it.Next(RIL.BLOCK)
     if (not og.get_RegionRefIndexed() and not og.get_OrderedGroupIndexed()
             and not og.get_UnorderedGroupIndexed()):
         # schema forbids empty OrderedGroup
         ro.set_OrderedGroup(None)