def _match_graphic_elements( self, semantic_graphic_list: Sequence[SemanticGraphic], candidate_semantic_content_list: Sequence[SemanticContentWrapper], unmatched_graphics_container: SemanticMixedContentWrapper ): _graphic_matchers: List[GraphicMatcher] = [ BoundingBoxDistanceGraphicMatcher(), GraphicRelatedBlockTextGraphicMatcher() ] if self.config.use_ocr_model: assert self.fulltext_models.ocr_model _graphic_matchers.append( OpticalCharacterRecognitionGraphicMatcher( ocr_model=self.fulltext_models.ocr_model ) ) graphic_matcher = ChainedGraphicMatcher(_graphic_matchers) graphic_match_result = graphic_matcher.get_graphic_matches( semantic_graphic_list=semantic_graphic_list, candidate_semantic_content_list=candidate_semantic_content_list ) for graphic_match in graphic_match_result.graphic_matches: if isinstance(graphic_match.candidate_semantic_content, SemanticMixedContentWrapper): graphic_match.candidate_semantic_content.add_content( graphic_match.semantic_graphic ) LOGGER.info('unmatched_graphics: %r', graphic_match_result.unmatched_graphics) for unmatched_graphic in graphic_match_result.unmatched_graphics: unmatched_graphics_container.add_content(unmatched_graphic)
def append_semantic_markers_for_layout_block( parent_semantic_content: SemanticMixedContentWrapper, layout_block: LayoutBlock) -> None: semantic_markers = list( iter_semantic_markers_for_layout_block(layout_block)) for semantic_marker in semantic_markers: parent_semantic_content.add_content(semantic_marker)