def test_should_not_merge_right_adjacent_block_with_same_different_tag( self): block1 = AnnotationBlock(TAG1, DEFAULT_BOUNDING_BOX) block2 = AnnotationBlock( TAG2, block1.bounding_box.move_by(block1.bounding_box.width, 0)) merged_blocks = merge_blocks([block1, block2]) assert [b.tag for b in merged_blocks] == [TAG1, TAG2]
def test_should_merge_right_adjacent_block_with_same_tag(self): block1 = AnnotationBlock(TAG1, DEFAULT_BOUNDING_BOX) block2 = AnnotationBlock( TAG1, block1.bounding_box.move_by(block1.bounding_box.width, 0)) merged_blocks = merge_blocks([block1, block2]) assert [b.tag for b in merged_blocks] == [TAG1] assert merged_blocks[0].bounding_box == block1.bounding_box.include( block2.bounding_box)
def test_should_not_merge_too_far_away_block_with_same_tag(self): block1 = AnnotationBlock(TAG1, DEFAULT_BOUNDING_BOX) block2 = AnnotationBlock( TAG1, block1.bounding_box.move_by( block1.bounding_box.width + DEFAULT_NEARBY_TOLERANCE + 1, 0)) merged_blocks = merge_blocks([block1, block2], nearby_tolerance=DEFAULT_NEARBY_TOLERANCE) assert [b.tag for b in merged_blocks] == [TAG1, TAG1]
def test_should_merge_right_nearby_block_with_same_tag(self): block1 = AnnotationBlock(TAG1, DEFAULT_BOUNDING_BOX) block2 = AnnotationBlock( TAG1, block1.bounding_box.move_by( block1.bounding_box.width + DEFAULT_NEARBY_TOLERANCE, 0)) merged_blocks = merge_blocks([block1, block2], nearby_tolerance=DEFAULT_NEARBY_TOLERANCE) assert [b.tag for b in merged_blocks] == [TAG1] assert merged_blocks[0].bounding_box == block1.bounding_box.include( block2.bounding_box)
def test_should_merge_multiple_sequential_right_adjacent_blocks(self): block1 = AnnotationBlock(TAG1, DEFAULT_BOUNDING_BOX) block2 = AnnotationBlock( TAG1, block1.bounding_box.move_by(block1.bounding_box.width, 0)) block3 = AnnotationBlock( TAG1, block2.bounding_box.move_by(block2.bounding_box.width, 0)) merged_blocks = merge_blocks([block1, block2, block3]) assert [b.tag for b in merged_blocks] == [TAG1] assert merged_blocks[0].bounding_box == (block1.bounding_box.include( block2.bounding_box).include(block3.bounding_box))
def svg_page_to_blockified_png_bytes(svg_page, color_map, image_size=None): structured_document = SvgStructuredDocument(svg_page) blocks = expand_blocks( merge_blocks( annotation_document_page_to_annotation_blocks( structured_document, structured_document.get_pages()[0] ) ) ) viewbox = svg_page.attrib.get('viewBox') if not viewbox: raise RuntimeError( 'viewbox missing on svg, available attributes: %s' % svg_page.attrib.keys() ) _, _, width, height = viewbox.split() image = annotated_blocks_to_image( blocks, color_map, width=float(width), height=float(height), background='white', scale_to_size=image_size ) out = BytesIO() image.save(out, 'png') return out.getvalue()
def test_should_return_same_single_blocks(self): block = AnnotationBlock(TAG1, DEFAULT_BOUNDING_BOX) merged_blocks = merge_blocks([block]) assert merged_blocks == [block]