def test_should_return_higherfont_if_font_size_is_larger(self): assert get_token_font_size_feature( previous_token=LayoutToken('', font=LayoutFont(font_id='dummy', font_size=1)), current_token=LayoutToken( '', font=LayoutFont(font_id='dummy', font_size=2))) == 'HIGHERFONT'
def test_should_return_higherfont_if_new_font_has_no_size(self): assert get_token_font_size_feature( previous_token=LayoutToken('', font=LayoutFont(font_id='dummy', font_size=1)), current_token=LayoutToken( '', font=LayoutFont(font_id='dummy', font_size=None))) == 'HIGHERFONT'
def test_should_return_samefont_if_font_size_is_the_same(self): assert get_token_font_size_feature( previous_token=LayoutToken('', font=LayoutFont(font_id='dummy', font_size=1)), current_token=LayoutToken( '', font=LayoutFont(font_id='dummy', font_size=1))) == 'SAMEFONTSIZE'
def test_should_return_lowerfont_if_font_size_is_smaller(self): assert get_token_font_size_feature( previous_token=LayoutToken('', font=LayoutFont(font_id='dummy', font_size=2)), current_token=LayoutToken( '', font=LayoutFont(font_id='dummy', font_size=1))) == 'LOWERFONT'
def parse_font(self, font_node: etree.ElementBase) -> LayoutFont: font_styles = (font_node.attrib.get('FONTSTYLE') or '').split(' ') return LayoutFont(font_id=font_node.attrib.get('ID'), font_family=font_node.attrib.get('FONTFAMILY'), font_size=float(font_node.attrib.get('FONTSIZE')), is_bold='bold' in font_styles, is_italics='italics' in font_styles, is_subscript='subscript' in font_styles, is_superscript='superscript' in font_styles)
def test_should_return_false_if_no_font_size_available(self): layout_tokens = [ LayoutToken('', font=LayoutFont('font1', font_size=None)), LayoutToken('', font=LayoutFont('font2', font_size=None)), LayoutToken('', font=LayoutFont('font3', font_size=None)), LayoutToken('', font=LayoutFont('font4', font_size=None)) ] relative_font_size_feature = RelativeFontSizeFeature(layout_tokens) assert [ relative_font_size_feature.is_smallest_font_size(layout_token) for layout_token in layout_tokens ] == [False, False, False, False] assert [ relative_font_size_feature.is_largest_font_size(layout_token) for layout_token in layout_tokens ] == [False, False, False, False] assert [ relative_font_size_feature.is_larger_than_average_font_size( layout_token) for layout_token in layout_tokens ] == [False, False, False, False]
def test_should_return_is_smallest_largest_and_larger_than_avg(self): layout_tokens = [ LayoutToken('', font=LayoutFont('font1', font_size=1)), LayoutToken('', font=LayoutFont('font2', font_size=2)), LayoutToken('', font=LayoutFont('font3', font_size=3)), LayoutToken('', font=LayoutFont('font4', font_size=4)) ] relative_font_size_feature = RelativeFontSizeFeature(layout_tokens) assert [ relative_font_size_feature.is_smallest_font_size(layout_token) for layout_token in layout_tokens ] == [True, False, False, False] assert [ relative_font_size_feature.is_largest_font_size(layout_token) for layout_token in layout_tokens ] == [False, False, False, True] assert [ relative_font_size_feature.is_larger_than_average_font_size( layout_token) for layout_token in layout_tokens ] == [False, False, True, True]
def test_should_return_higherfont_without_previous_token(self): assert get_token_font_size_feature( previous_token=None, current_token=LayoutToken( '', font=LayoutFont(font_id='dummy'))) == 'HIGHERFONT'
from sciencebeam_parser.document.layout_document import (LayoutBlock, LayoutToken, LayoutFont) from sciencebeam_parser.document.tei.common import ( get_text_content, get_tei_xpath_text_content_list, iter_layout_block_tei_children, TEI_E) LOGGER = logging.getLogger(__name__) TOKEN_1 = 'token1' TOKEN_2 = 'token2' TOKEN_3 = 'token3' TOKEN_4 = 'token4' ITALICS_FONT_1 = LayoutFont(font_id='font1', is_italics=True) BOLD_FONT_1 = LayoutFont(font_id='font1', is_bold=True) BOLD_FONT_1 = LayoutFont(font_id='font1', is_bold=True) BOLD_ITALICS_FONT_1 = LayoutFont(font_id='font1', is_bold=True, is_italics=True) SUBSCRIPT_FONT_1 = LayoutFont(font_id='font1', is_subscript=True) SUPERSCRIPT_FONT_1 = LayoutFont(font_id='font1', is_superscript=True) class TestIterLayoutBlockTeiChildren: