def test_should_return_higherfont_if_font_size_is_larger(self):
     assert get_token_font_size_feature(
         previous_token=LayoutToken('',
                                    font=LayoutFont(font_id='dummy',
                                                    font_size=1)),
         current_token=LayoutToken(
             '', font=LayoutFont(font_id='dummy',
                                 font_size=2))) == 'HIGHERFONT'
 def test_should_return_higherfont_if_new_font_has_no_size(self):
     assert get_token_font_size_feature(
         previous_token=LayoutToken('',
                                    font=LayoutFont(font_id='dummy',
                                                    font_size=1)),
         current_token=LayoutToken(
             '', font=LayoutFont(font_id='dummy',
                                 font_size=None))) == 'HIGHERFONT'
 def test_should_return_samefont_if_font_size_is_the_same(self):
     assert get_token_font_size_feature(
         previous_token=LayoutToken('',
                                    font=LayoutFont(font_id='dummy',
                                                    font_size=1)),
         current_token=LayoutToken(
             '', font=LayoutFont(font_id='dummy',
                                 font_size=1))) == 'SAMEFONTSIZE'
 def test_should_return_lowerfont_if_font_size_is_smaller(self):
     assert get_token_font_size_feature(
         previous_token=LayoutToken('',
                                    font=LayoutFont(font_id='dummy',
                                                    font_size=2)),
         current_token=LayoutToken(
             '', font=LayoutFont(font_id='dummy',
                                 font_size=1))) == 'LOWERFONT'
Exemple #5
0
 def parse_font(self, font_node: etree.ElementBase) -> LayoutFont:
     font_styles = (font_node.attrib.get('FONTSTYLE') or '').split(' ')
     return LayoutFont(font_id=font_node.attrib.get('ID'),
                       font_family=font_node.attrib.get('FONTFAMILY'),
                       font_size=float(font_node.attrib.get('FONTSIZE')),
                       is_bold='bold' in font_styles,
                       is_italics='italics' in font_styles,
                       is_subscript='subscript' in font_styles,
                       is_superscript='superscript' in font_styles)
 def test_should_return_false_if_no_font_size_available(self):
     layout_tokens = [
         LayoutToken('', font=LayoutFont('font1', font_size=None)),
         LayoutToken('', font=LayoutFont('font2', font_size=None)),
         LayoutToken('', font=LayoutFont('font3', font_size=None)),
         LayoutToken('', font=LayoutFont('font4', font_size=None))
     ]
     relative_font_size_feature = RelativeFontSizeFeature(layout_tokens)
     assert [
         relative_font_size_feature.is_smallest_font_size(layout_token)
         for layout_token in layout_tokens
     ] == [False, False, False, False]
     assert [
         relative_font_size_feature.is_largest_font_size(layout_token)
         for layout_token in layout_tokens
     ] == [False, False, False, False]
     assert [
         relative_font_size_feature.is_larger_than_average_font_size(
             layout_token) for layout_token in layout_tokens
     ] == [False, False, False, False]
 def test_should_return_is_smallest_largest_and_larger_than_avg(self):
     layout_tokens = [
         LayoutToken('', font=LayoutFont('font1', font_size=1)),
         LayoutToken('', font=LayoutFont('font2', font_size=2)),
         LayoutToken('', font=LayoutFont('font3', font_size=3)),
         LayoutToken('', font=LayoutFont('font4', font_size=4))
     ]
     relative_font_size_feature = RelativeFontSizeFeature(layout_tokens)
     assert [
         relative_font_size_feature.is_smallest_font_size(layout_token)
         for layout_token in layout_tokens
     ] == [True, False, False, False]
     assert [
         relative_font_size_feature.is_largest_font_size(layout_token)
         for layout_token in layout_tokens
     ] == [False, False, False, True]
     assert [
         relative_font_size_feature.is_larger_than_average_font_size(
             layout_token) for layout_token in layout_tokens
     ] == [False, False, True, True]
 def test_should_return_higherfont_without_previous_token(self):
     assert get_token_font_size_feature(
         previous_token=None,
         current_token=LayoutToken(
             '', font=LayoutFont(font_id='dummy'))) == 'HIGHERFONT'
from sciencebeam_parser.document.layout_document import (LayoutBlock,
                                                         LayoutToken,
                                                         LayoutFont)
from sciencebeam_parser.document.tei.common import (
    get_text_content, get_tei_xpath_text_content_list,
    iter_layout_block_tei_children, TEI_E)

LOGGER = logging.getLogger(__name__)

TOKEN_1 = 'token1'
TOKEN_2 = 'token2'
TOKEN_3 = 'token3'
TOKEN_4 = 'token4'

ITALICS_FONT_1 = LayoutFont(font_id='font1', is_italics=True)

BOLD_FONT_1 = LayoutFont(font_id='font1', is_bold=True)

BOLD_FONT_1 = LayoutFont(font_id='font1', is_bold=True)

BOLD_ITALICS_FONT_1 = LayoutFont(font_id='font1',
                                 is_bold=True,
                                 is_italics=True)

SUBSCRIPT_FONT_1 = LayoutFont(font_id='font1', is_subscript=True)

SUPERSCRIPT_FONT_1 = LayoutFont(font_id='font1', is_superscript=True)


class TestIterLayoutBlockTeiChildren: