def test_should_add_lb_element_before_token_without_tag(self): tei_parent = _lines_to_tei( E.front(), [TeiLine([]), TeiLine([_tei_text(TOKEN_1, None)])]) child_elements = list(tei_parent) assert [c.tag for c in child_elements] == [TeiTagNames.LB] assert child_elements[0].tail == TOKEN_1
def test_should_preserve_space_after_lb_within_tokens_with_same_tag(self): tei_parent = _lines_to_tei(E.front(), [ TeiLine([_tei_text(TOKEN_1, TAG_1)]), TeiLine([_tei_text(' ' + TOKEN_2, TAG_1)]) ]) child_elements = list(tei_parent) assert [c.tag for c in child_elements] == [TAG_1] assert _to_xml(child_elements[0]) == ( '<{tag1}>{token1}<{lb}/> {token2}</{tag1}>'.format( tag1=TAG_1, token1=TOKEN_1, token2=TOKEN_2, lb=TeiTagNames.LB))
def test_should_not_include_standalone_space_after_lb_in_tag_before_other_different_tag( self): tei_parent = _lines_to_tei(E.front(), [ TeiLine([_tei_text(TOKEN_1, TAG_1)]), TeiLine([_tei_text(' ', None), _tei_text(TOKEN_2, TAG_2)]) ]) assert _to_xml(tei_parent) == ( '<front><{tag1}>{token1}<{lb}/></{tag1}> <{tag2}>{token2}</{tag2}></front>' .format(tag1=TAG_1, tag2=TAG_2, token1=TOKEN_1, token2=TOKEN_2, lb=TeiTagNames.LB))
def test_should_map_tag_to_tei_path(self): tei_parent = _lines_to_tei(E.front(), [TeiLine([_tei_text(TOKEN_1, TAG_1)])], tag_to_tei_path_mapping={TAG_1: TAG_2}) child_elements = list(tei_parent) assert [c.tag for c in child_elements] == [TAG_2] assert child_elements[0].text == TOKEN_1
def test_should_map_tag_to_nested_tei_path(self): tei_parent = _lines_to_tei( E.front(), [TeiLine([_tei_text(TOKEN_1, TAG_1)])], tag_to_tei_path_mapping={TAG_1: 'parent/child'}) child_elements = list(tei_parent) assert [c.tag for c in child_elements] == ['parent'] nested_child_elements = list(child_elements[0]) assert [c.tag for c in nested_child_elements] == ['child'] assert nested_child_elements[0].text == TOKEN_1
def test_should_combine_tokens(self): tei_parent = _lines_to_tei(E.front(), [ TeiLine( [_tei_text(TOKEN_1, TAG_1), _tei_text(' ' + TOKEN_2, TAG_1)]) ]) child_elements = list(tei_parent) assert [c.tag for c in child_elements] == [TAG_1] assert child_elements[0].text == ' '.join([TOKEN_1, TOKEN_2])
def test_should_use_common_path_between_similar_nested_tag_paths(self): tei_parent = _lines_to_tei( E.front(), [TeiLine([_tei_text(TOKEN_1, TAG_1), _tei_text(TOKEN_2, TAG_2)])], tag_to_tei_path_mapping={ TAG_1: 'parent/child1', TAG_2: 'parent/child2' }) assert _to_xml(tei_parent) == ( '<front><parent><child1>{token1}</child1>' '<child2>{token2}</child2></parent></front>'.format( token1=TOKEN_1, token2=TOKEN_2))
def test_should_apply_default_tag(self): tei_parent = _lines_to_tei( E.front(), [TeiLine([_tei_text(TOKEN_1, '')])], tag_to_tei_path_mapping={DEFAULT_TAG_KEY: 'other'}) child_elements = list(tei_parent) assert [c.tag for c in child_elements] == ['other']
def test_should_convert_single_token(self): tei_parent = _lines_to_tei(E.front(), [TeiLine([_tei_text(TOKEN_1, TAG_1)])]) child_elements = list(tei_parent) assert [c.tag for c in child_elements] == [TAG_1] assert [c.text for c in child_elements] == [TOKEN_1]