def test_language_shift(value, expected_language, normalized): shift = LanguageShift.of(value) equal = LanguageShift.of(value) other = ValueToken.of(r"%bar") assert shift.value == value assert shift.clean_value == value assert shift.get_key() == f"LanguageShift⁝{value}" assert shift.lemmatizable is False assert shift.normalized == normalized assert shift.language == expected_language serialized = { "type": "LanguageShift", "normalized": normalized, "language": shift.language.name, } assert_token_serialization(shift, serialized) assert shift == equal assert hash(shift) == hash(equal) assert shift != other assert hash(shift) != hash(other) assert shift != ValueToken.of(value)
def test_text_line_of_iterable(code: str, language: Language) -> None: tokens = [ Word.of([Reading.of_name("first")]), LanguageShift.of(code), Word.of([Reading.of_name("second")]), LanguageShift.of("%sb"), LoneDeterminative.of([Determinative.of([Reading.of_name("third")])]), Word.of([BrokenAway.open(), Reading.of_name("fourth")]), UnknownNumberOfSigns.of(), BrokenAway.close(), ] expected_tokens = ( Word.of([Reading.of_name("first")], DEFAULT_LANGUAGE), LanguageShift.of(code), Word.of([Reading.of_name("second")], language), LanguageShift.of("%sb"), LoneDeterminative.of([Determinative.of([Reading.of_name("third")])], Language.AKKADIAN), Word.of( [ BrokenAway.open(), Reading.of((ValueToken( frozenset({EnclosureType.BROKEN_AWAY}), ErasureState.NONE, "fourth", ), )).set_enclosure_type(frozenset({EnclosureType.BROKEN_AWAY })), ], DEFAULT_LANGUAGE, ), UnknownNumberOfSigns(frozenset({EnclosureType.BROKEN_AWAY}), ErasureState.NONE), BrokenAway.close().set_enclosure_type( frozenset({EnclosureType.BROKEN_AWAY})), ) line = TextLine.of_iterable(LINE_NUMBER, tokens) assert line.line_number == LINE_NUMBER assert line.content == expected_tokens assert ( line.key == f"TextLine⁞{line.atf}⟨{'⁚'.join(token.get_key() for token in expected_tokens)}⟩" ) assert line.atf == f"1. first {code} second %sb {{third}} [fourth ...]"
def test_parse_atf_language_shifts(code: str, expected_language: Language) -> None: word = "ha-am" parts = [Reading.of_name("ha"), Joiner.hyphen(), Reading.of_name("am")] line = f"1. {word} {code} {word} %sb {word}" expected = Text((TextLine.of_iterable( LineNumber(1), ( Word.of(parts, DEFAULT_LANGUAGE), LanguageShift.of(code), Word.of(parts, expected_language), LanguageShift.of("%sb"), Word.of(parts, Language.AKKADIAN), ), ), )) assert parse_atf_lark(line).lines == expected.lines
def test_parse_normalized_akkadain_shift() -> None: word = "ha" line = f"1. {word} %n {word} %sux {word}" expected = Text((TextLine.of_iterable( LineNumber(1), ( Word.of((Reading.of_name(word), ), DEFAULT_LANGUAGE), LanguageShift.normalized_akkadian(), AkkadianWord.of((ValueToken.of(word), )), LanguageShift.of("%sux"), Word.of((Reading.of_name(word), ), Language.SUMERIAN), ), ), )) assert parse_atf_lark(line).lines == expected.lines
def expected_transliteration(language: Language) -> Sequence[Token]: return ( Word.of([Reading.of_name("bu")], language), LanguageShift.of("%es"), Word.of( [ BrokenAway.open(), Reading.of((ValueToken( frozenset({EnclosureType.BROKEN_AWAY}), ErasureState.NONE, "kur", ), )).set_enclosure_type(frozenset({EnclosureType.BROKEN_AWAY })), ], Language.EMESAL, ), UnknownNumberOfSigns(frozenset({EnclosureType.BROKEN_AWAY}), ErasureState.NONE), BrokenAway.close().set_enclosure_type( frozenset({EnclosureType.BROKEN_AWAY})), )
from ebl.transliteration.domain.markup import EmphasisPart, LanguagePart, StringPart from ebl.transliteration.domain.note_line import NoteLine from ebl.transliteration.domain.sign_tokens import Reading from ebl.transliteration.domain.tokens import ( EnclosureType, ErasureState, LanguageShift, Token, UnknownNumberOfSigns, ValueToken, ) from ebl.transliteration.domain.word_tokens import Word TRANSLITERATION: Sequence[Token] = ( Word.of([Reading.of_name("bu")]), LanguageShift.of("%es"), Word.of([BrokenAway.open(), Reading.of_name("kur")]), UnknownNumberOfSigns.of(), BrokenAway.close(), ) EXPECTED_ATF = "bu %es [kur ...]" def expected_transliteration(language: Language) -> Sequence[Token]: return ( Word.of([Reading.of_name("bu")], language), LanguageShift.of("%es"), Word.of( [ BrokenAway.open(), Reading.of((ValueToken(
LineNumber(1), [ Word.of(unique_lemma=(WordId("nu I"), ), parts=[Reading.of_name("bu")]) ], ), ), ( TextLine.of_iterable( LineNumber(1), [ Word.of(unique_lemma=(WordId("nu I"), ), parts=[Reading.of_name("bu")]) ], ), TextLine.of_iterable(LineNumber(1), [LanguageShift.of("%sux")]), TextLine.of_iterable(LineNumber(1), [LanguageShift.of("%sux")]), ), ( TextLine.of_iterable( LineNumber(1), [ Word.of(unique_lemma=(WordId("nu I"), ), parts=[Reading.of_name("bu")]) ], ), TextLine.of_iterable(LineNumber(1), [Word.of([Reading.of_name("mu")])]), TextLine.of_iterable(LineNumber(1), [Word.of([Reading.of_name("mu")])]), ),
def ebl_atf_text_line__greek_shift(self, value): return LanguageShift.of(str(value))
def ebl_atf_text_line__normalized_akkadian_shift(self, value): return LanguageShift.of(str(value))
from ebl.transliteration.domain.sign_tokens import Divider, Reading from ebl.transliteration.domain.tokens import ( Column, CommentaryProtocol, ErasureState, Joiner, LanguageShift, Tabulation, UnknownNumberOfSigns, ValueToken, Variant, ) TOKENS = [ UnknownNumberOfSigns(frozenset({EnclosureType.BROKEN_AWAY}), ErasureState.NONE), LanguageShift.of("%sux"), DocumentOrientedGloss.open(), ] def test_value_token(): value = "value" token = ValueToken.of(value) equal = ValueToken.of(value) other = ValueToken.of("anothervalue") assert token.value == value assert token.clean_value == value assert token.get_key() == f"ValueToken⁝{value}" assert token.lemmatizable is False
Joiner.hyphen(), UnknownNumberOfSigns.of(), Joiner.hyphen(), Reading.of_name("ad"), )), ), ) ], ), ( "1. %grc ΑαΒβΓγΔδΕεΖζΗηΘθΙιΚκΛλΜμΝνΞξΟοΠπΡρΣσςΤτΥυΦφΧχΨψΩω", [ TextLine.of_iterable( LineNumber(1), ( LanguageShift.of("%grc"), GreekWord.of([ GreekLetter.of(letter) for letter in ("ΑαΒβΓγΔδΕεΖζΗηΘθΙιΚκΛλΜμ" "ΝνΞξΟοΠπΡρΣσςΤτΥυΦφΧχΨψΩω") ]), ), ) ], ), ( "1. %akkgrc α", [ TextLine.of_iterable( LineNumber(1), (