def test_language_shift(value, expected_language, normalized):
    shift = LanguageShift.of(value)
    equal = LanguageShift.of(value)
    other = ValueToken.of(r"%bar")

    assert shift.value == value
    assert shift.clean_value == value
    assert shift.get_key() == f"LanguageShift⁝{value}"
    assert shift.lemmatizable is False
    assert shift.normalized == normalized
    assert shift.language == expected_language

    serialized = {
        "type": "LanguageShift",
        "normalized": normalized,
        "language": shift.language.name,
    }
    assert_token_serialization(shift, serialized)

    assert shift == equal
    assert hash(shift) == hash(equal)

    assert shift != other
    assert hash(shift) != hash(other)

    assert shift != ValueToken.of(value)
def test_text_line_of_iterable(code: str, language: Language) -> None:
    tokens = [
        Word.of([Reading.of_name("first")]),
        LanguageShift.of(code),
        Word.of([Reading.of_name("second")]),
        LanguageShift.of("%sb"),
        LoneDeterminative.of([Determinative.of([Reading.of_name("third")])]),
        Word.of([BrokenAway.open(),
                 Reading.of_name("fourth")]),
        UnknownNumberOfSigns.of(),
        BrokenAway.close(),
    ]
    expected_tokens = (
        Word.of([Reading.of_name("first")], DEFAULT_LANGUAGE),
        LanguageShift.of(code),
        Word.of([Reading.of_name("second")], language),
        LanguageShift.of("%sb"),
        LoneDeterminative.of([Determinative.of([Reading.of_name("third")])],
                             Language.AKKADIAN),
        Word.of(
            [
                BrokenAway.open(),
                Reading.of((ValueToken(
                    frozenset({EnclosureType.BROKEN_AWAY}),
                    ErasureState.NONE,
                    "fourth",
                ), )).set_enclosure_type(frozenset({EnclosureType.BROKEN_AWAY
                                                    })),
            ],
            DEFAULT_LANGUAGE,
        ),
        UnknownNumberOfSigns(frozenset({EnclosureType.BROKEN_AWAY}),
                             ErasureState.NONE),
        BrokenAway.close().set_enclosure_type(
            frozenset({EnclosureType.BROKEN_AWAY})),
    )
    line = TextLine.of_iterable(LINE_NUMBER, tokens)

    assert line.line_number == LINE_NUMBER
    assert line.content == expected_tokens
    assert (
        line.key ==
        f"TextLine⁞{line.atf}⟨{'⁚'.join(token.get_key() for token in expected_tokens)}⟩"
    )
    assert line.atf == f"1. first {code} second %sb {{third}} [fourth ...]"
def test_parse_atf_language_shifts(code: str,
                                   expected_language: Language) -> None:
    word = "ha-am"
    parts = [Reading.of_name("ha"), Joiner.hyphen(), Reading.of_name("am")]
    line = f"1. {word} {code} {word} %sb {word}"

    expected = Text((TextLine.of_iterable(
        LineNumber(1),
        (
            Word.of(parts, DEFAULT_LANGUAGE),
            LanguageShift.of(code),
            Word.of(parts, expected_language),
            LanguageShift.of("%sb"),
            Word.of(parts, Language.AKKADIAN),
        ),
    ), ))

    assert parse_atf_lark(line).lines == expected.lines
def test_parse_normalized_akkadain_shift() -> None:
    word = "ha"
    line = f"1. {word} %n {word} %sux {word}"

    expected = Text((TextLine.of_iterable(
        LineNumber(1),
        (
            Word.of((Reading.of_name(word), ), DEFAULT_LANGUAGE),
            LanguageShift.normalized_akkadian(),
            AkkadianWord.of((ValueToken.of(word), )),
            LanguageShift.of("%sux"),
            Word.of((Reading.of_name(word), ), Language.SUMERIAN),
        ),
    ), ))

    assert parse_atf_lark(line).lines == expected.lines
Ejemplo n.º 5
0
def expected_transliteration(language: Language) -> Sequence[Token]:
    return (
        Word.of([Reading.of_name("bu")], language),
        LanguageShift.of("%es"),
        Word.of(
            [
                BrokenAway.open(),
                Reading.of((ValueToken(
                    frozenset({EnclosureType.BROKEN_AWAY}),
                    ErasureState.NONE,
                    "kur",
                ), )).set_enclosure_type(frozenset({EnclosureType.BROKEN_AWAY
                                                    })),
            ],
            Language.EMESAL,
        ),
        UnknownNumberOfSigns(frozenset({EnclosureType.BROKEN_AWAY}),
                             ErasureState.NONE),
        BrokenAway.close().set_enclosure_type(
            frozenset({EnclosureType.BROKEN_AWAY})),
    )
Ejemplo n.º 6
0
from ebl.transliteration.domain.markup import EmphasisPart, LanguagePart, StringPart
from ebl.transliteration.domain.note_line import NoteLine
from ebl.transliteration.domain.sign_tokens import Reading
from ebl.transliteration.domain.tokens import (
    EnclosureType,
    ErasureState,
    LanguageShift,
    Token,
    UnknownNumberOfSigns,
    ValueToken,
)
from ebl.transliteration.domain.word_tokens import Word

TRANSLITERATION: Sequence[Token] = (
    Word.of([Reading.of_name("bu")]),
    LanguageShift.of("%es"),
    Word.of([BrokenAway.open(), Reading.of_name("kur")]),
    UnknownNumberOfSigns.of(),
    BrokenAway.close(),
)
EXPECTED_ATF = "bu %es [kur ...]"


def expected_transliteration(language: Language) -> Sequence[Token]:
    return (
        Word.of([Reading.of_name("bu")], language),
        LanguageShift.of("%es"),
        Word.of(
            [
                BrokenAway.open(),
                Reading.of((ValueToken(
         LineNumber(1),
         [
             Word.of(unique_lemma=(WordId("nu I"), ),
                     parts=[Reading.of_name("bu")])
         ],
     ),
 ),
 (
     TextLine.of_iterable(
         LineNumber(1),
         [
             Word.of(unique_lemma=(WordId("nu I"), ),
                     parts=[Reading.of_name("bu")])
         ],
     ),
     TextLine.of_iterable(LineNumber(1), [LanguageShift.of("%sux")]),
     TextLine.of_iterable(LineNumber(1), [LanguageShift.of("%sux")]),
 ),
 (
     TextLine.of_iterable(
         LineNumber(1),
         [
             Word.of(unique_lemma=(WordId("nu I"), ),
                     parts=[Reading.of_name("bu")])
         ],
     ),
     TextLine.of_iterable(LineNumber(1),
                          [Word.of([Reading.of_name("mu")])]),
     TextLine.of_iterable(LineNumber(1),
                          [Word.of([Reading.of_name("mu")])]),
 ),
 def ebl_atf_text_line__greek_shift(self, value):
     return LanguageShift.of(str(value))
 def ebl_atf_text_line__normalized_akkadian_shift(self, value):
     return LanguageShift.of(str(value))
from ebl.transliteration.domain.sign_tokens import Divider, Reading
from ebl.transliteration.domain.tokens import (
    Column,
    CommentaryProtocol,
    ErasureState,
    Joiner,
    LanguageShift,
    Tabulation,
    UnknownNumberOfSigns,
    ValueToken,
    Variant,
)

TOKENS = [
    UnknownNumberOfSigns(frozenset({EnclosureType.BROKEN_AWAY}), ErasureState.NONE),
    LanguageShift.of("%sux"),
    DocumentOrientedGloss.open(),
]


def test_value_token():
    value = "value"
    token = ValueToken.of(value)
    equal = ValueToken.of(value)
    other = ValueToken.of("anothervalue")

    assert token.value == value
    assert token.clean_value == value
    assert token.get_key() == f"ValueToken⁝{value}"
    assert token.lemmatizable is False
                     Joiner.hyphen(),
                     UnknownNumberOfSigns.of(),
                     Joiner.hyphen(),
                     Reading.of_name("ad"),
                 )),
             ),
         )
     ],
 ),
 (
     "1. %grc ΑαΒβΓγΔδΕεΖζΗηΘθΙιΚκΛλΜμΝνΞξΟοΠπΡρΣσςΤτΥυΦφΧχΨψΩω",
     [
         TextLine.of_iterable(
             LineNumber(1),
             (
                 LanguageShift.of("%grc"),
                 GreekWord.of([
                     GreekLetter.of(letter)
                     for letter in ("ΑαΒβΓγΔδΕεΖζΗηΘθΙιΚκΛλΜμ"
                                    "ΝνΞξΟοΠπΡρΣσςΤτΥυΦφΧχΨψΩω")
                 ]),
             ),
         )
     ],
 ),
 (
     "1. %akkgrc α",
     [
         TextLine.of_iterable(
             LineNumber(1),
             (