Ejemplo n.º 1
0
def test_note_line():
    parts = (
        StringPart("this is a note "),
        EmphasisPart("italic text"),
        LanguagePart.of_transliteration(Language.AKKADIAN, TRANSLITERATION),
        LanguagePart.of_transliteration(Language.SUMERIAN, TRANSLITERATION),
        LanguagePart.of_transliteration(Language.EMESAL, TRANSLITERATION),
    )
    line = NoteLine(parts)

    assert line.parts == (
        StringPart("this is a note "),
        EmphasisPart("italic text"),
        LanguagePart(Language.AKKADIAN,
                     expected_transliteration(Language.AKKADIAN)),
        LanguagePart(Language.SUMERIAN,
                     expected_transliteration(Language.SUMERIAN)),
        LanguagePart(Language.EMESAL,
                     expected_transliteration(Language.EMESAL)),
    )
    assert line.atf == (
        "#note: this is a note "
        "@i{italic text}"
        f"@akk{{{EXPECTED_ATF}}}@sux{{{EXPECTED_ATF}}}@es{{{EXPECTED_ATF}}}")
    assert line.lemmatization == (
        LemmatizationToken("this is a note "),
        LemmatizationToken("@i{italic text}"),
        LemmatizationToken(f"@akk{{{EXPECTED_ATF}}}"),
        LemmatizationToken(f"@sux{{{EXPECTED_ATF}}}"),
        LemmatizationToken(f"@es{{{EXPECTED_ATF}}}"),
    )
def expected_language_part(language: Language, transliteration: str) -> LanguagePart:
    return LanguagePart.of_transliteration(language, parse_text(transliteration))
from ebl.transliteration.domain.markup import (
    BibliographyPart,
    EmphasisPart,
    LanguagePart,
    MarkupPart,
    StringPart,
    rstrip,
    title_case,
    to_title,
)
from ebl.transliteration.domain.sign_tokens import Divider, Reading

PUNCTUATION = ";,:.-–—"
TEXT = "sed nec tortor varius, iaculis."
LANGUAGE_PART = LanguagePart(
    Language.AKKADIAN,
    [Reading.of_name("kur"), Divider.of(":")])
BIBLIOGRAPHY_PART = BibliographyPart(
    Reference(BibliographyId("1"), ReferenceType.DISCUSSION,
              TEXT + PUNCTUATION))


@pytest.mark.parametrize(  # pyre-ignore[56]
    "part,expected",
    [
        (
            StringPart(f"{PUNCTUATION}A{PUNCTUATION}A{PUNCTUATION}"),
            StringPart(f"{PUNCTUATION}A{PUNCTUATION}A"),
        ),
        (
            EmphasisPart(f"{PUNCTUATION}A{PUNCTUATION}A{PUNCTUATION}"),
class LemmatizedFragmentFactory(TransliteratedFragmentFactory):
    text = Text((
        TextLine.of_iterable(
            LineNumber(1, True),
            (
                Word.of([UnidentifiedSign.of()]),
                Word.of([
                    Logogram.of_name(
                        "BA",
                        surrogate=[
                            Reading.of_name("ku"),
                            Joiner.hyphen(),
                            Reading.of_name("u", 4),
                        ],
                    )
                ]),
                Column.of(),
                Tabulation.of(),
                Word.of([
                    BrokenAway.open(),
                    UnknownNumberOfSigns.of(),
                    Joiner.hyphen(),
                    Reading.of_name("ku"),
                    BrokenAway.close(),
                    Joiner.hyphen(),
                    Reading.of_name("nu"),
                    Joiner.hyphen(),
                    Reading.of_name("ši"),
                ]),
                Variant.of(Divider.of(":"), Reading.of_name("ku")),
                Word.of([
                    BrokenAway.open(),
                    UnknownNumberOfSigns.of(),
                    BrokenAway.close(),
                ]),
                Column.of(2),
                Divider.of(":", ("@v", ), (Flag.DAMAGE, )),
                CommentaryProtocol.of("!qt"),
                Word.of([Number.of_name("10", flags=[Flag.DAMAGE])]),
            ),
        ),
        TextLine.of_iterable(
            LineNumber(2, True),
            (
                Word.of([BrokenAway.open(),
                         UnknownNumberOfSigns.of()]),
                Word.of([Logogram.of_name("GI", 6)],
                        unique_lemma=(WordId("ginâ I"), )),
                Word.of([Reading.of_name("ana")],
                        unique_lemma=(WordId("ana I"), )),
                Word.of(
                    [
                        Reading.of_name("u₄"),
                        Joiner.hyphen(),
                        Reading.of_name("š[u"),
                    ],
                    unique_lemma=(WordId("ūsu I"), ),
                ),
                Word.of([UnknownNumberOfSigns.of(),
                         BrokenAway.close()]),
            ),
        ),
        TextLine.of_iterable(
            LineNumber(3, True),
            (
                Word.of([BrokenAway.open(),
                         UnknownNumberOfSigns.of()]),
                Word.of(
                    unique_lemma=(WordId("kīdu I"), ),
                    parts=[
                        Reading.of((
                            ValueToken.of("k"),
                            BrokenAway.close(),
                            ValueToken.of("i"),
                        )),
                        Joiner.hyphen(),
                        Reading.of_name("du"),
                    ],
                ),
                Word.of(unique_lemma=(WordId("u I"), ),
                        parts=[Reading.of_name("u")]),
                Word.of(
                    unique_lemma=(WordId("bamātu I"), ),
                    parts=[
                        Reading.of_name("ba"),
                        Joiner.hyphen(),
                        Reading.of_name("ma"),
                        Joiner.hyphen(),
                        Reading.of((
                            ValueToken.of("t"),
                            BrokenAway.open(),
                            ValueToken.of("i"),
                        )),
                    ],
                ),
                Word.of([UnknownNumberOfSigns.of(),
                         BrokenAway.close()]),
            ),
        ),
        TextLine.of_iterable(
            LineNumber(6, True),
            (
                Word.of([
                    BrokenAway.open(),
                    UnknownNumberOfSigns.of(),
                    BrokenAway.close(),
                ]),
                Word.of([UnclearSign.of([Flag.DAMAGE])]),
                Word.of(unique_lemma=(WordId("mu I"), ),
                        parts=[Reading.of_name("mu")]),
                Word.of(
                    unique_lemma=(WordId("tamalāku I"), ),
                    parts=[
                        Reading.of_name("ta"),
                        Joiner.hyphen(),
                        Reading.of_name("ma"),
                        InWordNewline.of(),
                        Joiner.hyphen(),
                        Reading.of_name("tu", 2),
                    ],
                ),
            ),
        ),
        TextLine.of_iterable(
            LineNumber(7, True),
            (
                Word.of([
                    Variant.of(Reading.of_name("šu"),
                               CompoundGrapheme.of(["BI×IS"]))
                ]),
                LanguageShift.normalized_akkadian(),
                AkkadianWord.of([ValueToken.of("kur")],
                                unique_lemma=(WordId("normalized I"), )),
            ),
        ),
        StateDollarLine(
            atf.Qualification.AT_LEAST,
            1,
            ScopeContainer(atf.Surface.OBVERSE, ""),
            atf.State.MISSING,
            None,
        ),
        ImageDollarLine("1", None, "numbered diagram of triangle"),
        RulingDollarLine(atf.Ruling.SINGLE),
        LooseDollarLine("this is a loose line"),
        SealDollarLine(1),
        SealAtLine(1),
        HeadingAtLine(1),
        ColumnAtLine(ColumnLabel([atf.Status.COLLATION], 1)),
        SurfaceAtLine(
            SurfaceLabel([atf.Status.COLLATION], atf.Surface.SURFACE,
                         "stone wig")),
        ObjectAtLine(
            ObjectLabel([atf.Status.COLLATION], atf.Object.OBJECT,
                        "stone wig")),
        DiscourseAtLine(atf.Discourse.DATE),
        DivisionAtLine("paragraph", 5),
        CompositeAtLine(atf.Composite.DIV, "part", 1),
        NoteLine((
            StringPart("a note "),
            EmphasisPart("italic"),
            LanguagePart.of_transliteration(
                Language.AKKADIAN, (Word.of([Reading.of_name("bu")]), )),
        )),
        ParallelComposition(False, "my name", LineNumber(1)),
        ParallelText(
            True,
            TextId(CorpusGenre.LITERATURE, 1, 1),
            ChapterName(Stage.OLD_BABYLONIAN, "", "my name"),
            LineNumber(1),
            False,
        ),
        ParallelFragment(False, MuseumNumber.of("K.1"), True, Labels(),
                         LineNumber(1), False),
    ))
class TransliteratedFragmentFactory(FragmentFactory):
    text = Text((
        TextLine.of_iterable(
            LineNumber(1, True),
            (
                Word.of([UnidentifiedSign.of()]),
                Word.of([
                    Logogram.of_name(
                        "BA",
                        surrogate=[
                            Reading.of_name("ku"),
                            Joiner.hyphen(),
                            Reading.of_name("u", 4),
                        ],
                    )
                ]),
                Column.of(),
                Tabulation.of(),
                Word.of([
                    BrokenAway.open(),
                    UnknownNumberOfSigns.of(),
                    Joiner.hyphen(),
                    Reading.of_name("ku"),
                    BrokenAway.close(),
                    Joiner.hyphen(),
                    Reading.of_name("nu"),
                    Joiner.hyphen(),
                    Reading.of_name("ši"),
                ]),
                Variant.of(Divider.of(":"), Reading.of_name("ku")),
                Word.of([
                    BrokenAway.open(),
                    UnknownNumberOfSigns.of(),
                    BrokenAway.close(),
                ]),
                Column.of(2),
                Divider.of(":", ("@v", ), (Flag.DAMAGE, )),
                CommentaryProtocol.of("!qt"),
                Word.of([Number.of_name("10", flags=[Flag.DAMAGE])]),
            ),
        ),
        TextLine.of_iterable(
            LineNumber(2, True),
            (
                Word.of([
                    BrokenAway.open(),
                    UnknownNumberOfSigns.of(),
                    BrokenAway.close(),
                ]),
                Word.of([Logogram.of_name("GI", 6)]),
                Word.of([Reading.of_name("ana")]),
                Word.of([
                    Reading.of_name("u", 4),
                    Joiner.hyphen(),
                    Reading.of((
                        ValueToken.of("š"),
                        BrokenAway.open(),
                        ValueToken.of("u"),
                    )),
                ]),
                Word.of([UnknownNumberOfSigns.of(),
                         BrokenAway.close()]),
            ),
        ),
        TextLine.of_iterable(
            LineNumber(3, True),
            (
                Word.of([BrokenAway.open(),
                         UnknownNumberOfSigns.of()]),
                Word.of([
                    Reading.of((
                        ValueToken.of("k"),
                        BrokenAway.close(),
                        ValueToken.of("i"),
                    )),
                    Joiner.hyphen(),
                    Reading.of_name("du"),
                ]),
                Word.of([Reading.of_name("u")]),
                Word.of([
                    Reading.of_name("ba"),
                    Joiner.hyphen(),
                    Reading.of_name("ma"),
                    Joiner.hyphen(),
                    Reading.of((
                        ValueToken.of("t"),
                        BrokenAway.open(),
                        ValueToken.of("i"),
                    )),
                ]),
                Word.of([UnknownNumberOfSigns.of(),
                         BrokenAway.close()]),
            ),
        ),
        TextLine.of_iterable(
            LineNumber(6, True),
            (
                Word.of([
                    BrokenAway.open(),
                    UnknownNumberOfSigns.of(),
                    BrokenAway.close(),
                ]),
                Word.of([UnclearSign.of([Flag.DAMAGE])]),
                Word.of([Reading.of_name("mu")]),
                Word.of([
                    Reading.of_name("ta"),
                    Joiner.hyphen(),
                    Reading.of_name("ma"),
                    InWordNewline.of(),
                    Joiner.hyphen(),
                    Reading.of_name("tu", 2),
                ]),
            ),
        ),
        TextLine.of_iterable(
            LineNumber(7, True),
            (
                Word.of([
                    Variant.of(Reading.of_name("šu"),
                               CompoundGrapheme.of(["BI×IS"]))
                ]),
                LanguageShift.normalized_akkadian(),
                AkkadianWord.of([ValueToken.of("kur")]),
            ),
        ),
        StateDollarLine(
            atf.Qualification.AT_LEAST,
            1,
            ScopeContainer(atf.Surface.OBVERSE, ""),
            atf.State.MISSING,
            None,
        ),
        ImageDollarLine("1", None, "numbered diagram of triangle"),
        RulingDollarLine(atf.Ruling.SINGLE),
        LooseDollarLine("this is a loose line"),
        SealDollarLine(1),
        SealAtLine(1),
        HeadingAtLine(1),
        ColumnAtLine(ColumnLabel([atf.Status.COLLATION], 1)),
        SurfaceAtLine(
            SurfaceLabel([atf.Status.COLLATION], atf.Surface.SURFACE,
                         "stone wig")),
        ObjectAtLine(
            ObjectLabel([atf.Status.COLLATION], atf.Object.OBJECT,
                        "stone wig")),
        DiscourseAtLine(atf.Discourse.DATE),
        DivisionAtLine("paragraph", 5),
        CompositeAtLine(atf.Composite.DIV, "part", 1),
        NoteLine((
            StringPart("a note "),
            EmphasisPart("italic"),
            LanguagePart.of_transliteration(
                Language.AKKADIAN, (Word.of([Reading.of_name("bu")]), )),
        )),
        ParallelComposition(False, "my name", LineNumber(1)),
        ParallelText(
            True,
            TextId(CorpusGenre.LITERATURE, 1, 1),
            ChapterName(Stage.OLD_BABYLONIAN, "", "my name"),
            LineNumber(1),
            False,
        ),
        ParallelFragment(False, MuseumNumber.of("K.1"), True, Labels(),
                         LineNumber(1), False),
    ))
    signs = (
        "X BA KU ABZ075 ABZ207a\\u002F207b\\u0020X ABZ377n1/KU ABZ377n1 ABZ411\n"
        "MI DIŠ UD ŠU\n"
        "KI DU ABZ411 BA MA TI\n"
        "X MU TA MA UD\n"
        "ŠU/|BI×IS|")
    folios = Folios((Folio("WGL", "3"), Folio("XXX", "3")))
    record = Record((RecordEntry("test", RecordType.TRANSLITERATION), ))
    line_to_vec = ((
        LineToVecEncoding.TEXT_LINE,
        LineToVecEncoding.TEXT_LINE,
        LineToVecEncoding.TEXT_LINE,
        LineToVecEncoding.TEXT_LINE,
        LineToVecEncoding.TEXT_LINE,
        LineToVecEncoding.SINGLE_RULING,
    ), )
 def ebl_atf_text_line__language_part(
         self, language: Token,
         transliteration: Sequence[EblToken]) -> LanguagePart:
     return LanguagePart.of_transliteration(Language.of_atf(f"%{language}"),
                                            transliteration)
Ejemplo n.º 7
0
        ),
        (
            Text.of_iterable([NoteLine((StringPart("this is a note "), ))]),
            Text.of_iterable(
                [NoteLine((StringPart("this is another note "), ))]),
            Text.of_iterable(
                [NoteLine((StringPart("this is another note "), ))]),
        ),
        (
            Text.of_iterable([NoteLine((StringPart("this is a note "), ))]),
            Text.of_iterable([NoteLine((EmphasisPart("this is a note "), ))]),
            Text.of_iterable([NoteLine((EmphasisPart("this is a note "), ))]),
        ),
        (
            Text.of_iterable([
                NoteLine((LanguagePart.of_transliteration(
                    Language.AKKADIAN, (ValueToken.of("bu"), )), ))
            ]),
            Text.of_iterable([
                NoteLine((LanguagePart.of_transliteration(
                    Language.AKKADIAN,
                    (Word.of([Reading.of_name("bu")]), )), ))
            ]),
            Text.of_iterable([
                NoteLine((LanguagePart.of_transliteration(
                    Language.AKKADIAN,
                    (Word.of([Reading.of_name("bu")]), )), ))
            ]),
        ),
    ],
)
def test_merge(old: Text, new: Text, expected: Text) -> None:
     SealDollarLine(1),
     {
         "type": "SealDollarLine",
         "prefix": "$",
         "content": [OneOfTokenSchema().dump(ValueToken.of(" seal 1"))],
         "number": 1,
         "displayValue": "seal 1",
     },
 ),
 (
     NoteLine(
         (
             StringPart("a note "),
             EmphasisPart("italic"),
             LanguagePart.of_transliteration(
                 Language.AKKADIAN, [Word.of([Reading.of_name("bu")])]
             ),
             BibliographyPart.of(BibliographyId("A"), "1-4"),
         )
     ),
     {
         "type": "NoteLine",
         "prefix": "#note: ",
         "parts": [
             {"type": "StringPart", "text": "a note "},
             {"type": "EmphasisPart", "text": "italic"},
             {
                 "type": "LanguagePart",
                 "language": Language.AKKADIAN.name,
                 "tokens": [
                     OneOfTokenSchema().dump(Word.of([Reading.of_name("bu")]))
Ejemplo n.º 9
0
 def make_part(self, data, **kwargs) -> LanguagePart:
     return LanguagePart.of_transliteration(data["language"],
                                            data["tokens"])