Beispiel #1
0
def test_updating_alignment(client, bibliography, sign_repository, signs,
                            text_repository):
    allow_signs(signs, sign_repository)
    chapter = ChapterFactory.build()
    allow_references(chapter, bibliography)
    text_repository.create_chapter(chapter)
    alignment = 0
    omitted_words = (1, )
    updated_chapter = attr.evolve(
        chapter,
        lines=(attr.evolve(
            chapter.lines[0],
            variants=(attr.evolve(
                chapter.lines[0].variants[0],
                manuscripts=(attr.evolve(
                    chapter.lines[0].variants[0].manuscripts[0],
                    line=TextLine.of_iterable(
                        chapter.lines[0].variants[0].manuscripts[0].line.
                        line_number,
                        (Word.of(
                            [
                                Reading.of_name("ku"),
                                Joiner.hyphen(),
                                BrokenAway.open(),
                                Reading.of_name("nu"),
                                Joiner.hyphen(),
                                Reading.of_name("ši"),
                                BrokenAway.close(),
                            ],
                            alignment=alignment,
                            variant=Word.of(
                                [Logogram.of_name("KU")],
                                language=Language.SUMERIAN,
                            ),
                        ), ),
                    ),
                    omitted_words=omitted_words,
                ), ),
            ), ),
        ), ),
    )

    expected_chapter = ApiChapterSchema().dump(updated_chapter)

    post_result = client.simulate_post(create_chapter_url(
        chapter, "/alignment"),
                                       body=json.dumps(DTO))

    assert post_result.status == falcon.HTTP_OK
    assert post_result.json == expected_chapter

    get_result = client.simulate_get(create_chapter_url(chapter))

    assert get_result.status == falcon.HTTP_OK
    assert get_result.json == expected_chapter
Beispiel #2
0
 def make_token(self, data, **kwargs):
     return (
         Logogram.of(
             data["name_parts"],
             data["sub_index"],
             data["modifiers"],
             data["flags"],
             data["sign"],
             data["surrogate"],
         )
         .set_enclosure_type(frozenset(data["enclosure_type"]))
         .set_erasure(data["erasure"])
     )
def test_logogram(
    name_parts,
    sub_index,
    modifiers,
    flags,
    sign,
    surrogate,
    expected_value,
    expected_clean_value,
    expected_name,
) -> None:
    logogram = Logogram.of(name_parts, sub_index, modifiers, flags, sign,
                           surrogate)

    expected_parts = (*name_parts, sign) if sign else name_parts
    assert logogram.value == expected_value
    assert logogram.clean_value == expected_clean_value
    assert (
        logogram.get_key() ==
        f"Logogram⁝{expected_value}⟨{'⁚'.join(token.get_key() for token in expected_parts)}⟩"
    )
    assert logogram.name_parts == name_parts
    assert logogram.name == expected_name
    assert logogram.modifiers == tuple(modifiers)
    assert logogram.flags == tuple(flags)
    assert logogram.lemmatizable is False
    assert logogram.sign == sign
    assert logogram.surrogate == tuple(surrogate)

    serialized = {
        "type": "Logogram",
        "name": expected_name,
        "nameParts": OneOfTokenSchema().dump(name_parts, many=True),
        "subIndex": sub_index,
        "modifiers": modifiers,
        "flags": [flag.value for flag in flags],
        "surrogate": OneOfTokenSchema().dump(surrogate, many=True),
        "sign": sign and OneOfTokenSchema().dump(sign),
    }
    assert_token_serialization(logogram, serialized)
from ebl.transliteration.domain.word_tokens import Word
from ebl.fragmentarium.domain.joins import Join, Joins

COLLECTION = "fragments"
JOINS_COLLECTION = "joins"


ANOTHER_LEMMATIZED_FRAGMENT = attr.evolve(
    TransliteratedFragmentFactory.build(),
    text=Text(
        (
            TextLine(
                LineNumber(1),
                (
                    Word.of(
                        [Logogram.of_name("GI", 6)], unique_lemma=(WordId("ginâ I"),)
                    ),
                    Word.of([Reading.of_name("ana")], unique_lemma=(WordId("ana II"),)),
                    Word.of([Reading.of_name("ana")], unique_lemma=(WordId("ana II"),)),
                    Word.of(
                        [
                            Reading.of_name("u", 4),
                            Joiner.hyphen(),
                            Reading.of_name("šu"),
                        ],
                        unique_lemma=(WordId("ūsu I"),),
                    ),
                    AkkadianWord.of(
                        [ValueToken.of("ana")], unique_lemma=(WordId("normalized I"),)
                    ),
                ),
 def ebl_atf_text_line__surrogate(
     self, name, sub_index, modifiers, flags, surrogate
 ):
     return Logogram.of(
         tuple(name.children), sub_index, modifiers, flags, None, surrogate.children
     )
 def ebl_atf_text_line__logogram(self, name, sub_index, modifiers, flags, sign=None):
     return Logogram.of(tuple(name.children), sub_index, modifiers, flags, sign)
class LemmatizedFragmentFactory(TransliteratedFragmentFactory):
    text = Text((
        TextLine.of_iterable(
            LineNumber(1, True),
            (
                Word.of([UnidentifiedSign.of()]),
                Word.of([
                    Logogram.of_name(
                        "BA",
                        surrogate=[
                            Reading.of_name("ku"),
                            Joiner.hyphen(),
                            Reading.of_name("u", 4),
                        ],
                    )
                ]),
                Column.of(),
                Tabulation.of(),
                Word.of([
                    BrokenAway.open(),
                    UnknownNumberOfSigns.of(),
                    Joiner.hyphen(),
                    Reading.of_name("ku"),
                    BrokenAway.close(),
                    Joiner.hyphen(),
                    Reading.of_name("nu"),
                    Joiner.hyphen(),
                    Reading.of_name("ši"),
                ]),
                Variant.of(Divider.of(":"), Reading.of_name("ku")),
                Word.of([
                    BrokenAway.open(),
                    UnknownNumberOfSigns.of(),
                    BrokenAway.close(),
                ]),
                Column.of(2),
                Divider.of(":", ("@v", ), (Flag.DAMAGE, )),
                CommentaryProtocol.of("!qt"),
                Word.of([Number.of_name("10", flags=[Flag.DAMAGE])]),
            ),
        ),
        TextLine.of_iterable(
            LineNumber(2, True),
            (
                Word.of([BrokenAway.open(),
                         UnknownNumberOfSigns.of()]),
                Word.of([Logogram.of_name("GI", 6)],
                        unique_lemma=(WordId("ginâ I"), )),
                Word.of([Reading.of_name("ana")],
                        unique_lemma=(WordId("ana I"), )),
                Word.of(
                    [
                        Reading.of_name("u₄"),
                        Joiner.hyphen(),
                        Reading.of_name("š[u"),
                    ],
                    unique_lemma=(WordId("ūsu I"), ),
                ),
                Word.of([UnknownNumberOfSigns.of(),
                         BrokenAway.close()]),
            ),
        ),
        TextLine.of_iterable(
            LineNumber(3, True),
            (
                Word.of([BrokenAway.open(),
                         UnknownNumberOfSigns.of()]),
                Word.of(
                    unique_lemma=(WordId("kīdu I"), ),
                    parts=[
                        Reading.of((
                            ValueToken.of("k"),
                            BrokenAway.close(),
                            ValueToken.of("i"),
                        )),
                        Joiner.hyphen(),
                        Reading.of_name("du"),
                    ],
                ),
                Word.of(unique_lemma=(WordId("u I"), ),
                        parts=[Reading.of_name("u")]),
                Word.of(
                    unique_lemma=(WordId("bamātu I"), ),
                    parts=[
                        Reading.of_name("ba"),
                        Joiner.hyphen(),
                        Reading.of_name("ma"),
                        Joiner.hyphen(),
                        Reading.of((
                            ValueToken.of("t"),
                            BrokenAway.open(),
                            ValueToken.of("i"),
                        )),
                    ],
                ),
                Word.of([UnknownNumberOfSigns.of(),
                         BrokenAway.close()]),
            ),
        ),
        TextLine.of_iterable(
            LineNumber(6, True),
            (
                Word.of([
                    BrokenAway.open(),
                    UnknownNumberOfSigns.of(),
                    BrokenAway.close(),
                ]),
                Word.of([UnclearSign.of([Flag.DAMAGE])]),
                Word.of(unique_lemma=(WordId("mu I"), ),
                        parts=[Reading.of_name("mu")]),
                Word.of(
                    unique_lemma=(WordId("tamalāku I"), ),
                    parts=[
                        Reading.of_name("ta"),
                        Joiner.hyphen(),
                        Reading.of_name("ma"),
                        InWordNewline.of(),
                        Joiner.hyphen(),
                        Reading.of_name("tu", 2),
                    ],
                ),
            ),
        ),
        TextLine.of_iterable(
            LineNumber(7, True),
            (
                Word.of([
                    Variant.of(Reading.of_name("šu"),
                               CompoundGrapheme.of(["BI×IS"]))
                ]),
                LanguageShift.normalized_akkadian(),
                AkkadianWord.of([ValueToken.of("kur")],
                                unique_lemma=(WordId("normalized I"), )),
            ),
        ),
        StateDollarLine(
            atf.Qualification.AT_LEAST,
            1,
            ScopeContainer(atf.Surface.OBVERSE, ""),
            atf.State.MISSING,
            None,
        ),
        ImageDollarLine("1", None, "numbered diagram of triangle"),
        RulingDollarLine(atf.Ruling.SINGLE),
        LooseDollarLine("this is a loose line"),
        SealDollarLine(1),
        SealAtLine(1),
        HeadingAtLine(1),
        ColumnAtLine(ColumnLabel([atf.Status.COLLATION], 1)),
        SurfaceAtLine(
            SurfaceLabel([atf.Status.COLLATION], atf.Surface.SURFACE,
                         "stone wig")),
        ObjectAtLine(
            ObjectLabel([atf.Status.COLLATION], atf.Object.OBJECT,
                        "stone wig")),
        DiscourseAtLine(atf.Discourse.DATE),
        DivisionAtLine("paragraph", 5),
        CompositeAtLine(atf.Composite.DIV, "part", 1),
        NoteLine((
            StringPart("a note "),
            EmphasisPart("italic"),
            LanguagePart.of_transliteration(
                Language.AKKADIAN, (Word.of([Reading.of_name("bu")]), )),
        )),
        ParallelComposition(False, "my name", LineNumber(1)),
        ParallelText(
            True,
            TextId(CorpusGenre.LITERATURE, 1, 1),
            ChapterName(Stage.OLD_BABYLONIAN, "", "my name"),
            LineNumber(1),
            False,
        ),
        ParallelFragment(False, MuseumNumber.of("K.1"), True, Labels(),
                         LineNumber(1), False),
    ))
class TransliteratedFragmentFactory(FragmentFactory):
    text = Text((
        TextLine.of_iterable(
            LineNumber(1, True),
            (
                Word.of([UnidentifiedSign.of()]),
                Word.of([
                    Logogram.of_name(
                        "BA",
                        surrogate=[
                            Reading.of_name("ku"),
                            Joiner.hyphen(),
                            Reading.of_name("u", 4),
                        ],
                    )
                ]),
                Column.of(),
                Tabulation.of(),
                Word.of([
                    BrokenAway.open(),
                    UnknownNumberOfSigns.of(),
                    Joiner.hyphen(),
                    Reading.of_name("ku"),
                    BrokenAway.close(),
                    Joiner.hyphen(),
                    Reading.of_name("nu"),
                    Joiner.hyphen(),
                    Reading.of_name("ši"),
                ]),
                Variant.of(Divider.of(":"), Reading.of_name("ku")),
                Word.of([
                    BrokenAway.open(),
                    UnknownNumberOfSigns.of(),
                    BrokenAway.close(),
                ]),
                Column.of(2),
                Divider.of(":", ("@v", ), (Flag.DAMAGE, )),
                CommentaryProtocol.of("!qt"),
                Word.of([Number.of_name("10", flags=[Flag.DAMAGE])]),
            ),
        ),
        TextLine.of_iterable(
            LineNumber(2, True),
            (
                Word.of([
                    BrokenAway.open(),
                    UnknownNumberOfSigns.of(),
                    BrokenAway.close(),
                ]),
                Word.of([Logogram.of_name("GI", 6)]),
                Word.of([Reading.of_name("ana")]),
                Word.of([
                    Reading.of_name("u", 4),
                    Joiner.hyphen(),
                    Reading.of((
                        ValueToken.of("š"),
                        BrokenAway.open(),
                        ValueToken.of("u"),
                    )),
                ]),
                Word.of([UnknownNumberOfSigns.of(),
                         BrokenAway.close()]),
            ),
        ),
        TextLine.of_iterable(
            LineNumber(3, True),
            (
                Word.of([BrokenAway.open(),
                         UnknownNumberOfSigns.of()]),
                Word.of([
                    Reading.of((
                        ValueToken.of("k"),
                        BrokenAway.close(),
                        ValueToken.of("i"),
                    )),
                    Joiner.hyphen(),
                    Reading.of_name("du"),
                ]),
                Word.of([Reading.of_name("u")]),
                Word.of([
                    Reading.of_name("ba"),
                    Joiner.hyphen(),
                    Reading.of_name("ma"),
                    Joiner.hyphen(),
                    Reading.of((
                        ValueToken.of("t"),
                        BrokenAway.open(),
                        ValueToken.of("i"),
                    )),
                ]),
                Word.of([UnknownNumberOfSigns.of(),
                         BrokenAway.close()]),
            ),
        ),
        TextLine.of_iterable(
            LineNumber(6, True),
            (
                Word.of([
                    BrokenAway.open(),
                    UnknownNumberOfSigns.of(),
                    BrokenAway.close(),
                ]),
                Word.of([UnclearSign.of([Flag.DAMAGE])]),
                Word.of([Reading.of_name("mu")]),
                Word.of([
                    Reading.of_name("ta"),
                    Joiner.hyphen(),
                    Reading.of_name("ma"),
                    InWordNewline.of(),
                    Joiner.hyphen(),
                    Reading.of_name("tu", 2),
                ]),
            ),
        ),
        TextLine.of_iterable(
            LineNumber(7, True),
            (
                Word.of([
                    Variant.of(Reading.of_name("šu"),
                               CompoundGrapheme.of(["BI×IS"]))
                ]),
                LanguageShift.normalized_akkadian(),
                AkkadianWord.of([ValueToken.of("kur")]),
            ),
        ),
        StateDollarLine(
            atf.Qualification.AT_LEAST,
            1,
            ScopeContainer(atf.Surface.OBVERSE, ""),
            atf.State.MISSING,
            None,
        ),
        ImageDollarLine("1", None, "numbered diagram of triangle"),
        RulingDollarLine(atf.Ruling.SINGLE),
        LooseDollarLine("this is a loose line"),
        SealDollarLine(1),
        SealAtLine(1),
        HeadingAtLine(1),
        ColumnAtLine(ColumnLabel([atf.Status.COLLATION], 1)),
        SurfaceAtLine(
            SurfaceLabel([atf.Status.COLLATION], atf.Surface.SURFACE,
                         "stone wig")),
        ObjectAtLine(
            ObjectLabel([atf.Status.COLLATION], atf.Object.OBJECT,
                        "stone wig")),
        DiscourseAtLine(atf.Discourse.DATE),
        DivisionAtLine("paragraph", 5),
        CompositeAtLine(atf.Composite.DIV, "part", 1),
        NoteLine((
            StringPart("a note "),
            EmphasisPart("italic"),
            LanguagePart.of_transliteration(
                Language.AKKADIAN, (Word.of([Reading.of_name("bu")]), )),
        )),
        ParallelComposition(False, "my name", LineNumber(1)),
        ParallelText(
            True,
            TextId(CorpusGenre.LITERATURE, 1, 1),
            ChapterName(Stage.OLD_BABYLONIAN, "", "my name"),
            LineNumber(1),
            False,
        ),
        ParallelFragment(False, MuseumNumber.of("K.1"), True, Labels(),
                         LineNumber(1), False),
    ))
    signs = (
        "X BA KU ABZ075 ABZ207a\\u002F207b\\u0020X ABZ377n1/KU ABZ377n1 ABZ411\n"
        "MI DIŠ UD ŠU\n"
        "KI DU ABZ411 BA MA TI\n"
        "X MU TA MA UD\n"
        "ŠU/|BI×IS|")
    folios = Folios((Folio("WGL", "3"), Folio("XXX", "3")))
    record = Record((RecordEntry("test", RecordType.TRANSLITERATION), ))
    line_to_vec = ((
        LineToVecEncoding.TEXT_LINE,
        LineToVecEncoding.TEXT_LINE,
        LineToVecEncoding.TEXT_LINE,
        LineToVecEncoding.TEXT_LINE,
        LineToVecEncoding.TEXT_LINE,
        LineToVecEncoding.SINGLE_RULING,
    ), )
                         Joiner.hyphen(),
                         Reading.of_name("bu"),
                     ])
                 ],
                 ErasureState.NONE,
             ), ),
         ),
     ],
 ),
 (
     "1. KIMIN {u₂#}[...] {u₂#} [...]",
     [
         TextLine.of_iterable(
             LineNumber(1),
             (
                 Word.of([Logogram.of_name("KIMIN")]),
                 Word.of(parts=[
                     Determinative.of([
                         Reading.of_name(
                             "u", 2, flags=[atf.Flag.DAMAGE])
                     ]),
                     BrokenAway.open(),
                     UnknownNumberOfSigns.of(),
                     BrokenAway.close(),
                 ]),
                 LoneDeterminative.of_value(
                     [
                         Determinative.of([
                             Reading.of_name(
                                 "u", 2, flags=[atf.Flag.DAMAGE])
                         ])
Beispiel #10
0
from ebl.transliteration.domain.line_number import LineNumber
from ebl.transliteration.domain.normalized_akkadian import AkkadianWord
from ebl.transliteration.domain.sign_tokens import Logogram, Reading
from ebl.transliteration.domain.text import Text
from ebl.transliteration.domain.text_line import TextLine
from ebl.transliteration.domain.tokens import ErasureState, Joiner, ValueToken
from ebl.transliteration.domain.word_tokens import Word

COLLECTION = "fragments"

ANOTHER_LEMMATIZED_FRAGMENT = attr.evolve(
    TransliteratedFragmentFactory.build(),
    text=Text((TextLine(
        LineNumber(1),
        (
            Word.of([Logogram.of_name("GI", 6)],
                    unique_lemma=(WordId("ginâ I"), )),
            Word.of([Reading.of_name("ana")],
                    unique_lemma=(WordId("ana II"), )),
            Word.of([Reading.of_name("ana")],
                    unique_lemma=(WordId("ana II"), )),
            Word.of(
                [
                    Reading.of_name("u", 4),
                    Joiner.hyphen(),
                    Reading.of_name("šu"),
                ],
                unique_lemma=(WordId("ūsu I"), ),
            ),
            AkkadianWord.of([ValueToken.of("ana")],
                            unique_lemma=(WordId("normalized I"), )),
     "1]2",
     Word.of([
         Number.of((ValueToken.of("1"), BrokenAway.close(),
                    ValueToken.of("2")))
     ]),
 ),
 (
     "1[2",
     Word.of([
         Number.of((ValueToken.of("1"), BrokenAway.open(),
                    ValueToken.of("2")))
     ]),
 ),
 ("ʾ", Word.of([Reading.of_name("ʾ")])),
 ("du₁₁", Word.of([Reading.of_name("du", 11)])),
 ("GAL", Word.of([Logogram.of_name("GAL")])),
 (
     "kur(GAL)",
     Word.of(
         [Reading.of_name("kur", sign=Grapheme.of(SignName("GAL")))]),
 ),
 (
     "KUR(GAL)",
     Word.of(
         [Logogram.of_name("KUR", sign=Grapheme.of(SignName("GAL")))]),
 ),
 (
     "kur(|GAL|)",
     Word.of(
         [Reading.of_name("kur", sign=CompoundGrapheme.of(["GAL"]))]),
 ),