Example #1
0
 def make_token(self, data, **kwargs):
     return (
         Reading.of(
             data["name_parts"],
             data["sub_index"],
             data["modifiers"],
             data["flags"],
             data["sign"],
         )
         .set_enclosure_type(frozenset(data["enclosure_type"]))
         .set_erasure(data["erasure"])
     )
def test_text_line_of_iterable(code: str, language: Language) -> None:
    tokens = [
        Word.of([Reading.of_name("first")]),
        LanguageShift.of(code),
        Word.of([Reading.of_name("second")]),
        LanguageShift.of("%sb"),
        LoneDeterminative.of([Determinative.of([Reading.of_name("third")])]),
        Word.of([BrokenAway.open(),
                 Reading.of_name("fourth")]),
        UnknownNumberOfSigns.of(),
        BrokenAway.close(),
    ]
    expected_tokens = (
        Word.of([Reading.of_name("first")], DEFAULT_LANGUAGE),
        LanguageShift.of(code),
        Word.of([Reading.of_name("second")], language),
        LanguageShift.of("%sb"),
        LoneDeterminative.of([Determinative.of([Reading.of_name("third")])],
                             Language.AKKADIAN),
        Word.of(
            [
                BrokenAway.open(),
                Reading.of((ValueToken(
                    frozenset({EnclosureType.BROKEN_AWAY}),
                    ErasureState.NONE,
                    "fourth",
                ), )).set_enclosure_type(frozenset({EnclosureType.BROKEN_AWAY
                                                    })),
            ],
            DEFAULT_LANGUAGE,
        ),
        UnknownNumberOfSigns(frozenset({EnclosureType.BROKEN_AWAY}),
                             ErasureState.NONE),
        BrokenAway.close().set_enclosure_type(
            frozenset({EnclosureType.BROKEN_AWAY})),
    )
    line = TextLine.of_iterable(LINE_NUMBER, tokens)

    assert line.line_number == LINE_NUMBER
    assert line.content == expected_tokens
    assert (
        line.key ==
        f"TextLine⁞{line.atf}⟨{'⁚'.join(token.get_key() for token in expected_tokens)}⟩"
    )
    assert line.atf == f"1. first {code} second %sb {{third}} [fourth ...]"
Example #3
0
def expected_transliteration(language: Language) -> Sequence[Token]:
    return (
        Word.of([Reading.of_name("bu")], language),
        LanguageShift.of("%es"),
        Word.of(
            [
                BrokenAway.open(),
                Reading.of((ValueToken(
                    frozenset({EnclosureType.BROKEN_AWAY}),
                    ErasureState.NONE,
                    "kur",
                ), )).set_enclosure_type(frozenset({EnclosureType.BROKEN_AWAY
                                                    })),
            ],
            Language.EMESAL,
        ),
        UnknownNumberOfSigns(frozenset({EnclosureType.BROKEN_AWAY}),
                             ErasureState.NONE),
        BrokenAway.close().set_enclosure_type(
            frozenset({EnclosureType.BROKEN_AWAY})),
    )
def test_variant():
    reading = Reading.of([ValueToken.of("sa"), BrokenAway.open(), ValueToken.of("l")])
    divider = Divider.of(":")
    variant = Variant.of(reading, divider)

    expected_value = "sa[l/:"
    assert variant.value == expected_value
    assert variant.clean_value == "sal/:"
    assert variant.tokens == (reading, divider)
    assert variant.parts == variant.tokens
    assert (
        variant.get_key()
        == f"Variant⁝{expected_value}⟨{'⁚'.join(token.get_key() for token in variant.tokens)}⟩"
    )
    assert variant.lemmatizable is False

    serialized = {
        "type": "Variant",
        "tokens": OneOfTokenSchema().dump([reading, divider], many=True),
    }
    assert_token_serialization(variant, serialized)
def test_reading(
    name_parts,
    sub_index,
    modifiers,
    flags,
    sign,
    expected_value,
    expected_clean_value,
    expected_name,
) -> None:
    reading = Reading.of(name_parts, sub_index, modifiers, flags, sign)

    expected_parts = (*name_parts, sign) if sign else name_parts
    assert reading.value == expected_value
    assert reading.clean_value == expected_clean_value
    assert (
        reading.get_key() ==
        f"Reading⁝{expected_value}⟨{'⁚'.join(token.get_key() for token in expected_parts)}⟩"
    )
    assert reading.name_parts == name_parts
    assert reading.name == expected_name
    assert reading.modifiers == tuple(modifiers)
    assert reading.flags == tuple(flags)
    assert reading.lemmatizable is False
    assert reading.sign == sign

    serialized = {
        "type": "Reading",
        "name": expected_name,
        "nameParts": OneOfTokenSchema().dump(name_parts, many=True),
        "subIndex": sub_index,
        "modifiers": modifiers,
        "flags": [flag.value for flag in flags],
        "sign": sign and OneOfTokenSchema().dump(sign),
    }
    assert_token_serialization(reading, serialized)
 def ebl_atf_text_line__reading(self, name, sub_index, modifiers, flags, sign=None):
     return Reading.of(tuple(name.children), sub_index, modifiers, flags, sign)
class LemmatizedFragmentFactory(TransliteratedFragmentFactory):
    text = Text((
        TextLine.of_iterable(
            LineNumber(1, True),
            (
                Word.of([UnidentifiedSign.of()]),
                Word.of([
                    Logogram.of_name(
                        "BA",
                        surrogate=[
                            Reading.of_name("ku"),
                            Joiner.hyphen(),
                            Reading.of_name("u", 4),
                        ],
                    )
                ]),
                Column.of(),
                Tabulation.of(),
                Word.of([
                    BrokenAway.open(),
                    UnknownNumberOfSigns.of(),
                    Joiner.hyphen(),
                    Reading.of_name("ku"),
                    BrokenAway.close(),
                    Joiner.hyphen(),
                    Reading.of_name("nu"),
                    Joiner.hyphen(),
                    Reading.of_name("ši"),
                ]),
                Variant.of(Divider.of(":"), Reading.of_name("ku")),
                Word.of([
                    BrokenAway.open(),
                    UnknownNumberOfSigns.of(),
                    BrokenAway.close(),
                ]),
                Column.of(2),
                Divider.of(":", ("@v", ), (Flag.DAMAGE, )),
                CommentaryProtocol.of("!qt"),
                Word.of([Number.of_name("10", flags=[Flag.DAMAGE])]),
            ),
        ),
        TextLine.of_iterable(
            LineNumber(2, True),
            (
                Word.of([BrokenAway.open(),
                         UnknownNumberOfSigns.of()]),
                Word.of([Logogram.of_name("GI", 6)],
                        unique_lemma=(WordId("ginâ I"), )),
                Word.of([Reading.of_name("ana")],
                        unique_lemma=(WordId("ana I"), )),
                Word.of(
                    [
                        Reading.of_name("u₄"),
                        Joiner.hyphen(),
                        Reading.of_name("š[u"),
                    ],
                    unique_lemma=(WordId("ūsu I"), ),
                ),
                Word.of([UnknownNumberOfSigns.of(),
                         BrokenAway.close()]),
            ),
        ),
        TextLine.of_iterable(
            LineNumber(3, True),
            (
                Word.of([BrokenAway.open(),
                         UnknownNumberOfSigns.of()]),
                Word.of(
                    unique_lemma=(WordId("kīdu I"), ),
                    parts=[
                        Reading.of((
                            ValueToken.of("k"),
                            BrokenAway.close(),
                            ValueToken.of("i"),
                        )),
                        Joiner.hyphen(),
                        Reading.of_name("du"),
                    ],
                ),
                Word.of(unique_lemma=(WordId("u I"), ),
                        parts=[Reading.of_name("u")]),
                Word.of(
                    unique_lemma=(WordId("bamātu I"), ),
                    parts=[
                        Reading.of_name("ba"),
                        Joiner.hyphen(),
                        Reading.of_name("ma"),
                        Joiner.hyphen(),
                        Reading.of((
                            ValueToken.of("t"),
                            BrokenAway.open(),
                            ValueToken.of("i"),
                        )),
                    ],
                ),
                Word.of([UnknownNumberOfSigns.of(),
                         BrokenAway.close()]),
            ),
        ),
        TextLine.of_iterable(
            LineNumber(6, True),
            (
                Word.of([
                    BrokenAway.open(),
                    UnknownNumberOfSigns.of(),
                    BrokenAway.close(),
                ]),
                Word.of([UnclearSign.of([Flag.DAMAGE])]),
                Word.of(unique_lemma=(WordId("mu I"), ),
                        parts=[Reading.of_name("mu")]),
                Word.of(
                    unique_lemma=(WordId("tamalāku I"), ),
                    parts=[
                        Reading.of_name("ta"),
                        Joiner.hyphen(),
                        Reading.of_name("ma"),
                        InWordNewline.of(),
                        Joiner.hyphen(),
                        Reading.of_name("tu", 2),
                    ],
                ),
            ),
        ),
        TextLine.of_iterable(
            LineNumber(7, True),
            (
                Word.of([
                    Variant.of(Reading.of_name("šu"),
                               CompoundGrapheme.of(["BI×IS"]))
                ]),
                LanguageShift.normalized_akkadian(),
                AkkadianWord.of([ValueToken.of("kur")],
                                unique_lemma=(WordId("normalized I"), )),
            ),
        ),
        StateDollarLine(
            atf.Qualification.AT_LEAST,
            1,
            ScopeContainer(atf.Surface.OBVERSE, ""),
            atf.State.MISSING,
            None,
        ),
        ImageDollarLine("1", None, "numbered diagram of triangle"),
        RulingDollarLine(atf.Ruling.SINGLE),
        LooseDollarLine("this is a loose line"),
        SealDollarLine(1),
        SealAtLine(1),
        HeadingAtLine(1),
        ColumnAtLine(ColumnLabel([atf.Status.COLLATION], 1)),
        SurfaceAtLine(
            SurfaceLabel([atf.Status.COLLATION], atf.Surface.SURFACE,
                         "stone wig")),
        ObjectAtLine(
            ObjectLabel([atf.Status.COLLATION], atf.Object.OBJECT,
                        "stone wig")),
        DiscourseAtLine(atf.Discourse.DATE),
        DivisionAtLine("paragraph", 5),
        CompositeAtLine(atf.Composite.DIV, "part", 1),
        NoteLine((
            StringPart("a note "),
            EmphasisPart("italic"),
            LanguagePart.of_transliteration(
                Language.AKKADIAN, (Word.of([Reading.of_name("bu")]), )),
        )),
        ParallelComposition(False, "my name", LineNumber(1)),
        ParallelText(
            True,
            TextId(CorpusGenre.LITERATURE, 1, 1),
            ChapterName(Stage.OLD_BABYLONIAN, "", "my name"),
            LineNumber(1),
            False,
        ),
        ParallelFragment(False, MuseumNumber.of("K.1"), True, Labels(),
                         LineNumber(1), False),
    ))
class TransliteratedFragmentFactory(FragmentFactory):
    text = Text((
        TextLine.of_iterable(
            LineNumber(1, True),
            (
                Word.of([UnidentifiedSign.of()]),
                Word.of([
                    Logogram.of_name(
                        "BA",
                        surrogate=[
                            Reading.of_name("ku"),
                            Joiner.hyphen(),
                            Reading.of_name("u", 4),
                        ],
                    )
                ]),
                Column.of(),
                Tabulation.of(),
                Word.of([
                    BrokenAway.open(),
                    UnknownNumberOfSigns.of(),
                    Joiner.hyphen(),
                    Reading.of_name("ku"),
                    BrokenAway.close(),
                    Joiner.hyphen(),
                    Reading.of_name("nu"),
                    Joiner.hyphen(),
                    Reading.of_name("ši"),
                ]),
                Variant.of(Divider.of(":"), Reading.of_name("ku")),
                Word.of([
                    BrokenAway.open(),
                    UnknownNumberOfSigns.of(),
                    BrokenAway.close(),
                ]),
                Column.of(2),
                Divider.of(":", ("@v", ), (Flag.DAMAGE, )),
                CommentaryProtocol.of("!qt"),
                Word.of([Number.of_name("10", flags=[Flag.DAMAGE])]),
            ),
        ),
        TextLine.of_iterable(
            LineNumber(2, True),
            (
                Word.of([
                    BrokenAway.open(),
                    UnknownNumberOfSigns.of(),
                    BrokenAway.close(),
                ]),
                Word.of([Logogram.of_name("GI", 6)]),
                Word.of([Reading.of_name("ana")]),
                Word.of([
                    Reading.of_name("u", 4),
                    Joiner.hyphen(),
                    Reading.of((
                        ValueToken.of("š"),
                        BrokenAway.open(),
                        ValueToken.of("u"),
                    )),
                ]),
                Word.of([UnknownNumberOfSigns.of(),
                         BrokenAway.close()]),
            ),
        ),
        TextLine.of_iterable(
            LineNumber(3, True),
            (
                Word.of([BrokenAway.open(),
                         UnknownNumberOfSigns.of()]),
                Word.of([
                    Reading.of((
                        ValueToken.of("k"),
                        BrokenAway.close(),
                        ValueToken.of("i"),
                    )),
                    Joiner.hyphen(),
                    Reading.of_name("du"),
                ]),
                Word.of([Reading.of_name("u")]),
                Word.of([
                    Reading.of_name("ba"),
                    Joiner.hyphen(),
                    Reading.of_name("ma"),
                    Joiner.hyphen(),
                    Reading.of((
                        ValueToken.of("t"),
                        BrokenAway.open(),
                        ValueToken.of("i"),
                    )),
                ]),
                Word.of([UnknownNumberOfSigns.of(),
                         BrokenAway.close()]),
            ),
        ),
        TextLine.of_iterable(
            LineNumber(6, True),
            (
                Word.of([
                    BrokenAway.open(),
                    UnknownNumberOfSigns.of(),
                    BrokenAway.close(),
                ]),
                Word.of([UnclearSign.of([Flag.DAMAGE])]),
                Word.of([Reading.of_name("mu")]),
                Word.of([
                    Reading.of_name("ta"),
                    Joiner.hyphen(),
                    Reading.of_name("ma"),
                    InWordNewline.of(),
                    Joiner.hyphen(),
                    Reading.of_name("tu", 2),
                ]),
            ),
        ),
        TextLine.of_iterable(
            LineNumber(7, True),
            (
                Word.of([
                    Variant.of(Reading.of_name("šu"),
                               CompoundGrapheme.of(["BI×IS"]))
                ]),
                LanguageShift.normalized_akkadian(),
                AkkadianWord.of([ValueToken.of("kur")]),
            ),
        ),
        StateDollarLine(
            atf.Qualification.AT_LEAST,
            1,
            ScopeContainer(atf.Surface.OBVERSE, ""),
            atf.State.MISSING,
            None,
        ),
        ImageDollarLine("1", None, "numbered diagram of triangle"),
        RulingDollarLine(atf.Ruling.SINGLE),
        LooseDollarLine("this is a loose line"),
        SealDollarLine(1),
        SealAtLine(1),
        HeadingAtLine(1),
        ColumnAtLine(ColumnLabel([atf.Status.COLLATION], 1)),
        SurfaceAtLine(
            SurfaceLabel([atf.Status.COLLATION], atf.Surface.SURFACE,
                         "stone wig")),
        ObjectAtLine(
            ObjectLabel([atf.Status.COLLATION], atf.Object.OBJECT,
                        "stone wig")),
        DiscourseAtLine(atf.Discourse.DATE),
        DivisionAtLine("paragraph", 5),
        CompositeAtLine(atf.Composite.DIV, "part", 1),
        NoteLine((
            StringPart("a note "),
            EmphasisPart("italic"),
            LanguagePart.of_transliteration(
                Language.AKKADIAN, (Word.of([Reading.of_name("bu")]), )),
        )),
        ParallelComposition(False, "my name", LineNumber(1)),
        ParallelText(
            True,
            TextId(CorpusGenre.LITERATURE, 1, 1),
            ChapterName(Stage.OLD_BABYLONIAN, "", "my name"),
            LineNumber(1),
            False,
        ),
        ParallelFragment(False, MuseumNumber.of("K.1"), True, Labels(),
                         LineNumber(1), False),
    ))
    signs = (
        "X BA KU ABZ075 ABZ207a\\u002F207b\\u0020X ABZ377n1/KU ABZ377n1 ABZ411\n"
        "MI DIŠ UD ŠU\n"
        "KI DU ABZ411 BA MA TI\n"
        "X MU TA MA UD\n"
        "ŠU/|BI×IS|")
    folios = Folios((Folio("WGL", "3"), Folio("XXX", "3")))
    record = Record((RecordEntry("test", RecordType.TRANSLITERATION), ))
    line_to_vec = ((
        LineToVecEncoding.TEXT_LINE,
        LineToVecEncoding.TEXT_LINE,
        LineToVecEncoding.TEXT_LINE,
        LineToVecEncoding.TEXT_LINE,
        LineToVecEncoding.TEXT_LINE,
        LineToVecEncoding.SINGLE_RULING,
    ), )
Example #9
0
from ebl.corpus.domain.manuscript import Manuscript
from ebl.transliteration.domain.text_id import TextId
from ebl.transliteration.domain.line_number import LineNumber
from ebl.transliteration.domain.sign_tokens import Reading
from ebl.transliteration.domain.text_line import TextLine
from ebl.transliteration.domain.tokens import ValueToken
from ebl.transliteration.domain.word_tokens import Word
from ebl.transliteration.domain.genre import Genre
from ebl.transliteration.domain.labels import SurfaceLabel
from ebl.transliteration.domain.atf import Surface
from ebl.corpus.web.extant_lines import ExtantLinesSchema
from ebl.transliteration.application.line_number_schemas import OneOfLineNumberSchema

LABELS = (SurfaceLabel.from_label(Surface.OBVERSE), )
MANUSCRIPT_TEXT_1 = TextLine(LineNumber(2),
                             (Word.of([Reading.of([ValueToken.of("ku")])]), ))


def test_extant_lines_schema() -> None:
    manuscript = Manuscript(1)
    manuscript_line = ManuscriptLine(1, LABELS, MANUSCRIPT_TEXT_1)
    variant = LineVariant(tuple(), manuscripts=(manuscript_line, ))
    text_line = Line(LineNumber(1), (variant, ))
    chapter = Chapter(TextId(Genre.LITERATURE, 0, 0),
                      manuscripts=(manuscript, ),
                      lines=(text_line, ))
    assert ExtantLinesSchema().dump(chapter) == {
        "extantLines": {
            str(manuscript.siglum): {
                " ".join(label.to_value() for label in manuscript_line.labels):
                [{
         )
     ],
 ),
 (
     "1. [... r]u?-u₂-qu na-a[n-...]\n2. ši-[ku-...-ku]-nu\n3. [...]-ku",
     [
         TextLine.of_iterable(
             LineNumber(1),
             (
                 Word.of(
                     (BrokenAway.open(), UnknownNumberOfSigns.of())),
                 Word.of(parts=[
                     Reading.of(
                         (
                             ValueToken.of("r"),
                             BrokenAway.close(),
                             ValueToken.of("u"),
                         ),
                         flags=[atf.Flag.UNCERTAIN],
                     ),
                     Joiner.hyphen(),
                     Reading.of_name("u", 2),
                     Joiner.hyphen(),
                     Reading.of_name("qu"),
                 ]),
                 Word.of(parts=[
                     Reading.of_name("na"),
                     Joiner.hyphen(),
                     Reading.of((
                         ValueToken.of("a"),
                         BrokenAway.open(),
                         ValueToken.of("n"),
Example #11
0
    fragment_repository.create(ANOTHER_LEMMATIZED_FRAGMENT)

    assert lemma_repository.query_lemmas("ana", False) == [["ana II"],
                                                           ["ana I"]]


@pytest.mark.parametrize(
    "parts,expected",
    [
        (
            [
                Reading.of(
                    [ValueToken.of("ana")],
                    flags=[
                        Flag.DAMAGE,
                        Flag.COLLATION,
                        Flag.UNCERTAIN,
                        Flag.CORRECTION,
                    ],
                )
            ],
            [["ana I"]],
        ),
        (
            [
                BrokenAway.open(),
                PerhapsBrokenAway.open(),
                Reading.of([ValueToken.of("ana")]),
                PerhapsBrokenAway.close(),
                BrokenAway.close(),
            ],
Example #12
0
         )
     ]),
 ),
 (
     Text.of_iterable([ControlLine("$", " double ruling")]),
     Text.of_iterable([RulingDollarLine(atf.Ruling.DOUBLE)]),
     Text.of_iterable([RulingDollarLine(atf.Ruling.DOUBLE)]),
 ),
 (
     Text.of_iterable([
         TextLine.of_iterable(
             LineNumber(1),
             [
                 Word.of([
                     Variant.of(
                         Reading.of([ValueToken.of("k[ur")]),
                         Reading.of([ValueToken.of("r[a")]),
                     )
                 ]),
                 BrokenAway.close(),
             ],
         )
     ]),
     Text.of_iterable([
         TextLine.of_iterable(
             LineNumber(1),
             [
                 Word.of([
                     Variant.of(
                         Reading.of([
                             ValueToken.of("k"),
 {
     "type": "TextLine",
     "prefix": "1.",
     "lineNumber": OneOfLineNumberSchema().dump(LineNumber(1)),
     "content": OneOfTokenSchema().dump(
         [
             DocumentOrientedGloss.open(),
             Word.of(
                 [
                     Reading.of(
                         (
                             ValueToken(
                                 frozenset(
                                     {EnclosureType.DOCUMENT_ORIENTED_GLOSS}
                                 ),
                                 ErasureState.NONE,
                                 "bu",
                             ),
                         )
                     ).set_enclosure_type(
                         frozenset({EnclosureType.DOCUMENT_ORIENTED_GLOSS})
                     )
                 ]
             ).set_enclosure_type(
                 frozenset({EnclosureType.DOCUMENT_ORIENTED_GLOSS})
             ),
             LoneDeterminative.of(
                 [
                     Determinative.of(
                         [
                             Reading.of(
             frozenset({
                 EnclosureType.DOCUMENT_ORIENTED_GLOSS
             })), )).set_enclosure_type(
                 frozenset({EnclosureType.DOCUMENT_ORIENTED_GLOSS})),
         DocumentOrientedGloss.close().set_enclosure_type(
             frozenset({EnclosureType.DOCUMENT_ORIENTED_GLOSS})),
     ),
 ),
 ((
     "ku[r ...]",
     (
         Word.of((Reading.of((
             ValueToken.of("ku"),
             BrokenAway.open(),
             ValueToken(
                 frozenset({EnclosureType.BROKEN_AWAY}),
                 ErasureState.NONE,
                 "r",
             ),
         )), )),
         Word.of((
             UnknownNumberOfSigns.of().set_enclosure_type(
                 frozenset({EnclosureType.BROKEN_AWAY})),
             BrokenAway.close().set_enclosure_type(
                 frozenset({EnclosureType.BROKEN_AWAY})),
         )).set_enclosure_type(frozenset({EnclosureType.BROKEN_AWAY})),
     ),
 )),
 ((
     "{k[ur}-X]",
     (Word.of((
Example #15
0
 (LINE, LINE, LINE),
 (
     Line(
         LineNumber(1),
         (LineVariant(
             RECONSTRUCTION,
             NOTE,
             (ManuscriptLine(
                 MANUSCRIPT_ID,
                 LABELS,
                 TextLine(
                     LineNumber(1),
                     (Word.of(
                         [
                             Reading.of([
                                 ValueToken.of("ku"),
                                 BrokenAway.close(),
                             ]),
                             Joiner.hyphen(),
                             Reading.of_name("nu"),
                             Joiner.hyphen(),
                             Reading.of_name("si"),
                         ],
                         unique_lemma=(WordId("word"), ),
                         alignment=0,
                     ), ),
                 ),
             ), ),
         ), ),
         IS_SECOND_LINE_OF_PARALLELISM,
         IS_BEGINNING_OF_SECTION,
     ),
 ("x:ti",
  Word.of([UnclearSign.of(),
           Joiner.colon(),
           Reading.of_name("ti")])),
 (
     "ti-X",
     Word.of([
         Reading.of_name("ti"),
         Joiner.hyphen(),
         UnidentifiedSign.of()
     ]),
 ),
 (
     "r]u-u₂-qu",
     Word.of([
         Reading.of((ValueToken.of("r"), BrokenAway.close(),
                     ValueToken.of("u"))),
         Joiner.hyphen(),
         Reading.of_name("u", 2),
         Joiner.hyphen(),
         Reading.of_name("qu"),
     ]),
 ),
 (
     "ru?-u₂-qu",
     Word.of([
         Reading.of_name("ru", flags=[atf.Flag.UNCERTAIN]),
         Joiner.hyphen(),
         Reading.of_name("u", 2),
         Joiner.hyphen(),
         Reading.of_name("qu"),
     ]),