def test_constructor_sets_correct_fields():
    assert CHAPTER.text_id == TextId(GENRE, CATEGORY, INDEX)
    assert CHAPTER.classification == CLASSIFICATION
    assert CHAPTER.stage == STAGE
    assert CHAPTER.version == VERSION
    assert CHAPTER.name == CHAPTER_NAME
    assert CHAPTER.order == ORDER
    assert CHAPTER.uncertain_fragments == (MUSEUM_NUMBER, )
    assert CHAPTER.manuscripts[0].id == MANUSCRIPT_ID
    assert CHAPTER.manuscripts[0].siglum == Siglum(PROVENANCE, PERIOD, TYPE,
                                                   SIGLUM_DISAMBIGUATOR)
    assert CHAPTER.manuscripts[0].siglum_disambiguator == SIGLUM_DISAMBIGUATOR
    assert CHAPTER.manuscripts[0].museum_number == MUSEUM_NUMBER
    assert CHAPTER.manuscripts[0].accession == ACCESSION
    assert CHAPTER.manuscripts[0].period_modifier == PERIOD_MODIFIER
    assert CHAPTER.manuscripts[0].period == PERIOD
    assert CHAPTER.manuscripts[0].provenance == PROVENANCE
    assert CHAPTER.manuscripts[0].type == TYPE
    assert CHAPTER.manuscripts[0].notes == NOTES
    assert CHAPTER.manuscripts[0].colophon == COLOPHON
    assert CHAPTER.manuscripts[0].unplaced_lines == UNPLACED_LINES
    assert CHAPTER.manuscripts[0].references == REFERENCES
    assert CHAPTER.lines[0].number == LINE_NUMBER
    assert CHAPTER.lines[0].variants == (LINE_VARIANT_1, )
    assert (CHAPTER.lines[0].is_second_line_of_parallelism ==
            IS_SECOND_LINE_OF_PARALLELISM)
    assert CHAPTER.lines[0].is_beginning_of_section == IS_BEGINNING_OF_SECTION
    assert CHAPTER.lines[0].translation == TRANSLATION
    assert CHAPTER.signs == SIGNS
    assert CHAPTER.record == RECORD
def test_invalid_extent() -> None:
    with pytest.raises(ValueError):
        Chapter(
            TextId(GENRE, 0, 0),
            manuscripts=(Manuscript(MANUSCRIPT_ID), ),
            lines=(Line(
                LineNumber(1),
                (LINE_VARIANT_1, ),
                translation=(TranslationLine(tuple(),
                                             extent=Extent(LineNumber(2))), ),
            ), ),
        )
def test_parallel_text(cf, chapter, display_value) -> None:
    text_id = TextId(Genre.LITERATURE, 1, 1)
    line_number = LineNumber(1)
    line = ParallelText(cf, text_id, chapter, line_number)

    assert line.has_cf is cf
    assert line.text == text_id
    assert line.chapter == chapter
    assert line.line_number == line_number

    assert line.display_value == display_value
    assert line.atf == Atf(f"// {display_value}")
    assert line.lemmatization == (LemmatizationToken(display_value), )
Esempio n. 4
0
def test_text_constructor_sets_correct_fields() -> None:
    assert TEXT.id == TextId(GENRE, CATEGORY, INDEX)
    assert TEXT.genre == GENRE
    assert TEXT.category == CATEGORY
    assert TEXT.index == INDEX
    assert TEXT.name == NAME
    assert TEXT.has_doi == HAS_DOI
    assert TEXT.number_of_verses == VERSES
    assert TEXT.approximate_verses == APPROXIMATE
    assert TEXT.intro == INTRO
    assert TEXT.chapters[0].stage == STAGE
    assert TEXT.chapters[0].name == CHAPTER_NAME
    assert TEXT.chapters[0].translation == TRANSLATION
    assert TEXT.chapters[0].uncertain_fragments == UNCERTAIN_FRAGMENTS
def test_overlapping_languages() -> None:
    Chapter(
        TextId(GENRE, 0, 0),
        manuscripts=(Manuscript(MANUSCRIPT_ID), ),
        lines=(
            Line(
                LineNumber(1),
                (LINE_VARIANT_1, ),
                translation=(TranslationLine(tuple(), "en",
                                             Extent(LineNumber(2))), ),
            ),
            Line(
                LineNumber(2),
                (LINE_VARIANT_2, ),
                translation=(TranslationLine(tuple(), "de"), ),
            ),
        ),
    )
Esempio n. 6
0
def test_extant_lines_schema() -> None:
    manuscript = Manuscript(1)
    manuscript_line = ManuscriptLine(1, LABELS, MANUSCRIPT_TEXT_1)
    variant = LineVariant(tuple(), manuscripts=(manuscript_line, ))
    text_line = Line(LineNumber(1), (variant, ))
    chapter = Chapter(TextId(Genre.LITERATURE, 0, 0),
                      manuscripts=(manuscript, ),
                      lines=(text_line, ))
    assert ExtantLinesSchema().dump(chapter) == {
        "extantLines": {
            str(manuscript.siglum): {
                " ".join(label.to_value() for label in manuscript_line.labels):
                [{
                    "isSideBoundary": False,
                    "lineNumber":
                    OneOfLineNumberSchema().dump(text_line.number),
                }]
            }
        }
    }
import pytest

from ebl.transliteration.domain.text_id import TextId
from ebl.transliteration.domain.genre import Genre


@pytest.mark.parametrize(
    "text_id,expected",
    [
        (TextId(Genre.LITERATURE, 0, 0), "L 0.0"),
        (TextId(Genre.LEXICOGRAPHY, 5, 8), "Lex V.8"),
        (TextId(Genre.MEDICINE, 1, 3), "Med I.3"),
    ],
)
def test_str(text_id, expected) -> None:
    assert str(text_id) == expected
def test_find_raises_exception_if_text_not_found(text_repository) -> None:
    with pytest.raises(NotFoundError):
        text_repository.find(TextId(Genre.LITERATURE, 1, 1))
Esempio n. 9
0
 def id(self) -> TextId:
     return TextId(self.genre, self.category, self.index)
class LemmatizedFragmentFactory(TransliteratedFragmentFactory):
    text = Text((
        TextLine.of_iterable(
            LineNumber(1, True),
            (
                Word.of([UnidentifiedSign.of()]),
                Word.of([
                    Logogram.of_name(
                        "BA",
                        surrogate=[
                            Reading.of_name("ku"),
                            Joiner.hyphen(),
                            Reading.of_name("u", 4),
                        ],
                    )
                ]),
                Column.of(),
                Tabulation.of(),
                Word.of([
                    BrokenAway.open(),
                    UnknownNumberOfSigns.of(),
                    Joiner.hyphen(),
                    Reading.of_name("ku"),
                    BrokenAway.close(),
                    Joiner.hyphen(),
                    Reading.of_name("nu"),
                    Joiner.hyphen(),
                    Reading.of_name("ši"),
                ]),
                Variant.of(Divider.of(":"), Reading.of_name("ku")),
                Word.of([
                    BrokenAway.open(),
                    UnknownNumberOfSigns.of(),
                    BrokenAway.close(),
                ]),
                Column.of(2),
                Divider.of(":", ("@v", ), (Flag.DAMAGE, )),
                CommentaryProtocol.of("!qt"),
                Word.of([Number.of_name("10", flags=[Flag.DAMAGE])]),
            ),
        ),
        TextLine.of_iterable(
            LineNumber(2, True),
            (
                Word.of([BrokenAway.open(),
                         UnknownNumberOfSigns.of()]),
                Word.of([Logogram.of_name("GI", 6)],
                        unique_lemma=(WordId("ginâ I"), )),
                Word.of([Reading.of_name("ana")],
                        unique_lemma=(WordId("ana I"), )),
                Word.of(
                    [
                        Reading.of_name("u₄"),
                        Joiner.hyphen(),
                        Reading.of_name("š[u"),
                    ],
                    unique_lemma=(WordId("ūsu I"), ),
                ),
                Word.of([UnknownNumberOfSigns.of(),
                         BrokenAway.close()]),
            ),
        ),
        TextLine.of_iterable(
            LineNumber(3, True),
            (
                Word.of([BrokenAway.open(),
                         UnknownNumberOfSigns.of()]),
                Word.of(
                    unique_lemma=(WordId("kīdu I"), ),
                    parts=[
                        Reading.of((
                            ValueToken.of("k"),
                            BrokenAway.close(),
                            ValueToken.of("i"),
                        )),
                        Joiner.hyphen(),
                        Reading.of_name("du"),
                    ],
                ),
                Word.of(unique_lemma=(WordId("u I"), ),
                        parts=[Reading.of_name("u")]),
                Word.of(
                    unique_lemma=(WordId("bamātu I"), ),
                    parts=[
                        Reading.of_name("ba"),
                        Joiner.hyphen(),
                        Reading.of_name("ma"),
                        Joiner.hyphen(),
                        Reading.of((
                            ValueToken.of("t"),
                            BrokenAway.open(),
                            ValueToken.of("i"),
                        )),
                    ],
                ),
                Word.of([UnknownNumberOfSigns.of(),
                         BrokenAway.close()]),
            ),
        ),
        TextLine.of_iterable(
            LineNumber(6, True),
            (
                Word.of([
                    BrokenAway.open(),
                    UnknownNumberOfSigns.of(),
                    BrokenAway.close(),
                ]),
                Word.of([UnclearSign.of([Flag.DAMAGE])]),
                Word.of(unique_lemma=(WordId("mu I"), ),
                        parts=[Reading.of_name("mu")]),
                Word.of(
                    unique_lemma=(WordId("tamalāku I"), ),
                    parts=[
                        Reading.of_name("ta"),
                        Joiner.hyphen(),
                        Reading.of_name("ma"),
                        InWordNewline.of(),
                        Joiner.hyphen(),
                        Reading.of_name("tu", 2),
                    ],
                ),
            ),
        ),
        TextLine.of_iterable(
            LineNumber(7, True),
            (
                Word.of([
                    Variant.of(Reading.of_name("šu"),
                               CompoundGrapheme.of(["BI×IS"]))
                ]),
                LanguageShift.normalized_akkadian(),
                AkkadianWord.of([ValueToken.of("kur")],
                                unique_lemma=(WordId("normalized I"), )),
            ),
        ),
        StateDollarLine(
            atf.Qualification.AT_LEAST,
            1,
            ScopeContainer(atf.Surface.OBVERSE, ""),
            atf.State.MISSING,
            None,
        ),
        ImageDollarLine("1", None, "numbered diagram of triangle"),
        RulingDollarLine(atf.Ruling.SINGLE),
        LooseDollarLine("this is a loose line"),
        SealDollarLine(1),
        SealAtLine(1),
        HeadingAtLine(1),
        ColumnAtLine(ColumnLabel([atf.Status.COLLATION], 1)),
        SurfaceAtLine(
            SurfaceLabel([atf.Status.COLLATION], atf.Surface.SURFACE,
                         "stone wig")),
        ObjectAtLine(
            ObjectLabel([atf.Status.COLLATION], atf.Object.OBJECT,
                        "stone wig")),
        DiscourseAtLine(atf.Discourse.DATE),
        DivisionAtLine("paragraph", 5),
        CompositeAtLine(atf.Composite.DIV, "part", 1),
        NoteLine((
            StringPart("a note "),
            EmphasisPart("italic"),
            LanguagePart.of_transliteration(
                Language.AKKADIAN, (Word.of([Reading.of_name("bu")]), )),
        )),
        ParallelComposition(False, "my name", LineNumber(1)),
        ParallelText(
            True,
            TextId(CorpusGenre.LITERATURE, 1, 1),
            ChapterName(Stage.OLD_BABYLONIAN, "", "my name"),
            LineNumber(1),
            False,
        ),
        ParallelFragment(False, MuseumNumber.of("K.1"), True, Labels(),
                         LineNumber(1), False),
    ))
class TransliteratedFragmentFactory(FragmentFactory):
    text = Text((
        TextLine.of_iterable(
            LineNumber(1, True),
            (
                Word.of([UnidentifiedSign.of()]),
                Word.of([
                    Logogram.of_name(
                        "BA",
                        surrogate=[
                            Reading.of_name("ku"),
                            Joiner.hyphen(),
                            Reading.of_name("u", 4),
                        ],
                    )
                ]),
                Column.of(),
                Tabulation.of(),
                Word.of([
                    BrokenAway.open(),
                    UnknownNumberOfSigns.of(),
                    Joiner.hyphen(),
                    Reading.of_name("ku"),
                    BrokenAway.close(),
                    Joiner.hyphen(),
                    Reading.of_name("nu"),
                    Joiner.hyphen(),
                    Reading.of_name("ši"),
                ]),
                Variant.of(Divider.of(":"), Reading.of_name("ku")),
                Word.of([
                    BrokenAway.open(),
                    UnknownNumberOfSigns.of(),
                    BrokenAway.close(),
                ]),
                Column.of(2),
                Divider.of(":", ("@v", ), (Flag.DAMAGE, )),
                CommentaryProtocol.of("!qt"),
                Word.of([Number.of_name("10", flags=[Flag.DAMAGE])]),
            ),
        ),
        TextLine.of_iterable(
            LineNumber(2, True),
            (
                Word.of([
                    BrokenAway.open(),
                    UnknownNumberOfSigns.of(),
                    BrokenAway.close(),
                ]),
                Word.of([Logogram.of_name("GI", 6)]),
                Word.of([Reading.of_name("ana")]),
                Word.of([
                    Reading.of_name("u", 4),
                    Joiner.hyphen(),
                    Reading.of((
                        ValueToken.of("š"),
                        BrokenAway.open(),
                        ValueToken.of("u"),
                    )),
                ]),
                Word.of([UnknownNumberOfSigns.of(),
                         BrokenAway.close()]),
            ),
        ),
        TextLine.of_iterable(
            LineNumber(3, True),
            (
                Word.of([BrokenAway.open(),
                         UnknownNumberOfSigns.of()]),
                Word.of([
                    Reading.of((
                        ValueToken.of("k"),
                        BrokenAway.close(),
                        ValueToken.of("i"),
                    )),
                    Joiner.hyphen(),
                    Reading.of_name("du"),
                ]),
                Word.of([Reading.of_name("u")]),
                Word.of([
                    Reading.of_name("ba"),
                    Joiner.hyphen(),
                    Reading.of_name("ma"),
                    Joiner.hyphen(),
                    Reading.of((
                        ValueToken.of("t"),
                        BrokenAway.open(),
                        ValueToken.of("i"),
                    )),
                ]),
                Word.of([UnknownNumberOfSigns.of(),
                         BrokenAway.close()]),
            ),
        ),
        TextLine.of_iterable(
            LineNumber(6, True),
            (
                Word.of([
                    BrokenAway.open(),
                    UnknownNumberOfSigns.of(),
                    BrokenAway.close(),
                ]),
                Word.of([UnclearSign.of([Flag.DAMAGE])]),
                Word.of([Reading.of_name("mu")]),
                Word.of([
                    Reading.of_name("ta"),
                    Joiner.hyphen(),
                    Reading.of_name("ma"),
                    InWordNewline.of(),
                    Joiner.hyphen(),
                    Reading.of_name("tu", 2),
                ]),
            ),
        ),
        TextLine.of_iterable(
            LineNumber(7, True),
            (
                Word.of([
                    Variant.of(Reading.of_name("šu"),
                               CompoundGrapheme.of(["BI×IS"]))
                ]),
                LanguageShift.normalized_akkadian(),
                AkkadianWord.of([ValueToken.of("kur")]),
            ),
        ),
        StateDollarLine(
            atf.Qualification.AT_LEAST,
            1,
            ScopeContainer(atf.Surface.OBVERSE, ""),
            atf.State.MISSING,
            None,
        ),
        ImageDollarLine("1", None, "numbered diagram of triangle"),
        RulingDollarLine(atf.Ruling.SINGLE),
        LooseDollarLine("this is a loose line"),
        SealDollarLine(1),
        SealAtLine(1),
        HeadingAtLine(1),
        ColumnAtLine(ColumnLabel([atf.Status.COLLATION], 1)),
        SurfaceAtLine(
            SurfaceLabel([atf.Status.COLLATION], atf.Surface.SURFACE,
                         "stone wig")),
        ObjectAtLine(
            ObjectLabel([atf.Status.COLLATION], atf.Object.OBJECT,
                        "stone wig")),
        DiscourseAtLine(atf.Discourse.DATE),
        DivisionAtLine("paragraph", 5),
        CompositeAtLine(atf.Composite.DIV, "part", 1),
        NoteLine((
            StringPart("a note "),
            EmphasisPart("italic"),
            LanguagePart.of_transliteration(
                Language.AKKADIAN, (Word.of([Reading.of_name("bu")]), )),
        )),
        ParallelComposition(False, "my name", LineNumber(1)),
        ParallelText(
            True,
            TextId(CorpusGenre.LITERATURE, 1, 1),
            ChapterName(Stage.OLD_BABYLONIAN, "", "my name"),
            LineNumber(1),
            False,
        ),
        ParallelFragment(False, MuseumNumber.of("K.1"), True, Labels(),
                         LineNumber(1), False),
    ))
    signs = (
        "X BA KU ABZ075 ABZ207a\\u002F207b\\u0020X ABZ377n1/KU ABZ377n1 ABZ411\n"
        "MI DIŠ UD ŠU\n"
        "KI DU ABZ411 BA MA TI\n"
        "X MU TA MA UD\n"
        "ŠU/|BI×IS|")
    folios = Folios((Folio("WGL", "3"), Folio("XXX", "3")))
    record = Record((RecordEntry("test", RecordType.TRANSLITERATION), ))
    line_to_vec = ((
        LineToVecEncoding.TEXT_LINE,
        LineToVecEncoding.TEXT_LINE,
        LineToVecEncoding.TEXT_LINE,
        LineToVecEncoding.TEXT_LINE,
        LineToVecEncoding.TEXT_LINE,
        LineToVecEncoding.SINGLE_RULING,
    ), )
             "surface": {
                 "status": ["CORRECTION"],
                 "surface": "OBVERSE",
                 "abbreviation": "o",
                 "text": "",
             },
             "column": None,
         },
         "lineNumber": OneOfLineNumberSchema().dump(LineNumber(1)),
         "exists": True,
     },
 ),
 (
     ParallelText(
         True,
         TextId(Genre.LITERATURE, 1, 1),
         ChapterName(Stage.OLD_BABYLONIAN, "version", "name"),
         LineNumber(1),
         True,
         ChapterName(Stage.OLD_BABYLONIAN, "implicit", "name"),
     ),
     {
         "type": "ParallelText",
         "prefix": "//",
         "content": [
             OneOfTokenSchema().dump(
                 ValueToken.of('cf. L I.1 OB "version" "name" 1')
             )
         ],
         "displayValue": 'cf. L I.1 OB "version" "name" 1',
         "hasCf": True,
MANUSCRIPT_TEXT_3 = attr.evolve(MANUSCRIPT_TEXT_1, line_number=LineNumber(3))
LINE_VARIANT_3 = LineVariant(
    LINE_RECONSTRUCTION,
    None,
    (ManuscriptLine(MANUSCRIPT_ID, tuple(), MANUSCRIPT_TEXT_3), ),
)
LINE_3 = Line(LineNumber(3), (LINE_VARIANT_3, ))

RECORD = Record(
    (Author("Author", "Test", AuthorRole.EDITOR, ""), ),
    (Translator("Author", "Test", "", "en"), ),
    "",
)

TEXT_ID = TextId(GENRE, CATEGORY, INDEX)
CHAPTER = Chapter(
    TEXT_ID,
    CLASSIFICATION,
    STAGE,
    VERSION,
    CHAPTER_NAME,
    ORDER,
    (Manuscript(
        MANUSCRIPT_ID,
        SIGLUM_DISAMBIGUATOR,
        MUSEUM_NUMBER,
        ACCESSION,
        PERIOD_MODIFIER,
        PERIOD,
        PROVENANCE,
Esempio n. 14
0
                                     [atf.Status.CORRECTION]),
             ColumnLabel.from_int(3, [atf.Status.UNCERTAIN]),
         ),
         LineNumber(1),
     ),
 ),
 (
     "// F K.1 1",
     ParallelFragment(False, MuseumNumber.of("K.1"), False, Labels(),
                      LineNumber(1)),
 ),
 (
     '// cf. L I.1 OB "my name" 1',
     ParallelText(
         True,
         TextId(Genre.LITERATURE, 1, 1),
         ChapterName(Stage.OLD_BABYLONIAN, "", "my name"),
         LineNumber(1),
     ),
 ),
 (
     '// cf. L I.1 OB "my version" "my name" 1',
     ParallelText(
         True,
         TextId(Genre.LITERATURE, 1, 1),
         ChapterName(Stage.OLD_BABYLONIAN, "my version", "my name"),
         LineNumber(1),
     ),
 ),
 (
     "// L I.1 1",
Esempio n. 15
0
 def ebl_atf_text_line__text_id(self, genre, category, number) -> TextId:
     return TextId(
         Genre(genre),
         0 if category == "0" else roman.fromRoman(category),
         int(number),
     )
 def make_id(self, data, **kwargs) -> TextId:
     return TextId(data["genre"], data["category"], data["index"])
Esempio n. 17
0
            Line(
                LineNumber(1),
                (LineVariant(
                    (Caesura.certain(), ),
                    None,
                    (MANUSCRIPT_LINE.update_alignments([None]), ),
                ), ),
            ),
        ),
    ],
)
def test_merge_line(old: Line, new: Line, expected: Line) -> None:
    assert old.merge(new) == expected


TEXT_ID = TextId(Genre.LITERATURE, 0, 0)
CLASSIFICATION = Classification.ANCIENT
STAGE = Stage.NEO_BABYLONIAN
VERSION = "A"
CHAPTER_NAME = "I"
ORDER = 1
MANUSCRIPT = Manuscript(MANUSCRIPT_ID)
MUSEUM_NUMBER = MuseumNumber.of("K.1")
CHAPTER = Chapter(
    TEXT_ID,
    CLASSIFICATION,
    STAGE,
    VERSION,
    CHAPTER_NAME,
    ORDER,
    (MANUSCRIPT, ),