def test_constructor_sets_correct_fields(): assert CHAPTER.text_id == TextId(GENRE, CATEGORY, INDEX) assert CHAPTER.classification == CLASSIFICATION assert CHAPTER.stage == STAGE assert CHAPTER.version == VERSION assert CHAPTER.name == CHAPTER_NAME assert CHAPTER.order == ORDER assert CHAPTER.uncertain_fragments == (MUSEUM_NUMBER, ) assert CHAPTER.manuscripts[0].id == MANUSCRIPT_ID assert CHAPTER.manuscripts[0].siglum == Siglum(PROVENANCE, PERIOD, TYPE, SIGLUM_DISAMBIGUATOR) assert CHAPTER.manuscripts[0].siglum_disambiguator == SIGLUM_DISAMBIGUATOR assert CHAPTER.manuscripts[0].museum_number == MUSEUM_NUMBER assert CHAPTER.manuscripts[0].accession == ACCESSION assert CHAPTER.manuscripts[0].period_modifier == PERIOD_MODIFIER assert CHAPTER.manuscripts[0].period == PERIOD assert CHAPTER.manuscripts[0].provenance == PROVENANCE assert CHAPTER.manuscripts[0].type == TYPE assert CHAPTER.manuscripts[0].notes == NOTES assert CHAPTER.manuscripts[0].colophon == COLOPHON assert CHAPTER.manuscripts[0].unplaced_lines == UNPLACED_LINES assert CHAPTER.manuscripts[0].references == REFERENCES assert CHAPTER.lines[0].number == LINE_NUMBER assert CHAPTER.lines[0].variants == (LINE_VARIANT_1, ) assert (CHAPTER.lines[0].is_second_line_of_parallelism == IS_SECOND_LINE_OF_PARALLELISM) assert CHAPTER.lines[0].is_beginning_of_section == IS_BEGINNING_OF_SECTION assert CHAPTER.lines[0].translation == TRANSLATION assert CHAPTER.signs == SIGNS assert CHAPTER.record == RECORD
def test_invalid_extent() -> None: with pytest.raises(ValueError): Chapter( TextId(GENRE, 0, 0), manuscripts=(Manuscript(MANUSCRIPT_ID), ), lines=(Line( LineNumber(1), (LINE_VARIANT_1, ), translation=(TranslationLine(tuple(), extent=Extent(LineNumber(2))), ), ), ), )
def test_parallel_text(cf, chapter, display_value) -> None: text_id = TextId(Genre.LITERATURE, 1, 1) line_number = LineNumber(1) line = ParallelText(cf, text_id, chapter, line_number) assert line.has_cf is cf assert line.text == text_id assert line.chapter == chapter assert line.line_number == line_number assert line.display_value == display_value assert line.atf == Atf(f"// {display_value}") assert line.lemmatization == (LemmatizationToken(display_value), )
def test_text_constructor_sets_correct_fields() -> None: assert TEXT.id == TextId(GENRE, CATEGORY, INDEX) assert TEXT.genre == GENRE assert TEXT.category == CATEGORY assert TEXT.index == INDEX assert TEXT.name == NAME assert TEXT.has_doi == HAS_DOI assert TEXT.number_of_verses == VERSES assert TEXT.approximate_verses == APPROXIMATE assert TEXT.intro == INTRO assert TEXT.chapters[0].stage == STAGE assert TEXT.chapters[0].name == CHAPTER_NAME assert TEXT.chapters[0].translation == TRANSLATION assert TEXT.chapters[0].uncertain_fragments == UNCERTAIN_FRAGMENTS
def test_overlapping_languages() -> None: Chapter( TextId(GENRE, 0, 0), manuscripts=(Manuscript(MANUSCRIPT_ID), ), lines=( Line( LineNumber(1), (LINE_VARIANT_1, ), translation=(TranslationLine(tuple(), "en", Extent(LineNumber(2))), ), ), Line( LineNumber(2), (LINE_VARIANT_2, ), translation=(TranslationLine(tuple(), "de"), ), ), ), )
def test_extant_lines_schema() -> None: manuscript = Manuscript(1) manuscript_line = ManuscriptLine(1, LABELS, MANUSCRIPT_TEXT_1) variant = LineVariant(tuple(), manuscripts=(manuscript_line, )) text_line = Line(LineNumber(1), (variant, )) chapter = Chapter(TextId(Genre.LITERATURE, 0, 0), manuscripts=(manuscript, ), lines=(text_line, )) assert ExtantLinesSchema().dump(chapter) == { "extantLines": { str(manuscript.siglum): { " ".join(label.to_value() for label in manuscript_line.labels): [{ "isSideBoundary": False, "lineNumber": OneOfLineNumberSchema().dump(text_line.number), }] } } }
import pytest from ebl.transliteration.domain.text_id import TextId from ebl.transliteration.domain.genre import Genre @pytest.mark.parametrize( "text_id,expected", [ (TextId(Genre.LITERATURE, 0, 0), "L 0.0"), (TextId(Genre.LEXICOGRAPHY, 5, 8), "Lex V.8"), (TextId(Genre.MEDICINE, 1, 3), "Med I.3"), ], ) def test_str(text_id, expected) -> None: assert str(text_id) == expected
def test_find_raises_exception_if_text_not_found(text_repository) -> None: with pytest.raises(NotFoundError): text_repository.find(TextId(Genre.LITERATURE, 1, 1))
def id(self) -> TextId: return TextId(self.genre, self.category, self.index)
class LemmatizedFragmentFactory(TransliteratedFragmentFactory): text = Text(( TextLine.of_iterable( LineNumber(1, True), ( Word.of([UnidentifiedSign.of()]), Word.of([ Logogram.of_name( "BA", surrogate=[ Reading.of_name("ku"), Joiner.hyphen(), Reading.of_name("u", 4), ], ) ]), Column.of(), Tabulation.of(), Word.of([ BrokenAway.open(), UnknownNumberOfSigns.of(), Joiner.hyphen(), Reading.of_name("ku"), BrokenAway.close(), Joiner.hyphen(), Reading.of_name("nu"), Joiner.hyphen(), Reading.of_name("ši"), ]), Variant.of(Divider.of(":"), Reading.of_name("ku")), Word.of([ BrokenAway.open(), UnknownNumberOfSigns.of(), BrokenAway.close(), ]), Column.of(2), Divider.of(":", ("@v", ), (Flag.DAMAGE, )), CommentaryProtocol.of("!qt"), Word.of([Number.of_name("10", flags=[Flag.DAMAGE])]), ), ), TextLine.of_iterable( LineNumber(2, True), ( Word.of([BrokenAway.open(), UnknownNumberOfSigns.of()]), Word.of([Logogram.of_name("GI", 6)], unique_lemma=(WordId("ginâ I"), )), Word.of([Reading.of_name("ana")], unique_lemma=(WordId("ana I"), )), Word.of( [ Reading.of_name("u₄"), Joiner.hyphen(), Reading.of_name("š[u"), ], unique_lemma=(WordId("ūsu I"), ), ), Word.of([UnknownNumberOfSigns.of(), BrokenAway.close()]), ), ), TextLine.of_iterable( LineNumber(3, True), ( Word.of([BrokenAway.open(), UnknownNumberOfSigns.of()]), Word.of( unique_lemma=(WordId("kīdu I"), ), parts=[ Reading.of(( ValueToken.of("k"), BrokenAway.close(), ValueToken.of("i"), )), Joiner.hyphen(), Reading.of_name("du"), ], ), Word.of(unique_lemma=(WordId("u I"), ), parts=[Reading.of_name("u")]), Word.of( unique_lemma=(WordId("bamātu I"), ), parts=[ Reading.of_name("ba"), Joiner.hyphen(), Reading.of_name("ma"), Joiner.hyphen(), Reading.of(( ValueToken.of("t"), BrokenAway.open(), ValueToken.of("i"), )), ], ), Word.of([UnknownNumberOfSigns.of(), BrokenAway.close()]), ), ), TextLine.of_iterable( LineNumber(6, True), ( Word.of([ BrokenAway.open(), UnknownNumberOfSigns.of(), BrokenAway.close(), ]), Word.of([UnclearSign.of([Flag.DAMAGE])]), Word.of(unique_lemma=(WordId("mu I"), ), parts=[Reading.of_name("mu")]), Word.of( unique_lemma=(WordId("tamalāku I"), ), parts=[ Reading.of_name("ta"), Joiner.hyphen(), Reading.of_name("ma"), InWordNewline.of(), Joiner.hyphen(), Reading.of_name("tu", 2), ], ), ), ), TextLine.of_iterable( LineNumber(7, True), ( Word.of([ Variant.of(Reading.of_name("šu"), CompoundGrapheme.of(["BI×IS"])) ]), LanguageShift.normalized_akkadian(), AkkadianWord.of([ValueToken.of("kur")], unique_lemma=(WordId("normalized I"), )), ), ), StateDollarLine( atf.Qualification.AT_LEAST, 1, ScopeContainer(atf.Surface.OBVERSE, ""), atf.State.MISSING, None, ), ImageDollarLine("1", None, "numbered diagram of triangle"), RulingDollarLine(atf.Ruling.SINGLE), LooseDollarLine("this is a loose line"), SealDollarLine(1), SealAtLine(1), HeadingAtLine(1), ColumnAtLine(ColumnLabel([atf.Status.COLLATION], 1)), SurfaceAtLine( SurfaceLabel([atf.Status.COLLATION], atf.Surface.SURFACE, "stone wig")), ObjectAtLine( ObjectLabel([atf.Status.COLLATION], atf.Object.OBJECT, "stone wig")), DiscourseAtLine(atf.Discourse.DATE), DivisionAtLine("paragraph", 5), CompositeAtLine(atf.Composite.DIV, "part", 1), NoteLine(( StringPart("a note "), EmphasisPart("italic"), LanguagePart.of_transliteration( Language.AKKADIAN, (Word.of([Reading.of_name("bu")]), )), )), ParallelComposition(False, "my name", LineNumber(1)), ParallelText( True, TextId(CorpusGenre.LITERATURE, 1, 1), ChapterName(Stage.OLD_BABYLONIAN, "", "my name"), LineNumber(1), False, ), ParallelFragment(False, MuseumNumber.of("K.1"), True, Labels(), LineNumber(1), False), ))
class TransliteratedFragmentFactory(FragmentFactory): text = Text(( TextLine.of_iterable( LineNumber(1, True), ( Word.of([UnidentifiedSign.of()]), Word.of([ Logogram.of_name( "BA", surrogate=[ Reading.of_name("ku"), Joiner.hyphen(), Reading.of_name("u", 4), ], ) ]), Column.of(), Tabulation.of(), Word.of([ BrokenAway.open(), UnknownNumberOfSigns.of(), Joiner.hyphen(), Reading.of_name("ku"), BrokenAway.close(), Joiner.hyphen(), Reading.of_name("nu"), Joiner.hyphen(), Reading.of_name("ši"), ]), Variant.of(Divider.of(":"), Reading.of_name("ku")), Word.of([ BrokenAway.open(), UnknownNumberOfSigns.of(), BrokenAway.close(), ]), Column.of(2), Divider.of(":", ("@v", ), (Flag.DAMAGE, )), CommentaryProtocol.of("!qt"), Word.of([Number.of_name("10", flags=[Flag.DAMAGE])]), ), ), TextLine.of_iterable( LineNumber(2, True), ( Word.of([ BrokenAway.open(), UnknownNumberOfSigns.of(), BrokenAway.close(), ]), Word.of([Logogram.of_name("GI", 6)]), Word.of([Reading.of_name("ana")]), Word.of([ Reading.of_name("u", 4), Joiner.hyphen(), Reading.of(( ValueToken.of("š"), BrokenAway.open(), ValueToken.of("u"), )), ]), Word.of([UnknownNumberOfSigns.of(), BrokenAway.close()]), ), ), TextLine.of_iterable( LineNumber(3, True), ( Word.of([BrokenAway.open(), UnknownNumberOfSigns.of()]), Word.of([ Reading.of(( ValueToken.of("k"), BrokenAway.close(), ValueToken.of("i"), )), Joiner.hyphen(), Reading.of_name("du"), ]), Word.of([Reading.of_name("u")]), Word.of([ Reading.of_name("ba"), Joiner.hyphen(), Reading.of_name("ma"), Joiner.hyphen(), Reading.of(( ValueToken.of("t"), BrokenAway.open(), ValueToken.of("i"), )), ]), Word.of([UnknownNumberOfSigns.of(), BrokenAway.close()]), ), ), TextLine.of_iterable( LineNumber(6, True), ( Word.of([ BrokenAway.open(), UnknownNumberOfSigns.of(), BrokenAway.close(), ]), Word.of([UnclearSign.of([Flag.DAMAGE])]), Word.of([Reading.of_name("mu")]), Word.of([ Reading.of_name("ta"), Joiner.hyphen(), Reading.of_name("ma"), InWordNewline.of(), Joiner.hyphen(), Reading.of_name("tu", 2), ]), ), ), TextLine.of_iterable( LineNumber(7, True), ( Word.of([ Variant.of(Reading.of_name("šu"), CompoundGrapheme.of(["BI×IS"])) ]), LanguageShift.normalized_akkadian(), AkkadianWord.of([ValueToken.of("kur")]), ), ), StateDollarLine( atf.Qualification.AT_LEAST, 1, ScopeContainer(atf.Surface.OBVERSE, ""), atf.State.MISSING, None, ), ImageDollarLine("1", None, "numbered diagram of triangle"), RulingDollarLine(atf.Ruling.SINGLE), LooseDollarLine("this is a loose line"), SealDollarLine(1), SealAtLine(1), HeadingAtLine(1), ColumnAtLine(ColumnLabel([atf.Status.COLLATION], 1)), SurfaceAtLine( SurfaceLabel([atf.Status.COLLATION], atf.Surface.SURFACE, "stone wig")), ObjectAtLine( ObjectLabel([atf.Status.COLLATION], atf.Object.OBJECT, "stone wig")), DiscourseAtLine(atf.Discourse.DATE), DivisionAtLine("paragraph", 5), CompositeAtLine(atf.Composite.DIV, "part", 1), NoteLine(( StringPart("a note "), EmphasisPart("italic"), LanguagePart.of_transliteration( Language.AKKADIAN, (Word.of([Reading.of_name("bu")]), )), )), ParallelComposition(False, "my name", LineNumber(1)), ParallelText( True, TextId(CorpusGenre.LITERATURE, 1, 1), ChapterName(Stage.OLD_BABYLONIAN, "", "my name"), LineNumber(1), False, ), ParallelFragment(False, MuseumNumber.of("K.1"), True, Labels(), LineNumber(1), False), )) signs = ( "X BA KU ABZ075 ABZ207a\\u002F207b\\u0020X ABZ377n1/KU ABZ377n1 ABZ411\n" "MI DIŠ UD ŠU\n" "KI DU ABZ411 BA MA TI\n" "X MU TA MA UD\n" "ŠU/|BI×IS|") folios = Folios((Folio("WGL", "3"), Folio("XXX", "3"))) record = Record((RecordEntry("test", RecordType.TRANSLITERATION), )) line_to_vec = (( LineToVecEncoding.TEXT_LINE, LineToVecEncoding.TEXT_LINE, LineToVecEncoding.TEXT_LINE, LineToVecEncoding.TEXT_LINE, LineToVecEncoding.TEXT_LINE, LineToVecEncoding.SINGLE_RULING, ), )
"surface": { "status": ["CORRECTION"], "surface": "OBVERSE", "abbreviation": "o", "text": "", }, "column": None, }, "lineNumber": OneOfLineNumberSchema().dump(LineNumber(1)), "exists": True, }, ), ( ParallelText( True, TextId(Genre.LITERATURE, 1, 1), ChapterName(Stage.OLD_BABYLONIAN, "version", "name"), LineNumber(1), True, ChapterName(Stage.OLD_BABYLONIAN, "implicit", "name"), ), { "type": "ParallelText", "prefix": "//", "content": [ OneOfTokenSchema().dump( ValueToken.of('cf. L I.1 OB "version" "name" 1') ) ], "displayValue": 'cf. L I.1 OB "version" "name" 1', "hasCf": True,
MANUSCRIPT_TEXT_3 = attr.evolve(MANUSCRIPT_TEXT_1, line_number=LineNumber(3)) LINE_VARIANT_3 = LineVariant( LINE_RECONSTRUCTION, None, (ManuscriptLine(MANUSCRIPT_ID, tuple(), MANUSCRIPT_TEXT_3), ), ) LINE_3 = Line(LineNumber(3), (LINE_VARIANT_3, )) RECORD = Record( (Author("Author", "Test", AuthorRole.EDITOR, ""), ), (Translator("Author", "Test", "", "en"), ), "", ) TEXT_ID = TextId(GENRE, CATEGORY, INDEX) CHAPTER = Chapter( TEXT_ID, CLASSIFICATION, STAGE, VERSION, CHAPTER_NAME, ORDER, (Manuscript( MANUSCRIPT_ID, SIGLUM_DISAMBIGUATOR, MUSEUM_NUMBER, ACCESSION, PERIOD_MODIFIER, PERIOD, PROVENANCE,
[atf.Status.CORRECTION]), ColumnLabel.from_int(3, [atf.Status.UNCERTAIN]), ), LineNumber(1), ), ), ( "// F K.1 1", ParallelFragment(False, MuseumNumber.of("K.1"), False, Labels(), LineNumber(1)), ), ( '// cf. L I.1 OB "my name" 1', ParallelText( True, TextId(Genre.LITERATURE, 1, 1), ChapterName(Stage.OLD_BABYLONIAN, "", "my name"), LineNumber(1), ), ), ( '// cf. L I.1 OB "my version" "my name" 1', ParallelText( True, TextId(Genre.LITERATURE, 1, 1), ChapterName(Stage.OLD_BABYLONIAN, "my version", "my name"), LineNumber(1), ), ), ( "// L I.1 1",
def ebl_atf_text_line__text_id(self, genre, category, number) -> TextId: return TextId( Genre(genre), 0 if category == "0" else roman.fromRoman(category), int(number), )
def make_id(self, data, **kwargs) -> TextId: return TextId(data["genre"], data["category"], data["index"])
Line( LineNumber(1), (LineVariant( (Caesura.certain(), ), None, (MANUSCRIPT_LINE.update_alignments([None]), ), ), ), ), ), ], ) def test_merge_line(old: Line, new: Line, expected: Line) -> None: assert old.merge(new) == expected TEXT_ID = TextId(Genre.LITERATURE, 0, 0) CLASSIFICATION = Classification.ANCIENT STAGE = Stage.NEO_BABYLONIAN VERSION = "A" CHAPTER_NAME = "I" ORDER = 1 MANUSCRIPT = Manuscript(MANUSCRIPT_ID) MUSEUM_NUMBER = MuseumNumber.of("K.1") CHAPTER = Chapter( TEXT_ID, CLASSIFICATION, STAGE, VERSION, CHAPTER_NAME, ORDER, (MANUSCRIPT, ),