class ManuscriptLineFactory(factory.Factory): class Meta: model = ManuscriptLine manuscript_id = factory.Sequence(lambda n: n) labels = ( SurfaceLabel.from_label(Surface.OBVERSE), ColumnLabel.from_label("iii", [Status.COLLATION, Status.CORRECTION]), ) line = factory.Sequence( lambda n: TextLine.of_iterable( LineNumber(n), ( Word.of( [ Reading.of_name("ku"), Joiner.hyphen(), BrokenAway.open(), Reading.of_name("nu"), Joiner.hyphen(), Reading.of_name("ši"), BrokenAway.close(), ] ), ), ) ) paratext = (NoteLine((StringPart("note"),)), RulingDollarLine(Ruling.SINGLE)) omitted_words = (1,)
def test_updating_alignment(client, bibliography, sign_repository, signs, text_repository): allow_signs(signs, sign_repository) chapter = ChapterFactory.build() allow_references(chapter, bibliography) text_repository.create_chapter(chapter) alignment = 0 omitted_words = (1, ) updated_chapter = attr.evolve( chapter, lines=(attr.evolve( chapter.lines[0], variants=(attr.evolve( chapter.lines[0].variants[0], manuscripts=(attr.evolve( chapter.lines[0].variants[0].manuscripts[0], line=TextLine.of_iterable( chapter.lines[0].variants[0].manuscripts[0].line. line_number, (Word.of( [ Reading.of_name("ku"), Joiner.hyphen(), BrokenAway.open(), Reading.of_name("nu"), Joiner.hyphen(), Reading.of_name("ši"), BrokenAway.close(), ], alignment=alignment, variant=Word.of( [Logogram.of_name("KU")], language=Language.SUMERIAN, ), ), ), ), omitted_words=omitted_words, ), ), ), ), ), ), ) expected_chapter = ApiChapterSchema().dump(updated_chapter) post_result = client.simulate_post(create_chapter_url( chapter, "/alignment"), body=json.dumps(DTO)) assert post_result.status == falcon.HTTP_OK assert post_result.json == expected_chapter get_result = client.simulate_get(create_chapter_url(chapter)) assert get_result.status == falcon.HTTP_OK assert get_result.json == expected_chapter
def test_updating_alignment(corpus, text_repository, bibliography, changelog, signs, sign_repository, user, when) -> None: aligmnet = 0 omitted_words = (1, ) updated_chapter = attr.evolve( CHAPTER, lines=(attr.evolve( CHAPTER.lines[0], variants=(attr.evolve( CHAPTER.lines[0].variants[0], manuscripts=(attr.evolve( CHAPTER.lines[0].variants[0].manuscripts[0], line=TextLine.of_iterable( CHAPTER.lines[0].variants[0].manuscripts[0].line. line_number, (Word.of( [ Reading.of_name("ku"), Joiner.hyphen(), BrokenAway.open(), Reading.of_name("nu"), Joiner.hyphen(), Reading.of_name("ši"), BrokenAway.close(), ], alignment=aligmnet, ), ), ), omitted_words=omitted_words, ), ), ), ), ), ), ) expect_find_and_update_chapter( bibliography, changelog, CHAPTER_WITHOUT_DOCUMENTS, updated_chapter, signs, sign_repository, text_repository, user, when, ) alignment = Alignment((((ManuscriptLineAlignment( (AlignmentToken("ku-[nu-ši]", aligmnet), ), omitted_words), ), ), )) assert corpus.update_alignment(CHAPTER.id_, alignment, user) == updated_chapter
def test_text_line_of_iterable(code: str, language: Language) -> None: tokens = [ Word.of([Reading.of_name("first")]), LanguageShift.of(code), Word.of([Reading.of_name("second")]), LanguageShift.of("%sb"), LoneDeterminative.of([Determinative.of([Reading.of_name("third")])]), Word.of([BrokenAway.open(), Reading.of_name("fourth")]), UnknownNumberOfSigns.of(), BrokenAway.close(), ] expected_tokens = ( Word.of([Reading.of_name("first")], DEFAULT_LANGUAGE), LanguageShift.of(code), Word.of([Reading.of_name("second")], language), LanguageShift.of("%sb"), LoneDeterminative.of([Determinative.of([Reading.of_name("third")])], Language.AKKADIAN), Word.of( [ BrokenAway.open(), Reading.of((ValueToken( frozenset({EnclosureType.BROKEN_AWAY}), ErasureState.NONE, "fourth", ), )).set_enclosure_type(frozenset({EnclosureType.BROKEN_AWAY })), ], DEFAULT_LANGUAGE, ), UnknownNumberOfSigns(frozenset({EnclosureType.BROKEN_AWAY}), ErasureState.NONE), BrokenAway.close().set_enclosure_type( frozenset({EnclosureType.BROKEN_AWAY})), ) line = TextLine.of_iterable(LINE_NUMBER, tokens) assert line.line_number == LINE_NUMBER assert line.content == expected_tokens assert ( line.key == f"TextLine⁞{line.atf}⟨{'⁚'.join(token.get_key() for token in expected_tokens)}⟩" ) assert line.atf == f"1. first {code} second %sb {{third}} [fourth ...]"
def test_updating_lines_edit(corpus, text_repository, bibliography, changelog, signs, sign_repository, user, when) -> None: updated_chapter = attr.evolve( CHAPTER, lines=(attr.evolve( CHAPTER.lines[0], number=LineNumber(1, True), variants=(attr.evolve( CHAPTER.lines[0].variants[0], manuscripts=(attr.evolve( CHAPTER.lines[0].variants[0].manuscripts[0], line=TextLine.of_iterable( LineNumber(1, True), (Word.of([ Reading.of_name("nu"), Joiner.hyphen(), BrokenAway.open(), Reading.of_name("ku"), Joiner.hyphen(), Reading.of_name("ši"), BrokenAway.close(), ]), ), ), ), ), ), ), ), ), signs=("ABZ075 KU ABZ207a\\u002F207b\\u0020X\nKU\nABZ075", ), parser_version=ATF_PARSER_VERSION, ) expect_find_and_update_chapter( bibliography, changelog, CHAPTER_WITHOUT_DOCUMENTS, updated_chapter, signs, sign_repository, text_repository, user, when, ) assert (corpus.update_lines( CHAPTER.id_, LinesUpdate([], set(), {0: updated_chapter.lines[0]}), user) == updated_chapter)
def expected_transliteration(language: Language) -> Sequence[Token]: return ( Word.of([Reading.of_name("bu")], language), LanguageShift.of("%es"), Word.of( [ BrokenAway.open(), Reading.of((ValueToken( frozenset({EnclosureType.BROKEN_AWAY}), ErasureState.NONE, "kur", ), )).set_enclosure_type(frozenset({EnclosureType.BROKEN_AWAY })), ], Language.EMESAL, ), UnknownNumberOfSigns(frozenset({EnclosureType.BROKEN_AWAY}), ErasureState.NONE), BrokenAway.close().set_enclosure_type( frozenset({EnclosureType.BROKEN_AWAY})), )
class LineVariantFactory(factory.Factory): class Meta: model = LineVariant class Params: manuscript_id = factory.Sequence(lambda n: n) manuscript = factory.SubFactory( ManuscriptLineFactory, manuscript_id=factory.SelfAttribute("..manuscript_id"), ) reconstruction = ( LanguageShift.normalized_akkadian(), AkkadianWord.of((ValueToken.of("buāru"),)), MetricalFootSeparator.uncertain(), BrokenAway.open(), UnknownNumberOfSigns.of(), Caesura.certain(), AkkadianWord.of( ( UnknownNumberOfSigns.of(), BrokenAway.close(), Joiner.hyphen(), ValueToken.of("buāru"), ), (Flag.DAMAGE,), ), ) note = factory.fuzzy.FuzzyChoice([None, NoteLine((StringPart("a note"),))]) manuscripts = factory.List([factory.SelfAttribute("..manuscript")], TupleFactory) intertext = factory.fuzzy.FuzzyChoice([tuple(), (StringPart("bar"),)]) parallel_lines = factory.List( [ factory.SubFactory(ParallelCompositionFactory), factory.SubFactory(ParallelTextFactory), factory.SubFactory(ParallelFragmentFactory), ], TupleFactory, )
def test_variant(): reading = Reading.of([ValueToken.of("sa"), BrokenAway.open(), ValueToken.of("l")]) divider = Divider.of(":") variant = Variant.of(reading, divider) expected_value = "sa[l/:" assert variant.value == expected_value assert variant.clean_value == "sal/:" assert variant.tokens == (reading, divider) assert variant.parts == variant.tokens assert ( variant.get_key() == f"Variant⁝{expected_value}⟨{'⁚'.join(token.get_key() for token in variant.tokens)}⟩" ) assert variant.lemmatizable is False serialized = { "type": "Variant", "tokens": OneOfTokenSchema().dump([reading, divider], many=True), } assert_token_serialization(variant, serialized)
) from ebl.transliteration.domain.tokens import Joiner, UnknownNumberOfSigns, ValueToken @pytest.mark.parametrize( # pyre-ignore[56] "word,expected,lemmatizable", [ (AkkadianWord.of((ValueToken.of("ibnû"), )), "ibnû", True), ( AkkadianWord.of((ValueToken.of("ibnû"), ), (Flag.UNCERTAIN, Flag.DAMAGE, Flag.CORRECTION)), "ibnû?#!", True, ), (AkkadianWord.of( (BrokenAway.open(), ValueToken.of("ibnû"))), "[ibnû", True), ( AkkadianWord.of(( BrokenAway.open(), PerhapsBrokenAway.open(), ValueToken.of("ib"), PerhapsBrokenAway.close(), ValueToken.of("nû"), BrokenAway.close(), )), "[(ib)nû]", True, ), ( AkkadianWord.of(( BrokenAway.open(),
def test_updating_lemmatization(client, bibliography, sign_repository, signs, text_repository): allow_signs(signs, sign_repository) chapter: Chapter = ChapterFactory.build() allow_references(chapter, bibliography) text_repository.create_chapter(chapter) updated_chapter = attr.evolve( chapter, lines=(attr.evolve( chapter.lines[0], variants=(attr.evolve( chapter.lines[0].variants[0], reconstruction=( chapter.lines[0].variants[0].reconstruction[0], chapter.lines[0].variants[0].reconstruction[1]. set_unique_lemma( LemmatizationToken( chapter.lines[0].variants[0].reconstruction[1]. value, (WordId("aklu I"), ), )), *chapter.lines[0].variants[0].reconstruction[2:6], chapter.lines[0].variants[0].reconstruction[6]. set_unique_lemma( LemmatizationToken( chapter.lines[0].variants[0].reconstruction[6]. value, tuple(), )), ), manuscripts=(attr.evolve( chapter.lines[0].variants[0].manuscripts[0], line=TextLine.of_iterable( chapter.lines[0].variants[0].manuscripts[0].line. line_number, (Word.of( [ Reading.of_name("ku"), Joiner.hyphen(), BrokenAway.open(), Reading.of_name("nu"), Joiner.hyphen(), Reading.of_name("ši"), BrokenAway.close(), ], unique_lemma=[WordId("aklu I")], ), ), ), ), ), ), ), ), ), ) expected = create_chapter_dto(updated_chapter) post_result = client.simulate_post(create_chapter_url( chapter, "/lemmatization"), body=json.dumps(DTO)) assert post_result.status == falcon.HTTP_OK assert post_result.json == expected get_result = client.simulate_get(create_chapter_url(chapter)) assert get_result.status == falcon.HTTP_OK assert get_result.json == expected
assert sign.flags == tuple(flags) assert sign.lemmatizable is False serialized = {"type": "UnclearSign", "flags": ["!"]} assert_token_serialization(sign, serialized) @pytest.mark.parametrize( # pyre-ignore[56] "name_parts,sub_index,modifiers,flags,sign,expected_value,expected_clean_value," "expected_name", [ ((ValueToken.of("kur"), ), 1, [], [], None, "kur", "kur", "kur"), ((ValueToken.of("kurʾ"), ), 1, [], [], None, "kurʾ", "kurʾ", "kurʾ"), ((ValueToken.of("ʾ"), ), 1, [], [], None, "ʾ", "ʾ", "ʾ"), ( (ValueToken.of("k"), BrokenAway.open(), ValueToken.of("ur")), 1, [], [], None, "k[ur", "kur", "kur", ), ( (ValueToken.of("ku"), BrokenAway.close(), ValueToken.of("r")), 1, [], [], None, "ku]r",
def ebl_atf_text_line__close_broken_away(self, _): return BrokenAway.close()
def ebl_atf_text_line__open_broken_away(self, _): return BrokenAway.open()
from ebl.transliteration.domain.note_line import NoteLine from ebl.transliteration.domain.sign_tokens import Reading from ebl.transliteration.domain.tokens import ( EnclosureType, ErasureState, LanguageShift, Token, UnknownNumberOfSigns, ValueToken, ) from ebl.transliteration.domain.word_tokens import Word TRANSLITERATION: Sequence[Token] = ( Word.of([Reading.of_name("bu")]), LanguageShift.of("%es"), Word.of([BrokenAway.open(), Reading.of_name("kur")]), UnknownNumberOfSigns.of(), BrokenAway.close(), ) EXPECTED_ATF = "bu %es [kur ...]" def expected_transliteration(language: Language) -> Sequence[Token]: return ( Word.of([Reading.of_name("bu")], language), LanguageShift.of("%es"), Word.of( [ BrokenAway.open(), Reading.of((ValueToken( frozenset({EnclosureType.BROKEN_AWAY}),
) @pytest.mark.parametrize( # pyre-ignore[56] "atf,expected", [ ("...", Word.of([UnknownNumberOfSigns.of()])), ("x", Word.of([UnclearSign.of()])), ("X", Word.of([UnidentifiedSign.of()])), ("x?", Word.of([UnclearSign.of([atf.Flag.UNCERTAIN])])), ("X#", Word.of([UnidentifiedSign.of([atf.Flag.DAMAGE])])), ("12", Word.of([Number.of_name("12")])), ( "1]2", Word.of([ Number.of((ValueToken.of("1"), BrokenAway.close(), ValueToken.of("2"))) ]), ), ( "1[2", Word.of([ Number.of((ValueToken.of("1"), BrokenAway.open(), ValueToken.of("2"))) ]), ), ("ʾ", Word.of([Reading.of_name("ʾ")])), ("du₁₁", Word.of([Reading.of_name("du", 11)])), ("GAL", Word.of([Logogram.of_name("GAL")])), ( "kur(GAL)",
( Line( LineNumber(1), (LineVariant( RECONSTRUCTION, NOTE, (ManuscriptLine( MANUSCRIPT_ID, LABELS, TextLine( LineNumber(1), (Word.of( [ Reading.of([ ValueToken.of("ku"), BrokenAway.close(), ]), Joiner.hyphen(), Reading.of_name("nu"), Joiner.hyphen(), Reading.of_name("si"), ], unique_lemma=(WordId("word"), ), alignment=0, ), ), ), ), ), ), ), IS_SECOND_LINE_OF_PARALLELISM, IS_BEGINNING_OF_SECTION, ),
LineNumber(1), [ Word.of( unique_lemma=(WordId("nu I"), ), parts=[ PerhapsBrokenAway.open(), Reading.of_name( "ku", flags=[atf.Flag.DAMAGE, atf.Flag.UNCERTAIN]), PerhapsBrokenAway.close(), ], ), Word.of( unique_lemma=(WordId("bu I"), ), parts=[ BrokenAway.open(), Reading.of_name("bu"), BrokenAway.close(), ], ), Word.of( unique_lemma=(WordId("kunu I"), ), alignment=4, parts=[ Reading.of_name("ku"), Joiner.hyphen(), BrokenAway.open(), Reading.of_name( "nu", flags=[atf.Flag.DAMAGE, atf.Flag.UNCERTAIN]), BrokenAway.close(),
class TransliteratedFragmentFactory(FragmentFactory): text = Text(( TextLine.of_iterable( LineNumber(1, True), ( Word.of([UnidentifiedSign.of()]), Word.of([ Logogram.of_name( "BA", surrogate=[ Reading.of_name("ku"), Joiner.hyphen(), Reading.of_name("u", 4), ], ) ]), Column.of(), Tabulation.of(), Word.of([ BrokenAway.open(), UnknownNumberOfSigns.of(), Joiner.hyphen(), Reading.of_name("ku"), BrokenAway.close(), Joiner.hyphen(), Reading.of_name("nu"), Joiner.hyphen(), Reading.of_name("ši"), ]), Variant.of(Divider.of(":"), Reading.of_name("ku")), Word.of([ BrokenAway.open(), UnknownNumberOfSigns.of(), BrokenAway.close(), ]), Column.of(2), Divider.of(":", ("@v", ), (Flag.DAMAGE, )), CommentaryProtocol.of("!qt"), Word.of([Number.of_name("10", flags=[Flag.DAMAGE])]), ), ), TextLine.of_iterable( LineNumber(2, True), ( Word.of([ BrokenAway.open(), UnknownNumberOfSigns.of(), BrokenAway.close(), ]), Word.of([Logogram.of_name("GI", 6)]), Word.of([Reading.of_name("ana")]), Word.of([ Reading.of_name("u", 4), Joiner.hyphen(), Reading.of(( ValueToken.of("š"), BrokenAway.open(), ValueToken.of("u"), )), ]), Word.of([UnknownNumberOfSigns.of(), BrokenAway.close()]), ), ), TextLine.of_iterable( LineNumber(3, True), ( Word.of([BrokenAway.open(), UnknownNumberOfSigns.of()]), Word.of([ Reading.of(( ValueToken.of("k"), BrokenAway.close(), ValueToken.of("i"), )), Joiner.hyphen(), Reading.of_name("du"), ]), Word.of([Reading.of_name("u")]), Word.of([ Reading.of_name("ba"), Joiner.hyphen(), Reading.of_name("ma"), Joiner.hyphen(), Reading.of(( ValueToken.of("t"), BrokenAway.open(), ValueToken.of("i"), )), ]), Word.of([UnknownNumberOfSigns.of(), BrokenAway.close()]), ), ), TextLine.of_iterable( LineNumber(6, True), ( Word.of([ BrokenAway.open(), UnknownNumberOfSigns.of(), BrokenAway.close(), ]), Word.of([UnclearSign.of([Flag.DAMAGE])]), Word.of([Reading.of_name("mu")]), Word.of([ Reading.of_name("ta"), Joiner.hyphen(), Reading.of_name("ma"), InWordNewline.of(), Joiner.hyphen(), Reading.of_name("tu", 2), ]), ), ), TextLine.of_iterable( LineNumber(7, True), ( Word.of([ Variant.of(Reading.of_name("šu"), CompoundGrapheme.of(["BI×IS"])) ]), LanguageShift.normalized_akkadian(), AkkadianWord.of([ValueToken.of("kur")]), ), ), StateDollarLine( atf.Qualification.AT_LEAST, 1, ScopeContainer(atf.Surface.OBVERSE, ""), atf.State.MISSING, None, ), ImageDollarLine("1", None, "numbered diagram of triangle"), RulingDollarLine(atf.Ruling.SINGLE), LooseDollarLine("this is a loose line"), SealDollarLine(1), SealAtLine(1), HeadingAtLine(1), ColumnAtLine(ColumnLabel([atf.Status.COLLATION], 1)), SurfaceAtLine( SurfaceLabel([atf.Status.COLLATION], atf.Surface.SURFACE, "stone wig")), ObjectAtLine( ObjectLabel([atf.Status.COLLATION], atf.Object.OBJECT, "stone wig")), DiscourseAtLine(atf.Discourse.DATE), DivisionAtLine("paragraph", 5), CompositeAtLine(atf.Composite.DIV, "part", 1), NoteLine(( StringPart("a note "), EmphasisPart("italic"), LanguagePart.of_transliteration( Language.AKKADIAN, (Word.of([Reading.of_name("bu")]), )), )), ParallelComposition(False, "my name", LineNumber(1)), ParallelText( True, TextId(CorpusGenre.LITERATURE, 1, 1), ChapterName(Stage.OLD_BABYLONIAN, "", "my name"), LineNumber(1), False, ), ParallelFragment(False, MuseumNumber.of("K.1"), True, Labels(), LineNumber(1), False), )) signs = ( "X BA KU ABZ075 ABZ207a\\u002F207b\\u0020X ABZ377n1/KU ABZ377n1 ABZ411\n" "MI DIŠ UD ŠU\n" "KI DU ABZ411 BA MA TI\n" "X MU TA MA UD\n" "ŠU/|BI×IS|") folios = Folios((Folio("WGL", "3"), Folio("XXX", "3"))) record = Record((RecordEntry("test", RecordType.TRANSLITERATION), )) line_to_vec = (( LineToVecEncoding.TEXT_LINE, LineToVecEncoding.TEXT_LINE, LineToVecEncoding.TEXT_LINE, LineToVecEncoding.TEXT_LINE, LineToVecEncoding.TEXT_LINE, LineToVecEncoding.SINGLE_RULING, ), )
class LemmatizedFragmentFactory(TransliteratedFragmentFactory): text = Text(( TextLine.of_iterable( LineNumber(1, True), ( Word.of([UnidentifiedSign.of()]), Word.of([ Logogram.of_name( "BA", surrogate=[ Reading.of_name("ku"), Joiner.hyphen(), Reading.of_name("u", 4), ], ) ]), Column.of(), Tabulation.of(), Word.of([ BrokenAway.open(), UnknownNumberOfSigns.of(), Joiner.hyphen(), Reading.of_name("ku"), BrokenAway.close(), Joiner.hyphen(), Reading.of_name("nu"), Joiner.hyphen(), Reading.of_name("ši"), ]), Variant.of(Divider.of(":"), Reading.of_name("ku")), Word.of([ BrokenAway.open(), UnknownNumberOfSigns.of(), BrokenAway.close(), ]), Column.of(2), Divider.of(":", ("@v", ), (Flag.DAMAGE, )), CommentaryProtocol.of("!qt"), Word.of([Number.of_name("10", flags=[Flag.DAMAGE])]), ), ), TextLine.of_iterable( LineNumber(2, True), ( Word.of([BrokenAway.open(), UnknownNumberOfSigns.of()]), Word.of([Logogram.of_name("GI", 6)], unique_lemma=(WordId("ginâ I"), )), Word.of([Reading.of_name("ana")], unique_lemma=(WordId("ana I"), )), Word.of( [ Reading.of_name("u₄"), Joiner.hyphen(), Reading.of_name("š[u"), ], unique_lemma=(WordId("ūsu I"), ), ), Word.of([UnknownNumberOfSigns.of(), BrokenAway.close()]), ), ), TextLine.of_iterable( LineNumber(3, True), ( Word.of([BrokenAway.open(), UnknownNumberOfSigns.of()]), Word.of( unique_lemma=(WordId("kīdu I"), ), parts=[ Reading.of(( ValueToken.of("k"), BrokenAway.close(), ValueToken.of("i"), )), Joiner.hyphen(), Reading.of_name("du"), ], ), Word.of(unique_lemma=(WordId("u I"), ), parts=[Reading.of_name("u")]), Word.of( unique_lemma=(WordId("bamātu I"), ), parts=[ Reading.of_name("ba"), Joiner.hyphen(), Reading.of_name("ma"), Joiner.hyphen(), Reading.of(( ValueToken.of("t"), BrokenAway.open(), ValueToken.of("i"), )), ], ), Word.of([UnknownNumberOfSigns.of(), BrokenAway.close()]), ), ), TextLine.of_iterable( LineNumber(6, True), ( Word.of([ BrokenAway.open(), UnknownNumberOfSigns.of(), BrokenAway.close(), ]), Word.of([UnclearSign.of([Flag.DAMAGE])]), Word.of(unique_lemma=(WordId("mu I"), ), parts=[Reading.of_name("mu")]), Word.of( unique_lemma=(WordId("tamalāku I"), ), parts=[ Reading.of_name("ta"), Joiner.hyphen(), Reading.of_name("ma"), InWordNewline.of(), Joiner.hyphen(), Reading.of_name("tu", 2), ], ), ), ), TextLine.of_iterable( LineNumber(7, True), ( Word.of([ Variant.of(Reading.of_name("šu"), CompoundGrapheme.of(["BI×IS"])) ]), LanguageShift.normalized_akkadian(), AkkadianWord.of([ValueToken.of("kur")], unique_lemma=(WordId("normalized I"), )), ), ), StateDollarLine( atf.Qualification.AT_LEAST, 1, ScopeContainer(atf.Surface.OBVERSE, ""), atf.State.MISSING, None, ), ImageDollarLine("1", None, "numbered diagram of triangle"), RulingDollarLine(atf.Ruling.SINGLE), LooseDollarLine("this is a loose line"), SealDollarLine(1), SealAtLine(1), HeadingAtLine(1), ColumnAtLine(ColumnLabel([atf.Status.COLLATION], 1)), SurfaceAtLine( SurfaceLabel([atf.Status.COLLATION], atf.Surface.SURFACE, "stone wig")), ObjectAtLine( ObjectLabel([atf.Status.COLLATION], atf.Object.OBJECT, "stone wig")), DiscourseAtLine(atf.Discourse.DATE), DivisionAtLine("paragraph", 5), CompositeAtLine(atf.Composite.DIV, "part", 1), NoteLine(( StringPart("a note "), EmphasisPart("italic"), LanguagePart.of_transliteration( Language.AKKADIAN, (Word.of([Reading.of_name("bu")]), )), )), ParallelComposition(False, "my name", LineNumber(1)), ParallelText( True, TextId(CorpusGenre.LITERATURE, 1, 1), ChapterName(Stage.OLD_BABYLONIAN, "", "my name"), LineNumber(1), False, ), ParallelFragment(False, MuseumNumber.of("K.1"), True, Labels(), LineNumber(1), False), ))
def test_updating_manuscript_lemmatization(corpus, text_repository, bibliography, changelog, signs, sign_repository, user, when) -> None: updated_chapter = attr.evolve( CHAPTER, lines=(attr.evolve( CHAPTER.lines[0], variants=(attr.evolve( CHAPTER.lines[0].variants[0], reconstruction=( CHAPTER.lines[0].variants[0].reconstruction[0], CHAPTER.lines[0].variants[0].reconstruction[1]. set_unique_lemma( LemmatizationToken( CHAPTER_WITHOUT_DOCUMENTS.lines[0].variants[0]. reconstruction[1].value, (WordId("aklu I"), ), )), *CHAPTER.lines[0].variants[0].reconstruction[2:6], CHAPTER.lines[0].variants[0].reconstruction[6]. set_unique_lemma( LemmatizationToken( CHAPTER.lines[0].variants[0].reconstruction[6]. value, tuple(), )), ), manuscripts=(attr.evolve( CHAPTER.lines[0].variants[0].manuscripts[0], line=TextLine.of_iterable( CHAPTER.lines[0].variants[0].manuscripts[0].line. line_number, (Word.of( [ Reading.of_name("ku"), Joiner.hyphen(), BrokenAway.open(), Reading.of_name("nu"), Joiner.hyphen(), Reading.of_name("ši"), BrokenAway.close(), ], unique_lemma=(WordId("aklu I"), ), ), ), ), ), ), ), ), ), ), ) expect_find_and_update_chapter( bibliography, changelog, CHAPTER_WITHOUT_DOCUMENTS, updated_chapter, signs, sign_repository, text_repository, user, when, ) lemmatization: ChapterLemmatization = ((LineVariantLemmatization( ( LemmatizationToken("%n"), LemmatizationToken("buāru", (WordId("aklu I"), )), LemmatizationToken("(|)"), LemmatizationToken("["), LemmatizationToken("..."), LemmatizationToken("||"), LemmatizationToken("...]-buāru#", tuple()), ), ((LemmatizationToken("ku-[nu-ši]", (WordId("aklu I"), )), ), ), ), ), ) assert (corpus.update_manuscript_lemmatization(CHAPTER.id_, lemmatization, user) == updated_chapter)
def make_token(self, data, **kwargs): return ( BrokenAway.of(data["side"]) .set_enclosure_type(frozenset(data["enclosure_type"])) .set_erasure(data["erasure"]) )
LineNumber(9, False, None, "b")), (Word.of((UnknownNumberOfSigns.of(), )), ), ) ], ), ("1. ($___$)", [TextLine.of_iterable(LineNumber(1), (Tabulation.of(), ))]), ( "1. ... [...] [(...)]", [ TextLine.of_iterable( LineNumber(1), ( Word.of((UnknownNumberOfSigns.of(), )), Word.of(( BrokenAway.open(), UnknownNumberOfSigns.of(), BrokenAway.close(), )), Word.of(( BrokenAway.open(), PerhapsBrokenAway.open(), UnknownNumberOfSigns.of(), PerhapsBrokenAway.close(), BrokenAway.close(), )), ), ) ], ), (
"ʾABDEGHIKLMNPSTUYZabcdefghiklmnpqrstuwyzÉâêîûāĒēīŠšūṣṭ₄" ), [], ], ), ("ibnû?", [ValueToken.of("ibnû"), [Flag.UNCERTAIN]]), ("ibnû#", [ValueToken.of("ibnû"), [Flag.DAMAGE]]), ("ibnû!", [ValueToken.of("ibnû"), [Flag.CORRECTION]]), ("ibnû#?", [ValueToken.of("ibnû"), [Flag.DAMAGE, Flag.UNCERTAIN]]), ("ibnû?#", [ValueToken.of("ibnû"), [Flag.UNCERTAIN, Flag.DAMAGE]]), ( "ibnû?#!", [ValueToken.of("ibnû"), [Flag.UNCERTAIN, Flag.DAMAGE, Flag.CORRECTION]], ), ("ibnû##", [ValueToken.of("ibnû"), [Flag.DAMAGE]]), ("[ibnû]", [BrokenAway.open(), ValueToken.of("ibnû"), BrokenAway.close(), []]), ("ib[nû", [ValueToken.of("ib"), BrokenAway.open(), ValueToken.of("nû"), []]), ("ib]nû", [ValueToken.of("ib"), BrokenAway.close(), ValueToken.of("nû"), []]), ( "i[b]nû", [ ValueToken.of("i"), BrokenAway.open(), ValueToken.of("b"), BrokenAway.close(), ValueToken.of("nû"), [], ], ), ("ibnû?]", [ValueToken.of("ibnû"), BrokenAway.close(), [Flag.UNCERTAIN]]), (
def map_line(atf) -> Sequence[Token]: visitor = EnclosureUpdater() parse_line(f"1. {atf}").accept(visitor) return visitor.tokens @pytest.mark.parametrize( "atf, expected", [ ("...", (Word.of((UnknownNumberOfSigns.of(), )), )), ( "[...]", (Word.of(( BrokenAway.open(), UnknownNumberOfSigns(frozenset({EnclosureType.BROKEN_AWAY}), ErasureState.NONE), BrokenAway.close().set_enclosure_type( frozenset({EnclosureType.BROKEN_AWAY})), )), ), ), ( "(...)", (Word.of(( PerhapsBrokenAway.open(), UnknownNumberOfSigns(frozenset({EnclosureType.PERHAPS}), ErasureState.NONE), PerhapsBrokenAway.close().set_enclosure_type( frozenset({EnclosureType.PERHAPS})), )), ),