Exemple #1
0
def test_fossey_schema():
    data = {
        "page": 405,
        "number": 25728,
        "suffix": "B",
        "reference": "Mai: MDP, VI, 11.I, 11",
        "newEdition": "Paulus AOAT 50, 981",
        "secondaryLiterature": "NABU 1997/1",
        "cdliNumber": "P123456",
        "museumNumber": MuseumNumberSchema().dump(MuseumNumber.of("K.4562")),
        "externalProject": "dcclt",
        "notes": "Das Zeichen ist eigentlich ZA₇",
        "date": "Marduk-apla-iddina I, 1171-1159 BC",
        "transliteration": "me-luḫ-ḫa",
        "sign":
        "M15,21.7c-0.1-0.1-0.2-0.4-0.2-0.8c-0.1-1-0.1-1.2-0.5-1.3c-0.2",
    }
    fossey = Fossey(
        405,
        25728,
        "B",
        "Mai: MDP, VI, 11.I, 11",
        "Paulus AOAT 50, 981",
        "NABU 1997/1",
        "P123456",
        MuseumNumber.of("K.4562"),
        "dcclt",
        "Das Zeichen ist eigentlich ZA₇",
        "Marduk-apla-iddina I, 1171-1159 BC",
        "me-luḫ-ḫa",
        "M15,21.7c-0.1-0.1-0.2-0.4-0.2-0.8c-0.1-1-0.1-1.2-0.5-1.3c-0.2",
    )

    assert FosseySchema().load(data) == fossey
    assert FosseySchema().dump(fossey) == data
def test_find_transliterated(database, fragment_repository):
    transliterated_fragment_1 = TransliteratedFragmentFactory.build(
        number=MuseumNumber.of("X.1")
    )
    transliterated_fragment_2 = TransliteratedFragmentFactory.build(
        number=MuseumNumber.of("X.2")
    )
    database[COLLECTION].insert_many(
        [
            {
                **SCHEMA.dump(transliterated_fragment_2),
                "_id": str(transliterated_fragment_2.number),
            },
            SCHEMA.dump(FragmentFactory.build()),
            {
                **SCHEMA.dump(transliterated_fragment_1),
                "_id": str(transliterated_fragment_1.number),
            },
        ]
    )

    assert fragment_repository.query_transliterated_numbers() == [
        transliterated_fragment_1.number,
        transliterated_fragment_2.number,
    ]
def test_schema():
    previous = MuseumNumber.of("X.1")
    next = MuseumNumber.of("X.1")
    fragment_info_pager = FragmentPagerInfo(previous, next)
    assert FragmentPagerInfoSchema().dump(fragment_info_pager) == {
        "next": "X.1",
        "previous": "X.1",
    }
def test_fragment_matcher_route_error(client, fragmentarium, user):
    faulty_fragment_id = "X.-1"
    fragment_1 = TransliteratedFragmentFactory.build(number=MuseumNumber.of("X.0"))
    fragment_2 = TransliteratedFragmentFactory.build(
        number=MuseumNumber.of("X.1"),
        line_to_vec=(LineToVecEncoding.from_list([1, 1, 2]),),
    )
    fragmentarium.create(fragment_1)
    fragmentarium.create(fragment_2)
    get_result = client.simulate_get(f"/fragments/{faulty_fragment_id}/match")
    assert get_result.status == falcon.HTTP_UNPROCESSABLE_ENTITY
def test_add_lowest_join_transliteration(user):
    fragment = FragmentFactory.build(
        number=MuseumNumber.of("X.2"),
        joins=Joins([[Join(MuseumNumber.of("X.1"),
                           is_in_fragmentarium=True)]]),
    )
    atf = Atf("1. x x")
    text = parse_atf_lark(atf)
    transliteration = TransliterationUpdate(text, fragment.notes)

    with pytest.raises(NotLowestJoinError):
        fragment.update_lowest_join_transliteration(transliteration, user)
def test_query_next_and_previous_fragment(museum_numbers, fragment_repository):
    for fragmentNumber in museum_numbers:
        fragment_repository.create(
            FragmentFactory.build(number=MuseumNumber.of(fragmentNumber))
        )
    for museum_number in museum_numbers:
        results = fragment_repository.query_next_and_previous_fragment(
            MuseumNumber.of(museum_number)
        )
        previous_index = (museum_numbers.index(museum_number) - 1) % len(museum_numbers)
        next_index = (museum_numbers.index(museum_number) + 1) % len(museum_numbers)
        assert results.previous == MuseumNumber.of(museum_numbers[previous_index])
        assert results.next == MuseumNumber.of(museum_numbers[next_index])
Exemple #7
0
def test_generate_annotations(client, photo_repository):
    fragment_number = MuseumNumber.of("K.2")

    boundary_results = {
        "boundaryResults": [{
            "top_left_x": 0.0,
            "top_left_y": 0.0,
            "width": 10.0,
            "height": 10.0,
            "probability": 0.99,
        }]
    }
    httpretty.register_uri(
        httpretty.POST,
        "http://localhost:8001/generate",
        body=json.dumps(boundary_results),
        content_type="image/jpeg",
    )

    result = client.simulate_get(
        f"/fragments/{fragment_number}/annotations",
        params={"generateAnnotations": True},
    )

    assert result.status == falcon.HTTP_OK
    assert result.json is not None
    assert result.json["fragmentNumber"] == "K.2"
    assert result.json["annotations"][0]["geometry"]["x"] == 0.0
    assert result.json["annotations"][0]["geometry"]["y"] == 0.0
    def rank_line_to_vec(self, candidate: str) -> LineToVecRanking:
        candidate_line_to_vecs = self._parse_candidate(candidate)
        line_to_vec_entries = (
            self._fragment_repository.query_transliterated_line_to_vec())
        ranker = LineToVecRanker()
        if candidate_line_to_vecs:
            for entry in filter(
                    lambda line_to_vec_entry: line_to_vec_entry.museum_number
                    != MuseumNumber.of(candidate),
                    line_to_vec_entries,
            ):
                line_to_vec_score = LineToVecScore(
                    entry.museum_number,
                    entry.script,
                    score(candidate_line_to_vecs, entry.line_to_vec),
                )
                line_to_vec_weighted_score = LineToVecScore(
                    entry.museum_number,
                    entry.script,
                    score_weighted(candidate_line_to_vecs, entry.line_to_vec),
                )
                ranker.insert_score(line_to_vec_score,
                                    line_to_vec_weighted_score)

        return ranker.ranking
Exemple #9
0
def test_find(fragment_repository, when, fragment_matcher):
    line_to_vec = (LineToVecEncoding.from_list((1, 2, 1, 1)), )
    number = MuseumNumber.of("BM.11")
    fragment = FragmentFactory.build(number=number, line_to_vec=line_to_vec)
    (when(fragment_repository).query_by_museum_number(number).thenReturn(
        fragment))
    assert fragment_matcher._parse_candidate("BM.11") == line_to_vec
Exemple #10
0
def test_signs_get(
    client,
    annotations_repository,
    photo_repository,
    when,
    fragment_repository,
    text_with_labels,
):
    fragment = TransliteratedFragmentFactory.build(
        number=MuseumNumber.of("K.2"), text=text_with_labels)
    fragment_repository.create(fragment)

    annotation_data = AnnotationDataFactory.build(sign_name="signName",
                                                  path=[2, 0, 0])
    annotation = AnnotationFactory.build(data=annotation_data)
    annotations_repository.create_or_update(
        AnnotationsFactory.build(fragment_number="K.2",
                                 annotations=[annotation]))

    result = client.simulate_get("/signs/signName/images")

    assert len(result.json) > 0
    result_json = result.json[0]

    assert result_json["fragmentNumber"] == str(fragment.number)
    assert isinstance(result_json["image"], str)
    assert result_json["script"] == fragment.script
    assert result_json["label"] == "i Stone wig Stone wig 2"

    assert result.status == falcon.HTTP_OK
def test_generate_annotations(annotations_repository, photo_repository,
                              changelog, when):
    fragment_number = MuseumNumber.of("X.0")

    boundary_results = {
        "boundaryResults": [{
            "top_left_x": 0.0,
            "top_left_y": 0.0,
            "width": 10.0,
            "height": 10.0,
            "probability": 0.99,
        }]
    }

    httpretty.register_uri(
        httpretty.POST,
        "http://mock-localhost:8001/generate",
        body=json.dumps(boundary_results),
        content_type="image/jpeg",
    )

    image_file = create_test_photo(fragment_number)
    ebl_ai_client = EblAiClient("http://mock-localhost:8001")

    annotations = ebl_ai_client.generate_annotations(fragment_number,
                                                     image_file)
    assert annotations.fragment_number == fragment_number
    assert len(annotations.annotations) == 1
    assert annotations.annotations[0].geometry.x == 0.0
    assert annotations.annotations[0].geometry.y == 0.0
def test_number_deserialization():
    number = MuseumNumber.of("Z.1.b")
    fragment = FragmentSchema().load({
        **FragmentSchema().dump(LemmatizedFragmentFactory.build()),
        "museumNumber":
        MuseumNumberSchema().dump(number),
    })
    assert fragment.number == number
def test_fragment_matcher_route(client, fragmentarium, user):
    fragment_id = "X.15"

    fragment_1 = TransliteratedFragmentFactory.build(number=MuseumNumber.of("X.15"))
    fragment_2 = TransliteratedFragmentFactory.build(
        number=MuseumNumber.of("X.326"),
        line_to_vec=(LineToVecEncoding.from_list([1, 1, 2]),),
    )
    expected_score = {
        "score": [{"museumNumber": "X.326", "script": fragment_2.script, "score": 3}],
        "scoreWeighted": [
            {"museumNumber": "X.326", "script": fragment_2.script, "score": 5}
        ],
    }
    fragmentarium.create(fragment_1)
    fragmentarium.create(fragment_2)
    get_result = client.simulate_get(f"/fragments/{fragment_id}/match")
    assert get_result.status == falcon.HTTP_OK
    assert get_result.json == expected_score
Exemple #14
0
def test_empty_line_to_vec(fragment_matcher, when):
    parameters = "BM.11"
    fragment_2_line_to_vec = (LineToVecEncoding.from_list([2, 1, 1]), )
    fragment_1 = FragmentFactory.build(number=MuseumNumber.of("BM.11"))
    fragment_2 = FragmentFactory.build(number=MuseumNumber.of("X.1"),
                                       line_to_vec=fragment_2_line_to_vec)
    fragment_3 = FragmentFactory.build(number=MuseumNumber.of("X.2"),
                                       line_to_vec=fragment_2_line_to_vec)

    (when(fragment_matcher._fragment_repository).query_by_museum_number(
        MuseumNumber.of(parameters)).thenReturn(fragment_1))
    (when(fragment_matcher._fragment_repository).
     query_transliterated_line_to_vec().thenReturn([
         LineToVecEntry(fragment_1.number, "N/A", tuple()),
         LineToVecEntry(fragment_2.number, "N/A", fragment_2_line_to_vec),
         LineToVecEntry(fragment_3.number, "N/A", fragment_2_line_to_vec),
     ]))
    assert fragment_matcher.rank_line_to_vec(parameters) == LineToVecRanking(
        [], [])
def test_line_to_vec_ranking_schema():
    line_to_vec_ranking = LineToVecRanking(
        score=[
            LineToVecScore(MuseumNumber.of("X.0"), "N/A", 10),
            LineToVecScore(MuseumNumber.of("X.1"), "N/A", 4),
        ],
        score_weighted=[
            LineToVecScore(MuseumNumber.of("X.0"), "N/A", 15),
            LineToVecScore(MuseumNumber.of("X.1"), "N/A", 7),
        ],
    )
    assert LineToVecRankingSchema().dump(line_to_vec_ranking) == {
        "score": [
            {"museumNumber": "X.0", "score": 10, "script": "N/A"},
            {"museumNumber": "X.1", "score": 4, "script": "N/A"},
        ],
        "scoreWeighted": [
            {"museumNumber": "X.0", "score": 15, "script": "N/A"},
            {"museumNumber": "X.1", "score": 7, "script": "N/A"},
        ],
    }
def test_update_update_transliteration_not_lowest_join(
    fragment_updater, user, fragment_repository, when
):
    number = MuseumNumber.of("X.2")
    transliterated_fragment = TransliteratedFragmentFactory.build(
        number=number,
        joins=Joins([[Join(MuseumNumber.of("X.1"), is_in_fragmentarium=True)]]),
    )

    (
        when(fragment_repository)
        .query_by_museum_number(number)
        .thenReturn(transliterated_fragment)
    )

    with pytest.raises(NotLowestJoinError):
        fragment_updater.update_transliteration(
            number,
            TransliterationUpdate(parse_atf_lark("1. x"), "updated notes", "X"),
            user,
            False,
        )
def test_parallel_fragment(cf, duplicates, labels, display_value) -> None:
    museum_number = MuseumNumber.of("K.1")
    line_number = LineNumber(1)
    line = ParallelFragment(cf, museum_number, duplicates, labels, line_number)

    assert line.has_cf is cf
    assert line.museum_number == museum_number
    assert line.has_duplicates is duplicates
    assert line.labels == labels
    assert line.line_number == line_number

    assert line.display_value == display_value
    assert line.atf == Atf(f"// {display_value}")
    assert line.lemmatization == (LemmatizationToken(display_value), )
def test_generate_annotations_error(annotations_repository, photo_repository,
                                    changelog, when):
    fragment_number = MuseumNumber.of("X.0")
    image_file = create_test_photo(fragment_number)

    ebl_ai_client = EblAiClient("http://localhost:8001")

    httpretty.register_uri(httpretty.POST,
                           "http://localhost:8001/generate",
                           status=404)

    with pytest.raises(EblAiApiError,
                       match="Ebl-Ai-Api Error with status code: 404"):
        ebl_ai_client.generate_annotations(fragment_number, image_file)
def test_update_transliteration(
    number,
    ignore_lowest_join,
    fragment_updater,
    user,
    fragment_repository,
    changelog,
    parallel_line_injector,
    when,
):
    transliterated_fragment = TransliteratedFragmentFactory.build(
        number=number,
        joins=Joins([[Join(MuseumNumber.of("X.1"), is_in_fragmentarium=True)]]),
        line_to_vec=None,
    )
    number = transliterated_fragment.number
    atf = Atf("1. x x\n2. x")
    transliteration = TransliterationUpdate(
        parse_atf_lark(atf), "updated notes", "X X\nX"
    )
    transliterated_fragment = transliterated_fragment.update_transliteration(
        transliteration, user
    )
    injected_fragment = transliterated_fragment.set_text(
        parallel_line_injector.inject_transliteration(transliterated_fragment.text)
    )
    (
        when(fragment_repository)
        .query_by_museum_number(number)
        .thenReturn(transliterated_fragment)
    )
    when(changelog).create(
        "fragments",
        user.profile,
        {"_id": str(number), **SCHEMA.dump(transliterated_fragment)},
        {"_id": str(number), **SCHEMA.dump(transliterated_fragment)},
    ).thenReturn()
    (
        when(fragment_repository)
        .update_transliteration(transliterated_fragment)
        .thenReturn()
    )

    result = fragment_updater.update_transliteration(
        number, transliteration, user, ignore_lowest_join
    )

    assert result == (injected_fragment, False)
Exemple #20
0
def test_sort_scores_to_list():
    score = [
        LineToVecScore(MuseumNumber.of("X.1"), "N/A", 4),
        LineToVecScore(MuseumNumber.of("X.2"), "N/A", 12),
        LineToVecScore(MuseumNumber.of("X.3"), "N/A", 2),
    ]

    assert sort_scores_to_list(score) == [
        LineToVecScore(MuseumNumber.of("X.2"), "N/A", 12),
        LineToVecScore(MuseumNumber.of("X.1"), "N/A", 4),
        LineToVecScore(MuseumNumber.of("X.3"), "N/A", 2),
    ]
def test_query_by_parallel_line_exists(database, fragment_repository):
    parallel_number = MuseumNumber.of("K.1")
    fragment = FragmentFactory.build(
        text=Text(
            (
                ParallelFragment(
                    False, parallel_number, True, Labels(), LineNumber(1), True
                ),
            )
        )
    )
    parallel_fragment = FragmentFactory.build(number=parallel_number)
    database[COLLECTION].insert_many(
        [
            FragmentSchema(exclude=["joins"]).dump(fragment),
            FragmentSchema(exclude=["joins"]).dump(parallel_fragment),
        ]
    )

    assert fragment_repository.query_by_museum_number(fragment.number) == fragment
Exemple #22
0
def test_generate_annotations(
    annotations_repository, photo_repository, changelog, when
):
    fragment_number = MuseumNumber.of("X.0")

    image_file = create_test_photo("K.2")

    when(photo_repository).query_by_file_name(f"{fragment_number}.jpg").thenReturn(
        image_file
    )
    ebl_ai_client = EblAiClient("mock-localhost:8001")
    service = AnnotationsService(
        ebl_ai_client, annotations_repository, photo_repository, changelog
    )

    expected = Annotations(fragment_number, tuple())
    when(ebl_ai_client).generate_annotations(fragment_number, image_file, 0).thenReturn(
        expected
    )

    annotations = service.generate_annotations(fragment_number, 0)
    assert isinstance(annotations, Annotations)
    assert annotations == expected
def test_updating_manuscripts(corpus, text_repository, bibliography, changelog,
                              signs, sign_repository, user, when) -> None:
    uncertain_fragments = (MuseumNumber.of("K.1"), )
    updated_chapter = attr.evolve(
        CHAPTER,
        manuscripts=(attr.evolve(
            CHAPTER.manuscripts[0],
            colophon=Transliteration.of_iterable([
                TextLine.of_iterable(LineNumber(1, True),
                                     (Word.of([Reading.of_name("ba")]), ))
            ]),
            unplaced_lines=Transliteration.of_iterable([
                TextLine.of_iterable(LineNumber(1, True),
                                     (Word.of([Reading.of_name("ku")]), ))
            ]),
            notes="Updated manuscript.",
        ), ),
        uncertain_fragments=uncertain_fragments,
        signs=("KU ABZ075 ABZ207a\\u002F207b\\u0020X\nBA\nKU", ),
    )
    expect_find_and_update_chapter(
        bibliography,
        changelog,
        CHAPTER_WITHOUT_DOCUMENTS,
        updated_chapter,
        signs,
        sign_repository,
        text_repository,
        user,
        when,
    )

    manuscripts = (updated_chapter.manuscripts[0], )
    assert (corpus.update_manuscripts(CHAPTER.id_, manuscripts,
                                      uncertain_fragments,
                                      user) == updated_chapter)
def create(include_documents: bool) -> Tuple[Chapter, dict]:
    references = (ReferenceFactory.build(with_document=include_documents),)
    manuscript = ManuscriptFactory.build(references=references)

    first_manuscript_line = ManuscriptLineFactory.build(manuscript_id=manuscript.id)
    second_manuscript_line = ManuscriptLineFactory.build(manuscript_id=manuscript.id)
    line = LineFactory.build(
        variants=(
            LineVariantFactory.build(
                manuscripts=(first_manuscript_line, second_manuscript_line),
                parallel_lines=(ParallelComposition(False, "name", LineNumber(1)),),
            ),
        )
    )

    chapter = ChapterFactory.build(
        manuscripts=(manuscript,),
        uncertain_fragments=(MuseumNumber.of("K.1"),),
        lines=(line,),
    )
    dto = {
        "textId": {
            "genre": chapter.text_id.genre.value,
            "category": chapter.text_id.category,
            "index": chapter.text_id.index,
        },
        "classification": chapter.classification.value,
        "stage": chapter.stage.value,
        "version": chapter.version,
        "name": chapter.name,
        "order": chapter.order,
        "signs": list(chapter.signs),
        "record": RecordSchema().dump(chapter.record),
        "parserVersion": chapter.parser_version,
        "manuscripts": ApiManuscriptSchema(
            exclude=[] if include_documents else ["joins"]
        ).dump(chapter.manuscripts, many=True),
        "uncertainFragments": [str(number) for number in chapter.uncertain_fragments],
        "lines": [
            {
                "number": line.number.label,
                "variants": [
                    {
                        "reconstruction": "".join(
                            [
                                convert_to_atf(None, variant.reconstruction),
                                f"\n{variant.note.atf}" if variant.note else "",
                                *[
                                    f"\n{parallel_line.atf}"
                                    for parallel_line in variant.parallel_lines
                                ],
                            ]
                        ),
                        "reconstructionTokens": OneOfTokenSchema().dump(
                            variant.reconstruction, many=True
                        ),
                        "intertext": "".join(part.value for part in variant.intertext),
                        "manuscripts": [
                            {
                                "manuscriptId": manuscript_line.manuscript_id,
                                "labels": [
                                    label.to_value() for label in manuscript_line.labels
                                ],
                                "number": manuscript_line.line.line_number.atf[:-1]
                                if isinstance(manuscript_line.line, TextLine)
                                else "",
                                "atf": "\n".join(
                                    [
                                        manuscript_line.line.atf[
                                            len(manuscript_line.line.line_number.atf)
                                            + 1 :
                                        ]
                                        if isinstance(manuscript_line.line, TextLine)
                                        else "",
                                        *[
                                            line.atf
                                            for line in manuscript_line.paratext
                                        ],
                                    ]
                                ).strip(),
                                "atfTokens": (
                                    OneOfLineSchema().dump(manuscript_line.line)[
                                        "content"
                                    ]
                                ),
                                "omittedWords": list(manuscript_line.omitted_words),
                            }
                            for manuscript_line in variant.manuscripts
                        ],
                    }
                    for variant in line.variants
                ],
                "isSecondLineOfParallelism": line.is_second_line_of_parallelism,
                "isBeginningOfSection": line.is_beginning_of_section,
                "translation": "\n".join(
                    translation.atf for translation in line.translation
                ),
            }
            for line in chapter.lines
        ],
    }

    return chapter, dto
Exemple #25
0
 def ebl_atf_text_line__museum_number(self, children) -> MuseumNumber:
     return MuseumNumber.of("".join(children))
def parse_museum_number(number: str) -> MuseumNumber:
    try:
        return MuseumNumber.of(number)
    except ValueError as error:
        raise DataError(error) from error
def test_folio_pager_exception(fragment_repository):
    with pytest.raises(NotFoundError):
        museum_number = MuseumNumber.of("1841-07-26.54")
        fragment_repository.query_next_and_previous_folio("test", "test", museum_number)
def test_record():
    record = RecordFactory.build()
    fragment = Fragment(MuseumNumber.of("X.1"), record=record)
    assert fragment.record == record
class TransliteratedFragmentFactory(FragmentFactory):
    text = Text((
        TextLine.of_iterable(
            LineNumber(1, True),
            (
                Word.of([UnidentifiedSign.of()]),
                Word.of([
                    Logogram.of_name(
                        "BA",
                        surrogate=[
                            Reading.of_name("ku"),
                            Joiner.hyphen(),
                            Reading.of_name("u", 4),
                        ],
                    )
                ]),
                Column.of(),
                Tabulation.of(),
                Word.of([
                    BrokenAway.open(),
                    UnknownNumberOfSigns.of(),
                    Joiner.hyphen(),
                    Reading.of_name("ku"),
                    BrokenAway.close(),
                    Joiner.hyphen(),
                    Reading.of_name("nu"),
                    Joiner.hyphen(),
                    Reading.of_name("ši"),
                ]),
                Variant.of(Divider.of(":"), Reading.of_name("ku")),
                Word.of([
                    BrokenAway.open(),
                    UnknownNumberOfSigns.of(),
                    BrokenAway.close(),
                ]),
                Column.of(2),
                Divider.of(":", ("@v", ), (Flag.DAMAGE, )),
                CommentaryProtocol.of("!qt"),
                Word.of([Number.of_name("10", flags=[Flag.DAMAGE])]),
            ),
        ),
        TextLine.of_iterable(
            LineNumber(2, True),
            (
                Word.of([
                    BrokenAway.open(),
                    UnknownNumberOfSigns.of(),
                    BrokenAway.close(),
                ]),
                Word.of([Logogram.of_name("GI", 6)]),
                Word.of([Reading.of_name("ana")]),
                Word.of([
                    Reading.of_name("u", 4),
                    Joiner.hyphen(),
                    Reading.of((
                        ValueToken.of("š"),
                        BrokenAway.open(),
                        ValueToken.of("u"),
                    )),
                ]),
                Word.of([UnknownNumberOfSigns.of(),
                         BrokenAway.close()]),
            ),
        ),
        TextLine.of_iterable(
            LineNumber(3, True),
            (
                Word.of([BrokenAway.open(),
                         UnknownNumberOfSigns.of()]),
                Word.of([
                    Reading.of((
                        ValueToken.of("k"),
                        BrokenAway.close(),
                        ValueToken.of("i"),
                    )),
                    Joiner.hyphen(),
                    Reading.of_name("du"),
                ]),
                Word.of([Reading.of_name("u")]),
                Word.of([
                    Reading.of_name("ba"),
                    Joiner.hyphen(),
                    Reading.of_name("ma"),
                    Joiner.hyphen(),
                    Reading.of((
                        ValueToken.of("t"),
                        BrokenAway.open(),
                        ValueToken.of("i"),
                    )),
                ]),
                Word.of([UnknownNumberOfSigns.of(),
                         BrokenAway.close()]),
            ),
        ),
        TextLine.of_iterable(
            LineNumber(6, True),
            (
                Word.of([
                    BrokenAway.open(),
                    UnknownNumberOfSigns.of(),
                    BrokenAway.close(),
                ]),
                Word.of([UnclearSign.of([Flag.DAMAGE])]),
                Word.of([Reading.of_name("mu")]),
                Word.of([
                    Reading.of_name("ta"),
                    Joiner.hyphen(),
                    Reading.of_name("ma"),
                    InWordNewline.of(),
                    Joiner.hyphen(),
                    Reading.of_name("tu", 2),
                ]),
            ),
        ),
        TextLine.of_iterable(
            LineNumber(7, True),
            (
                Word.of([
                    Variant.of(Reading.of_name("šu"),
                               CompoundGrapheme.of(["BI×IS"]))
                ]),
                LanguageShift.normalized_akkadian(),
                AkkadianWord.of([ValueToken.of("kur")]),
            ),
        ),
        StateDollarLine(
            atf.Qualification.AT_LEAST,
            1,
            ScopeContainer(atf.Surface.OBVERSE, ""),
            atf.State.MISSING,
            None,
        ),
        ImageDollarLine("1", None, "numbered diagram of triangle"),
        RulingDollarLine(atf.Ruling.SINGLE),
        LooseDollarLine("this is a loose line"),
        SealDollarLine(1),
        SealAtLine(1),
        HeadingAtLine(1),
        ColumnAtLine(ColumnLabel([atf.Status.COLLATION], 1)),
        SurfaceAtLine(
            SurfaceLabel([atf.Status.COLLATION], atf.Surface.SURFACE,
                         "stone wig")),
        ObjectAtLine(
            ObjectLabel([atf.Status.COLLATION], atf.Object.OBJECT,
                        "stone wig")),
        DiscourseAtLine(atf.Discourse.DATE),
        DivisionAtLine("paragraph", 5),
        CompositeAtLine(atf.Composite.DIV, "part", 1),
        NoteLine((
            StringPart("a note "),
            EmphasisPart("italic"),
            LanguagePart.of_transliteration(
                Language.AKKADIAN, (Word.of([Reading.of_name("bu")]), )),
        )),
        ParallelComposition(False, "my name", LineNumber(1)),
        ParallelText(
            True,
            TextId(CorpusGenre.LITERATURE, 1, 1),
            ChapterName(Stage.OLD_BABYLONIAN, "", "my name"),
            LineNumber(1),
            False,
        ),
        ParallelFragment(False, MuseumNumber.of("K.1"), True, Labels(),
                         LineNumber(1), False),
    ))
    signs = (
        "X BA KU ABZ075 ABZ207a\\u002F207b\\u0020X ABZ377n1/KU ABZ377n1 ABZ411\n"
        "MI DIŠ UD ŠU\n"
        "KI DU ABZ411 BA MA TI\n"
        "X MU TA MA UD\n"
        "ŠU/|BI×IS|")
    folios = Folios((Folio("WGL", "3"), Folio("XXX", "3")))
    record = Record((RecordEntry("test", RecordType.TRANSLITERATION), ))
    line_to_vec = ((
        LineToVecEncoding.TEXT_LINE,
        LineToVecEncoding.TEXT_LINE,
        LineToVecEncoding.TEXT_LINE,
        LineToVecEncoding.TEXT_LINE,
        LineToVecEncoding.TEXT_LINE,
        LineToVecEncoding.SINGLE_RULING,
    ), )
class LemmatizedFragmentFactory(TransliteratedFragmentFactory):
    text = Text((
        TextLine.of_iterable(
            LineNumber(1, True),
            (
                Word.of([UnidentifiedSign.of()]),
                Word.of([
                    Logogram.of_name(
                        "BA",
                        surrogate=[
                            Reading.of_name("ku"),
                            Joiner.hyphen(),
                            Reading.of_name("u", 4),
                        ],
                    )
                ]),
                Column.of(),
                Tabulation.of(),
                Word.of([
                    BrokenAway.open(),
                    UnknownNumberOfSigns.of(),
                    Joiner.hyphen(),
                    Reading.of_name("ku"),
                    BrokenAway.close(),
                    Joiner.hyphen(),
                    Reading.of_name("nu"),
                    Joiner.hyphen(),
                    Reading.of_name("ši"),
                ]),
                Variant.of(Divider.of(":"), Reading.of_name("ku")),
                Word.of([
                    BrokenAway.open(),
                    UnknownNumberOfSigns.of(),
                    BrokenAway.close(),
                ]),
                Column.of(2),
                Divider.of(":", ("@v", ), (Flag.DAMAGE, )),
                CommentaryProtocol.of("!qt"),
                Word.of([Number.of_name("10", flags=[Flag.DAMAGE])]),
            ),
        ),
        TextLine.of_iterable(
            LineNumber(2, True),
            (
                Word.of([BrokenAway.open(),
                         UnknownNumberOfSigns.of()]),
                Word.of([Logogram.of_name("GI", 6)],
                        unique_lemma=(WordId("ginâ I"), )),
                Word.of([Reading.of_name("ana")],
                        unique_lemma=(WordId("ana I"), )),
                Word.of(
                    [
                        Reading.of_name("u₄"),
                        Joiner.hyphen(),
                        Reading.of_name("š[u"),
                    ],
                    unique_lemma=(WordId("ūsu I"), ),
                ),
                Word.of([UnknownNumberOfSigns.of(),
                         BrokenAway.close()]),
            ),
        ),
        TextLine.of_iterable(
            LineNumber(3, True),
            (
                Word.of([BrokenAway.open(),
                         UnknownNumberOfSigns.of()]),
                Word.of(
                    unique_lemma=(WordId("kīdu I"), ),
                    parts=[
                        Reading.of((
                            ValueToken.of("k"),
                            BrokenAway.close(),
                            ValueToken.of("i"),
                        )),
                        Joiner.hyphen(),
                        Reading.of_name("du"),
                    ],
                ),
                Word.of(unique_lemma=(WordId("u I"), ),
                        parts=[Reading.of_name("u")]),
                Word.of(
                    unique_lemma=(WordId("bamātu I"), ),
                    parts=[
                        Reading.of_name("ba"),
                        Joiner.hyphen(),
                        Reading.of_name("ma"),
                        Joiner.hyphen(),
                        Reading.of((
                            ValueToken.of("t"),
                            BrokenAway.open(),
                            ValueToken.of("i"),
                        )),
                    ],
                ),
                Word.of([UnknownNumberOfSigns.of(),
                         BrokenAway.close()]),
            ),
        ),
        TextLine.of_iterable(
            LineNumber(6, True),
            (
                Word.of([
                    BrokenAway.open(),
                    UnknownNumberOfSigns.of(),
                    BrokenAway.close(),
                ]),
                Word.of([UnclearSign.of([Flag.DAMAGE])]),
                Word.of(unique_lemma=(WordId("mu I"), ),
                        parts=[Reading.of_name("mu")]),
                Word.of(
                    unique_lemma=(WordId("tamalāku I"), ),
                    parts=[
                        Reading.of_name("ta"),
                        Joiner.hyphen(),
                        Reading.of_name("ma"),
                        InWordNewline.of(),
                        Joiner.hyphen(),
                        Reading.of_name("tu", 2),
                    ],
                ),
            ),
        ),
        TextLine.of_iterable(
            LineNumber(7, True),
            (
                Word.of([
                    Variant.of(Reading.of_name("šu"),
                               CompoundGrapheme.of(["BI×IS"]))
                ]),
                LanguageShift.normalized_akkadian(),
                AkkadianWord.of([ValueToken.of("kur")],
                                unique_lemma=(WordId("normalized I"), )),
            ),
        ),
        StateDollarLine(
            atf.Qualification.AT_LEAST,
            1,
            ScopeContainer(atf.Surface.OBVERSE, ""),
            atf.State.MISSING,
            None,
        ),
        ImageDollarLine("1", None, "numbered diagram of triangle"),
        RulingDollarLine(atf.Ruling.SINGLE),
        LooseDollarLine("this is a loose line"),
        SealDollarLine(1),
        SealAtLine(1),
        HeadingAtLine(1),
        ColumnAtLine(ColumnLabel([atf.Status.COLLATION], 1)),
        SurfaceAtLine(
            SurfaceLabel([atf.Status.COLLATION], atf.Surface.SURFACE,
                         "stone wig")),
        ObjectAtLine(
            ObjectLabel([atf.Status.COLLATION], atf.Object.OBJECT,
                        "stone wig")),
        DiscourseAtLine(atf.Discourse.DATE),
        DivisionAtLine("paragraph", 5),
        CompositeAtLine(atf.Composite.DIV, "part", 1),
        NoteLine((
            StringPart("a note "),
            EmphasisPart("italic"),
            LanguagePart.of_transliteration(
                Language.AKKADIAN, (Word.of([Reading.of_name("bu")]), )),
        )),
        ParallelComposition(False, "my name", LineNumber(1)),
        ParallelText(
            True,
            TextId(CorpusGenre.LITERATURE, 1, 1),
            ChapterName(Stage.OLD_BABYLONIAN, "", "my name"),
            LineNumber(1),
            False,
        ),
        ParallelFragment(False, MuseumNumber.of("K.1"), True, Labels(),
                         LineNumber(1), False),
    ))