Ejemplo n.º 1
0
def test_extent_overlapping_languages() -> None:
    Text.of_iterable(
        [
            TextLine.of_iterable(LineNumber(1), [Word.of([Reading.of_name("bu")])]),
            TranslationLine(tuple(), "en", Extent(LineNumber(2))),
            TextLine.of_iterable(LineNumber(2), [Word.of([Reading.of_name("bu")])]),
            TranslationLine(tuple(), "de"),
        ]
    )
Ejemplo n.º 2
0
def test_extent_before_translation() -> None:
    with pytest.raises(ValueError):
        Text.of_iterable(
            [
                TextLine.of_iterable(LineNumber(1), [Word.of([Reading.of_name("bu")])]),
                TextLine.of_iterable(LineNumber(2), [Word.of([Reading.of_name("bu")])]),
                TranslationLine(tuple(), "en", Extent(LineNumber(1))),
            ]
        )
def text_with_labels():
    return Text.of_iterable([
        TextLine.of_iterable(LineNumber(1),
                             [Word.of([Reading.of_name("bu")])]),
        ColumnAtLine(ColumnLabel.from_int(1)),
        SurfaceAtLine(SurfaceLabel([], atf.Surface.SURFACE, "Stone wig")),
        ObjectAtLine(ObjectLabel([], atf.Object.OBJECT, "Stone wig")),
        TextLine.of_iterable(LineNumber(2),
                             [Word.of([Reading.of_name("bu")])]),
    ])
Ejemplo n.º 4
0
def test_exent_overlapping() -> None:
    with pytest.raises(ValueError):
        Text.of_iterable(
            [
                TextLine.of_iterable(LineNumber(1), [Word.of([Reading.of_name("bu")])]),
                TranslationLine(tuple(), extent=Extent(LineNumber(2))),
                TextLine.of_iterable(LineNumber(2), [Word.of([Reading.of_name("bu")])]),
                TranslationLine(tuple()),
            ]
        )
Ejemplo n.º 5
0
def test_labels(text_with_labels) -> None:
    assert text_with_labels.labels == [
        LineLabel(None, None, None, LineNumber(1)),
        LineLabel(
            ColumnLabel.from_int(1),
            SurfaceLabel([], atf.Surface.SURFACE, "Stone wig"),
            ObjectLabel([], atf.Object.OBJECT, "Stone wig"),
            LineNumber(2),
        ),
    ]
def test_invalid_extent() -> None:
    with pytest.raises(ValueError):
        Chapter(
            TextId(GENRE, 0, 0),
            manuscripts=(Manuscript(MANUSCRIPT_ID), ),
            lines=(Line(
                LineNumber(1),
                (LINE_VARIANT_1, ),
                translation=(TranslationLine(tuple(),
                                             extent=Extent(LineNumber(2))), ),
            ), ),
        )
class ParallelCompositionFactory(factory.Factory):
    class Meta:
        model = ParallelComposition

    has_cf = factory.Faker("boolean")
    name = factory.Faker("sentence")
    line_number = LineNumber(1)
Ejemplo n.º 8
0
 def make_line_number(self, data: dict, **kwargs) -> LineNumber:
     return LineNumber(
         data["number"],
         data["has_prime"],
         data["prefix_modifier"],
         data["suffix_modifier"],
     )
Ejemplo n.º 9
0
def test_dump_line():
    text = Text(
        (
            TextLine.of_iterable(
                LineNumber(1),
                [
                    Word.of(
                        parts=[
                            Reading.of_name("ha"),
                            Joiner.hyphen(),
                            Reading.of_name("am"),
                        ]
                    )
                ],
            ),
            EmptyLine(),
            ControlLine("#", " comment"),
        ),
        "1.0.0",
    )

    assert TextSchema().dump(text) == {
        "lines": OneOfLineSchema().dump(text.lines, many=True),
        "parser_version": text.parser_version,
        "numberOfLines": 1,
    }
class ManuscriptLineFactory(factory.Factory):
    class Meta:
        model = ManuscriptLine

    manuscript_id = factory.Sequence(lambda n: n)
    labels = (
        SurfaceLabel.from_label(Surface.OBVERSE),
        ColumnLabel.from_label("iii", [Status.COLLATION, Status.CORRECTION]),
    )
    line = factory.Sequence(
        lambda n: TextLine.of_iterable(
            LineNumber(n),
            (
                Word.of(
                    [
                        Reading.of_name("ku"),
                        Joiner.hyphen(),
                        BrokenAway.open(),
                        Reading.of_name("nu"),
                        Joiner.hyphen(),
                        Reading.of_name("ši"),
                        BrokenAway.close(),
                    ]
                ),
            ),
        )
    )
    paratext = (NoteLine((StringPart("note"),)), RulingDollarLine(Ruling.SINGLE))
    omitted_words = (1,)
Ejemplo n.º 11
0
def test_update_lemmatization() -> None:
    tokens = [list(line) for line in TEXT.lemmatization.tokens]
    tokens[0][0] = LemmatizationToken(tokens[0][0].value, (WordId("nu I"),))
    lemmatization = Lemmatization(tokens)

    expected = Text(
        (
            TextLine(
                LineNumber(1),
                (
                    Word.of(
                        unique_lemma=(WordId("nu I"),),
                        parts=[
                            Reading.of_name("ha"),
                            Joiner.hyphen(),
                            Reading.of_name("am"),
                        ],
                    ),
                ),
            ),
            RulingDollarLine(atf.Ruling.SINGLE),
        ),
        TEXT.parser_version,
    )

    assert TEXT.update_lemmatization(lemmatization) == expected
Ejemplo n.º 12
0
def test_updating(client, bibliography, sign_repository, signs,
                  text_repository) -> None:
    allow_signs(signs, sign_repository)
    chapter = ChapterFactory.build()
    allow_references(chapter, bibliography)
    text_repository.create_chapter(chapter)
    updated_chapter = attr.evolve(
        chapter,
        lines=(attr.evolve(chapter.lines[0], number=LineNumber(1, True)), ),
        parser_version=ATF_PARSER_VERSION,
    )

    body = {
        "new": [],
        "deleted": [],
        "edited": [{
            "index": index,
            "line": line
        } for index, line in enumerate(
            create_chapter_dto(updated_chapter)["lines"])],
    }
    post_result = client.simulate_post(create_chapter_url(chapter, "/lines"),
                                       body=json.dumps(body))

    assert post_result.status == falcon.HTTP_OK
    assert post_result.json == create_chapter_dto(updated_chapter)

    get_result = client.simulate_get(create_chapter_url(chapter))

    assert get_result.status == falcon.HTTP_OK
    assert get_result.json == create_chapter_dto(updated_chapter)
def test_extant_lines() -> None:
    manuscript = Manuscript(MANUSCRIPT_ID)
    manuscript_line = LINE_VARIANT_1.manuscripts[0]
    chapter = Chapter(
        TEXT_ID,
        manuscripts=(manuscript, ),
        lines=(
            Line(LineNumber(1), (LINE_VARIANT_1, )),
            Line(LineNumber(2), (LINE_VARIANT_2, )),
        ),
    )
    assert chapter.extant_lines == {
        manuscript.siglum: {
            manuscript_line.labels:
            [ExtantLine(manuscript_line.labels, LineNumber(1), True)]
        }
    }
def test_overlapping_languages() -> None:
    Chapter(
        TextId(GENRE, 0, 0),
        manuscripts=(Manuscript(MANUSCRIPT_ID), ),
        lines=(
            Line(
                LineNumber(1),
                (LINE_VARIANT_1, ),
                translation=(TranslationLine(tuple(), "en",
                                             Extent(LineNumber(2))), ),
            ),
            Line(
                LineNumber(2),
                (LINE_VARIANT_2, ),
                translation=(TranslationLine(tuple(), "de"), ),
            ),
        ),
    )
def test_statistics(database, fragment_repository):
    database[COLLECTION].insert_many(
        [
            SCHEMA.dump(
                FragmentFactory.build(
                    text=Text(
                        (
                            TextLine(
                                LineNumber(1),
                                (
                                    Word.of([Reading.of_name("first")]),
                                    Word.of([Reading.of_name("line")]),
                                ),
                            ),
                            ControlLine("#", "ignore"),
                            EmptyLine(),
                        )
                    )
                )
            ),
            SCHEMA.dump(
                FragmentFactory.build(
                    text=Text(
                        (
                            ControlLine("#", "ignore"),
                            TextLine(
                                LineNumber(1), (Word.of([Reading.of_name("second")]),)
                            ),
                            TextLine(
                                LineNumber(2), (Word.of([Reading.of_name("third")]),)
                            ),
                            ControlLine("#", "ignore"),
                            TextLine(
                                LineNumber(3), (Word.of([Reading.of_name("fourth")]),)
                            ),
                        )
                    )
                )
            ),
            SCHEMA.dump(FragmentFactory.build(text=Text())),
        ]
    )
    assert fragment_repository.count_transliterated_fragments() == 2
    assert fragment_repository.count_lines() == 4
def test_updating_lines_edit(corpus, text_repository, bibliography, changelog,
                             signs, sign_repository, user, when) -> None:
    updated_chapter = attr.evolve(
        CHAPTER,
        lines=(attr.evolve(
            CHAPTER.lines[0],
            number=LineNumber(1, True),
            variants=(attr.evolve(
                CHAPTER.lines[0].variants[0],
                manuscripts=(attr.evolve(
                    CHAPTER.lines[0].variants[0].manuscripts[0],
                    line=TextLine.of_iterable(
                        LineNumber(1, True),
                        (Word.of([
                            Reading.of_name("nu"),
                            Joiner.hyphen(),
                            BrokenAway.open(),
                            Reading.of_name("ku"),
                            Joiner.hyphen(),
                            Reading.of_name("ši"),
                            BrokenAway.close(),
                        ]), ),
                    ),
                ), ),
            ), ),
        ), ),
        signs=("ABZ075 KU ABZ207a\\u002F207b\\u0020X\nKU\nABZ075", ),
        parser_version=ATF_PARSER_VERSION,
    )
    expect_find_and_update_chapter(
        bibliography,
        changelog,
        CHAPTER_WITHOUT_DOCUMENTS,
        updated_chapter,
        signs,
        sign_repository,
        text_repository,
        user,
        when,
    )

    assert (corpus.update_lines(
        CHAPTER.id_, LinesUpdate([], set(), {0: updated_chapter.lines[0]}),
        user) == updated_chapter)
class ParallelTextFactory(factory.Factory):
    class Meta:
        model = ParallelText

    has_cf = factory.Faker("boolean")
    text = factory.SubFactory(TextIdFactory)
    chapter = factory.SubFactory(ChapterNameFactory)
    line_number = LineNumber(1)
    exists = None
    implicit_chapter = None
class ParallelFragmentFactory(factory.Factory):
    class Meta:
        model = ParallelFragment

    has_cf = factory.Faker("boolean")
    museum_number = factory.Sequence(lambda n: MuseumNumber("X", str(n)))
    has_duplicates = factory.Faker("boolean")
    labels = factory.SubFactory(LabelsFactory)
    line_number = LineNumber(1)
    exists = None
def test_parallel_composition(cf, display_value) -> None:
    name = "my name"
    line_number = LineNumber(1)
    line = ParallelComposition(cf, name, line_number)

    assert line.has_cf is cf
    assert line.name == name
    assert line.line_number == line_number

    assert line.display_value == display_value
    assert line.atf == Atf(f"// {display_value}")
    assert line.lemmatization == (LemmatizationToken(display_value), )
Ejemplo n.º 20
0
def test_query_lemmas_ignores_in_value(parts, expected, fragment_repository,
                                       lemma_repository):
    fragment = FragmentFactory.build(
        text=Text.of_iterable([
            TextLine.of_iterable(
                LineNumber(1),
                [Word.of(parts, unique_lemma=(WordId("ana I"), ))])
        ]),
        signs="DIŠ",
    )
    fragment_repository.create(fragment)

    assert lemma_repository.query_lemmas("ana", False) == expected
def test_parallel_text(cf, chapter, display_value) -> None:
    text_id = TextId(Genre.LITERATURE, 1, 1)
    line_number = LineNumber(1)
    line = ParallelText(cf, text_id, chapter, line_number)

    assert line.has_cf is cf
    assert line.text == text_id
    assert line.chapter == chapter
    assert line.line_number == line_number

    assert line.display_value == display_value
    assert line.atf == Atf(f"// {display_value}")
    assert line.lemmatization == (LemmatizationToken(display_value), )
def test_parallel_fragment(cf, duplicates, labels, display_value) -> None:
    museum_number = MuseumNumber.of("K.1")
    line_number = LineNumber(1)
    line = ParallelFragment(cf, museum_number, duplicates, labels, line_number)

    assert line.has_cf is cf
    assert line.museum_number == museum_number
    assert line.has_duplicates is duplicates
    assert line.labels == labels
    assert line.line_number == line_number

    assert line.display_value == display_value
    assert line.atf == Atf(f"// {display_value}")
    assert line.lemmatization == (LemmatizationToken(display_value), )
class LineFactory(factory.Factory):
    class Meta:
        model = Line

    class Params:
        manuscript_id = factory.Sequence(lambda n: n)
        variant = factory.SubFactory(
            LineVariantFactory, manuscript_id=factory.SelfAttribute("..manuscript_id")
        )

    number = factory.Sequence(lambda n: LineNumber(n))
    variants = factory.List([factory.SelfAttribute("..variant")], TupleFactory)
    is_second_line_of_parallelism = factory.Faker("boolean")
    is_beginning_of_section = factory.Faker("boolean")
    translation = (TranslationLine((StringPart("foo"),), "en", None),)
class ManuscriptFactory(factory.Factory):
    class Meta:
        model = Manuscript

    id = factory.Sequence(lambda n: n + 1)
    siglum_disambiguator = factory.Faker("word")
    museum_number = factory.Sequence(
        lambda n: MuseumNumber("M", str(n)) if pydash.is_odd(n) else None
    )
    accession = factory.Sequence(lambda n: f"A.{n}" if pydash.is_even(n) else "")
    period_modifier = factory.fuzzy.FuzzyChoice(PeriodModifier)
    period = factory.fuzzy.FuzzyChoice(set(Period) - {Period.NONE})
    provenance = factory.fuzzy.FuzzyChoice(set(Provenance) - {Provenance.STANDARD_TEXT})
    type = factory.fuzzy.FuzzyChoice(set(ManuscriptType) - {ManuscriptType.NONE})
    notes = factory.Faker("sentence")
    colophon = Transliteration.of_iterable(
        [TextLine.of_iterable(LineNumber(1, True), (Word.of([Reading.of_name("ku")]),))]
    )
    unplaced_lines = Transliteration.of_iterable(
        [TextLine.of_iterable(LineNumber(1, True), (Word.of([Reading.of_name("nu")]),))]
    )
    references = factory.List(
        [factory.SubFactory(ReferenceFactory, with_document=True)], TupleFactory
    )
def test_updating_manuscripts(corpus, text_repository, bibliography, changelog,
                              signs, sign_repository, user, when) -> None:
    uncertain_fragments = (MuseumNumber.of("K.1"), )
    updated_chapter = attr.evolve(
        CHAPTER,
        manuscripts=(attr.evolve(
            CHAPTER.manuscripts[0],
            colophon=Transliteration.of_iterable([
                TextLine.of_iterable(LineNumber(1, True),
                                     (Word.of([Reading.of_name("ba")]), ))
            ]),
            unplaced_lines=Transliteration.of_iterable([
                TextLine.of_iterable(LineNumber(1, True),
                                     (Word.of([Reading.of_name("ku")]), ))
            ]),
            notes="Updated manuscript.",
        ), ),
        uncertain_fragments=uncertain_fragments,
        signs=("KU ABZ075 ABZ207a\\u002F207b\\u0020X\nBA\nKU", ),
    )
    expect_find_and_update_chapter(
        bibliography,
        changelog,
        CHAPTER_WITHOUT_DOCUMENTS,
        updated_chapter,
        signs,
        sign_repository,
        text_repository,
        user,
        when,
    )

    manuscripts = (updated_chapter.manuscripts[0], )
    assert (corpus.update_manuscripts(CHAPTER.id_, manuscripts,
                                      uncertain_fragments,
                                      user) == updated_chapter)
def test_parse_normalized_akkadain_shift() -> None:
    word = "ha"
    line = f"1. {word} %n {word} %sux {word}"

    expected = Text((TextLine.of_iterable(
        LineNumber(1),
        (
            Word.of((Reading.of_name(word), ), DEFAULT_LANGUAGE),
            LanguageShift.normalized_akkadian(),
            AkkadianWord.of((ValueToken.of(word), )),
            LanguageShift.of("%sux"),
            Word.of((Reading.of_name(word), ), Language.SUMERIAN),
        ),
    ), ))

    assert parse_atf_lark(line).lines == expected.lines
def test_parse_atf_language_shifts(code: str,
                                   expected_language: Language) -> None:
    word = "ha-am"
    parts = [Reading.of_name("ha"), Joiner.hyphen(), Reading.of_name("am")]
    line = f"1. {word} {code} {word} %sb {word}"

    expected = Text((TextLine.of_iterable(
        LineNumber(1),
        (
            Word.of(parts, DEFAULT_LANGUAGE),
            LanguageShift.of(code),
            Word.of(parts, expected_language),
            LanguageShift.of("%sb"),
            Word.of(parts, Language.AKKADIAN),
        ),
    ), ))

    assert parse_atf_lark(line).lines == expected.lines
Ejemplo n.º 28
0
def test_extant_lines_schema() -> None:
    manuscript = Manuscript(1)
    manuscript_line = ManuscriptLine(1, LABELS, MANUSCRIPT_TEXT_1)
    variant = LineVariant(tuple(), manuscripts=(manuscript_line, ))
    text_line = Line(LineNumber(1), (variant, ))
    chapter = Chapter(TextId(Genre.LITERATURE, 0, 0),
                      manuscripts=(manuscript, ),
                      lines=(text_line, ))
    assert ExtantLinesSchema().dump(chapter) == {
        "extantLines": {
            str(manuscript.siglum): {
                " ".join(label.to_value() for label in manuscript_line.labels):
                [{
                    "isSideBoundary": False,
                    "lineNumber":
                    OneOfLineNumberSchema().dump(text_line.number),
                }]
            }
        }
    }
def test_query_by_parallel_line_exists(database, fragment_repository):
    parallel_number = MuseumNumber.of("K.1")
    fragment = FragmentFactory.build(
        text=Text(
            (
                ParallelFragment(
                    False, parallel_number, True, Labels(), LineNumber(1), True
                ),
            )
        )
    )
    parallel_fragment = FragmentFactory.build(number=parallel_number)
    database[COLLECTION].insert_many(
        [
            FragmentSchema(exclude=["joins"]).dump(fragment),
            FragmentSchema(exclude=["joins"]).dump(parallel_fragment),
        ]
    )

    assert fragment_repository.query_by_museum_number(fragment.number) == fragment
def test_parse_dividers() -> None:
    line, expected_tokens = (
        r'1. :? :#! :# ::? :.@v /@19* :"@20@c ;@v@19!',
        [
            TextLine.of_iterable(
                LineNumber(1),
                (
                    Divider.of(":", tuple(), (atf.Flag.UNCERTAIN, )),
                    Divider.of(":", tuple(),
                               (atf.Flag.DAMAGE, atf.Flag.CORRECTION)),
                    Divider.of(":", tuple(), (atf.Flag.DAMAGE, )),
                    Divider.of("::", tuple(), (atf.Flag.UNCERTAIN, )),
                    Divider.of(":.", ("@v", ), tuple()),
                    Divider.of("/", ("@19", ), (atf.Flag.COLLATION, )),
                    Divider.of(':"', ("@20", "@c"), tuple()),
                    Divider.of(";", ("@v", "@19"), (atf.Flag.CORRECTION, )),
                ),
            )
        ],
    )
    assert parse_atf_lark(line).lines == Text.of_iterable(
        expected_tokens).lines