Esempio n. 1
0
def test_extent_before_translation() -> None:
    with pytest.raises(ValueError):
        Text.of_iterable(
            [
                TextLine.of_iterable(LineNumber(1), [Word.of([Reading.of_name("bu")])]),
                TextLine.of_iterable(LineNumber(2), [Word.of([Reading.of_name("bu")])]),
                TranslationLine(tuple(), "en", Extent(LineNumber(1))),
            ]
        )
Esempio n. 2
0
def test_extent_overlapping_languages() -> None:
    Text.of_iterable(
        [
            TextLine.of_iterable(LineNumber(1), [Word.of([Reading.of_name("bu")])]),
            TranslationLine(tuple(), "en", Extent(LineNumber(2))),
            TextLine.of_iterable(LineNumber(2), [Word.of([Reading.of_name("bu")])]),
            TranslationLine(tuple(), "de"),
        ]
    )
Esempio n. 3
0
def test_exent_overlapping() -> None:
    with pytest.raises(ValueError):
        Text.of_iterable(
            [
                TextLine.of_iterable(LineNumber(1), [Word.of([Reading.of_name("bu")])]),
                TranslationLine(tuple(), extent=Extent(LineNumber(2))),
                TextLine.of_iterable(LineNumber(2), [Word.of([Reading.of_name("bu")])]),
                TranslationLine(tuple()),
            ]
        )
def parse_atf_lark(atf_):
    def parse_line_(line: str, line_number: int):
        try:
            parsed_line = parse_line(line) if line else EmptyLine()
            validate_line(parsed_line)
            return parsed_line, None
        except PARSE_ERRORS as ex:
            return (None,
                    create_transliteration_error_data(ex, line, line_number))

    def check_errors(pairs):
        errors = [error for line, error in pairs if error is not None]
        if any(errors):
            raise TransliterationError(errors)

    lines = atf_.split("\n")
    lines = list(dropwhile(lambda line: line == "", reversed(lines)))
    lines.reverse()
    lines = [parse_line_(line, number) for number, line in enumerate(lines)]
    check_errors(lines)
    lines = tuple(pair[0] for pair in lines)

    text = Text(lines, f"{atf.ATF_PARSER_VERSION}")

    if pydash.duplicates(text.labels):
        raise DataError("Duplicate labels.")
    else:
        return text
Esempio n. 5
0
def test_update_lemmatization() -> None:
    tokens = [list(line) for line in TEXT.lemmatization.tokens]
    tokens[0][0] = LemmatizationToken(tokens[0][0].value, (WordId("nu I"),))
    lemmatization = Lemmatization(tokens)

    expected = Text(
        (
            TextLine(
                LineNumber(1),
                (
                    Word.of(
                        unique_lemma=(WordId("nu I"),),
                        parts=[
                            Reading.of_name("ha"),
                            Joiner.hyphen(),
                            Reading.of_name("am"),
                        ],
                    ),
                ),
            ),
            RulingDollarLine(atf.Ruling.SINGLE),
        ),
        TEXT.parser_version,
    )

    assert TEXT.update_lemmatization(lemmatization) == expected
Esempio n. 6
0
class TransliterationUpdate:
    text: Text = Text()
    notes: str = ""
    signs: str = attr.ib(default="")

    @signs.validator
    def _check_signs(self, _attribute, value) -> None:
        questionable_lines = self._get_questionable_lines(value)
        if questionable_lines:
            raise TransliterationError(
                [
                    {"description": "Invalid value", "lineNumber": line_number}
                    for line_number in questionable_lines
                ]
            )

    def _get_questionable_lines(self, value: str) -> List[int]:
        lines = [line.atf for line in self.text.lines]
        text_lines = [line.atf for line in self.text.text_lines]
        signs = value.split("\n")

        def get_line_number(text_line_number: int) -> int:
            line = text_lines[text_line_number]
            return lines.index(line) + 1

        return [
            get_line_number(index) for index, line in enumerate(signs) if "?" in line
        ]
Esempio n. 7
0
def test_dump_line():
    text = Text(
        (
            TextLine.of_iterable(
                LineNumber(1),
                [
                    Word.of(
                        parts=[
                            Reading.of_name("ha"),
                            Joiner.hyphen(),
                            Reading.of_name("am"),
                        ]
                    )
                ],
            ),
            EmptyLine(),
            ControlLine("#", " comment"),
        ),
        "1.0.0",
    )

    assert TextSchema().dump(text) == {
        "lines": OneOfLineSchema().dump(text.lines, many=True),
        "parser_version": text.parser_version,
        "numberOfLines": 1,
    }
Esempio n. 8
0
def test_parse_ruling_dollar_line(prefix, ruling, expected_ruling, status,
                                  expected_status, status_space, parenthesis):
    ruling = f"{ruling} ruling"
    ruling_with_status = (f"{ruling} {status}" if
                          (status and status_space) else f"{ruling}{status}")
    line = f"({ruling_with_status})" if parenthesis else ruling_with_status
    assert (parse_atf_lark(f"{prefix}{line}").lines == Text.of_iterable(
        [RulingDollarLine(expected_ruling, expected_status)]).lines)
def test_statistics(database, fragment_repository):
    database[COLLECTION].insert_many(
        [
            SCHEMA.dump(
                FragmentFactory.build(
                    text=Text(
                        (
                            TextLine(
                                LineNumber(1),
                                (
                                    Word.of([Reading.of_name("first")]),
                                    Word.of([Reading.of_name("line")]),
                                ),
                            ),
                            ControlLine("#", "ignore"),
                            EmptyLine(),
                        )
                    )
                )
            ),
            SCHEMA.dump(
                FragmentFactory.build(
                    text=Text(
                        (
                            ControlLine("#", "ignore"),
                            TextLine(
                                LineNumber(1), (Word.of([Reading.of_name("second")]),)
                            ),
                            TextLine(
                                LineNumber(2), (Word.of([Reading.of_name("third")]),)
                            ),
                            ControlLine("#", "ignore"),
                            TextLine(
                                LineNumber(3), (Word.of([Reading.of_name("fourth")]),)
                            ),
                        )
                    )
                )
            ),
            SCHEMA.dump(FragmentFactory.build(text=Text())),
        ]
    )
    assert fragment_repository.count_transliterated_fragments() == 2
    assert fragment_repository.count_lines() == 4
def text_with_labels():
    return Text.of_iterable([
        TextLine.of_iterable(LineNumber(1),
                             [Word.of([Reading.of_name("bu")])]),
        ColumnAtLine(ColumnLabel.from_int(1)),
        SurfaceAtLine(SurfaceLabel([], atf.Surface.SURFACE, "Stone wig")),
        ObjectAtLine(ObjectLabel([], atf.Object.OBJECT, "Stone wig")),
        TextLine.of_iterable(LineNumber(2),
                             [Word.of([Reading.of_name("bu")])]),
    ])
Esempio n. 11
0
def test_load_line(lines):
    parser_version = "2.3.1"
    serialized_lines = OneOfLineSchema().dump(lines, many=True)
    assert TextSchema().load(
        {
            "lines": serialized_lines,
            "parser_version": parser_version,
            "numberOfLines": 1,
        }
    ) == Text.of_iterable(lines).set_parser_version(parser_version)
class InterestingFragmentFactory(FragmentFactory):
    collection = "Kuyunjik"  # pyre-ignore[15]
    publication = ""  # pyre-ignore[15]
    joins: Sequence[str] = tuple()
    text = Text()
    notes = ""
    uncurated_references = (
        UncuratedReference("7(0)"),
        UncuratedReference("CAD 51", (34, 56)),
        UncuratedReference("7(1)"),
    )
    references = tuple()
class Manuscript:
    id: int
    siglum_disambiguator: str = ""
    museum_number: Optional[MuseumNumber] = None
    accession: str = attr.ib(default="")
    period_modifier: PeriodModifier = PeriodModifier.NONE
    period: Period = Period.NEO_ASSYRIAN
    provenance: Provenance = attr.ib(default=Provenance.NINEVEH)
    type: ManuscriptType = ManuscriptType.LIBRARY
    notes: str = ""
    colophon: Text = Text()
    unplaced_lines: Text = Text()
    references: Sequence[Reference] = tuple()
    joins: Joins = Joins()
    is_in_fragmentarium: bool = False

    @accession.validator
    def validate_accession(self, _, value) -> None:
        if self.museum_number and value:
            raise ValueError("Accession given when museum number present.")

    @provenance.validator
    def validate_provenance(self, _, value) -> None:
        if is_invalid_standard_text(value, self.period, self.type):
            raise ValueError(
                "Manuscript must not have period and type when provenance is Standard Text."
            )
        elif is_invalid_non_standard_text(value, self.period, self.type):
            raise ValueError(
                "Manuscript must have period and type unless provenance is Standard Text."
            )

    @property
    def text_lines(self) -> Sequence[TextLine]:
        return [*self.colophon.text_lines, *self.unplaced_lines.text_lines]

    @property
    def siglum(self) -> Siglum:
        return Siglum(self.provenance, self.period, self.type,
                      self.siglum_disambiguator)
Esempio n. 14
0
def test_query_lemmas_ignores_in_value(parts, expected, fragment_repository,
                                       lemma_repository):
    fragment = FragmentFactory.build(
        text=Text.of_iterable([
            TextLine.of_iterable(
                LineNumber(1),
                [Word.of(parts, unique_lemma=(WordId("ana I"), ))])
        ]),
        signs="DIŠ",
    )
    fragment_repository.create(fragment)

    assert lemma_repository.query_lemmas("ana", False) == expected
class ManuscriptFactory(factory.Factory):
    class Meta:
        model = Manuscript

    id = factory.Sequence(lambda n: n + 1)
    siglum_disambiguator = factory.Faker("word")
    museum_number = factory.Sequence(
        lambda n: MuseumNumber("M", str(n)) if pydash.is_odd(n) else None
    )
    accession = factory.Sequence(lambda n: f"A.{n}" if pydash.is_even(n) else "")
    period_modifier = factory.fuzzy.FuzzyChoice(PeriodModifier)
    period = factory.fuzzy.FuzzyChoice(set(Period) - {Period.NONE})
    provenance = factory.fuzzy.FuzzyChoice(set(Provenance) - {Provenance.STANDARD_TEXT})
    type = factory.fuzzy.FuzzyChoice(set(ManuscriptType) - {ManuscriptType.NONE})
    notes = factory.Faker("sentence")
    colophon = Transliteration.of_iterable(
        [TextLine.of_iterable(LineNumber(1, True), (Word.of([Reading.of_name("ku")]),))]
    )
    unplaced_lines = Transliteration.of_iterable(
        [TextLine.of_iterable(LineNumber(1, True), (Word.of([Reading.of_name("nu")]),))]
    )
    references = factory.List(
        [factory.SubFactory(ReferenceFactory, with_document=True)], TupleFactory
    )
def test_updating_manuscripts(corpus, text_repository, bibliography, changelog,
                              signs, sign_repository, user, when) -> None:
    uncertain_fragments = (MuseumNumber.of("K.1"), )
    updated_chapter = attr.evolve(
        CHAPTER,
        manuscripts=(attr.evolve(
            CHAPTER.manuscripts[0],
            colophon=Transliteration.of_iterable([
                TextLine.of_iterable(LineNumber(1, True),
                                     (Word.of([Reading.of_name("ba")]), ))
            ]),
            unplaced_lines=Transliteration.of_iterable([
                TextLine.of_iterable(LineNumber(1, True),
                                     (Word.of([Reading.of_name("ku")]), ))
            ]),
            notes="Updated manuscript.",
        ), ),
        uncertain_fragments=uncertain_fragments,
        signs=("KU ABZ075 ABZ207a\\u002F207b\\u0020X\nBA\nKU", ),
    )
    expect_find_and_update_chapter(
        bibliography,
        changelog,
        CHAPTER_WITHOUT_DOCUMENTS,
        updated_chapter,
        signs,
        sign_repository,
        text_repository,
        user,
        when,
    )

    manuscripts = (updated_chapter.manuscripts[0], )
    assert (corpus.update_manuscripts(CHAPTER.id_, manuscripts,
                                      uncertain_fragments,
                                      user) == updated_chapter)
def test_parse_normalized_akkadain_shift() -> None:
    word = "ha"
    line = f"1. {word} %n {word} %sux {word}"

    expected = Text((TextLine.of_iterable(
        LineNumber(1),
        (
            Word.of((Reading.of_name(word), ), DEFAULT_LANGUAGE),
            LanguageShift.normalized_akkadian(),
            AkkadianWord.of((ValueToken.of(word), )),
            LanguageShift.of("%sux"),
            Word.of((Reading.of_name(word), ), Language.SUMERIAN),
        ),
    ), ))

    assert parse_atf_lark(line).lines == expected.lines
def test_parse_atf_language_shifts(code: str,
                                   expected_language: Language) -> None:
    word = "ha-am"
    parts = [Reading.of_name("ha"), Joiner.hyphen(), Reading.of_name("am")]
    line = f"1. {word} {code} {word} %sb {word}"

    expected = Text((TextLine.of_iterable(
        LineNumber(1),
        (
            Word.of(parts, DEFAULT_LANGUAGE),
            LanguageShift.of(code),
            Word.of(parts, expected_language),
            LanguageShift.of("%sb"),
            Word.of(parts, Language.AKKADIAN),
        ),
    ), ))

    assert parse_atf_lark(line).lines == expected.lines
Esempio n. 19
0
def test_get(client, text_repository):
    chapter = ChapterFactory.build(
        lines=tuple(),
        manuscripts=(
            ManuscriptFactory.build(references=tuple()),
            ManuscriptFactory.build(colophon=Text(), references=tuple()),
            ManuscriptFactory.build(references=tuple()),
        ),
    )
    text_repository.create_chapter(chapter)

    result = client.simulate_get(create_chapter_url(chapter, "/colophons"))

    assert result.status == falcon.HTTP_OK
    assert result.json == [{
        "siglum": str(manuscript.siglum),
        "text": TextSchema().dump(manuscript.colophon),
    } for manuscript in chapter.manuscripts
                           if not manuscript.colophon.is_empty]
def test_query_by_parallel_line_exists(database, fragment_repository):
    parallel_number = MuseumNumber.of("K.1")
    fragment = FragmentFactory.build(
        text=Text(
            (
                ParallelFragment(
                    False, parallel_number, True, Labels(), LineNumber(1), True
                ),
            )
        )
    )
    parallel_fragment = FragmentFactory.build(number=parallel_number)
    database[COLLECTION].insert_many(
        [
            FragmentSchema(exclude=["joins"]).dump(fragment),
            FragmentSchema(exclude=["joins"]).dump(parallel_fragment),
        ]
    )

    assert fragment_repository.query_by_museum_number(fragment.number) == fragment
def test_parse_dividers() -> None:
    line, expected_tokens = (
        r'1. :? :#! :# ::? :.@v /@19* :"@20@c ;@v@19!',
        [
            TextLine.of_iterable(
                LineNumber(1),
                (
                    Divider.of(":", tuple(), (atf.Flag.UNCERTAIN, )),
                    Divider.of(":", tuple(),
                               (atf.Flag.DAMAGE, atf.Flag.CORRECTION)),
                    Divider.of(":", tuple(), (atf.Flag.DAMAGE, )),
                    Divider.of("::", tuple(), (atf.Flag.UNCERTAIN, )),
                    Divider.of(":.", ("@v", ), tuple()),
                    Divider.of("/", ("@19", ), (atf.Flag.COLLATION, )),
                    Divider.of(':"', ("@20", "@c"), tuple()),
                    Divider.of(";", ("@v", "@19"), (atf.Flag.CORRECTION, )),
                ),
            )
        ],
    )
    assert parse_atf_lark(line).lines == Text.of_iterable(
        expected_tokens).lines
Esempio n. 22
0
def test_combinations(
    qualification,
    extent,
    scope,
    state,
    status,
    expected_qualification,
    expected_extent,
    expected_scope,
    expected_state,
    expected_status,
):
    line = " ".join(["$", qualification, extent, scope, state, status])
    expected_line = StateDollarLine(
        expected_qualification,
        expected_extent,
        expected_scope,
        expected_state,
        expected_status,
    )
    assert parse_atf_lark(line).lines == Text.of_iterable([expected_line
                                                           ]).lines
Esempio n. 23
0
def test_of_iterable() -> None:
    assert Text.of_iterable(LINES) == Text(LINES, atf.ATF_PARSER_VERSION)
ANOTHER_LEMMATIZED_FRAGMENT = attr.evolve(
    TransliteratedFragmentFactory.build(),
    text=Text(
        (
            TextLine(
                LineNumber(1),
                (
                    Word.of(
                        [Logogram.of_name("GI", 6)], unique_lemma=(WordId("ginâ I"),)
                    ),
                    Word.of([Reading.of_name("ana")], unique_lemma=(WordId("ana II"),)),
                    Word.of([Reading.of_name("ana")], unique_lemma=(WordId("ana II"),)),
                    Word.of(
                        [
                            Reading.of_name("u", 4),
                            Joiner.hyphen(),
                            Reading.of_name("šu"),
                        ],
                        unique_lemma=(WordId("ūsu I"),),
                    ),
                    AkkadianWord.of(
                        [ValueToken.of("ana")], unique_lemma=(WordId("normalized I"),)
                    ),
                ),
            ),
        )
    ),
    signs="MI DIŠ DIŠ UD ŠU",
)
def test_set_text():
    fragment = FragmentFactory.build()
    text = Text((ParallelCompositionFactory.build(), ))
    updated_fragment = fragment.set_text(text)

    assert updated_fragment.text == text
def test_text():
    fragment = FragmentFactory.build()
    assert fragment.text == Text()
def test_parse_note_line() -> None:
    markup = "this is a note @i{italic text}@akk{kur}@sux{kur}"
    atf = f"#note: {markup}"
    expected_line = NoteLine(parse_markup(markup))
    assert parse_atf_lark(atf).lines == Text.of_iterable([expected_line]).lines
def test_parse_state_dollar_line_surface_ambiguity(line, expected_line):
    assert parse_atf_lark(line).lines == Text.of_iterable([expected_line
                                                           ]).lines
def test_parse_state_dollar_line(prefix, parenthesis, line, expected_line):
    atf_ = f"{prefix}({line})" if parenthesis else f"{prefix}{line}"
    assert parse_atf_lark(atf_).lines == Text.of_iterable([expected_line
                                                           ]).lines
Esempio n. 30
0
class Fragment:
    number: MuseumNumber
    accession: str = ""
    cdli_number: str = ""
    bm_id_number: str = ""
    publication: str = ""
    description: str = ""
    collection: str = ""
    script: str = ""
    museum: str = ""
    width: Measure = Measure()
    length: Measure = Measure()
    thickness: Measure = Measure()
    joins: Joins = Joins()
    record: Record = Record()
    folios: Folios = Folios()
    text: Text = Text()
    signs: str = ""
    notes: str = ""
    references: Sequence[Reference] = tuple()
    uncurated_references: Optional[Sequence[UncuratedReference]] = None
    genres: Sequence[Genre] = tuple()
    line_to_vec: Tuple[LineToVecEncodings, ...] = tuple()

    @property
    def is_lowest_join(self) -> bool:
        return (self.joins.lowest or self.number) == self.number

    def set_references(self, references: Sequence[Reference]) -> "Fragment":
        return attr.evolve(self, references=references)

    def set_text(self, text: Text) -> "Fragment":
        return attr.evolve(self, text=text)

    def update_lowest_join_transliteration(
            self, transliteration: TransliterationUpdate,
            user: User) -> "Fragment":
        if transliteration.text.is_empty or self.is_lowest_join:
            return self.update_transliteration(transliteration, user)
        else:
            raise NotLowestJoinError(
                "Transliteration must be empty unless fragment is the lowest in join."
            )

    def update_transliteration(self, transliteration: TransliterationUpdate,
                               user: User) -> "Fragment":
        record = self.record.add_entry(self.text.atf, transliteration.text.atf,
                                       user)
        text = self.text.merge(transliteration.text)

        return attr.evolve(
            self,
            text=text,
            notes=transliteration.notes,
            signs=transliteration.signs,
            record=record,
            line_to_vec=create_line_to_vec(text.lines),
        )

    def set_genres(self, genres_new: Sequence[Genre]) -> "Fragment":
        return attr.evolve(self, genres=tuple(genres_new))

    def update_lemmatization(self, lemmatization: Lemmatization) -> "Fragment":
        text = self.text.update_lemmatization(lemmatization)
        return attr.evolve(self, text=text)

    def get_matching_lines(self, query: TransliterationQuery) -> Lines:
        line_numbers = query.match(self.signs)
        lines = [line.atf for line in self.text.text_lines]

        return tuple(
            tuple(lines[numbers[0]:numbers[1] + 1])
            for numbers, _ in groupby(line_numbers))