def test_dump_line(): text = Text( ( TextLine.of_iterable( LineNumber(1), [ Word.of( parts=[ Reading.of_name("ha"), Joiner.hyphen(), Reading.of_name("am"), ] ) ], ), EmptyLine(), ControlLine("#", " comment"), ), "1.0.0", ) assert TextSchema().dump(text) == { "lines": OneOfLineSchema().dump(text.lines, many=True), "parser_version": text.parser_version, "numberOfLines": 1, }
def parse_line_(line: str, line_number: int): try: parsed_line = parse_line(line) if line else EmptyLine() validate_line(parsed_line) return parsed_line, None except PARSE_ERRORS as ex: return (None, create_transliteration_error_data(ex, line, line_number))
def make_manuscript_line(self, data: dict, **kwargs) -> ManuscriptLine: has_text_line = len(data["number"]) > 0 lines = data["atf"].split("\n") try: text = (parse_text_line(f"{data['number']}. {lines[0]}") if has_text_line else EmptyLine()) paratext = lines[1:] if has_text_line else lines return ManuscriptLine( data["manuscript_id"], tuple(data["labels"]), text, tuple(parse_paratext(line) for line in paratext), tuple(data["omitted_words"]), ) except PARSE_ERRORS as error: raise ValidationError(f"Invalid manuscript line: {data['atf']}.", "atf") from error
def test_statistics(database, fragment_repository): database[COLLECTION].insert_many( [ SCHEMA.dump( FragmentFactory.build( text=Text( ( TextLine( LineNumber(1), ( Word.of([Reading.of_name("first")]), Word.of([Reading.of_name("line")]), ), ), ControlLine("#", "ignore"), EmptyLine(), ) ) ) ), SCHEMA.dump( FragmentFactory.build( text=Text( ( ControlLine("#", "ignore"), TextLine( LineNumber(1), (Word.of([Reading.of_name("second")]),) ), TextLine( LineNumber(2), (Word.of([Reading.of_name("third")]),) ), ControlLine("#", "ignore"), TextLine( LineNumber(3), (Word.of([Reading.of_name("fourth")]),) ), ) ) ) ), SCHEMA.dump(FragmentFactory.build(text=Text())), ] ) assert fragment_repository.count_transliterated_fragments() == 2 assert fragment_repository.count_lines() == 4
def empty_line(self, _): return EmptyLine()
from ebl.dictionary.domain.word import WordId from ebl.transliteration.domain import atf from ebl.transliteration.domain.enclosure_tokens import BrokenAway, PerhapsBrokenAway from ebl.transliteration.domain.line import ControlLine, EmptyLine, Line from ebl.transliteration.domain.line_number import LineNumber from ebl.transliteration.domain.sign_tokens import Reading from ebl.transliteration.domain.text_line import TextLine from ebl.transliteration.domain.tokens import Joiner, LanguageShift from ebl.transliteration.domain.word_tokens import Word @pytest.mark.parametrize( # pyre-ignore[56] "old,new,expected", [ ( EmptyLine(), TextLine.of_iterable(LineNumber(1), [Word.of([Reading.of_name("bu")])]), TextLine.of_iterable(LineNumber(1), [Word.of([Reading.of_name("bu")])]), ), ( TextLine.of_iterable(LineNumber(1), [Word.of([Reading.of_name("bu")])]), ControlLine("#", " comment"), ControlLine("#", " comment"), ), ( TextLine.of_iterable(LineNumber(1), [Word.of([Reading.of_name("bu")])]), TextLine.of_iterable(LineNumber(2),
def test_is_end_of_side(paratext, is_end) -> None: line = ManuscriptLineFactory.build(line=EmptyLine(), paratext=paratext) assert line.is_end_of_side is is_end
import pytest from ebl.tests.factories.corpus import ManuscriptLineFactory from ebl.transliteration.domain import atf from ebl.transliteration.domain.dollar_line import StateDollarLine from ebl.transliteration.domain.line import EmptyLine from ebl.transliteration.domain.line_number import LineNumber from ebl.transliteration.domain.text_line import TextLine from ebl.transliteration.domain.note_line import NoteLine from ebl.transliteration.domain.markup import StringPart @pytest.mark.parametrize( "line,is_beginning", [ (EmptyLine(), False), (TextLine(LineNumber(2)), False), (TextLine(LineNumber(1)), True), ], ) def test_is_beginning_of_side(line, is_beginning) -> None: line = ManuscriptLineFactory.build(line=line) assert line.is_beginning_of_side is is_beginning @pytest.mark.parametrize( # pyre-ignore[56] "paratext,is_end", [ (tuple(), False), ((NoteLine((StringPart("note"), )), ), False), ((StateDollarLine(None, atf.Extent.SEVERAL, None, None,
parse_text_line("1. kur")), ), ( [f" {MANUSCRIPTS[0].siglum} 1. kur"], ManuscriptLine(MANUSCRIPTS[0].id, tuple(), parse_text_line("1. kur")), ), ( [ f"{MANUSCRIPTS[0].siglum} o iii", "#note: a note", "$ single ruling" ], ManuscriptLine( MANUSCRIPTS[0].id, parse_labels("o iii"), EmptyLine(), (parse_paratext("#note: a note"), parse_paratext("$ single ruling")), ), ), ( [f"{MANUSCRIPTS[0].siglum}"], ManuscriptLine(MANUSCRIPTS[0].id, tuple(), EmptyLine()), ), ], ) def test_parse_manuscript(lines, expected) -> None: atf = "\n".join(lines) assert parse_manuscript(atf) == expected
def test_empty_line() -> None: line = EmptyLine() assert line.lemmatization == tuple() assert line.key == f"EmptyLine⁞⁞{hash(line)}" assert line.atf == ""
def test_empty_line() -> None: line = EmptyLine() assert line.lemmatization == tuple() assert line.key == f"EmptyLine⁞⁞{hash(line)}" assert line.atf == "" def test_control_line() -> None: prefix = "#" content = "only" line = ControlLine(prefix, content) assert line.prefix == prefix assert line.content == content assert line.key == f"ControlLine⁞#only⁞{hash(line)}" assert line.lemmatization == (LemmatizationToken(content),) @pytest.mark.parametrize( # pyre-ignore[56] "line,lemmatization", [ (ControlLine("#", " a comment"), (LemmatizationToken(" a comment"),)), (EmptyLine(), tuple()), ], ) def test_update_lemmatization(line, lemmatization) -> None: assert line.update_lemmatization(lemmatization) == line
NOTE, (ManuscriptLine(MANUSCRIPT_ID, LABELS, MANUSCRIPT_TEXT_1, PARATEXT, OMITTED_WORDS), ), PARALLEL_LINES, ) LINE_1 = Line( LINE_NUMBER, (LINE_VARIANT_1, ), IS_SECOND_LINE_OF_PARALLELISM, IS_BEGINNING_OF_SECTION, TRANSLATION, ) LINE_VARIANT_2 = LineVariant( LINE_RECONSTRUCTION, None, (ManuscriptLine(MANUSCRIPT_ID, tuple(), EmptyLine()), )) LINE_2 = Line(LineNumber(2), (LINE_VARIANT_2, )) MANUSCRIPT_TEXT_3 = attr.evolve(MANUSCRIPT_TEXT_1, line_number=LineNumber(3)) LINE_VARIANT_3 = LineVariant( LINE_RECONSTRUCTION, None, (ManuscriptLine(MANUSCRIPT_ID, tuple(), MANUSCRIPT_TEXT_3), ), ) LINE_3 = Line(LineNumber(3), (LINE_VARIANT_3, )) RECORD = Record( (Author("Author", "Test", AuthorRole.EDITOR, ""), ), (Translator("Author", "Test", "", "en"), ), "", )
Text.of_iterable([ TextLine.of_iterable( LineNumber(1), [ Word.of([ Reading.of_name("ha"), Joiner.hyphen(), Reading.of_name("am"), ]) ], ), ControlLine("#", " comment"), ]), ), ( Text.of_iterable([EmptyLine()]), Text.of_iterable([RulingDollarLine(atf.Ruling.SINGLE)]), Text.of_iterable([RulingDollarLine(atf.Ruling.SINGLE)]), ), ( Text.of_iterable([ RulingDollarLine(atf.Ruling.DOUBLE), RulingDollarLine(atf.Ruling.SINGLE), EmptyLine(), ]), Text.of_iterable( [RulingDollarLine(atf.Ruling.DOUBLE), EmptyLine()]), Text.of_iterable( [RulingDollarLine(atf.Ruling.DOUBLE), EmptyLine()]),
).set_enclosure_type( frozenset({EnclosureType.DOCUMENT_ORIENTED_GLOSS}) ) ] ).set_enclosure_type( frozenset({EnclosureType.DOCUMENT_ORIENTED_GLOSS}) ), DocumentOrientedGloss.close().set_enclosure_type( frozenset({EnclosureType.DOCUMENT_ORIENTED_GLOSS}) ), ], many=True, ), }, ), (EmptyLine(), {"type": "EmptyLine", "prefix": "", "content": []}), ( LooseDollarLine("end of side"), { "type": "LooseDollarLine", "prefix": "$", "content": [OneOfTokenSchema().dump(ValueToken.of(" (end of side)"))], "text": "end of side", "displayValue": "(end of side)", }, ), ( ImageDollarLine("1", "a", "great"), { "type": "ImageDollarLine", "prefix": "$",
def make_line(self, data, **kwargs) -> EmptyLine: return EmptyLine()
ControlLine("#", " comment"), ), "1.0.0", ) assert TextSchema().dump(text) == { "lines": OneOfLineSchema().dump(text.lines, many=True), "parser_version": text.parser_version, "numberOfLines": 1, } @pytest.mark.parametrize( "lines", [ [EmptyLine()], [ControlLine("#", " comment")], [ TextLine.of_iterable( LineNumber(1), [ Word.of( unique_lemma=(WordId("nu I"),), parts=[Reading.of_name("nu")] ), Word.of(alignment=1, parts=[Reading.of_name("nu")]), LanguageShift.of("%sux"), LoneDeterminative.of( [Determinative.of([Reading.of_name("nu")])], language=Language.SUMERIAN, ), Erasure.open(),