def test_metrical_foot_separator(separator: MetricalFootSeparator,
                                 is_uncertain: bool, value: str) -> None:
    assert separator.value == value
    assert separator.is_uncertain == is_uncertain

    serialized = {"type": "MetricalFootSeparator", "isUncertain": is_uncertain}
    assert_token_serialization(separator, serialized)
コード例 #2
0
def test_akkadian_word(
    word: GreekWord,
    expected: str,
    language: Language,
    lemmatizable: bool,
    alignable: bool,
) -> None:
    assert word.value == expected
    assert word.clean_value == expected.translate(str.maketrans("", "", "[]()<>#?!"))
    assert word.language == language
    assert word.normalized is False
    assert word.lemmatizable is lemmatizable
    assert word.alignable is alignable

    serialized = {
        "type": "GreekWord",
        "parts": OneOfTokenSchema().dump(word.parts, many=True),
        "uniqueLemma": [],
        "alignment": None,
        "variant": None,
        "lemmatizable": lemmatizable,
        "alignable": alignable,
        "normalized": word.normalized,
        "language": language.name,
    }
    assert_token_serialization(word, serialized)
コード例 #3
0
def test_language_shift(value, expected_language, normalized):
    shift = LanguageShift.of(value)
    equal = LanguageShift.of(value)
    other = ValueToken.of(r"%bar")

    assert shift.value == value
    assert shift.clean_value == value
    assert shift.get_key() == f"LanguageShift⁝{value}"
    assert shift.lemmatizable is False
    assert shift.normalized == normalized
    assert shift.language == expected_language

    serialized = {
        "type": "LanguageShift",
        "normalized": normalized,
        "language": shift.language.name,
    }
    assert_token_serialization(shift, serialized)

    assert shift == equal
    assert hash(shift) == hash(equal)

    assert shift != other
    assert hash(shift) != hash(other)

    assert shift != ValueToken.of(value)
コード例 #4
0
def test_joiner(joiner, expected_value):
    assert joiner.value == expected_value
    assert joiner.clean_value == expected_value
    assert joiner.get_key() == f"Joiner⁝{expected_value}"
    assert joiner.lemmatizable is False

    serialized = {"type": "Joiner"}
    assert_token_serialization(joiner, serialized)
コード例 #5
0
def test_column_with_number():
    column = Column.of(1)

    expected_value = "&1"
    assert column.value == expected_value
    assert column.clean_value == expected_value
    assert column.get_key() == f"Column⁝{expected_value}"
    assert column.lemmatizable is False

    serialized = {"type": "Column", "number": 1}
    assert_token_serialization(column, serialized)
コード例 #6
0
def test_tabulation():
    value = "($___$)"
    tabulation = Tabulation.of()

    assert tabulation.value == value
    assert tabulation.clean_value == value
    assert tabulation.get_key() == f"Tabulation⁝{value}"
    assert tabulation.lemmatizable is False

    serialized = {"type": "Tabulation"}
    assert_token_serialization(tabulation, serialized)
コード例 #7
0
def test_line_break():
    value = "|"
    line_break = LineBreak.of()

    assert line_break.value == value
    assert line_break.clean_value == value
    assert line_break.get_key() == f"LineBreak⁝{value}"
    assert line_break.lemmatizable is False

    serialized = {"type": "LineBreak"}
    assert_token_serialization(line_break, serialized)
コード例 #8
0
def test_greek_letter() -> None:
    alphabet = "α"
    flag = atf.Flag.UNCERTAIN
    greek_letter = GreekLetter.of(alphabet, [flag])

    assert greek_letter.value == f"{alphabet}{flag.value}"
    assert greek_letter.clean_value == alphabet
    assert greek_letter.get_key() == f"GreekLetter⁝{greek_letter.value}"
    assert greek_letter.lemmatizable is False

    serialized = {"type": "GreekLetter", "letter": alphabet, "flags": [flag.value]}
    assert_token_serialization(greek_letter, serialized)
コード例 #9
0
def test_compound_grapheme() -> None:
    compound = CompoundGrapheme.of(["BI", "IS"])

    expected_value = "|BI.IS|"
    assert compound.name == SignName(expected_value)
    assert compound.value == expected_value
    assert compound.clean_value == expected_value
    assert (compound.get_key() ==
            f"CompoundGrapheme⁝{expected_value}⟨ValueToken⁝BI⁚ValueToken⁝IS⟩")

    serialized = {"type": "CompoundGrapheme", "compound_parts": ["BI", "IS"]}
    assert_token_serialization(compound, serialized)
コード例 #10
0
def test_unclear_sign() -> None:
    sign = UnclearSign.of()

    expected_value = "x"
    assert sign.value == expected_value
    assert sign.clean_value == expected_value
    assert sign.get_key() == f"UnclearSign⁝{expected_value}"
    assert sign.flags == tuple()
    assert sign.lemmatizable is False

    serialized = {"type": "UnclearSign", "flags": []}
    assert_token_serialization(sign, serialized)
コード例 #11
0
def test_commentary_protocol(protocol_enum):
    value = protocol_enum.value
    protocol = CommentaryProtocol.of(value)

    assert protocol.value == value
    assert protocol.clean_value == value
    assert protocol.get_key() == f"CommentaryProtocol⁝{value}"
    assert protocol.lemmatizable is False
    assert protocol.protocol == protocol_enum

    serialized = {"type": "CommentaryProtocol"}
    assert_token_serialization(protocol, serialized)
コード例 #12
0
def test_in_word_new_line():
    newline = InWordNewline(frozenset({EnclosureType.BROKEN_AWAY}),
                            ErasureState.NONE)

    expected_value = ";"
    assert newline.value == expected_value
    assert newline.clean_value == expected_value
    assert newline.get_key() == f"InWordNewline⁝{expected_value}"
    assert newline.lemmatizable is False

    serialized = {"type": "InWordNewline"}
    assert_token_serialization(newline, serialized)
コード例 #13
0
def test_unclear_sign_with_flags() -> None:
    flags = [atf.Flag.CORRECTION]
    sign = UnclearSign.of(flags)

    expected_value = "x!"
    assert sign.value == expected_value
    assert sign.clean_value == "x"
    assert sign.get_key() == f"UnclearSign⁝{expected_value}"
    assert sign.flags == tuple(flags)
    assert sign.lemmatizable is False

    serialized = {"type": "UnclearSign", "flags": ["!"]}
    assert_token_serialization(sign, serialized)
コード例 #14
0
def test_unknown_number_of_signs():
    unknown_number_of_signs = UnknownNumberOfSigns(
        frozenset({EnclosureType.BROKEN_AWAY}), ErasureState.NONE
    )

    expected_value = "..."
    assert unknown_number_of_signs.value == expected_value
    assert unknown_number_of_signs.clean_value == expected_value
    assert unknown_number_of_signs.get_key() == f"UnknownNumberOfSigns⁝{expected_value}"
    assert unknown_number_of_signs.lemmatizable is False

    serialized = {"type": "UnknownNumberOfSigns"}
    assert_token_serialization(unknown_number_of_signs, serialized)
コード例 #15
0
def test_unidentified_sign_with_flags() -> None:
    flags = [atf.Flag.DAMAGE]
    sign = UnidentifiedSign.of(flags)

    expected_value = "X#"
    assert sign.value == expected_value
    assert sign.clean_value == "X"
    assert sign.get_key() == f"UnidentifiedSign⁝{expected_value}"
    assert sign.flags == tuple(flags)
    assert sign.lemmatizable is False

    serialized = {"type": "UnidentifiedSign", "flags": ["#"]}
    assert_token_serialization(sign, serialized)
コード例 #16
0
def test_enclosure(enclosure_class, type_, sides, side):
    value = sides[side]
    enclosure = enclosure_class.of(side)

    assert enclosure.value == value
    assert enclosure.clean_value == ""
    assert enclosure.get_key() == f"{type_}⁝{value}"
    assert enclosure.side == side
    assert enclosure.is_open == (side == Side.LEFT)
    assert enclosure.is_close == (side == Side.RIGHT)
    assert enclosure.lemmatizable is False

    serialized = {"type": type_, "side": side.name}
    assert_token_serialization(enclosure, serialized)
コード例 #17
0
def test_egyptian_metrical_feet_separator():
    egyptian_metrical_feet_separator = EgyptianMetricalFeetSeparator.of(
        (atf.Flag.UNCERTAIN,)
    )
    expected_value = "•?"
    assert egyptian_metrical_feet_separator.value == expected_value
    assert egyptian_metrical_feet_separator.clean_value == "•"
    assert (
        egyptian_metrical_feet_separator.get_key()
        == f"EgyptianMetricalFeetSeparator⁝{expected_value}"
    )
    assert egyptian_metrical_feet_separator.lemmatizable is False

    serialized = {"type": "EgyptianMetricalFeetSeparator", "flags": ["?"]}
    assert_token_serialization(egyptian_metrical_feet_separator, serialized)
コード例 #18
0
def test_determinative():
    parts = [Reading.of_name("kur"), Joiner.hyphen(), Reading.of_name("kur")]
    determinative = Determinative.of(parts)

    expected_value = f"{{{''.join(part.value for part in parts)}}}"
    expected_clean_value = f"{{{''.join(part.clean_value for part in parts)}}}"
    expected_parts = f"⟨{'⁚'.join(part.get_key() for part in parts)}⟩"
    assert determinative.value == expected_value
    assert determinative.clean_value == expected_clean_value
    assert determinative.get_key() == f"Determinative⁝{expected_value}{expected_parts}"
    assert determinative.parts == tuple(parts)
    assert determinative.lemmatizable is False

    serialized = {
        "type": "Determinative",
        "parts": OneOfTokenSchema().dump(parts, many=True),
    }
    assert_token_serialization(determinative, serialized)
コード例 #19
0
def test_linguistic_gloss():
    parts = [Reading.of_name("kur"), Joiner.hyphen(), Reading.of_name("kur")]
    gloss = LinguisticGloss.of(parts)

    expected_value = f"{{{{{''.join(part.value for part in parts)}}}}}"
    expected_clean_value = f"{{{{{''.join(part.clean_value for part in parts)}}}}}"
    expected_parts = f"⟨{'⁚'.join(part.get_key() for part in parts)}⟩"
    assert gloss.value == expected_value
    assert gloss.clean_value == expected_clean_value
    assert gloss.get_key() == f"LinguisticGloss⁝{expected_value}{expected_parts}"
    assert gloss.parts == tuple(parts)
    assert gloss.lemmatizable is False

    serialized = {
        "type": "LinguisticGloss",
        "parts": OneOfTokenSchema().dump(parts, many=True),
    }
    assert_token_serialization(gloss, serialized)
コード例 #20
0
def test_grapheme(name, modifiers, flags, expected_value,
                  expected_clean_value) -> None:
    grapheme = Grapheme.of(name, modifiers, flags)

    assert grapheme.name == name
    assert grapheme.value == expected_value
    assert grapheme.clean_value == expected_clean_value
    assert grapheme.get_key() == f"Grapheme⁝{expected_value}"
    assert grapheme.modifiers == tuple(modifiers)
    assert grapheme.flags == tuple(flags)
    assert grapheme.lemmatizable is False

    serialized = {
        "type": "Grapheme",
        "name": name,
        "modifiers": modifiers,
        "flags": [flag.value for flag in flags],
    }
    assert_token_serialization(grapheme, serialized)
コード例 #21
0
def test_value_token():
    value = "value"
    token = ValueToken.of(value)
    equal = ValueToken.of(value)
    other = ValueToken.of("anothervalue")

    assert token.value == value
    assert token.clean_value == value
    assert token.get_key() == f"ValueToken⁝{value}"
    assert token.lemmatizable is False

    serialized = {"type": "ValueToken"}
    assert_token_serialization(token, serialized)

    assert token == equal
    assert hash(token) == hash(equal)

    assert token != other
    assert hash(token) != hash(other)
コード例 #22
0
def test_divider() -> None:
    value = ":"
    modifiers = ("@v", )
    flags = (atf.Flag.UNCERTAIN, )
    divider = Divider.of(value, modifiers, flags)

    expected_value = ":@v?"
    assert divider.value == expected_value
    assert divider.clean_value == ":@v"
    assert divider.get_key() == f"Divider⁝{expected_value}"
    assert divider.lemmatizable is False

    serialized = {
        "type": "Divider",
        "divider": value,
        "modifiers": list(modifiers),
        "flags": ["?"],
    }
    assert_token_serialization(divider, serialized)
コード例 #23
0
def test_variant():
    reading = Reading.of([ValueToken.of("sa"), BrokenAway.open(), ValueToken.of("l")])
    divider = Divider.of(":")
    variant = Variant.of(reading, divider)

    expected_value = "sa[l/:"
    assert variant.value == expected_value
    assert variant.clean_value == "sal/:"
    assert variant.tokens == (reading, divider)
    assert variant.parts == variant.tokens
    assert (
        variant.get_key()
        == f"Variant⁝{expected_value}⟨{'⁚'.join(token.get_key() for token in variant.tokens)}⟩"
    )
    assert variant.lemmatizable is False

    serialized = {
        "type": "Variant",
        "tokens": OneOfTokenSchema().dump([reading, divider], many=True),
    }
    assert_token_serialization(variant, serialized)
def test_akkadian_word(word: AkkadianWord, expected: str,
                       lemmatizable: bool) -> None:
    assert word.value == expected
    assert word.clean_value == expected.translate(
        str.maketrans("", "", "[]()<>#?!"))
    assert word.lemmatizable is lemmatizable
    assert word.alignable is lemmatizable

    serialized = {
        "type": "AkkadianWord",
        "parts": OneOfTokenSchema().dump(word.parts, many=True),
        "modifiers": [modifier.value for modifier in word.modifiers],
        "uniqueLemma": [],
        "alignment": None,
        "variant": None,
        "lemmatizable": lemmatizable,
        "alignable": lemmatizable,
        "normalized": True,
        "language": "AKKADIAN",
    }
    assert_token_serialization(word, serialized)
コード例 #25
0
def test_logogram(
    name_parts,
    sub_index,
    modifiers,
    flags,
    sign,
    surrogate,
    expected_value,
    expected_clean_value,
    expected_name,
) -> None:
    logogram = Logogram.of(name_parts, sub_index, modifiers, flags, sign,
                           surrogate)

    expected_parts = (*name_parts, sign) if sign else name_parts
    assert logogram.value == expected_value
    assert logogram.clean_value == expected_clean_value
    assert (
        logogram.get_key() ==
        f"Logogram⁝{expected_value}⟨{'⁚'.join(token.get_key() for token in expected_parts)}⟩"
    )
    assert logogram.name_parts == name_parts
    assert logogram.name == expected_name
    assert logogram.modifiers == tuple(modifiers)
    assert logogram.flags == tuple(flags)
    assert logogram.lemmatizable is False
    assert logogram.sign == sign
    assert logogram.surrogate == tuple(surrogate)

    serialized = {
        "type": "Logogram",
        "name": expected_name,
        "nameParts": OneOfTokenSchema().dump(name_parts, many=True),
        "subIndex": sub_index,
        "modifiers": modifiers,
        "flags": [flag.value for flag in flags],
        "surrogate": OneOfTokenSchema().dump(surrogate, many=True),
        "sign": sign and OneOfTokenSchema().dump(sign),
    }
    assert_token_serialization(logogram, serialized)
コード例 #26
0
def test_number(
    name_parts,
    modifiers,
    flags,
    sign,
    expected_value,
    expected_clean_value,
    expected_name,
) -> None:
    number = Number.of(name_parts, modifiers, flags, sign)

    expected_sub_index = 1
    expected_parts = (*name_parts, sign) if sign else name_parts
    assert number.value == expected_value
    assert number.clean_value == expected_clean_value
    assert (
        number.get_key() ==
        f"Number⁝{expected_value}⟨{'⁚'.join(token.get_key() for token in expected_parts)}⟩"
    )
    assert number.name_parts == name_parts
    assert number.name == expected_name
    assert number.sub_index == expected_sub_index
    assert number.modifiers == tuple(modifiers)
    assert number.flags == tuple(flags)
    assert number.lemmatizable is False
    assert number.sign == sign

    serialized = {
        "type": "Number",
        "name": expected_name,
        "nameParts": OneOfTokenSchema().dump(name_parts, many=True),
        "modifiers": modifiers,
        "subIndex": expected_sub_index,
        "flags": [flag.value for flag in flags],
        "sign": sign and OneOfTokenSchema().dump(sign),
    }
    assert_token_serialization(number, serialized)
コード例 #27
0
def test_reading(
    name_parts,
    sub_index,
    modifiers,
    flags,
    sign,
    expected_value,
    expected_clean_value,
    expected_name,
) -> None:
    reading = Reading.of(name_parts, sub_index, modifiers, flags, sign)

    expected_parts = (*name_parts, sign) if sign else name_parts
    assert reading.value == expected_value
    assert reading.clean_value == expected_clean_value
    assert (
        reading.get_key() ==
        f"Reading⁝{expected_value}⟨{'⁚'.join(token.get_key() for token in expected_parts)}⟩"
    )
    assert reading.name_parts == name_parts
    assert reading.name == expected_name
    assert reading.modifiers == tuple(modifiers)
    assert reading.flags == tuple(flags)
    assert reading.lemmatizable is False
    assert reading.sign == sign

    serialized = {
        "type": "Reading",
        "name": expected_name,
        "nameParts": OneOfTokenSchema().dump(name_parts, many=True),
        "subIndex": sub_index,
        "modifiers": modifiers,
        "flags": [flag.value for flag in flags],
        "sign": sign and OneOfTokenSchema().dump(sign),
    }
    assert_token_serialization(reading, serialized)
コード例 #28
0
def test_lone_determinative(language):
    value = "{mu}"
    parts = [Determinative.of([Reading.of_name("mu")])]
    lone_determinative = LoneDeterminative.of(parts, language)

    equal = LoneDeterminative.of(parts, language)
    other_language = LoneDeterminative.of(parts, Language.UNKNOWN)
    other_parts = LoneDeterminative.of(
        [Determinative.of([Reading.of_name("bu")])], language)

    assert lone_determinative.value == value
    assert lone_determinative.lemmatizable is False
    assert lone_determinative.language == language
    assert lone_determinative.normalized is False
    assert lone_determinative.unique_lemma == tuple()

    serialized = {
        "type": "LoneDeterminative",
        "uniqueLemma": [],
        "normalized": False,
        "language": lone_determinative.language.name,
        "lemmatizable": lone_determinative.lemmatizable,
        "alignable": lone_determinative.lemmatizable,
        "erasure": ErasureState.NONE.name,
        "parts": OneOfTokenSchema().dump(parts, many=True),
    }
    assert_token_serialization(lone_determinative, serialized)

    assert lone_determinative == equal
    assert hash(lone_determinative) == hash(equal)

    for not_equal in [other_language, other_parts]:
        assert lone_determinative != not_equal
        assert hash(lone_determinative) != hash(not_equal)

    assert lone_determinative != ValueToken.of(value)
def test_caesura(caesura: Caesura, is_uncertain: bool, value: str) -> None:
    assert caesura.value == value
    assert caesura.is_uncertain == is_uncertain

    serialized = {"type": "Caesura", "isUncertain": is_uncertain}
    assert_token_serialization(caesura, serialized)