def test_modern_sheva_1(): """`sheva` is voiced when under the first of two letters, both representing the same consonant or consonants with identical place and manner of articulation. """ RULE_NAME = "sheva-modern-double-sound" word = r"שָׁכְחוּ" # sha-che-chu (same sound) parts = ["shin", "qamats-gadol", "khaf", "sheva-na", "het", "shuruq"] parsed = Parser().parse(word) assert parts == parsed.flat() assert RULE_NAME in parsed.rules.flat() word = r"מָכְרוּ" # mach-ru (negative example) parts = ["mem", "qamats-gadol", "khaf", "sheva-na", "resh", "shuruq"] parsed = Parser().parse(word) assert parts == Parser().parse(word).flat() assert RULE_NAME not in parsed.rules.flat() word = r"שָׁדַדְתְּ" # sha-da-det (same place of articulation) parts = [ "shin", "qamats-gadol", "dalet", "patah", "dalet", "sheva-nah", "tav", "dagesh-qal", "sheva-nah", ] parsed = Parser().parse(word) assert parts == parsed.flat() assert RULE_NAME in parsed.rules.flat() word = r"שָׁלַלְתְּ" # sha-lalt (negative example) parts = [ "shin", "qamats-gadol", "lamed", "patah", "lamed", "sheva-nah", "tav", "dagesh-qal", "sheva-nah", ] parsed = Parser().parse(word) assert parts == parsed.flat() assert RULE_NAME not in parsed.rules.flat()
def test_modern_sheva_3(): """`sheva` is voiced when under the first letter of a word, if the second letter is a glottal consonant (`alef`, `he`, `ayin`). """ RULE_NAME = "sheva-modern-voiced-before-glottal" word = r"תְּאָרִים" # te-a-rim parts = [ "tav", "dagesh-qal", "sheva-na", "alef", "qamats-gadol", "resh", "hiriq-male-yod", "eim-qria-yod", "mem-sofit", ] parsed = Parser().parse(word) assert parts == parsed.flat() assert RULE_NAME in parsed.rules.flat() word = r"תְּמָרִים" # te-ma-rim (traditional); tma-rim (modern); (negative example) parts = [ "tav", "dagesh-qal", "sheva-na", "mem", "qamats-gadol", "resh", "hiriq-male-yod", "eim-qria-yod", "mem-sofit", ] parsed = Parser().parse(word) assert parts == parsed.flat() assert RULE_NAME not in parsed.rules.flat()
def test_modern_sheva_4(): """`sheva` is voiced when under the first letter of a word, if this letter represents one of the prefix-morphemes (`be-`, `ve-`, `ke-`, `le-`, `te-`). """ # TODO: requires root word to determine if the first letter is a prefix or not. RULE_NAME = "sheva-modern-voiced-prefix" word = r"בְּרֵיחָהּ" # be-rei-cha parts = [ "bet", "dagesh-qal", "sheva-na", "resh", "tsere", "eim-qria-yod", "het", "qamats-gadol", "mapiq-he", "mapiq", ] parsed = Parser().parse(word) assert parts == parsed.flat() assert RULE_NAME in parsed.rules.flat() word = r"בְּרֵיכָה" # brei-cha parts = [ "bet", "dagesh-qal", "sheva-na", "resh", "tsere", "eim-qria-yod", "khaf", "qamats-gadol", "eim-qria-he", ] parsed = Parser().parse(word) assert parts == parsed.flat() # assert RULE_NAME not in parsed.rules.flat() # TODO: need base word # NOTE: requires emphasis information word = r"בְּחִישָׁה" # be-chi-sha word = r"בְּחִישָׁה" # bchi-sha # NOTE: requires emphasis information word = r"וְרוֹדִים" # ve-ro-dim word = r"וְרוּדִים" # vru-dim # NOTE: requires emphasis information word = r"כְּרָזָה" # ke-ra-za word = r"כְּרָזָה" # kra-za word = r"לְפָּרִיז" # le-pa-rav parts = [ "lamed", "sheva-na", "pe", "dagesh-qal", "qamats-gadol", "resh", "hiriq-male-yod", "eim-qria-yod", "zayin", ] parsed = Parser(disabled=["sheva-modern-voiced-sonorant"]).parse(word) assert parts == parsed.flat() assert RULE_NAME in parsed.rules.flat() word = r"תְּבַלּוּ" # te-va-lu parts = [ "tav", "dagesh-qal", "sheva-na", "vet", "patah", "lamed", "dagesh-hazaq", "shuruq", ] parsed = Parser().parse(word) assert parts == parsed.flat() assert RULE_NAME in parsed.rules.flat() word = r"תְּבַלּוּל" parts = [ "tav", "dagesh-qal", "sheva-na", "vet", "patah", "lamed", "dagesh-hazaq", "shuruq", "lamed", ] parsed = Parser().parse(word) assert parts == parsed.flat()
def test_modern_sheva_2(): """`sheva` is voiced when under the first letter of a word, if this letter is a sonorant in modern pronunciation (`yod`, `lamed`, `mem`, `nun`, `resh`). """ RULE_NAME = "sheva-modern-voiced-sonorant" word = r"נְמָלִים" # ne-ma-lim parts = [ "nun", "sheva-na", "mem", "qamats-gadol", "lamed", "hiriq-male-yod", "eim-qria-yod", "mem-sofit", ] parsed = Parser().parse(word) assert parts == parsed.flat() assert RULE_NAME in parsed.rules.flat() word = r"גְּמָלִים" # ge-ma-lim (traditional); gma-lim (modern); (negative example) parts = [ "gimel", "dagesh-qal", "sheva-na", "mem", "qamats-gadol", "lamed", "hiriq-male-yod", "eim-qria-yod", "mem-sofit", ] parsed = Parser().parse(word) assert parts == parsed.flat() assert RULE_NAME not in parsed.rules.flat() word = r"מְנִיָּה" # me-ni-ya parts = [ "mem", "sheva-na", "nun", "hiriq", "yod", "dagesh-hazaq", "qamats-gadol", "eim-qria-he", ] parsed = Parser().parse(word) assert parts == parsed.flat() assert RULE_NAME in parsed.rules.flat() word = r"בְּנִיָּה" # be-ni-ya (traditional); bni-ya (modern); (negative example) parts = [ "bet", "dagesh-qal", "sheva-na", "nun", "hiriq", "yod", "dagesh-hazaq", "qamats-gadol", "eim-qria-he", ] parsed = Parser().parse(word) assert parts == parsed.flat() assert RULE_NAME not in parsed.rules.flat()