def test_sheva_nah_before_bgdkft_with_dagesh(): """`sheva` before BGDKFT with `dagesh` is `sheva-nah`""" word = r"הָפְכִּי" # hof-khi parts = [ "he", "qamats-qatan", "fe", "sheva-nah", "kaf", "dagesh-qal", "hiriq-male-yod", "eim-qria-yod", ] assert parts == Parser().parse(word).flat() word = r"קָרְבָּן" # kor-ban parts = [ "qof", "qamats-qatan", "resh", "sheva-nah", "bet", "dagesh-qal", "qamats", # TODO: should be qamats-gadol "nun-sofit", ] assert parts == Parser().parse(word).flat()
def test_bgdkft_not_after_vowel(): """`dagesh` in BGDKFT NOT after vowel is `dagesh-qal` (dagesh-qal-bgdkf)""" word = r"בָּרָא" # ba-ra parts = [ "bet", "dagesh-qal", "qamats-gadol", "resh", "qamats-gadol", "eim-qria-alef", ] assert parts == Parser().parse(word).flat() word = "דָּבָר" # da-var parts = ["dalet", "dagesh-qal", "qamats-gadol", "vet", "qamats", "resh"] assert parts == Parser().parse(word).flat() word = "פֶּה" # poh parts = ["pe", "dagesh-qal", "segol", "eim-qria-he"] assert parts == Parser().parse(word).flat() word = "מִדְבָּר" # mi-de-bar parts = [ "mem", "hiriq", "dalet", "sheva-nah", "bet", "dagesh-qal", "qamats", "resh", ] assert parts == Parser().parse(word).flat()
def test_qamats_qatan_unstresssed_closed(): """`qamats` in unstressed closed syllable is `qamats-qatan`""" word = r"וַיָּ֥קָם" parts = [ "vav", "patah", "yod", "dagesh-hazaq", "qamats-gadol", "qof", "qamats-qatan", "mem-sofit", ] assert parts == Parser().parse(word).flat() word = r"רָחְבָּהּ" parts = [ "resh", "qamats-qatan", "het", "sheva-nah", "bet", "dagesh-qal", "qamats-gadol", "mapiq-he", "mapiq", ] assert parts == Parser().parse(word).flat()
def test_sheva_na_ending_sah(): """`sheva` before `sav+qamats`, `he` is `sheva-na` (sheva-na-ending-sah)""" word = r"פָשְׂתָה" # fa-se-sah (Leviticus 13:28) parts = [ "fe", "qamats-gadol", "sin", "sheva-na", "sav", "qamats-gadol", "eim-qria-he", ] assert parts == Parser().parse(word).flat() # non-doubled sound word = r"שָׁבְתָה" # sha-ve-sa (Leviticus 26:35) parts = [ "shin", "qamats-gadol", "vet", "sheva-na", "sav", "qamats-gadol", "eim-qria-he", ] assert parts == Parser().parse(word).flat()
def test_sheva_nah_ending_iym_iys(): """`sheva` before `hiriq`, `yod`, `mem-sofit|sav` is `sheva-nah` (sheva-nah-ending-iy-m|s)""" # mem-sofit word = "בָּטְנִים" # bot-nim (Genesis 43:11) parts = [ "bet", "dagesh-qal", "qamats-qatan", "tet", "sheva-nah", "nun", "hiriq-male-yod", "eim-qria-yod", "mem-sofit", ] assert parts == Parser().parse(word).flat() # sav word = r"גָּפְרִית" # gof-ris (Deuteronomy 29:22) parts = [ "gimel", "dagesh-qal", "qamats-qatan", "fe", "sheva-nah", "resh", "hiriq-male-yod", "eim-qria-yod", "sav", ] assert parts == Parser().parse(word).flat()
def test_qamats_qatan_next_accent(): """`qamats` in closed syllable with non-first accent is `qamats-qatan`""" word = r"וַיָּ֩שָׁב֩" # va-ya-shov (Genesis 33:16) parts = [ "vav", "patah", "yod", "dagesh-hazaq", "qamats-gadol", "shin", "qamats-qatan", "vet", ] assert parts == Parser().parse(word).flat() word = r"וַיָּ֨רָץ֙" # va-ya-rotz (Numbers 17:12) parts = [ "vav", "patah", "yod", "dagesh-hazaq", "qamats-gadol", "resh", "qamats-qatan", "tsadi-sofit", ] assert parts == Parser().parse(word).flat()
def test_vav_dagesh_hazaq(): """`vav` with `dagesh` after/has vowel is `vav`, `dagesh-hazaq` (dagesh-hazaq-default)""" word = r"חַוָּה" # cha-vah parts = [ "het", "patah", "vav", "dagesh-hazaq", "qamats-gadol", "eim-qria-he" ] assert parts == Parser().parse(word).flat() word = r"וְיִשְׁתַּחֲוּוּ" # ve-yish-ta-cha-vu parts = [ "vav", "sheva-na", "yod", "hiriq", "shin", "sheva-nah", "tav", "dagesh-qal", "patah", "het", "hataf-patah", "vav", "dagesh-hazaq", "shuruq", ] assert parts == Parser().parse(word).flat()
def test_break_before_vowel(): """syllable break before a vowel (syllable-before-vowel)""" word = r"בָּרָא" # ba-ra parts = [ ["bet", "dagesh-qal", "qamats-gadol"], ["resh", "qamats-gadol", "eim-qria-alef"], ] p = Parser() assert parts == p.syllabify(p.parse(word))
def test_patah_genuvah(): """`patah` on last `het|ayin|mapiq-he` is `patah-genuvah` (patah-genuvah)""" word = r"נֹחַ" # no-ah parts = ["nun", "holam-haser", "het", "patah-genuvah"] assert parts == Parser().parse(word).flat() word = r"הָרֵעַ" # ha-rei-a parts = ["he", "qamats-gadol", "resh", "tsere", "ayin", "patah-genuvah"] assert parts == Parser().parse(word).flat()
def test_vav_is_holam_male(): """`vav`, `holam` NOT after vowel or sheva is `holam-male` (eim-qria-vav-is-holam-male)""" word = r"אוֹר" # or parts = ["alef", "holam-male-vav", "resh"] assert parts == Parser().parse(word).flat() word = r"בּוֹא" # bo parts = ["bet", "dagesh-qal", "holam-male-vav", "alef"] assert parts == Parser().parse(word).flat()
def test_vav_holam_after_vowel(): """`vav` with `holam_haser` after vowel or sheva `vav`, `holam-haser` (!eim-qria-vav-is-holam-male)""" word = r"עֲוֺן" # a-von parts = ["ayin", "hataf-patah", "vav", "holam-haser", "nun-sofit"] assert parts == Parser().parse(word).flat() word = r"מִצְוֺת" # mits-voth parts = ["mem", "hiriq", "tsadi", "sheva-nah", "vav", "holam-haser", "sav"] assert parts == Parser().parse(word).flat()
def test_glide_uy(): """bare `yod` after `shuruq` is `yod-glide` (glide-uy)""" word = r"צִפּוּי" # tsi-puy (Numbers 17:3) parts = ["tsadi", "hiriq", "pe", "dagesh-hazaq", "shuruq", "yod-glide"] assert parts == Parser().parse(word).flat() # words that start with shuruq don't count word = r"וּמִי" # u-mi (Deuteronomy 4:8) parts = ["shuruq", "mem", "hiriq-male-yod", "eim-qria-yod"] assert parts == Parser().parse(word).flat()
def test_no_break_sheva_nah(): """no syllable break after `sheva-nah`""" word = r"יִשְׁרְצוּ" parts = [ ["yod", "hiriq", "shin", "sheva-nah"], ["resh", "sheva-na"], ["tsadi", "shuruq"], ] p = Parser() assert parts == p.syllabify(p.parse(word))
def test_sheva_na_double_letter(): """`sheva` before same letter is `sheva-na` (sheva-na-double-letter)""" word = r"הַלְלוּ" # ha-le-lu parts = ["he", "patah", "lamed", "sheva-na", "lamed", "shuruq"] assert parts == Parser().parse(word).flat() word = r"הִנְנִי" # hi-ne-ni (has vowel under second letter) parts = [ "he", "hiriq", "nun", "sheva-na", "nun", "hiriq-male-yod", "eim-qria-yod" ] assert parts == Parser().parse(word).flat()
def test_sheva_nah_qamats_letter_end(): """`sheva` before `qamats`, (`mapiq-he|mem-sofit|nun-sofit`) is `sheva-nah` (sheva-nah-ending-qamats-letter)""" # qamats-mapiq-he word = r"לְעָבְדָהּ" # le-ov-dah (Genesis 2:15) parts = [ "lamed", "sheva-na", "ayin", "qamats-qatan", "vet", "sheva-nah", "dalet", "qamats-gadol", "mapiq-he", "mapiq", ] assert parts == Parser().parse(word).flat() # qamats-mem-sofit word = r"אָכְלָם" # okh-lam (Genesis 14:11) parts = [ "alef", "qamats-qatan", "khaf", "sheva-nah", "lamed", "qamats-gadol", "mem-sofit", ] assert parts == Parser().parse(word).flat() # qamats-nun-sofit word = r"יָקְטָן" # yok-tan (Genesis 10:25) parts = [ "yod", "qamats-qatan", "qof", "sheva-nah", "tet", "qamats-gadol", "nun-sofit", ] assert parts == Parser().parse(word).flat() # segol-mapiq-he # no example # segol-mem-sofit word = r"חָקְכֶם" parts = ["het", "qamats-qatan", "qof", "sheva-nah", "khaf", "segol", "mem-sofit"] assert parts == Parser().parse(word).flat()
def test_mitzvot_matzot(): """Mitzvot can be spelled several ways (not to be confused with matzot).""" zwnj = r"מִצְוֹת" # mitz-voth haser_for_vav = r"מִצְוֺת" # mitz-voth precomposed = r"מִצְוֹת" # mitz-voth matzot = r"מַצּוֹת" # ma-tzot parts = ["mem", "hiriq", "tsadi", "sheva-nah", "vav", "holam-haser", "sav"] assert parts == Parser().parse(zwnj).flat() assert parts == Parser().parse(haser_for_vav).flat() assert parts == Parser().parse(precomposed).flat() parts = ["mem", "patah", "tsadi", "dagesh-hazaq", "holam-male-vav", "sav"] assert parts == Parser().parse(matzot).flat()
def test_sheva_na_khaf_sofit_qamats(): """`sheva` on `lamed|shin|sav` before `khaf-sofit+qamats-gadol` is `sheva-na` (sheva-na-ending-l|sh|s-kha)""" # lamed word = r"יִשְׁאָלְךָ" # yish-ale-kha (Deuteronomy 6:20) parts = [ "yod", "hiriq", "shin", "sheva-nah", "alef", "qamats-gadol", "lamed", "sheva-na", "khaf-sofit", "qamats-gadol", ] assert parts == Parser().parse(word).flat() # shin word = r"יִירָשְׁךָ" # yi-ra-she-kha (Genesis 15:4) parts = [ "yod", "hiriq-male-yod", "eim-qria-yod", "resh", "qamats-gadol", "shin", "sheva-na", "khaf-sofit", "qamats-gadol", ] assert parts == Parser().parse(word).flat() # sav word = r"בְּכֹרָתְךָ" # be-kho-ra-te-kha (Genesis 25:31) parts = [ "bet", "dagesh-qal", "sheva-na", "khaf", "holam-haser", "resh", "qamats-gadol", "sav", "sheva-na", "khaf-sofit", "qamats-gadol", ] assert parts == Parser().parse(word).flat()
def test_sheva_double_end(): """two `sheva` at word end are `sheva-nah`, `sheva-nah` (sheva-double-end)""" word = r"אַנְתְּ" # ahnt parts = [ "alef", "patah", "nun", "sheva-nah", "tav", "dagesh-qal", "sheva-nah" ] assert parts == Parser().parse(word).flat()
def test_patah_genuvah(): """`patah` on last `het|ayin|mapiq-he` is `patah-genuvah`""" # het word = r"נֹחַ" # no-akh (Genesis 5:29) parts = ["nun", "holam-haser", "het", "patah-genuvah"] assert parts == Parser().parse(word).flat() # ayin word = r"רֹעַ" # ro-a (Deuteronomy 28:20) parts = ["resh", "holam-haser", "ayin", "patah-genuvah"] assert parts == Parser().parse(word).flat() # mapiq-he word = r"נֹהַּ" # no-ah (Ezekiel 7:11) parts = ["nun", "holam-haser", "mapiq-he", "mapiq", "patah-genuvah"] assert parts == Parser().parse(word).flat()
def test_enabled_rules(): """only enabled rules run""" word = r"נֹחַ" # no-ah parts = ["nun", "holam", "het", "patah-genuvah"] enabled, disabled = ["vowel-patah-genuvah"], ["vowel-holam-haser-default"] assert parts == Parser(enabled=enabled, disabled=disabled).parse(word).flat()
def test_sheva_merahef(): """`sheva` before BGDKFT without `dagesh` is `sheva-merahef`""" word = r"מַלְכֵי" parts = [ "mem", "patah", "lamed", "sheva-merahef", "khaf", "tsere", "eim-qria-yod" ] assert parts == Parser().parse(word).flat()
def test_double_sheva_middle(): """two `sheva` midword are `sheva-nah`, `sheva-na` (sheva-double-middle)""" word = r"יִמְשְׁלוּ" # yim-she-lu parts = [ "yod", "hiriq", "mem", "sheva-nah", "shin", "sheva-na", "lamed", "shuruq" ] assert parts == Parser().parse(word).flat()
def test_sheva_nah_after_accent(): """`sheva` after accent is `sheva-nah` (sheva-nah-after-accent)""" word = r"קֵ֑דְמָה" # keid-ma (Leviticus 16:14) parts = [ "qof", "tsere", "dalet", "sheva-nah", "mem", "qamats-gadol", "eim-qria-he" ] assert parts == Parser().parse(word).flat()
def test_sheva_nah_after_initial_vav_with_patah(): """`sheva` after initial `vav` with `patah` is `sheva-nah` (sheva-nah-after-short-vowel)""" word = r"וַיְהִי" # vay-he (feels weird, but Simanim confirms) parts = [ "vav", "patah", "yod", "sheva-nah", "he", "hiriq-male-yod", "eim-qria-yod" ] assert parts == Parser().parse(word).flat()
def test_sheva_na_under_dagesh_hazaq(): """`sheva` under `dagesh-hazaq` is `sheva-na` (sheva-na-dagesh-hazaq)""" word = r"הַבְּאֵר" # ha-be-eir parts = [ "he", "patah", "bet", "dagesh-hazaq", "sheva-na", "alef", "tsere", "resh" ] assert parts == Parser().parse(word).flat()
def test_sheva_na_after_holam_alef(): """holam + alef is a long vowel (sheva-na-after-long-vowel)""" word = r"יֹאמְרוּ" # yo-me-ru (Exodus 4:1) parts = [ "yod", "holam-haser", "eim-qria-alef", "mem", "sheva-na", "resh", "shuruq" ] assert parts == Parser().parse(word).flat()
def test_strict_no_break_after_hataf(): """(strict) no syllable break after hataf-vowel (syl-none-after-hataf)""" word = "אֲשֶׁר" parts = [["alef", "hataf-patah"], ["shin", "segol", "resh"]] p = Parser() assert parts == p.syllabify(p.parse(word)) parts = [["alef", "hataf-patah", "shin", "segol", "resh"]] p = Parser() assert parts == p.syllabify(p.parse(word), strict=True)
def test_simple_syllables(): """simple syllables""" word = r"מַת" parts = [["mem", "patah", "sav"]] p = Parser() assert parts == p.syllabify(p.parse(word)), "simple closed syllable" word = r"מִי" parts = [["mem", "hiriq-male-yod", "eim-qria-yod"]] p = Parser() assert parts == p.syllabify(p.parse(word)), "simple open syllable"
def test_eim_qria_yod(): """bare `yod` after `hiriq|tsere|segol` is `eim-qria-yod` (eim-qria-yod)""" # NOTE: `yod` after `hiriq` is already `hiriq-male` word = r"אֵין" # ein (tsere) parts = ["alef", "tsere", "eim-qria-yod", "nun-sofit"] assert parts == Parser().parse(word).flat() word = r"אֵלֶיךָ" # ei-lecha (segol) parts = [ "alef", "tsere", "lamed", "segol", "eim-qria-yod", "khaf-sofit", "qamats-gadol", ] assert parts == Parser().parse(word).flat()
def test_eim_qria_alef(): """bare `alef` after `qamats|patah|segol|tsere|holam|shuruq` is `eim-qria-alef` (eim-qria-alef)""" word = r"נָא" # na (qamats) parts = ["nun", "qamats-gadol", "eim-qria-alef"] assert parts == Parser().parse(word).flat() word = r"חַטַּאת" # ha-tat (patah) parts = [ "het", "patah", "tet", "dagesh-hazaq", "patah", "eim-qria-alef", "sav" ] assert parts == Parser().parse(word).flat() word = r"יֵרֶא" # ya-re (segol) parts = ["yod", "tsere", "resh", "segol", "eim-qria-alef"] assert parts == Parser().parse(word).flat() word = r"צֵא" # tsei (tsere) parts = ["tsadi", "tsere", "eim-qria-alef"] assert parts == Parser().parse(word).flat() word = r"בֹּא" # bo (holam) parts = ["bet", "dagesh-qal", "holam-haser", "eim-qria-alef"] assert parts == Parser().parse(word).flat() word = r"הוּא" # hu (shuruq) parts = ["he", "shuruq", "eim-qria-alef"] assert parts == Parser().parse(word).flat()