def test_akkadian_syllabifier(self): """Test Akkadian syllabifier""" syllabifier = AkkadianSyllabifier() word = "epištašu" syllables = syllabifier.syllabify(word) target = ['e','piš','ta','šu'] self.assertEqual(syllables, target)
class BoundForm(object): """ Return the bound form of a noun, suitable for adding suffixed pronouns. """ def __init__(self): self.syllabifier = Syllabifier() self.stemmer = Stemmer() self.cv_patterner = CVPattern() def get_bound_form(self, noun, gender): """Return bound form of nound, given its gender.""" syllables = self.syllabifier.syllabify(noun) stem = self.stemmer.get_stem(noun, gender) cv_pattern = self.cv_patterner.get_cv_pattern(stem) # Based on Huehnergard Appendix 6.C.1: base in -VC if [letter[0] for letter in cv_pattern[-2:]] == ['V', 'C'] or stem in ['nakr']: # a. 2-syllable if len(syllables) > 2: # awīlum > awīl, nakrum > naker if stem in ['nakr']: return 'naker' else: return stem # b. 1-syllable elif len(syllables) > 1: # bēlum > bēl return stem # c. abum, aḫum if stem in ['ab', 'aḫ']: return stem + 'i' # Appendix 6.C.2: base in -C₁C₁ if cv_pattern[-1][:2] == cv_pattern[-2][:2]: # a. 1-syllable if 3 > len(syllables) > 1: return stem + 'i' # b. 2-syllable, -tt if len(syllables) > 2 and cv_pattern[-1][2] + cv_pattern[-2][2] == 'tt': return stem + 'i' # c. 2-syllable, other if len(syllables) > 2: return stem[:-1] # Appendix 6.C.3: base in -C₁C₂, C₂ ≠ t, i.e. pVrs if cv_pattern[-1][0] == cv_pattern[-2][0] and cv_pattern[-1][1] != cv_pattern[-2][1]: return stem[:-1] + stem[1] + stem[-1] # Appendix 6.C.4: base in -Ct (fem.) if cv_pattern[-1][2] == 't' and cv_pattern[-2][0] == 'C': if len(syllables) > 2: return stem + 'i' # Need to deal with fem. Ptcpl. māḫirtum -> māḫirat if len(syllables) > 1: # These are case by case if stem in ['qīšt']: return stem + 'i' if stem in ['mārt']: return stem[:-1] + stem[1] + stem[-1]
def find_stress(self, word): """ Find the stressed syllable in a word. The general logic follows Huehnergard 3rd edition (pgs. 3-4): (a) Light: ending in a short vowel: e.g., -a, -ba (b) Heavy: ending in a long vowel marked with a macron, or in a short vowel plus a consonant: e.g., -ā, -bā, -ak, -bak (c) Ultraheavy: ending in a long vowel marked with a circumflex, in any long vowel plus a consonant: e.g., -â, -bâ, -āk, -bāk, -âk, -bâk. (a) If the last syllable is ultraheavy, it bears the stress. (b) Otherwise, stress falls on the last non-final heavy or ultraheavy syllable. (c) Words that contain no non-final heavy or ultraheavy syllables have the stress fall on the first syllable. :param word: a string (or list) in Akkadian :return: a list of syllables with stressed syllable surrounded by "[]" """ syllabifier = Syllabifier() if type(word) is str: word = syllabifier.syllabify(word) syllables_stress = [] for i, syllable in enumerate(word): # Enumerate over the syllables and mark them for length # We check each type of length by looking at the length of the # syllable and verifying rules based on character length. # Ultraheavy: # -â, -bâ, -āk, -bāk, -âk, -bâk. if len(syllable) == 1: if self._is_circumflex_vowel(syllable): syllables_stress.append((syllable, "Ultraheavy")) continue elif len(syllable) == 2: if self._is_consonant( syllable[0]) and self._is_circumflex_vowel( syllable[1]): syllables_stress.append((syllable, "Ultraheavy")) continue if (self._is_macron_vowel(syllable[0]) or self._is_circumflex_vowel(syllable[0])) \ and self._is_consonant(syllable[1]): syllables_stress.append((syllable, "Ultraheavy")) continue elif len(syllable) == 3: if self._is_macron_vowel( syllable[1]) or self._is_circumflex_vowel(syllable[1]): syllables_stress.append((syllable, "Ultraheavy")) continue # Heavy: # -ā, -bā, -ak, -bak if len(syllable) == 1: if self._is_macron_vowel(syllable): syllables_stress.append((syllable, "Heavy")) continue elif len(syllable) == 2: if self._is_consonant(syllable[0]) and self._is_macron_vowel( syllable[1]): syllables_stress.append((syllable, "Heavy")) continue if self._is_short_vowel(syllable[0]) and self._is_consonant( syllable[1]): syllables_stress.append((syllable, "Heavy")) continue elif len(syllable) == 3: if self._is_short_vowel(syllable[1]): syllables_stress.append((syllable, "Heavy")) continue # Light: # -a, -ba if len(syllable) == 1: if self._is_short_vowel(syllable): syllables_stress.append((syllable, "Light")) continue elif len(syllable) == 2: if self._is_consonant(syllable[0]) and self._is_short_vowel( syllable[1]): syllables_stress.append((syllable, "Light")) continue # It's easier to find stress backwards syllables_stress = syllables_stress[::-1] syllables = [] found_stress = 0 for i, syllable in enumerate(syllables_stress): # If we've found the stressed syllable just append the next syllable if found_stress: syllables.append(syllable[0]) continue # Rule (a) elif syllable[1] == "Ultraheavy" and i == 0: syllables.append("[{}]".format(syllable[0])) found_stress = 1 continue # Rule (b) elif syllable[1] in ['Ultraheavy', 'Heavy'] and i > 0: syllables.append("[{}]".format(syllable[0])) found_stress = 1 continue # Final 'Heavy' syllable, gets no stress elif syllable[1] == 'Heavy' and i == 0: syllables.append(syllable[0]) continue # Light syllable gets no stress elif syllable[1] == "Light": syllables.append(syllable[0]) continue # Reverse the list again syllables = syllables[::-1] # If we still haven't found stress then rule (c) applies # Rule (c) if not found_stress: syllables[0] = "[{}]".format(syllables[0]) return syllables
def __init__(self): self.syllabifier = Syllabifier() self.stemmer = Stemmer() self.cv_patterner = CVPattern()
def find_stress(self, word): """ Find the stressed syllable in a word. The general logic follows Huehnergard 3rd edition (pgs. 3-4): (a) Light: ending in a short vowel: e.g., -a, -ba (b) Heavy: ending in a long vowel marked with a macron, or in a short vowel plus a consonant: e.g., -ā, -bā, -ak, -bak (c) Ultraheavy: ending in a long vowel marked with a circumflex, in any long vowel plus a consonant: e.g., -â, -bâ, -āk, -bāk, -âk, -bâk. (a) If the last syllable is ultraheavy, it bears the stress. (b) Otherwise, stress falls on the last non-final heavy or ultraheavy syllable. (c) Words that contain no non-final heavy or ultraheavy syllables have the stress fall on the first syllable. :param word: a string (or list) in Akkadian :return: a list of syllables with stressed syllable surrounded by "[]" """ syllabifier = Syllabifier() if type(word) is str: word = syllabifier.syllabify(word) syllables_stress = [] for i, syllable in enumerate(word): # Enumerate over the syllables and mark them for length # We check each type of length by looking at the length of the # syllable and verifying rules based on character length. # Ultraheavy: # -â, -bâ, -āk, -bāk, -âk, -bâk. if len(syllable) == 1: if self._is_circumflex_vowel(syllable): syllables_stress.append((syllable, "Ultraheavy")) continue elif len(syllable) == 2: if self._is_consonant(syllable[0]) and self._is_circumflex_vowel(syllable[1]): syllables_stress.append((syllable, "Ultraheavy")) continue if (self._is_macron_vowel(syllable[0]) or self._is_circumflex_vowel(syllable[0])) \ and self._is_consonant(syllable[1]): syllables_stress.append((syllable, "Ultraheavy")) continue elif len(syllable) == 3: if self._is_macron_vowel(syllable[1]) or self._is_circumflex_vowel(syllable[1]): syllables_stress.append((syllable, "Ultraheavy")) continue # Heavy: # -ā, -bā, -ak, -bak if len(syllable) == 1: if self._is_macron_vowel(syllable): syllables_stress.append((syllable, "Heavy")) continue elif len(syllable) == 2: if self._is_consonant(syllable[0]) and self._is_macron_vowel(syllable[1]): syllables_stress.append((syllable, "Heavy")) continue if self._is_short_vowel(syllable[0]) and self._is_consonant(syllable[1]): syllables_stress.append((syllable, "Heavy")) continue elif len(syllable) == 3: if self._is_short_vowel(syllable[1]): syllables_stress.append((syllable, "Heavy")) continue # Light: # -a, -ba if len(syllable) == 1: if self._is_short_vowel(syllable): syllables_stress.append((syllable, "Light")) continue elif len(syllable) == 2: if self._is_consonant(syllable[0]) and self._is_short_vowel(syllable[1]): syllables_stress.append((syllable, "Light")) continue # It's easier to find stress backwards syllables_stress = syllables_stress[::-1] syllables = [] found_stress = 0 for i, syllable in enumerate(syllables_stress): # If we've found the stressed syllable just append the next syllable if found_stress: syllables.append(syllable[0]) continue # Rule (a) elif syllable[1] == "Ultraheavy" and i == 0: syllables.append("[{}]".format(syllable[0])) found_stress = 1 continue # Rule (b) elif syllable[1] in ['Ultraheavy', 'Heavy'] and i > 0: syllables.append("[{}]".format(syllable[0])) found_stress = 1 continue # Final 'Heavy' syllable, gets no stress elif syllable[1] == 'Heavy' and i == 0: syllables.append(syllable[0]) continue # Light syllable gets no stress elif syllable[1] == "Light": syllables.append(syllable[0]) continue # Reverse the list again syllables = syllables[::-1] # If we still haven't found stress then rule (c) applies # Rule (c) if not found_stress: syllables[0] = "[{}]".format(syllables[0]) return syllables