예제 #1
0
파일: test_stem.py 프로젝트: vierth/cltk
 def test_akkadian_syllabifier(self):
     """Test Akkadian syllabifier"""
     syllabifier = AkkadianSyllabifier()
     word = "epištašu"
     syllables = syllabifier.syllabify(word)
     target = ['e','piš','ta','šu']
     self.assertEqual(syllables, target)
예제 #2
0
 def test_akkadian_syllabifier(self):
     """Test Akkadian syllabifier"""
     syllabifier = AkkadianSyllabifier()
     word = "epištašu"
     syllables = syllabifier.syllabify(word)
     target = ['e','piš','ta','šu']
     self.assertEqual(syllables, target)
예제 #3
0
class BoundForm(object):
    """
    Return the bound form of a noun, suitable for adding suffixed pronouns.
    """

    def __init__(self):
        self.syllabifier = Syllabifier()
        self.stemmer = Stemmer()
        self.cv_patterner = CVPattern()

    def get_bound_form(self, noun, gender):
        """Return bound form of nound, given its gender."""
        syllables = self.syllabifier.syllabify(noun)
        stem = self.stemmer.get_stem(noun, gender)
        cv_pattern = self.cv_patterner.get_cv_pattern(stem)
        # Based on Huehnergard Appendix 6.C.1: base in -VC
        if [letter[0] for letter in cv_pattern[-2:]] == ['V', 'C'] or stem in ['nakr']:
            # a. 2-syllable
            if len(syllables) > 2:
                # awīlum > awīl, nakrum > naker
                if stem in ['nakr']:
                    return 'naker'
                else:
                    return stem
            # b. 1-syllable
            elif len(syllables) > 1:
                # bēlum > bēl
                return stem
            # c. abum, aḫum
            if stem in ['ab', 'aḫ']:
                return stem + 'i'
        # Appendix 6.C.2: base in -C₁C₁
        if cv_pattern[-1][:2] == cv_pattern[-2][:2]:
            # a. 1-syllable
            if 3 > len(syllables) > 1:
                return stem + 'i'
            # b. 2-syllable, -tt
            if len(syllables) > 2 and cv_pattern[-1][2] + cv_pattern[-2][2] == 'tt':
                return stem + 'i'
            # c. 2-syllable, other
            if len(syllables) > 2:
                return stem[:-1]
        # Appendix 6.C.3: base in -C₁C₂, C₂ ≠ t, i.e. pVrs
        if cv_pattern[-1][0] == cv_pattern[-2][0] and cv_pattern[-1][1] != cv_pattern[-2][1]:
            return stem[:-1] + stem[1] + stem[-1]
        # Appendix 6.C.4: base in -Ct (fem.)
        if cv_pattern[-1][2] == 't' and cv_pattern[-2][0] == 'C':
            if len(syllables) > 2:
                return stem + 'i'
            # Need to deal with fem. Ptcpl. māḫirtum -> māḫirat
            if len(syllables) > 1:
                # These are case by case
                if stem in ['qīšt']:
                    return stem + 'i'
                if stem in ['mārt']:
                    return stem[:-1] + stem[1] + stem[-1]
예제 #4
0
    def find_stress(self, word):
        """
        Find the stressed syllable in a word.
        The general logic follows Huehnergard 3rd edition (pgs. 3-4):
        (a) Light: ending in a short vowel: e.g., -a, -ba
        (b) Heavy: ending in a long vowel marked with a macron, or in a
        short vowel plus a consonant: e.g., -ā, -bā, -ak, -bak
        (c) Ultraheavy: ending in a long vowel marked with a circumflex,
        in any long vowel plus a consonant: e.g., -â, -bâ, -āk, -bāk, -âk, -bâk.
        (a) If the last syllable is ultraheavy, it bears the stress.
        (b) Otherwise, stress falls on the last non-final heavy or ultraheavy syllable.
        (c) Words that contain no non-final heavy or ultraheavy syllables have the
        stress fall on the first syllable.
        :param word: a string (or list) in Akkadian
        :return: a list of syllables with stressed syllable surrounded by "[]"
        """
        syllabifier = Syllabifier()

        if type(word) is str:
            word = syllabifier.syllabify(word)

        syllables_stress = []

        for i, syllable in enumerate(word):
            # Enumerate over the syllables and mark them for length
            # We check each type of length by looking at the length of the
            # syllable and verifying rules based on character length.

            # Ultraheavy:
            # -â, -bâ, -āk, -bāk, -âk, -bâk.
            if len(syllable) == 1:
                if self._is_circumflex_vowel(syllable):
                    syllables_stress.append((syllable, "Ultraheavy"))
                    continue
            elif len(syllable) == 2:
                if self._is_consonant(
                        syllable[0]) and self._is_circumflex_vowel(
                            syllable[1]):
                    syllables_stress.append((syllable, "Ultraheavy"))
                    continue
                if (self._is_macron_vowel(syllable[0]) or self._is_circumflex_vowel(syllable[0])) \
                        and self._is_consonant(syllable[1]):
                    syllables_stress.append((syllable, "Ultraheavy"))
                    continue
            elif len(syllable) == 3:
                if self._is_macron_vowel(
                        syllable[1]) or self._is_circumflex_vowel(syllable[1]):
                    syllables_stress.append((syllable, "Ultraheavy"))
                    continue

            # Heavy:
            # -ā, -bā, -ak, -bak
            if len(syllable) == 1:
                if self._is_macron_vowel(syllable):
                    syllables_stress.append((syllable, "Heavy"))
                    continue
            elif len(syllable) == 2:
                if self._is_consonant(syllable[0]) and self._is_macron_vowel(
                        syllable[1]):
                    syllables_stress.append((syllable, "Heavy"))
                    continue
                if self._is_short_vowel(syllable[0]) and self._is_consonant(
                        syllable[1]):
                    syllables_stress.append((syllable, "Heavy"))
                    continue
            elif len(syllable) == 3:
                if self._is_short_vowel(syllable[1]):
                    syllables_stress.append((syllable, "Heavy"))
                    continue

            # Light:
            # -a, -ba
            if len(syllable) == 1:
                if self._is_short_vowel(syllable):
                    syllables_stress.append((syllable, "Light"))
                    continue
            elif len(syllable) == 2:
                if self._is_consonant(syllable[0]) and self._is_short_vowel(
                        syllable[1]):
                    syllables_stress.append((syllable, "Light"))
                    continue

        # It's easier to find stress backwards
        syllables_stress = syllables_stress[::-1]

        syllables = []
        found_stress = 0
        for i, syllable in enumerate(syllables_stress):
            # If we've found the stressed syllable just append the next syllable
            if found_stress:
                syllables.append(syllable[0])
                continue

            # Rule (a)
            elif syllable[1] == "Ultraheavy" and i == 0:
                syllables.append("[{}]".format(syllable[0]))
                found_stress = 1
                continue

            # Rule (b)
            elif syllable[1] in ['Ultraheavy', 'Heavy'] and i > 0:
                syllables.append("[{}]".format(syllable[0]))
                found_stress = 1
                continue

            # Final 'Heavy' syllable, gets no stress
            elif syllable[1] == 'Heavy' and i == 0:
                syllables.append(syllable[0])
                continue

            # Light syllable gets no stress
            elif syllable[1] == "Light":
                syllables.append(syllable[0])
                continue

        # Reverse the list again
        syllables = syllables[::-1]

        # If we still haven't found stress then rule (c) applies
        # Rule (c)
        if not found_stress:
            syllables[0] = "[{}]".format(syllables[0])

        return syllables
예제 #5
0
 def __init__(self):
     self.syllabifier = Syllabifier()
     self.stemmer = Stemmer()
     self.cv_patterner = CVPattern()
예제 #6
0
파일: stress.py 프로젝트: TylerKirby/cltk
    def find_stress(self, word):
        """
        Find the stressed syllable in a word.
        The general logic follows Huehnergard 3rd edition (pgs. 3-4):
        (a) Light: ending in a short vowel: e.g., -a, -ba
        (b) Heavy: ending in a long vowel marked with a macron, or in a
        short vowel plus a consonant: e.g., -ā, -bā, -ak, -bak
        (c) Ultraheavy: ending in a long vowel marked with a circumflex,
        in any long vowel plus a consonant: e.g., -â, -bâ, -āk, -bāk, -âk, -bâk.
        (a) If the last syllable is ultraheavy, it bears the stress.
        (b) Otherwise, stress falls on the last non-final heavy or ultraheavy syllable.
        (c) Words that contain no non-final heavy or ultraheavy syllables have the
        stress fall on the first syllable.
        :param word: a string (or list) in Akkadian
        :return: a list of syllables with stressed syllable surrounded by "[]"
        """
        syllabifier = Syllabifier()

        if type(word) is str:
            word = syllabifier.syllabify(word)

        syllables_stress = []

        for i, syllable in enumerate(word):
            # Enumerate over the syllables and mark them for length
            # We check each type of length by looking at the length of the
            # syllable and verifying rules based on character length.

            # Ultraheavy:
            # -â, -bâ, -āk, -bāk, -âk, -bâk.
            if len(syllable) == 1:
                if self._is_circumflex_vowel(syllable):
                    syllables_stress.append((syllable, "Ultraheavy"))
                    continue
            elif len(syllable) == 2:
                if self._is_consonant(syllable[0]) and self._is_circumflex_vowel(syllable[1]):
                    syllables_stress.append((syllable, "Ultraheavy"))
                    continue
                if (self._is_macron_vowel(syllable[0]) or self._is_circumflex_vowel(syllable[0])) \
                        and self._is_consonant(syllable[1]):
                    syllables_stress.append((syllable, "Ultraheavy"))
                    continue
            elif len(syllable) == 3:
                if self._is_macron_vowel(syllable[1]) or self._is_circumflex_vowel(syllable[1]):
                    syllables_stress.append((syllable, "Ultraheavy"))
                    continue

            # Heavy:
            # -ā, -bā, -ak, -bak
            if len(syllable) == 1:
                if self._is_macron_vowel(syllable):
                    syllables_stress.append((syllable, "Heavy"))
                    continue
            elif len(syllable) == 2:
                if self._is_consonant(syllable[0]) and self._is_macron_vowel(syllable[1]):
                    syllables_stress.append((syllable, "Heavy"))
                    continue
                if self._is_short_vowel(syllable[0]) and self._is_consonant(syllable[1]):
                    syllables_stress.append((syllable, "Heavy"))
                    continue
            elif len(syllable) == 3:
                if self._is_short_vowel(syllable[1]):
                    syllables_stress.append((syllable, "Heavy"))
                    continue

            # Light:
            # -a, -ba
            if len(syllable) == 1:
                if self._is_short_vowel(syllable):
                    syllables_stress.append((syllable, "Light"))
                    continue
            elif len(syllable) == 2:
                if self._is_consonant(syllable[0]) and self._is_short_vowel(syllable[1]):
                    syllables_stress.append((syllable, "Light"))
                    continue

        # It's easier to find stress backwards
        syllables_stress = syllables_stress[::-1]

        syllables = []
        found_stress = 0
        for i, syllable in enumerate(syllables_stress):
            # If we've found the stressed syllable just append the next syllable
            if found_stress:
                syllables.append(syllable[0])
                continue

            # Rule (a)
            elif syllable[1] == "Ultraheavy" and i == 0:
                syllables.append("[{}]".format(syllable[0]))
                found_stress = 1
                continue

            # Rule (b)
            elif syllable[1] in ['Ultraheavy', 'Heavy'] and i > 0:
                syllables.append("[{}]".format(syllable[0]))
                found_stress = 1
                continue

            # Final 'Heavy' syllable, gets no stress
            elif syllable[1] == 'Heavy' and i == 0:
                syllables.append(syllable[0])
                continue

            # Light syllable gets no stress
            elif syllable[1] == "Light":
                syllables.append(syllable[0])
                continue

        # Reverse the list again
        syllables = syllables[::-1]

        # If we still haven't found stress then rule (c) applies
        # Rule (c)
        if not found_stress:
            syllables[0] = "[{}]".format(syllables[0])

        return syllables