예제 #1
0
 def _setup(self, word) -> list:
     """Prepares a word for syllable processing. If the word starts with a prefix, process it
     separately."""
     if len(word) == 1:
         return [word]
     for prefix in self.constants.PREFIXES:
         if word.startswith(prefix):
             (first, rest) = StringUtils.split_on(word, prefix)
             if self._contains_vowels(rest):
                 return StringUtils.remove_blank_spaces(
                     self._process(first) + self._process(rest))
             # a word like pror can happen from ellision
             return StringUtils.remove_blank_spaces(self._process(word))
     return StringUtils.remove_blank_spaces(self._process(word))
예제 #2
0
    def _setup(self, word) -> list:
        """Prepares a word for syllable processing.

        If the word starts with a prefix, process it separately.
        """
        if len(word) == 1:
            return [word]
        for prefix in self.constants.PREFIXES:
            if word.startswith(prefix):
                (first, rest) = StringUtils.split_on(word, prefix)
                if self._contains_vowels(rest):
                    return StringUtils.remove_blank_spaces(
                        self._process(first) + self._process(rest))
                # a word like pror can happen from ellision
                return StringUtils.remove_blank_spaces(self._process(word))
        if word in self.constants.UI_EXCEPTIONS.keys():
            return self.constants.UI_EXCEPTIONS[word]


        return StringUtils.remove_blank_spaces(self._process(word))
예제 #3
0
    def get_syllable_count(self, syllables: list) -> int:
        """Counts the number of syllable groups that would occur after ellision.

        Often we will want preserve the position and separation of syllables so that they
        can be used to reconstitute a line, and apply stresses to the original word positions.
        However, we also want to be able to count the number of syllables accurately.

        >>> syllabifier = Syllabifier()
        >>> print(syllabifier.get_syllable_count([
        ... 'Jām', 'tūm', 'c', 'au', 'sus', 'es', 'u', 'nus', 'I', 'ta', 'lo', 'rum']))
        11
        """
        tmp_syllables = copy.deepcopy(syllables)
        return len(StringUtils.remove_blank_spaces(
            StringUtils.move_consonant_right(tmp_syllables,
                                             self._find_solo_consonant(tmp_syllables))))
예제 #4
0
    def get_syllable_count(self, syllables: list) -> int:
        """Counts the number of syllable groups that would occur after ellision.

        Often we will want preserve the position and separation of syllables so that they
        can be used to reconstitute a line, and apply stresses to the original word positions.
        However, we also want to be able to count the number of syllables accurately.

        >>> syllabifier = Syllabifier()
        >>> print(syllabifier.get_syllable_count([
        ... 'Jām', 'tūm', 'c', 'au', 'sus', 'es', 'u', 'nus', 'I', 'ta', 'lo', 'rum']))
        11
        """
        tmp_syllables = copy.deepcopy(syllables)
        return len(StringUtils.remove_blank_spaces(
            StringUtils.move_consonant_right(tmp_syllables,
                                             self._find_solo_consonant(tmp_syllables))))
예제 #5
0
    def syllabify(self, words: str) -> list:
        """Parse a Latin word into a list of syllable strings.
        :param words: a string containing one latin word or many words separated by spaces.
        :return: list of string, each representing a syllable.

        >>> syllabifier = Syllabifier()
        >>> print(syllabifier.syllabify("fuit"))
        ['fu', 'it']
        >>> print(syllabifier.syllabify("libri"))
        ['li', 'bri']
        >>> print(syllabifier.syllabify("contra"))
        ['con', 'tra']
        >>> print(syllabifier.syllabify("iaculum"))
        ['ja', 'cu', 'lum']
        >>> print(syllabifier.syllabify("amo"))
        ['a', 'mo']
        >>> print(syllabifier.syllabify("bracchia"))
        ['brac', 'chi', 'a']
        >>> print(syllabifier.syllabify("deinde"))
        ['dein', 'de']
        >>> print(syllabifier.syllabify("certabant"))
        ['cer', 'ta', 'bant']
        >>> print(syllabifier.syllabify("aere"))
        ['ae', 're']
        >>> print(syllabifier.syllabify("adiungere"))
        ['ad', 'jun', 'ge', 're']
        >>> print(syllabifier.syllabify("mōns"))
        ['mōns']
        >>> print(syllabifier.syllabify("domus"))
        ['do', 'mus']
        >>> print(syllabifier.syllabify("lixa"))
        ['li', 'xa']
        >>> print(syllabifier.syllabify("asper"))
        ['as', 'per']
        >>> #  handle doubles
        >>> print(syllabifier.syllabify("siccus"))
        ['sic', 'cus']
        >>> # handle liquid + liquid
        >>> print(syllabifier.syllabify("almus"))
        ['al', 'mus']
        >>> # handle liquid + mute
        >>> print(syllabifier.syllabify("ambo"))
        ['am', 'bo']
        >>> print(syllabifier.syllabify("anguis"))
        ['an', 'guis']
        >>> print(syllabifier.syllabify("arbor"))
        ['ar', 'bor']
        >>> print(syllabifier.syllabify("pulcher"))
        ['pul', 'cher']
        >>> print(syllabifier.syllabify("ruptus"))
        ['ru', 'ptus']
        >>> print(syllabifier.syllabify("Bīthÿnus"))
        ['Bī', 'thÿ', 'nus']
        >>> print(syllabifier.syllabify("sanguen"))
        ['san', 'guen']
        >>> print(syllabifier.syllabify("unguentum"))
        ['un', 'guen', 'tum']
        >>> print(syllabifier.syllabify("lingua"))
        ['lin', 'gua']
        >>> print(syllabifier.syllabify("linguā"))
        ['lin', 'guā']
        >>> print(syllabifier.syllabify("languidus"))
        ['lan', 'gui', 'dus']

        >>> print(syllabifier.syllabify("suis"))
        ['su', 'is']
        >>> print(syllabifier.syllabify("habui"))
        ['ha', 'bu', 'i']
        >>> print(syllabifier.syllabify("habuit"))
        ['ha', 'bu', 'it']
        >>> print(syllabifier.syllabify("qui"))
        ['qui']
        >>> print(syllabifier.syllabify("quibus"))
        ['qui', 'bus']
        >>> print(syllabifier.syllabify("hui"))
        ['hui']
        >>> print(syllabifier.syllabify("cui"))
        ['cui']
        >>> print(syllabifier.syllabify("huic"))
        ['huic']
        """
        cleaned = words.translate(self.remove_punct_map)
        cleaned = cleaned.replace("qu", "kw")
        cleaned = cleaned.replace("Qu", "Kw")
        cleaned = cleaned.replace("gua", "gwa")
        cleaned = cleaned.replace("Gua", "Gwa")
        cleaned = cleaned.replace("gue", "gwe")
        cleaned = cleaned.replace("Gue", "Gwe")
        cleaned = cleaned.replace("gui", "gwi")
        cleaned = cleaned.replace("Gui", "Gwi")
        cleaned = cleaned.replace("guo", "gwo")
        cleaned = cleaned.replace("Guo", "Gwo")
        cleaned = cleaned.replace("guu", "gwu")
        cleaned = cleaned.replace("Guu", "Gwu")
        cleaned = cleaned.replace("guā", "gwā")
        cleaned = cleaned.replace("Guā", "Gwā")
        cleaned = cleaned.replace("guē", "gwē")
        cleaned = cleaned.replace("Guē", "Gwē")
        cleaned = cleaned.replace("guī", "gwī")
        cleaned = cleaned.replace("Guī", "Gwī")
        cleaned = cleaned.replace("guō", "gwō")
        cleaned = cleaned.replace("Guō", "Gwō")
        cleaned = cleaned.replace("guū", "gwū")
        cleaned = cleaned.replace("Guū", "Gwū")
        items = cleaned.strip().split(" ")

        for char in cleaned:
            if not char in self.ACCEPTABLE_CHARS:
                LOG.error("Unsupported character found in %s " % cleaned)
                return items
        syllables: list = []
        for item in items:
            syllables += self._setup(item)
        for idx, syl in enumerate(syllables):
            if "kw" in syl:
                syl = syl.replace("kw", "qu")
                syllables[idx] = syl
            if "Kw" in syl:
                syl = syl.replace("Kw", "Qu")
                syllables[idx] = syl
            if "gw" in syl:
                syl = syl.replace("gw", "gu")
                syllables[idx] = syl
            if "Gw" in syl:
                syl = syl.replace("Gw", "Gu")
                syllables[idx] = syl

        return StringUtils.remove_blank_spaces(syllables)
예제 #6
0
    def syllabify(self, words: str) -> list:
        """Parse a Latin word into a list of syllable strings.
        :param words: a string containing one latin word or many words separated by spaces.
        :return: list of string, each representing a syllable.

        >>> syllabifier = Syllabifier()
        >>> print(syllabifier.syllabify("fuit"))
        ['fu', 'it']
        >>> print(syllabifier.syllabify("libri"))
        ['li', 'bri']
        >>> print(syllabifier.syllabify("contra"))
        ['con', 'tra']
        >>> print(syllabifier.syllabify("iaculum"))
        ['ja', 'cu', 'lum']
        >>> print(syllabifier.syllabify("amo"))
        ['a', 'mo']
        >>> print(syllabifier.syllabify("bracchia"))
        ['brac', 'chi', 'a']
        >>> print(syllabifier.syllabify("deinde"))
        ['dein', 'de']
        >>> print(syllabifier.syllabify("certabant"))
        ['cer', 'ta', 'bant']
        >>> print(syllabifier.syllabify("aere"))
        ['ae', 're']
        >>> print(syllabifier.syllabify("adiungere"))
        ['ad', 'jun', 'ge', 're']
        >>> print(syllabifier.syllabify("mōns"))
        ['mōns']
        >>> print(syllabifier.syllabify("domus"))
        ['do', 'mus']
        >>> print(syllabifier.syllabify("lixa"))
        ['li', 'xa']
        >>> print(syllabifier.syllabify("asper"))
        ['as', 'per']
        >>> #  handle doubles
        >>> print(syllabifier.syllabify("siccus"))
        ['sic', 'cus']
        >>> # handle liquid + liquid
        >>> print(syllabifier.syllabify("almus"))
        ['al', 'mus']
        >>> # handle liquid + mute
        >>> print(syllabifier.syllabify("ambo"))
        ['am', 'bo']
        >>> print(syllabifier.syllabify("anguis"))
        ['an', 'guis']
        >>> print(syllabifier.syllabify("arbor"))
        ['ar', 'bor']
        >>> print(syllabifier.syllabify("pulcher"))
        ['pul', 'cher']
        >>> print(syllabifier.syllabify("ruptus"))
        ['ru', 'ptus']
        >>> print(syllabifier.syllabify("Bīthÿnus"))
        ['Bī', 'thÿ', 'nus']
        >>> print(syllabifier.syllabify("sanguen"))
        ['san', 'guen']
        >>> print(syllabifier.syllabify("unguentum"))
        ['un', 'guen', 'tum']
        >>> print(syllabifier.syllabify("lingua"))
        ['lin', 'gua']
        >>> print(syllabifier.syllabify("linguā"))
        ['lin', 'guā']
        >>> print(syllabifier.syllabify("languidus"))
        ['lan', 'gui', 'dus']

        >>> print(syllabifier.syllabify("suis"))
        ['su', 'is']
        >>> print(syllabifier.syllabify("habui"))
        ['ha', 'bu', 'i']
        >>> print(syllabifier.syllabify("habuit"))
        ['ha', 'bu', 'it']
        >>> print(syllabifier.syllabify("qui"))
        ['qui']
        >>> print(syllabifier.syllabify("quibus"))
        ['qui', 'bus']
        >>> print(syllabifier.syllabify("hui"))
        ['hui']
        >>> print(syllabifier.syllabify("cui"))
        ['cui']
        >>> print(syllabifier.syllabify("huic"))
        ['huic']
        """
        cleaned = words.translate(self.remove_punct_map)
        cleaned = cleaned.replace("qu", "kw")
        cleaned = cleaned.replace("Qu", "Kw")
        cleaned = cleaned.replace("gua", "gwa")
        cleaned = cleaned.replace("Gua", "Gwa")
        cleaned = cleaned.replace("gue", "gwe")
        cleaned = cleaned.replace("Gue", "Gwe")
        cleaned = cleaned.replace("gui", "gwi")
        cleaned = cleaned.replace("Gui", "Gwi")
        cleaned = cleaned.replace("guo", "gwo")
        cleaned = cleaned.replace("Guo", "Gwo")
        cleaned = cleaned.replace("guu", "gwu")
        cleaned = cleaned.replace("Guu", "Gwu")
        cleaned = cleaned.replace("guā", "gwā")
        cleaned = cleaned.replace("Guā", "Gwā")
        cleaned = cleaned.replace("guē", "gwē")
        cleaned = cleaned.replace("Guē", "Gwē")
        cleaned = cleaned.replace("guī", "gwī")
        cleaned = cleaned.replace("Guī", "Gwī")
        cleaned = cleaned.replace("guō", "gwō")
        cleaned = cleaned.replace("Guō", "Gwō")
        cleaned = cleaned.replace("guū", "gwū")
        cleaned = cleaned.replace("Guū", "Gwū")
        items = cleaned.strip().split(" ")

        for char in cleaned:
            if not char in self.ACCEPTABLE_CHARS:
                LOG.error("Unsupported character found in %s " % cleaned)
                return items
        syllables: list = []
        for item in items:
            syllables += self._setup(item)
        for idx, syl in enumerate(syllables):
            if "kw" in syl:
                syl = syl.replace("kw", "qu")
                syllables[idx] = syl
            if "Kw" in syl:
                syl = syl.replace("Kw", "Qu")
                syllables[idx] = syl
            if "gw" in syl:
                syl = syl.replace("gw", "gu")
                syllables[idx] = syl
            if "Gw" in syl:
                syl = syl.replace("Gw", "Gu")
                syllables[idx] = syl

        return StringUtils.remove_blank_spaces(syllables)