def _setup(self, word) -> list: """Prepares a word for syllable processing. If the word starts with a prefix, process it separately.""" if len(word) == 1: return [word] for prefix in self.constants.PREFIXES: if word.startswith(prefix): (first, rest) = StringUtils.split_on(word, prefix) if self._contains_vowels(rest): return StringUtils.remove_blank_spaces( self._process(first) + self._process(rest)) # a word like pror can happen from ellision return StringUtils.remove_blank_spaces(self._process(word)) return StringUtils.remove_blank_spaces(self._process(word))
def _setup(self, word) -> list: """Prepares a word for syllable processing. If the word starts with a prefix, process it separately. """ if len(word) == 1: return [word] for prefix in self.constants.PREFIXES: if word.startswith(prefix): (first, rest) = StringUtils.split_on(word, prefix) if self._contains_vowels(rest): return StringUtils.remove_blank_spaces( self._process(first) + self._process(rest)) # a word like pror can happen from ellision return StringUtils.remove_blank_spaces(self._process(word)) if word in self.constants.UI_EXCEPTIONS.keys(): return self.constants.UI_EXCEPTIONS[word] return StringUtils.remove_blank_spaces(self._process(word))
def get_syllable_count(self, syllables: list) -> int: """Counts the number of syllable groups that would occur after ellision. Often we will want preserve the position and separation of syllables so that they can be used to reconstitute a line, and apply stresses to the original word positions. However, we also want to be able to count the number of syllables accurately. >>> syllabifier = Syllabifier() >>> print(syllabifier.get_syllable_count([ ... 'Jām', 'tūm', 'c', 'au', 'sus', 'es', 'u', 'nus', 'I', 'ta', 'lo', 'rum'])) 11 """ tmp_syllables = copy.deepcopy(syllables) return len(StringUtils.remove_blank_spaces( StringUtils.move_consonant_right(tmp_syllables, self._find_solo_consonant(tmp_syllables))))
def get_syllable_count(self, syllables: list) -> int: """Counts the number of syllable groups that would occur after ellision. Often we will want preserve the position and separation of syllables so that they can be used to reconstitute a line, and apply stresses to the original word positions. However, we also want to be able to count the number of syllables accurately. >>> syllabifier = Syllabifier() >>> print(syllabifier.get_syllable_count([ ... 'Jām', 'tūm', 'c', 'au', 'sus', 'es', 'u', 'nus', 'I', 'ta', 'lo', 'rum'])) 11 """ tmp_syllables = copy.deepcopy(syllables) return len(StringUtils.remove_blank_spaces( StringUtils.move_consonant_right(tmp_syllables, self._find_solo_consonant(tmp_syllables))))
def syllabify(self, words: str) -> list: """Parse a Latin word into a list of syllable strings. :param words: a string containing one latin word or many words separated by spaces. :return: list of string, each representing a syllable. >>> syllabifier = Syllabifier() >>> print(syllabifier.syllabify("fuit")) ['fu', 'it'] >>> print(syllabifier.syllabify("libri")) ['li', 'bri'] >>> print(syllabifier.syllabify("contra")) ['con', 'tra'] >>> print(syllabifier.syllabify("iaculum")) ['ja', 'cu', 'lum'] >>> print(syllabifier.syllabify("amo")) ['a', 'mo'] >>> print(syllabifier.syllabify("bracchia")) ['brac', 'chi', 'a'] >>> print(syllabifier.syllabify("deinde")) ['dein', 'de'] >>> print(syllabifier.syllabify("certabant")) ['cer', 'ta', 'bant'] >>> print(syllabifier.syllabify("aere")) ['ae', 're'] >>> print(syllabifier.syllabify("adiungere")) ['ad', 'jun', 'ge', 're'] >>> print(syllabifier.syllabify("mōns")) ['mōns'] >>> print(syllabifier.syllabify("domus")) ['do', 'mus'] >>> print(syllabifier.syllabify("lixa")) ['li', 'xa'] >>> print(syllabifier.syllabify("asper")) ['as', 'per'] >>> # handle doubles >>> print(syllabifier.syllabify("siccus")) ['sic', 'cus'] >>> # handle liquid + liquid >>> print(syllabifier.syllabify("almus")) ['al', 'mus'] >>> # handle liquid + mute >>> print(syllabifier.syllabify("ambo")) ['am', 'bo'] >>> print(syllabifier.syllabify("anguis")) ['an', 'guis'] >>> print(syllabifier.syllabify("arbor")) ['ar', 'bor'] >>> print(syllabifier.syllabify("pulcher")) ['pul', 'cher'] >>> print(syllabifier.syllabify("ruptus")) ['ru', 'ptus'] >>> print(syllabifier.syllabify("Bīthÿnus")) ['Bī', 'thÿ', 'nus'] >>> print(syllabifier.syllabify("sanguen")) ['san', 'guen'] >>> print(syllabifier.syllabify("unguentum")) ['un', 'guen', 'tum'] >>> print(syllabifier.syllabify("lingua")) ['lin', 'gua'] >>> print(syllabifier.syllabify("linguā")) ['lin', 'guā'] >>> print(syllabifier.syllabify("languidus")) ['lan', 'gui', 'dus'] >>> print(syllabifier.syllabify("suis")) ['su', 'is'] >>> print(syllabifier.syllabify("habui")) ['ha', 'bu', 'i'] >>> print(syllabifier.syllabify("habuit")) ['ha', 'bu', 'it'] >>> print(syllabifier.syllabify("qui")) ['qui'] >>> print(syllabifier.syllabify("quibus")) ['qui', 'bus'] >>> print(syllabifier.syllabify("hui")) ['hui'] >>> print(syllabifier.syllabify("cui")) ['cui'] >>> print(syllabifier.syllabify("huic")) ['huic'] """ cleaned = words.translate(self.remove_punct_map) cleaned = cleaned.replace("qu", "kw") cleaned = cleaned.replace("Qu", "Kw") cleaned = cleaned.replace("gua", "gwa") cleaned = cleaned.replace("Gua", "Gwa") cleaned = cleaned.replace("gue", "gwe") cleaned = cleaned.replace("Gue", "Gwe") cleaned = cleaned.replace("gui", "gwi") cleaned = cleaned.replace("Gui", "Gwi") cleaned = cleaned.replace("guo", "gwo") cleaned = cleaned.replace("Guo", "Gwo") cleaned = cleaned.replace("guu", "gwu") cleaned = cleaned.replace("Guu", "Gwu") cleaned = cleaned.replace("guā", "gwā") cleaned = cleaned.replace("Guā", "Gwā") cleaned = cleaned.replace("guē", "gwē") cleaned = cleaned.replace("Guē", "Gwē") cleaned = cleaned.replace("guī", "gwī") cleaned = cleaned.replace("Guī", "Gwī") cleaned = cleaned.replace("guō", "gwō") cleaned = cleaned.replace("Guō", "Gwō") cleaned = cleaned.replace("guū", "gwū") cleaned = cleaned.replace("Guū", "Gwū") items = cleaned.strip().split(" ") for char in cleaned: if not char in self.ACCEPTABLE_CHARS: LOG.error("Unsupported character found in %s " % cleaned) return items syllables: list = [] for item in items: syllables += self._setup(item) for idx, syl in enumerate(syllables): if "kw" in syl: syl = syl.replace("kw", "qu") syllables[idx] = syl if "Kw" in syl: syl = syl.replace("Kw", "Qu") syllables[idx] = syl if "gw" in syl: syl = syl.replace("gw", "gu") syllables[idx] = syl if "Gw" in syl: syl = syl.replace("Gw", "Gu") syllables[idx] = syl return StringUtils.remove_blank_spaces(syllables)
def syllabify(self, words: str) -> list: """Parse a Latin word into a list of syllable strings. :param words: a string containing one latin word or many words separated by spaces. :return: list of string, each representing a syllable. >>> syllabifier = Syllabifier() >>> print(syllabifier.syllabify("fuit")) ['fu', 'it'] >>> print(syllabifier.syllabify("libri")) ['li', 'bri'] >>> print(syllabifier.syllabify("contra")) ['con', 'tra'] >>> print(syllabifier.syllabify("iaculum")) ['ja', 'cu', 'lum'] >>> print(syllabifier.syllabify("amo")) ['a', 'mo'] >>> print(syllabifier.syllabify("bracchia")) ['brac', 'chi', 'a'] >>> print(syllabifier.syllabify("deinde")) ['dein', 'de'] >>> print(syllabifier.syllabify("certabant")) ['cer', 'ta', 'bant'] >>> print(syllabifier.syllabify("aere")) ['ae', 're'] >>> print(syllabifier.syllabify("adiungere")) ['ad', 'jun', 'ge', 're'] >>> print(syllabifier.syllabify("mōns")) ['mōns'] >>> print(syllabifier.syllabify("domus")) ['do', 'mus'] >>> print(syllabifier.syllabify("lixa")) ['li', 'xa'] >>> print(syllabifier.syllabify("asper")) ['as', 'per'] >>> # handle doubles >>> print(syllabifier.syllabify("siccus")) ['sic', 'cus'] >>> # handle liquid + liquid >>> print(syllabifier.syllabify("almus")) ['al', 'mus'] >>> # handle liquid + mute >>> print(syllabifier.syllabify("ambo")) ['am', 'bo'] >>> print(syllabifier.syllabify("anguis")) ['an', 'guis'] >>> print(syllabifier.syllabify("arbor")) ['ar', 'bor'] >>> print(syllabifier.syllabify("pulcher")) ['pul', 'cher'] >>> print(syllabifier.syllabify("ruptus")) ['ru', 'ptus'] >>> print(syllabifier.syllabify("Bīthÿnus")) ['Bī', 'thÿ', 'nus'] >>> print(syllabifier.syllabify("sanguen")) ['san', 'guen'] >>> print(syllabifier.syllabify("unguentum")) ['un', 'guen', 'tum'] >>> print(syllabifier.syllabify("lingua")) ['lin', 'gua'] >>> print(syllabifier.syllabify("linguā")) ['lin', 'guā'] >>> print(syllabifier.syllabify("languidus")) ['lan', 'gui', 'dus'] >>> print(syllabifier.syllabify("suis")) ['su', 'is'] >>> print(syllabifier.syllabify("habui")) ['ha', 'bu', 'i'] >>> print(syllabifier.syllabify("habuit")) ['ha', 'bu', 'it'] >>> print(syllabifier.syllabify("qui")) ['qui'] >>> print(syllabifier.syllabify("quibus")) ['qui', 'bus'] >>> print(syllabifier.syllabify("hui")) ['hui'] >>> print(syllabifier.syllabify("cui")) ['cui'] >>> print(syllabifier.syllabify("huic")) ['huic'] """ cleaned = words.translate(self.remove_punct_map) cleaned = cleaned.replace("qu", "kw") cleaned = cleaned.replace("Qu", "Kw") cleaned = cleaned.replace("gua", "gwa") cleaned = cleaned.replace("Gua", "Gwa") cleaned = cleaned.replace("gue", "gwe") cleaned = cleaned.replace("Gue", "Gwe") cleaned = cleaned.replace("gui", "gwi") cleaned = cleaned.replace("Gui", "Gwi") cleaned = cleaned.replace("guo", "gwo") cleaned = cleaned.replace("Guo", "Gwo") cleaned = cleaned.replace("guu", "gwu") cleaned = cleaned.replace("Guu", "Gwu") cleaned = cleaned.replace("guā", "gwā") cleaned = cleaned.replace("Guā", "Gwā") cleaned = cleaned.replace("guē", "gwē") cleaned = cleaned.replace("Guē", "Gwē") cleaned = cleaned.replace("guī", "gwī") cleaned = cleaned.replace("Guī", "Gwī") cleaned = cleaned.replace("guō", "gwō") cleaned = cleaned.replace("Guō", "Gwō") cleaned = cleaned.replace("guū", "gwū") cleaned = cleaned.replace("Guū", "Gwū") items = cleaned.strip().split(" ") for char in cleaned: if not char in self.ACCEPTABLE_CHARS: LOG.error("Unsupported character found in %s " % cleaned) return items syllables: list = [] for item in items: syllables += self._setup(item) for idx, syl in enumerate(syllables): if "kw" in syl: syl = syl.replace("kw", "qu") syllables[idx] = syl if "Kw" in syl: syl = syl.replace("Kw", "Qu") syllables[idx] = syl if "gw" in syl: syl = syl.replace("gw", "gu") syllables[idx] = syl if "Gw" in syl: syl = syl.replace("Gw", "Gu") syllables[idx] = syl return StringUtils.remove_blank_spaces(syllables)