Exemple #1
0
 def produce_scansion(self, stresses: list, syllables_wspaces: list,
                      offset_map: dict) -> str:
     """Create a scansion string that has stressed and unstressed syllable positions in locations
     that correspond with the original texts syllable vowels.
      :param stresses list of syllable positions
      :param syllables_wspaces list of syllables with spaces escaped for punctuation or elision
      :param offset_map dictionary of syllable positions, and an offset amount which is the
       number of spaces to skip in the original line before inserting the accent.
      """
     scansion = list(" " * len(StringUtils.flatten(syllables_wspaces)))
     unstresses = StringUtils.get_unstresses(stresses,
                                             len(syllables_wspaces))
     try:
         for idx in unstresses:
             location = offset_map[idx]
             if location is not None:
                 scansion[location] = self.constants.UNSTRESSED
         for idx in stresses:
             location = offset_map[idx]
             if location is not None:
                 scansion[location] = self.constants.STRESSED
     except Exception as e:
         print("problem with syllables; check syllabification %s %s" %
               (syllables_wspaces, e))
         pass
     return "".join(scansion)
Exemple #2
0
    def __init__(self, constants=ScansionConstants(), syllabifier=Syllabifier(),
                 optional_transform: bool = False, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.constants = constants
        self.remove_punct_map = StringUtils.remove_punctuation_dict()
        self.punctuation_substitutions = StringUtils.punctuation_for_spaces_dict()
        self.metrical_validator = MetricalValidator(constants)
        self.formatter = ScansionFormatter(constants)
        self.syllabifier = syllabifier
        self.optional_transform = optional_transform
        self.inverted_amphibrach_re = re.compile(
            r"{}\s*{}\s*{}".format(self.constants.STRESSED,
                                   self.constants.UNSTRESSED,
                                   self.constants.STRESSED))
        self.syllable_matcher = re.compile(r"[{}]".format(self.constants.VOWELS +
                                                          self.constants.ACCENTED_VOWELS +
                                                          self.constants.LIQUIDS +
                                                          self.constants.MUTES))
        self.SPONDAIC_PENTAMETER = self.constants.SPONDEE + self.constants.SPONDEE + \
                                   self.constants.STRESSED + self.constants.DACTYL + \
                                   self.constants.DACTYL + self.constants.OPTIONAL_ENDING

        self.DACTYLIC_PENTAMETER = self.constants.DACTYL + self.constants.DACTYL + \
                                   self.constants.STRESSED + self.constants.DACTYL + \
                                   self.constants.DACTYL + self.constants.OPTIONAL_ENDING
Exemple #3
0
 def _move_consonant(self, letters: list, positions: list) -> list:
     """Given a list of consonant positions, move the consonants according to certain
     consonant syllable behavioral rules for gathering and grouping."""
     for pos in positions:
         previous_letter = letters[pos - 1]
         consonant = letters[pos]
         next_letter = letters[pos + 1]
         if self._contains_vowels(next_letter) and self._starts_with_vowel(next_letter):
             return StringUtils.move_consonant_right(letters, [pos])
         if self._contains_vowels(previous_letter) and self._ends_with_vowel(
                 previous_letter) and len(previous_letter) == 1:
             return StringUtils.move_consonant_left(letters, [pos])
         if previous_letter + consonant in self.constants.ASPIRATES:
             return StringUtils.move_consonant_left(letters, [pos])
         if consonant + next_letter in self.constants.ASPIRATES:
             return StringUtils.move_consonant_right(letters, [pos])
         if next_letter[0] == consonant:
             return StringUtils.move_consonant_left(letters, [pos])
         if consonant in self.constants.MUTES and next_letter[0] in self.constants.LIQUIDS:
             return StringUtils.move_consonant_right(letters, [pos])
         if consonant in ['k', 'K'] and next_letter[0] in ['w', 'W']:
             return StringUtils.move_consonant_right(letters, [pos])
         if self._contains_consonants(next_letter[0]) and self._starts_with_vowel(
                 previous_letter[-1]):
             return StringUtils.move_consonant_left(letters, [pos])
         # fall through case
         if self._contains_consonants(next_letter[0]):
             return StringUtils.move_consonant_right(letters, [pos])
     return letters
Exemple #4
0
 def _move_consonant(self, letters: list, positions: list) -> list:
     """Given a list of consonant positions, move the consonants according to certain
     consonant syllable behavioral rules for gathering and grouping."""
     for pos in positions:
         previous_letter = letters[pos - 1]
         consonant = letters[pos]
         next_letter = letters[pos + 1]
         if self._contains_vowels(next_letter) and self._starts_with_vowel(
                 next_letter):
             return StringUtils.move_consonant_right(letters, [pos])
         if self._contains_vowels(
                 previous_letter) and self._ends_with_vowel(
                     previous_letter) and len(previous_letter) == 1:
             return StringUtils.move_consonant_left(letters, [pos])
         if previous_letter + consonant in self.constants.ASPIRATES:
             return StringUtils.move_consonant_left(letters, [pos])
         if consonant + next_letter in self.constants.ASPIRATES:
             return StringUtils.move_consonant_right(letters, [pos])
         if next_letter[0] == consonant:
             return StringUtils.move_consonant_left(letters, [pos])
         if consonant in self.constants.MUTES and next_letter[
                 0] in self.constants.LIQUIDS:
             return StringUtils.move_consonant_right(letters, [pos])
         if consonant in ['k', 'K'] and next_letter[0] in ['w', 'W']:
             return StringUtils.move_consonant_right(letters, [pos])
         if self._contains_consonants(
                 next_letter[0]) and self._starts_with_vowel(
                     previous_letter[-1]):
             return StringUtils.move_consonant_left(letters, [pos])
         # fall through case
         if self._contains_consonants(next_letter[0]):
             return StringUtils.move_consonant_right(letters, [pos])
     return letters
Exemple #5
0
 def _setup(self, word) -> list:
     """Prepares a word for syllable processing. If the word starts with a prefix, process it
     separately."""
     if len(word) == 1:
         return [word]
     for prefix in self.constants.PREFIXES:
         if word.startswith(prefix):
             (first, rest) = StringUtils.split_on(word, prefix)
             if self._contains_vowels(rest):
                 return StringUtils.remove_blank_spaces(
                     self._process(first) + self._process(rest))
             # a word like pror can happen from ellision
             return StringUtils.remove_blank_spaces(self._process(word))
     return StringUtils.remove_blank_spaces(self._process(word))
Exemple #6
0
 def __init__(self, constants=ScansionConstants(), syllabifier=Syllabifier(), **kwargs):
     self.constants = constants
     self.remove_punct_map = StringUtils.remove_punctuation_dict()
     self.punctuation_substitutions = StringUtils.punctuation_for_spaces_dict()
     self.metrical_validator = MetricalValidator(constants)
     self.formatter = ScansionFormatter(constants)
     self.syllabifier = syllabifier
     self.inverted_amphibrach_re = re.compile(
         r"{}\s*{}\s*{}".format(self.constants.STRESSED,
                                self.constants.UNSTRESSED,
                                self.constants.STRESSED))
     self.syllable_matcher = re.compile(r"[{}]".format(self.constants.VOWELS +
                                                       self.constants.ACCENTED_VOWELS +
                                                       self.constants.LIQUIDS +
                                                       self.constants.MUTES))
Exemple #7
0
    def get_syllable_count(self, syllables: list) -> int:
        """Counts the number of syllable groups that would occur after ellision.

        Often we will want preserve the position and separation of syllables so that they
        can be used to reconstitute a line, and apply stresses to the original word positions.
        However, we also want to be able to count the number of syllables accurately.

        >>> syllabifier = Syllabifier()
        >>> print(syllabifier.get_syllable_count([
        ... 'Jām', 'tūm', 'c', 'au', 'sus', 'es', 'u', 'nus', 'I', 'ta', 'lo', 'rum']))
        11
        """
        tmp_syllables = copy.deepcopy(syllables)
        return len(StringUtils.remove_blank_spaces(
            StringUtils.move_consonant_right(tmp_syllables,
                                             self._find_solo_consonant(tmp_syllables))))
Exemple #8
0
 def correct_first_two_dactyls(self, scansion: str) -> str:
     """If a hexameter or pentameter starts with spondee,
     an unstressed syllable in the third position must actually be stressed,
     so we will convert it: - - | U    ->  - - | -
     And/or if the starting pattern is spondee + trochee + stressed, then the unstressed
     trochee can be corrected: - - | - u | -   ->  - - | - -| -
     :param scansion:
     :return:
     >>> print(VerseScanner().correct_first_two_dactyls(
     ... " -   - U   U -  -  U U U U  U U  - -")) # doctest: +NORMALIZE_WHITESPACE
      -   - -   - -  -  U U U U  U U  - -
     """
     mark_list = StringUtils.mark_list(scansion)
     new_line = self.correct_invalid_start(scansion)
     raw_scansion = new_line.replace(" ", "")
     if raw_scansion.startswith(self.constants.SPONDEE +
                                self.constants.TROCHEE +
                                self.constants.STRESSED):
         new_scansion = list(self.constants.SPONDEE +
                             self.constants.SPONDEE +
                             self.constants.STRESSED + raw_scansion[5:])
         corrected = "".join(new_scansion)
         new_sequence = list(" " * len(scansion))
         for idx, car in enumerate(corrected):
             new_sequence[mark_list[idx]] = car
         return "".join(new_sequence)
     return new_line
Exemple #9
0
    def get_syllable_count(self, syllables: list) -> int:
        """Counts the number of syllable groups that would occur after ellision.

        Often we will want preserve the position and separation of syllables so that they
        can be used to reconstitute a line, and apply stresses to the original word positions.
        However, we also want to be able to count the number of syllables accurately.

        >>> syllabifier = Syllabifier()
        >>> print(syllabifier.get_syllable_count([
        ... 'Jām', 'tūm', 'c', 'au', 'sus', 'es', 'u', 'nus', 'I', 'ta', 'lo', 'rum']))
        11
        """
        tmp_syllables = copy.deepcopy(syllables)
        return len(StringUtils.remove_blank_spaces(
            StringUtils.move_consonant_right(tmp_syllables,
                                             self._find_solo_consonant(tmp_syllables))))
Exemple #10
0
 def correct_first_two_dactyls(self, scansion: str) -> str:
     """If a hexameter or pentameter starts with spondee,
     an unstressed syllable in the third position must actually be stressed,
     so we will convert it: - - | U    ->  - - | -
     And/or if the starting pattern is spondee + trochee + stressed, then the unstressed
     trochee can be corrected: - - | - u | -   ->  - - | - -| -
     :param scansion:
     :return:
     >>> print(VerseScanner().correct_first_two_dactyls(
     ... " -   - U   U -  -  U U U U  U U  - -")) # doctest: +NORMALIZE_WHITESPACE
      -   - -   - -  -  U U U U  U U  - -
     """
     mark_list = StringUtils.mark_list(scansion)
     new_line = self.correct_invalid_start(scansion)
     raw_scansion = new_line.replace(" ", "")
     if raw_scansion.startswith(self.constants.SPONDEE + self.constants.TROCHEE +
                                        self.constants.STRESSED):
         new_scansion = list(self.constants.SPONDEE + self.constants.SPONDEE
                             + self.constants.STRESSED + raw_scansion[5:])
         corrected = "".join(new_scansion)
         new_sequence = list(" " * len(scansion))
         for idx, car in enumerate(corrected):
             new_sequence[mark_list[idx]] = car
         return "".join(new_sequence)
     return new_line
Exemple #11
0
    def accent_by_position(self, verse: str) -> str:
        """:param verse: a line of unaccented hexameter verse
        :return: the same line with vowels accented by position

        >>> print(HexameterScanner().accent_by_position(
        ... "Arma virumque cano, Troiae qui primus ab oris").lstrip())
        Ārma virūmque canō  Trojae quī primus ab oris
        """
        line = verse.translate(self.punctuation_substitutions)
        line = self.transform_i_to_j(line)
        marks = list(line)
        # Vowels followed by 2 consonants
        # The digraphs ch, ph, th, qu and sometimes gu and su count as single consonants.
        # see http://people.virginia.edu/~jdk3t/epicintrog/scansion.htm
        marks = StringUtils.overwrite(
            marks, "[{}][{}][{}]".format(self.constants.VOWELS,
                                         self.constants.CONSONANTS,
                                         self.constants.CONSONANTS_WO_H),
            self.constants.STRESSED)
        # one space (or more for 'dropped' punctuation may intervene)
        marks = StringUtils.overwrite(
            marks, r"[{}][{}]\s*[{}]".format(self.constants.VOWELS,
                                             self.constants.CONSONANTS,
                                             self.constants.CONSONANTS_WO_H),
            self.constants.STRESSED)
        # ... if both consonants are in the next word, the vowel may be long
        # .... but it could be short if the vowel is not on the thesis/emphatic part of the foot
        # ... see Gildersleeve and Lodge p.446
        marks = StringUtils.overwrite(
            marks, r"[{}]\s*[{}][{}]".format(self.constants.VOWELS,
                                             self.constants.CONSONANTS,
                                             self.constants.CONSONANTS_WO_H),
            self.constants.STRESSED)
        #  x is considered as two letters
        marks = StringUtils.overwrite(marks,
                                      "[{}][xX]".format(self.constants.VOWELS),
                                      self.constants.STRESSED)
        #  z is considered as two letters
        marks = StringUtils.overwrite(
            marks, r"[{}][zZ]".format(self.constants.VOWELS),
            self.constants.STRESSED)
        original_verse = list(line)
        for idx, word in enumerate(original_verse):
            if marks[idx] == self.constants.STRESSED:
                original_verse[idx] = self.constants.VOWELS_TO_ACCENTS[
                    original_verse[idx]]
        return "".join(original_verse)
Exemple #12
0
 def __init__(self,
              constants=ScansionConstants(),
              syllabifier=Syllabifier()):
     self.constants = constants
     self.remove_punct_map = StringUtils.remove_punctuation_dict()
     self.punctuation_substitutions = StringUtils.punctuation_for_spaces_dict(
     )
     self.metrical_validator = MetricalValidator(constants)
     self.formatter = ScansionFormatter(constants)
     self.syllabifier = syllabifier
     self.inverted_amphibrach_re = re.compile(r"{}\s*{}\s*{}".format(
         self.constants.STRESSED, self.constants.UNSTRESSED,
         self.constants.STRESSED))
     self.syllable_matcher = re.compile(
         r"[{}]".format(self.constants.VOWELS +
                        self.constants.ACCENTED_VOWELS +
                        self.constants.LIQUIDS + self.constants.MUTES))
Exemple #13
0
 def flag_dipthongs(self, syllables: list) -> list:
     """Return a list of syllables that contain a dipthong"""
     long_positions = []
     for idx, syl in enumerate(syllables):
         for dipthong in self.constants.DIPTHONGS:
             if dipthong in syllables[idx]:
                 if not StringUtils.starts_with_qu(syllables[idx]):
                     long_positions.append(idx)
     return long_positions
Exemple #14
0
 def flag_dipthongs(self, syllables: list) -> list:
     """Return a list of syllables that contain a dipthong"""
     long_positions = []
     for idx, syl in enumerate(syllables):
         for dipthong in self.constants.DIPTHONGS:
             if dipthong in syllables[idx]:
                 if not StringUtils.starts_with_qu(syllables[idx]):
                     long_positions.append(idx)
     return long_positions
Exemple #15
0
    def transform_i_to_j_optional(self, line: str) -> str:
        """Sometimes for the demands of meter a more permissive i to j transformation is warranted.
        :param line:
        :return:

        >>> print(HexameterScanner().transform_i_to_j_optional("Italiam"))
        Italjam
        >>> print(HexameterScanner().transform_i_to_j_optional("Lāvīniaque"))
        Lāvīnjaque
        >>> print(HexameterScanner().transform_i_to_j_optional("omnium"))
        omnjum
        """

        words = line.split(" ")
        space_list = StringUtils.space_list(line)
        corrected_words = []
        for word in words:
            found = False
            for prefix in self.constants.PREFIXES:
                if word.startswith(prefix) and word != prefix:
                    corrected_words.append(
                        self.syllabifier.convert_consonantal_i(prefix))
                    corrected_words.append(
                        self.syllabifier.convert_consonantal_i(
                            word[len(prefix):]))
                    found = True
                    break
            if not found:
                corrected_words.append(
                    self.syllabifier.convert_consonantal_i(word))
        new_line = StringUtils.join_syllables_spaces(corrected_words,
                                                     space_list)
        #  the following two may be tunable and subject to improvement
        char_list = StringUtils.overwrite(
            list(new_line),
            "[bcdfgjkmpqrstvwxzBCDFGHJKMPQRSTVWXZ][i][{}]".format(
                self.constants.VOWELS_WO_I), "j", 1)
        char_list = StringUtils.overwrite(
            char_list, "[{}][iI][{}]".format(self.constants.LIQUIDS,
                                             self.constants.VOWELS_WO_I), "j",
            1)
        return "".join(char_list)
Exemple #16
0
    def _setup(self, word) -> list:
        """Prepares a word for syllable processing.

        If the word starts with a prefix, process it separately.
        """
        if len(word) == 1:
            return [word]
        for prefix in self.constants.PREFIXES:
            if word.startswith(prefix):
                (first, rest) = StringUtils.split_on(word, prefix)
                if self._contains_vowels(rest):
                    return StringUtils.remove_blank_spaces(
                        self._process(first) + self._process(rest))
                # a word like pror can happen from ellision
                return StringUtils.remove_blank_spaces(self._process(word))
        if word in self.constants.UI_EXCEPTIONS.keys():
            return self.constants.UI_EXCEPTIONS[word]


        return StringUtils.remove_blank_spaces(self._process(word))
Exemple #17
0
 def __init__(self, constants=ScansionConstants()):
     self.constants = constants
     self.consonant_matcher = re.compile("[{}]".format(constants.CONSONANTS))
     self.vowel_matcher = re.compile(
         "[{}]".format(constants.VOWELS + constants.ACCENTED_VOWELS))
     self.consonantal_i_matcher = re.compile(
         r"\b[iIīĪ][{}]".format(constants.VOWELS + constants.ACCENTED_VOWELS))
     self.remove_punct_map = StringUtils.remove_punctuation_dict()
     self.kw_matcher = re.compile("[kK][w]")
     self.ACCEPTABLE_CHARS = constants.ACCENTED_VOWELS + constants.VOWELS + ' ' \
                             + constants.CONSONANTS
Exemple #18
0
 def __init__(self, constants=ScansionConstants()):
     self.constants = constants
     self.consonant_matcher = re.compile("[{}]".format(constants.CONSONANTS))
     self.vowel_matcher = re.compile(
         "[{}]".format(constants.VOWELS + constants.ACCENTED_VOWELS))
     self.consonantal_i_matcher = re.compile(
         r"\b[iIīĪ][{}]".format(constants.VOWELS + constants.ACCENTED_VOWELS))
     self.remove_punct_map = StringUtils.remove_punctuation_dict()
     self.kw_matcher = re.compile("[kK][w]")
     self.ACCEPTABLE_CHARS = constants.ACCENTED_VOWELS + constants.VOWELS + ' ' \
                             + constants.CONSONANTS
     self.diphthongs = [d for d in constants.DIPTHONGS if d not in ["ui", "Ui", "uī"]]
Exemple #19
0
 def calc_offset(self, syllables_spaces: list) -> dict:
     """Calculate a dictionary of accent positions from a list of syllables with spaces."""
     line = StringUtils.flatten(syllables_spaces)
     mydict = defaultdict(lambda: None)
     for idx, syl in enumerate(syllables_spaces):
         target_syllable = syllables_spaces[idx]
         skip_qu = StringUtils.starts_with_qu(target_syllable)
         matches = list(self.syllable_matcher.finditer(target_syllable))
         for position, possible in enumerate(matches):
             if skip_qu:
                 skip_qu = False
                 continue
             (start, end) = possible.span()
             if target_syllable[start:end] in \
                             self.constants.VOWELS + self.constants.ACCENTED_VOWELS:
                 part = line[:len("".join(syllables_spaces[:idx]))]
                 offset = len(part) + start
                 if line[offset] not in self.constants.VOWELS + self.constants.ACCENTED_VOWELS:
                     print("Problem at line %s offset %s" % (line, offset))
                 mydict[idx] = offset
     return mydict
Exemple #20
0
 def calc_offset(self, syllables_spaces: list) -> dict:
     """Calculate a dictionary of accent positions from a list of syllables with spaces."""
     line = StringUtils.flatten(syllables_spaces)
     mydict = defaultdict(lambda: None)
     for idx, syl in enumerate(syllables_spaces):
         target_syllable = syllables_spaces[idx]
         skip_qu = StringUtils.starts_with_qu(target_syllable)
         matches = list(self.syllable_matcher.finditer(target_syllable))
         for position, possible in enumerate(matches):
             if skip_qu:
                 skip_qu = False
                 continue
             (start, end) = possible.span()
             if target_syllable[start:end] in \
                             self.constants.VOWELS + self.constants.ACCENTED_VOWELS:
                 part = line[:len("".join(syllables_spaces[:idx]))]
                 offset = len(part) + start
                 if line[offset] not in self.constants.VOWELS + self.constants.ACCENTED_VOWELS:
                     LOG.error("Problem at line {} offset {}".format(line, offset))
                 mydict[idx] = offset
     return mydict
Exemple #21
0
    def transform_i_to_j(self, line: str) -> str:
        """Transform instances of consonantal i to j
        :param line:
        :return:

        >>> print(HexameterScanner().transform_i_to_j("iactātus"))
        jactātus
        >>> print(HexameterScanner().transform_i_to_j("bracchia"))
        bracchia
        """

        words = line.split(" ")
        space_list = StringUtils.space_list(line)
        corrected_words = []
        for word in words:
            found = False
            for prefix in self.constants.PREFIXES:
                if word.startswith(prefix) and word != prefix:
                    corrected_words.append(
                        self.syllabifier.convert_consonantal_i(prefix))
                    corrected_words.append(
                        self.syllabifier.convert_consonantal_i(
                            word[len(prefix):]))
                    found = True
                    break
            if not found:
                corrected_words.append(
                    self.syllabifier.convert_consonantal_i(word))
        new_line = StringUtils.join_syllables_spaces(corrected_words,
                                                     space_list)
        char_list = StringUtils.overwrite(
            list(new_line),
            r"\b[iī][{}]".format(self.constants.VOWELS +
                                 self.constants.ACCENTED_VOWELS), "j")
        char_list = StringUtils.overwrite(
            char_list, r"\b[I][{}]".format(self.constants.VOWELS_WO_I), "J")
        char_list = StringUtils.overwrite(
            char_list, r"[{}][i][{}]".format(self.constants.VOWELS_WO_I,
                                             self.constants.VOWELS), "j", 1)
        return "".join(char_list)
Exemple #22
0
 def _process(self, word: str) -> list:
     """Process a word into a list of strings representing the syllables of the word. This
     method describes rules for consonant grouping behaviors and then iteratively applies those
     rules the list of letters that comprise the word, until all the letters are grouped into
     appropriate syllable groups."""
     #   if a blank arrives from splitting, just return an empty list
     if len(word.strip()) == 0:
         return []
     word = self.convert_consonantal_i(word)
     my_word = " " + word + " "
     letters = list(my_word)
     positions = []
     for dipth in self.diphthongs:
         if dipth in my_word:
             dipth_matcher = re.compile("{}".format(dipth))
             matches = dipth_matcher.finditer(my_word)
             for match in matches:
                 (start, end) = match.span()
                 positions.append(start)
     matches = self.kw_matcher.finditer(my_word)
     for match in matches:
         (start, end) = match.span()
         positions.append(start)
     letters = StringUtils.merge_next(letters, positions)
     letters = StringUtils.remove_blanks(letters)
     positions.clear()
     if not self._contains_vowels("".join(letters)):
         return ["".join(letters).strip()
                 ]  # occurs when only 'qu' appears by ellision
     positions = self._starting_consonants_only(letters)
     while len(positions) > 0:
         letters = StringUtils.move_consonant_right(letters, positions)
         letters = StringUtils.remove_blanks(letters)
         positions = self._starting_consonants_only(letters)
     positions = self._ending_consonants_only(letters)
     while len(positions) > 0:
         letters = StringUtils.move_consonant_left(letters, positions)
         letters = StringUtils.remove_blanks(letters)
         positions = self._ending_consonants_only(letters)
     positions = self._find_solo_consonant(letters)
     while len(positions) > 0:
         letters = self._move_consonant(letters, positions)
         letters = StringUtils.remove_blanks(letters)
         positions = self._find_solo_consonant(letters)
     positions = self._find_consonant_cluster(letters)
     while len(positions) > 0:
         letters = self._move_consonant(letters, positions)
         letters = StringUtils.remove_blanks(letters)
         positions = self._find_consonant_cluster(letters)
     return letters
Exemple #23
0
 def _process(self, word: str) -> list:
     """Process a word into a list of strings representing the syllables of the word. This
     method describes rules for consonant grouping behaviors and then iteratively applies those
     rules the list of letters that comprise the word, until all the letters are grouped into
     appropriate syllable groups."""
     #   if a blank arrives from splitting, just return an empty list
     if len(word.strip()) == 0:
         return []
     my_word = " " + word + " "
     letters = list(my_word)
     positions = []
     for dipth in self.diphthongs:
         if dipth in my_word:
             dipth_matcher = re.compile("{}".format(dipth))
             matches = dipth_matcher.finditer(my_word)
             for match in matches:
                 (start, end) = match.span()
                 positions.append(start)
     matches = self.kw_matcher.finditer(my_word)
     for match in matches:
         (start, end) = match.span()
         positions.append(start)
     letters = StringUtils.merge_next(letters, positions)
     letters = StringUtils.remove_blanks(letters)
     positions.clear()
     if not self._contains_vowels("".join(letters)):
         return ["".join(letters).strip()]  # occurs when only 'qu' appears by ellision
     positions = self._starting_consonants_only(letters)
     while len(positions) > 0:
         letters = StringUtils.move_consonant_right(letters, positions)
         letters = StringUtils.remove_blanks(letters)
         positions = self._starting_consonants_only(letters)
     positions = self._ending_consonants_only(letters)
     while len(positions) > 0:
         letters = StringUtils.move_consonant_left(letters, positions)
         letters = StringUtils.remove_blanks(letters)
         positions = self._ending_consonants_only(letters)
     positions = self._find_solo_consonant(letters)
     while len(positions) > 0:
         letters = self._move_consonant(letters, positions)
         letters = StringUtils.remove_blanks(letters)
         positions = self._find_solo_consonant(letters)
     positions = self._find_consonant_cluster(letters)
     while len(positions) > 0:
         letters = self._move_consonant(letters, positions)
         letters = StringUtils.remove_blanks(letters)
         positions = self._find_consonant_cluster(letters)
     return letters
Exemple #24
0
    def make_dactyls(self, scansion: str) -> str:
        """If a pentameter line has 14 syllables, it starts and ends with double dactyls.

        >>> print(PentameterScanner().make_dactyls("U  U  U  U  U  U  U  U  U  U  U  U  U  U"))
        -  U  U  -  U  U  -  -  U  U  -  U  U  U
        """
        mark_list = StringUtils.mark_list(scansion)
        vals = list(scansion.replace(" ", ""))
        new_vals = self.DACTYLIC_PENTAMETER[:-1] + vals[-1]
        corrected = "".join(new_vals)
        new_line = list(" " * len(scansion))
        for idx, car in enumerate(corrected):
            new_line[mark_list[idx]] = car
        return "".join(new_line)
Exemple #25
0
    def make_spondaic(self, scansion: str) -> str:
        """If a pentameter line has 12 syllables, then it must start with double spondees.

        >>> print(PentameterScanner().make_spondaic("U  U  U  U  U  U  U  U  U  U  U  U"))
        -  -  -  -  -  -  U  U  -  U  U  U
        """
        mark_list = StringUtils.mark_list(scansion)
        vals = list(scansion.replace(" ", ""))
        new_vals = self.SPONDAIC_PENTAMETER[:-1] + vals[-1]
        corrected = "".join(new_vals)
        new_line = list(" " * len(scansion))
        for idx, car in enumerate(corrected):
            new_line[mark_list[idx]] = car
        return "".join(new_line)
Exemple #26
0
 def produce_scansion(self, stresses: list, syllables_wspaces: list, offset_map: dict) -> str:
     """Create a scansion string that has stressed and unstressed syllable positions in locations
     that correspond with the original texts syllable vowels.
      :param stresses list of syllable positions
      :param syllables_wspaces list of syllables with spaces escaped for punctuation or elision
      :param offset_map dictionary of syllable positions, and an offset amount which is the
       number of spaces to skip in the original line before inserting the accent.
      """
     scansion = list(" " * len(StringUtils.flatten(syllables_wspaces)))
     unstresses = StringUtils.get_unstresses(stresses, len(syllables_wspaces))
     try:
         for idx in unstresses:
             location = offset_map[idx]
             if location is not None:
                 scansion[location] = self.constants.UNSTRESSED
         for idx in stresses:
             location = offset_map[idx]
             if location is not None:
                 scansion[location] = self.constants.STRESSED
     except Exception as e:
         LOG.error("problem with syllables; check syllabification {}, {}".format(
             syllables_wspaces, e))
     return "".join(scansion)
Exemple #27
0
    def transform_i_to_j_optional(self, line: str) -> str:
        """Sometimes for the demands of meter a more permissive i to j transformation is warranted.
        :param line:
        :return:

        >>> print(VerseScanner().transform_i_to_j_optional("Italiam"))
        Italjam
        >>> print(VerseScanner().transform_i_to_j_optional("Lāvīniaque"))
        Lāvīnjaque
        >>> print(VerseScanner().transform_i_to_j_optional("omnium"))
        omnjum
        """
        words = line.split(" ")
        space_list = StringUtils.space_list(line)
        corrected_words = []
        for word in words:
            found = False
            for prefix in self.constants.PREFIXES:
                if word.startswith(prefix) and word != prefix:
                    corrected_words.append(self.syllabifier.convert_consonantal_i(prefix))
                    corrected_words.append(
                        self.syllabifier.convert_consonantal_i(word[len(prefix):]))
                    found = True
                    break
            if not found:
                corrected_words.append(self.syllabifier.convert_consonantal_i(word))
        new_line = StringUtils.join_syllables_spaces(corrected_words, space_list)
        #  the following two may be tunable and subject to improvement
        char_list = StringUtils.overwrite(list(new_line),
                                          "[bcdfgjkmpqrstvwxzBCDFGHJKMPQRSTVWXZ][i][{}]".format(
                                              self.constants.VOWELS_WO_I),
                                          "j", 1)
        char_list = StringUtils.overwrite(char_list,
                                          "[{}][iI][{}]".format(self.constants.LIQUIDS,
                                                                self.constants.VOWELS_WO_I),
                                          "j", 1)
        return "".join(char_list)
Exemple #28
0
    def transform_i_to_j(self, line: str) -> str:
        """Transform instances of consonantal i to j
        :param line:
        :return:

        >>> print(VerseScanner().transform_i_to_j("iactātus"))
        jactātus
        >>> print(VerseScanner().transform_i_to_j("bracchia"))
        bracchia
        """

        words = line.split(" ")
        space_list = StringUtils.space_list(line)
        corrected_words = []
        for word in words:
            found = False
            for prefix in self.constants.PREFIXES:
                if word.startswith(prefix) and word != prefix:
                    corrected_words.append(self.syllabifier.convert_consonantal_i(prefix))
                    corrected_words.append(
                        self.syllabifier.convert_consonantal_i(word[len(prefix):]))
                    found = True
                    break
            if not found:
                corrected_words.append(self.syllabifier.convert_consonantal_i(word))
        new_line = StringUtils.join_syllables_spaces(corrected_words, space_list)
        char_list = StringUtils.overwrite(list(new_line),
                                          r"\b[iī][{}]".format(
                                              self.constants.VOWELS + self.constants.ACCENTED_VOWELS),
                                          "j")
        char_list = StringUtils.overwrite(char_list,
                                          r"\b[I][{}]".format(self.constants.VOWELS_WO_I),
                                          "J")
        char_list = StringUtils.overwrite(char_list, r"[{}][i][{}]".format(
            self.constants.VOWELS_WO_I, self.constants.VOWELS),
                                          "j", 1)
        return "".join(char_list)
    def correct_invalid_start(self, scansion: str) -> str:
        """The third syllable of a hendecasyllabic line is long, so we will convert it

        :param scansion:
        :return: scansion string with corrected start
        >>> print(HendecasyllableScanner().correct_invalid_start(
        ... "- U U  U U  - U   -  U - U").strip())
        - U -  U U  - U   -  U - U
        """
        mark_list = StringUtils.mark_list(scansion)
        vals = list(scansion.replace(" ", ""))
        corrected = vals[:2] + [self.constants.STRESSED] + vals[3:]
        new_line = list(" " * len(scansion))
        for idx, car in enumerate(corrected):
            new_line[mark_list[idx]] = car
        return "".join(new_line)
Exemple #30
0
    def correct_invalid_start(self, scansion: str) -> str:
        """The third syllable of a hendecasyllabic line is long, so we will convert it

        :param scansion:
        :return: scansion string with corrected start
        >>> print(HendecasyllableScanner().correct_invalid_start(
        ... "- U U  U U  - U   -  U - U").strip())
        - U -  U U  - U   -  U - U
        """
        mark_list = StringUtils.mark_list(scansion)
        vals = list(scansion.replace(" ", ""))
        corrected = vals[:2] + [self.constants.STRESSED] + vals[3:]
        new_line = list(" " * len(scansion))
        for idx, car in enumerate(corrected):
            new_line[mark_list[idx]] = car
        return "".join(new_line)
Exemple #31
0
    def correct_penultimate_dactyl_chain(self, scansion: str) -> str:
        """For pentameter the last two feet of the verse are predictable dactyls,
        and do not regularly allow substitutions.
        :param scansion: scansion line thus far
        :return: corrected line of scansion

        >>> print(PentameterScanner().correct_penultimate_dactyl_chain(
        ... "U  U  U  U  U  U  U  U  U  U  U  U  U  U"))
        U  U  U  U  U  U  U  -  U  U  -  U  U  U
        """
        mark_list = StringUtils.mark_list(scansion)
        vals = list(scansion.replace(" ", ""))
        n_vals = vals[:-7] + [self.constants.DACTYL + self.constants.DACTYL] + [vals[-1]]
        corrected = "".join(n_vals)
        new_line = list(" " * len(scansion))
        for idx, car in enumerate(corrected):
            new_line[mark_list[idx]] = car
        return "".join(new_line)
Exemple #32
0
 def elide_all(self, line: str) -> str:
     """Given a string of space separated syllables, erase with spaces the syllable portions
     that would disappear according to the rules of elision."""
     marks = list(line.translate(self.remove_punct_map))
     all_vowels = self.constants.VOWELS + self.constants.ACCENTED_VOWELS
     tmp = "".join(marks)
     # Elision rules are compound but not cummulative: we place all elision edits into a list
     #  of candidates, and then merge, taking the least of each section of the line.
     candidates = [tmp, self.elide(tmp, r"[{}][{}]\s+[{}]".format(self.constants.CONSONANTS,
                                                                  all_vowels, all_vowels), 1, 1),
                   self.elide(tmp,
                              r"[{}][{}]\s+[hH]".format(self.constants.CONSONANTS, all_vowels),
                              1, 1), self.elide(tmp, r"[aāuū]m\s+[{}]".format(all_vowels), 2),
                   self.elide(tmp, r"ae\s+[{}]".format(all_vowels), 2),
                   self.elide(tmp, r"[{}]\s+[{}]".format(all_vowels, all_vowels), 1),
                   self.elide(tmp, r"[uū]m\s+h", 2)]
     results = StringUtils.merge_elisions(candidates)
     return results
Exemple #33
0
 def correct_invalid_start(self, scansion: str) -> str:
     """If a hexameter, hendecasyllables, or pentameter scansion starts with spondee,
     an unstressed syllable in the third position must actually be stressed,
     so we will convert it: - - | U    ->  - - | -
     :param scansion:
     :return:
     >>> print(VerseScanner().correct_invalid_start(
     ... " -   - U   U -  -  U U U U  U U  - -").strip())
     -   - -   - -  -  U U U U  U U  - -
     """
     mark_list = StringUtils.mark_list(scansion)
     raw_scansion = scansion.replace(" ", "")
     if raw_scansion.startswith(self.constants.SPONDEE + self.constants.UNSTRESSED):
         new_scansion = list(self.constants.SPONDEE + self.constants.SPONDEE + raw_scansion[4:])
         corrected = "".join(new_scansion)
         new_sequence = list(" " * len(scansion))
         for idx, car in enumerate(corrected):
             new_sequence[mark_list[idx]] = car
         return "".join(new_sequence)
     return scansion
    def correct_antepenult_chain(self, scansion: str) -> str:
        """For hendecasyllables the last three feet of the verse are predictable
        and do not regularly allow substitutions.

        :param scansion: scansion line thus far
        :return: corrected line of scansion

        >>> print(HendecasyllableScanner().correct_antepenult_chain(
        ... "-U -UU UU UU UX").strip())
        -U -UU -U -U -X
        """
        mark_list = StringUtils.mark_list(scansion)
        vals = list(scansion.replace(" ", ""))
        new_vals = vals[:len(vals) - 6] + [self.constants.TROCHEE +
                                           self.constants.TROCHEE +
                                           self.constants.STRESSED] + vals[-1:]
        corrected = "".join(new_vals)
        new_line = list(" " * len(scansion))
        for idx, car in enumerate(corrected):
            new_line[mark_list[idx]] = car
        return "".join(new_line)
Exemple #35
0
 def correct_invalid_start(self, scansion: str) -> str:
     """If a hexameter, hendecasyllables, or pentameter scansion starts with spondee,
     an unstressed syllable in the third position must actually be stressed,
     so we will convert it: - - | U    ->  - - | -
     :param scansion:
     :return:
     >>> print(VerseScanner().correct_invalid_start(
     ... " -   - U   U -  -  U U U U  U U  - -").strip())
     -   - -   - -  -  U U U U  U U  - -
     """
     mark_list = StringUtils.mark_list(scansion)
     raw_scansion = scansion.replace(" ", "")
     if raw_scansion.startswith(self.constants.SPONDEE +
                                self.constants.UNSTRESSED):
         new_scansion = list(self.constants.SPONDEE +
                             self.constants.SPONDEE + raw_scansion[4:])
         corrected = "".join(new_scansion)
         new_sequence = list(" " * len(scansion))
         for idx, car in enumerate(corrected):
             new_sequence[mark_list[idx]] = car
         return "".join(new_sequence)
     return scansion
Exemple #36
0
    def correct_antepenult_chain(self, scansion: str) -> str:
        """For hendecasyllables the last three feet of the verse are predictable
        and do not regularly allow substitutions.

        :param scansion: scansion line thus far
        :return: corrected line of scansion

        >>> print(HendecasyllableScanner().correct_antepenult_chain(
        ... "-U -UU UU UU UX").strip())
        -U -UU -U -U -X
        """
        mark_list = StringUtils.mark_list(scansion)
        vals = list(scansion.replace(" ", ""))
        new_vals = vals[:len(vals) - 6] + [
            self.constants.TROCHEE + self.constants.TROCHEE +
            self.constants.STRESSED
        ] + vals[-1:]
        corrected = "".join(new_vals)
        new_line = list(" " * len(scansion))
        for idx, car in enumerate(corrected):
            new_line[mark_list[idx]] = car
        return "".join(new_line)
Exemple #37
0
 def elide_all(self, line: str) -> str:
     """Given a string of space separated syllables, erase with spaces the syllable portions
     that would disappear according to the rules of elision."""
     marks = list(line.translate(self.remove_punct_map))
     all_vowels = self.constants.VOWELS + self.constants.ACCENTED_VOWELS
     tmp = "".join(marks)
     # Elision rules are compound but not cummulative: we place all elision edits into a list
     #  of candidates, and then merge, taking the least of each section of the line.
     candidates = [
         tmp,
         self.elide(
             tmp, r"[{}][{}]\s+[{}]".format(self.constants.CONSONANTS,
                                            all_vowels, all_vowels), 1, 1),
         self.elide(
             tmp, r"[{}][{}]\s+[hH]".format(self.constants.CONSONANTS,
                                            all_vowels), 1, 1),
         self.elide(tmp, r"[aāuū]m\s+[{}]".format(all_vowels), 2),
         self.elide(tmp, r"ae\s+[{}]".format(all_vowels), 2),
         self.elide(tmp, r"[{}]\s+[{}]".format(all_vowels, all_vowels), 1),
         self.elide(tmp, r"[uū]m\s+h", 2)
     ]
     results = StringUtils.merge_elisions(candidates)
     return results
Exemple #38
0
    def scan(self, original_line: str, optional_transform: bool = False) -> Verse:
        """Scan a line of Latin pentameter and produce a scansion pattern, and other data.

        >>> scanner = PentameterScanner()
        >>> print(scanner.scan('ex hoc ingrato gaudia amore tibi.'))
        Verse(original='ex hoc ingrato gaudia amore tibi.', scansion='-   -  -   - -   - U  U - U  U U ', meter='pentameter', valid=True, syllable_count=12, accented='ēx hōc īngrātō gaudia amōre tibi.', scansion_notes=['Spondaic pentameter'], syllables = ['ēx', 'hoc', 'īn', 'gra', 'to', 'gau', 'di', 'a', 'mo', 're', 'ti', 'bi'])
        >>> print(scanner.scan(
        ... "in vento et rapida scribere oportet aqua.").scansion) # doctest: +NORMALIZE_WHITESPACE
        -   -    -   U U -    - U   U -  U  U  U
        """
        verse = Verse(original_line, meter='pentameter')
        # replace punctuation with spaces
        line = original_line.translate(self.punctuation_substitutions)
        # conservative i to j
        line = self.transform_i_to_j(line)
        working_line = self.elide_all(line)
        working_line = self.accent_by_position(working_line)
        syllables = self.syllabifier.syllabify(working_line)
        if optional_transform:
            working_line = self.transform_i_to_j_optional(line)
            working_line = self.elide_all(working_line)
            working_line = self.accent_by_position(working_line)
            syllables = self.syllabifier.syllabify(working_line)
            verse.scansion_notes += [self.constants.NOTE_MAP["optional i to j"]]
        verse.working_line = working_line
        verse.syllable_count = self.syllabifier.get_syllable_count(syllables)
        verse.syllables = syllables
        if verse.syllable_count < 12:
            verse.valid = False
            verse.scansion_notes += [self.constants.NOTE_MAP["< 12p"]]
            return verse
        stresses = self.flag_dipthongs(syllables)
        syllables_wspaces = StringUtils.to_syllables_with_trailing_spaces(working_line, syllables)
        offset_map = self.calc_offset(syllables_wspaces)
        for idx, syl in enumerate(syllables):
            for accented in self.constants.ACCENTED_VOWELS:
                if accented in syl:
                    stresses.append(idx)
        # first syllable is always long in Pentameter
        stresses.append(0)
        # second to last syllable is always long
        stresses.append(verse.syllable_count - 2)

        verse.scansion = self.produce_scansion(stresses,
                                               syllables_wspaces, offset_map)
        if len(StringUtils.stress_positions(self.constants.STRESSED, verse.scansion)) != \
                len(set(stresses)):
            verse.valid = False
            verse.scansion_notes += [self.constants.NOTE_MAP["invalid syllables"]]
            return verse

        if self.metrical_validator.is_valid_pentameter(verse.scansion):
            verse.scansion_notes += [self.constants.NOTE_MAP["positionally"]]
            return self.assign_candidate(verse, verse.scansion)

        # identify some obvious and probably choices based on number of syllables
        if verse.syllable_count == 12:  # produce spondees where possible
            candidate = self.make_spondaic(verse.scansion)
            verse.scansion_notes += [self.constants.NOTE_MAP["12p"]]
            return self.assign_candidate(verse, candidate)
        if verse.syllable_count == 14:  # produce spondees where possible
            candidate = self.make_dactyls(verse.scansion)
            verse.scansion_notes += [self.constants.NOTE_MAP["14p"]]
            return self.assign_candidate(verse, candidate)
        if verse.syllable_count > 14:
            verse.valid = False
            verse.scansion_notes += [self.constants.NOTE_MAP["> 14"]]
            return verse

        smoothed = self.correct_first_two_dactyls(verse.scansion)

        if distance(verse.scansion, smoothed) > 0:
            verse.scansion_notes += [self.constants.NOTE_MAP["invalid start"]]
            verse.scansion = smoothed
            stresses += StringUtils.differences(verse.scansion, smoothed)

        if self.metrical_validator.is_valid_pentameter(verse.scansion):
            return self.assign_candidate(verse, verse.scansion)

        smoothed = self.correct_penultimate_dactyl_chain(verse.scansion)

        if distance(verse.scansion, smoothed) > 0:
            verse.scansion_notes += [self.constants.NOTE_MAP["penultimate dactyl chain"]]
            verse.scansion = smoothed
            stresses += StringUtils.differences(verse.scansion, smoothed)

        if self.metrical_validator.is_valid_pentameter(verse.scansion):
            return self.assign_candidate(verse, verse.scansion)

        candidates = self.metrical_validator.closest_pentameter_patterns(verse.scansion)
        if candidates is not None:
            if len(candidates) == 1 \
                    and len(verse.scansion.replace(" ", "")) == len(candidates[0]) \
                    and len(StringUtils.differences(verse.scansion, candidates[0])) == 1:
                tmp_scansion = self.produce_scansion(
                    StringUtils.differences(verse.scansion, candidates[0]),
                    syllables_wspaces, offset_map)

                if self.metrical_validator.is_valid_pentameter(tmp_scansion):
                    verse.scansion_notes += [self.constants.NOTE_MAP["closest match"]]
                    return self.assign_candidate(verse, tmp_scansion)

        # if the line doesn't scan "as is", it may scan if the optional i to j transformations
        # are made, so here we set them and try again.
        if self.optional_transform and not verse.valid:
            return self.scan(original_line, optional_transform=True)

        verse.accented = self.formatter.merge_line_scansion(verse.original, verse.scansion)
        return verse
Exemple #39
0
    def correct_dactyl_chain(self, scansion: str) -> str:
        """Three or more unstressed accents in a row is a broken dactyl chain,
         best detected and processed backwards.
         Since this method takes a Procrustean approach to modifying the scansion pattern,
         it is not used by default in the scan method; however, it is available as an optional
         keyword parameter, and users looking to further automate the generation of scansion
         candidates should consider using this as a fall back.
        :param scansion: scansion with broken dactyl chain; inverted amphibrachs not allowed
        :return: corrected line of scansion

        >>> print(HexameterScanner().correct_dactyl_chain(
        ... "-   U U  -  - U U -  - - U U  - x").strip())
        -   - -  -  - U U -  - - U U  - x
        >>> print(HexameterScanner().correct_dactyl_chain(
        ... "-   U  U U  U -     -   -   -  -   U  U -   U").strip())
        -   -  - U  U -     -   -   -  -   U  U -   U
        """
        mark_list = StringUtils.mark_list(scansion)
        vals = list(scansion.replace(" ", ""))
        #  ignore last two positions, save them
        feet = [vals.pop(), vals.pop()]
        length = len(vals)
        idx = length - 1
        while idx > 0:
            one = vals[idx]
            two = vals[idx - 1]
            if idx > 1:
                three = vals[idx - 2]
            else:
                three = ""
            # Dactyl foot is okay, no corrections
            if one == self.constants.UNSTRESSED and \
                            two == self.constants.UNSTRESSED and \
                            three == self.constants.STRESSED:
                feet += [one]
                feet += [two]
                feet += [three]
                idx -= 3
                continue
            # Spondee foot is okay, no corrections
            if one == self.constants.STRESSED and \
                            two == self.constants.STRESSED:
                feet += [one]
                feet += [two]
                idx -= 2
                continue
            # handle "U U U" foot as "- U U"
            if one == self.constants.UNSTRESSED and \
                            two == self.constants.UNSTRESSED and \
                            three == self.constants.UNSTRESSED:
                feet += [one]
                feet += [two]
                feet += [self.constants.STRESSED]
                idx -= 3
                continue
            # handle "U U -" foot as "- -"
            if one == self.constants.STRESSED and \
                            two == self.constants.UNSTRESSED and \
                            three == self.constants.UNSTRESSED:
                feet += [self.constants.STRESSED]
                feet += [self.constants.STRESSED]
                idx -= 2
                continue
            # handle "-  U" foot as "- -"
            if one == self.constants.UNSTRESSED and \
                            two == self.constants.STRESSED:
                feet += [self.constants.STRESSED]
                feet += [two]
                idx -= 2
                continue

        corrected = "".join(feet[::-1])
        new_line = list(" " * len(scansion))
        for idx, car in enumerate(corrected):
            new_line[mark_list[idx]] = car
        return "".join(new_line)
Exemple #40
0
    def scan(self, original_line: str, optional_transform: bool = False,
             dactyl_smoothing: bool = False) -> Verse:
        """Scan a line of Latin hexameter and produce a scansion pattern, and other data.

        >>> scanner = HexameterScanner()
        >>> print(scanner.scan("impulerit. Tantaene animis caelestibus irae?"))
        Verse(original='impulerit. Tantaene animis caelestibus irae?', scansion='-  U U -    -   -   U U -    - -  U U  -  - ', meter='hexameter', valid=True, syllable_count=15, accented='īmpulerīt. Tāntaene animīs caelēstibus īrae?', scansion_notes=['Valid by positional stresses.'], syllables = ['īm', 'pu', 'le', 'rīt', 'Tān', 'taen', 'a', 'ni', 'mīs', 'cae', 'lēs', 'ti', 'bus', 'i', 'rae'])
        >>> print(scanner.scan(
        ... "Arma virumque cano, Troiae qui prīmus ab ōrīs").scansion) # doctest: +NORMALIZE_WHITESPACE
        -  U  U -   U  U -    -  -   -   - U  U  - -
        >>> # some hexameters need the optional transformations:
        >>> optional_transform_scanner = HexameterScanner(optional_transform=True)
        >>> print(optional_transform_scanner.scan(
        ... "Ītaliam, fāto profugus, Lāvīniaque vēnit").scansion) # doctest: +NORMALIZE_WHITESPACE
        - -  -    - -   U U -    - -  U  U  - U
        >>> print(HexameterScanner().scan(
        ... "lītora, multum ille et terrīs iactātus et alto").scansion) # doctest: +NORMALIZE_WHITESPACE
        - U U   -     -    -   -  -   -  - U  U  -  U
        >>> print(HexameterScanner().scan(
        ... "vī superum saevae memorem Iūnōnis ob īram;").scansion) # doctest: +NORMALIZE_WHITESPACE
        -  U U -    -  -  U U -   - - U  U  - U
        >>> # handle multiple elisions
        >>> print(scanner.scan("monstrum horrendum, informe, ingens, cui lumen ademptum").scansion) # doctest: +NORMALIZE_WHITESPACE
        -        -  -      -  -     -  -      -  - U  U -   U
        >>> # if we have 17 syllables, create a chain of all dactyls
        >>> print(scanner.scan("quadrupedante putrem sonitu quatit ungula campum"
        ... ).scansion) # doctest: +NORMALIZE_WHITESPACE
        -  U U -  U  U  -   U U -   U U  -  U U  -  U
        >>> # if we have 13 syllables exactly, we'll create a spondaic hexameter
        >>> print(HexameterScanner().scan(
        ... "illi inter sese multa vi bracchia tollunt").scansion)  # doctest: +NORMALIZE_WHITESPACE
        -    -  -   - -  -  -  -   -   UU  -  -
        >>> print(HexameterScanner().scan(
        ... "dat latus; insequitur cumulo praeruptus aquae mons").scansion) # doctest: +NORMALIZE_WHITESPACE
        -   U U   -  U  U -   U U -    - -  U  U   -  -
        >>> print(optional_transform_scanner.scan(
        ... "Non quivis videt inmodulata poëmata iudex").scansion) # doctest: +NORMALIZE_WHITESPACE
        -    - -   U U  -  U U - U  U- U U  - -
        >>> print(HexameterScanner().scan(
        ... "certabant urbem Romam Remoramne vocarent").scansion) # doctest: +NORMALIZE_WHITESPACE
        -  - -   -  -   - -   U U -  U  U - -
        >>> # advanced smoothing is available via keyword flags: dactyl_smoothing
        >>> # print(HexameterScanner().scan(
        #... "his verbis: 'o gnata, tibi sunt ante ferendae",
        #... dactyl_smoothing=True).scansion) # doctest: +NORMALIZE_WHITESPACE
        #-   -  -    -   - U   U -  -   -  U  U -   -
        """
        verse = Verse(original_line, meter='hexameter')
        # replace punctuation with spaces
        line = original_line.translate(self.punctuation_substitutions)
        # conservative i to j
        line = self.transform_i_to_j(line)
        working_line = self.elide_all(line)
        working_line = self.accent_by_position(working_line)
        syllables = self.syllabifier.syllabify(working_line)
        if optional_transform:
            working_line = self.transform_i_to_j_optional(line)
            working_line = self.elide_all(working_line)
            working_line = self.accent_by_position(working_line)
            syllables = self.syllabifier.syllabify(working_line)
            verse.scansion_notes += [self.constants.NOTE_MAP["optional i to j"]]
        verse.working_line = working_line
        verse.syllable_count = self.syllabifier.get_syllable_count(syllables)
        verse.syllables = syllables
        if verse.syllable_count < 12:
            verse.valid = False
            verse.scansion_notes += [self.constants.NOTE_MAP["< 12"]]
            return verse
        stresses = self.flag_dipthongs(syllables)
        syllables_wspaces = StringUtils.to_syllables_with_trailing_spaces(working_line, syllables)
        offset_map = self.calc_offset(syllables_wspaces)
        for idx, syl in enumerate(syllables):
            for accented in self.constants.ACCENTED_VOWELS:
                if accented in syl:
                    stresses.append(idx)
        # first syllable is always long in hexameter
        stresses.append(0)
        # second to last syllable is always long
        stresses.append(verse.syllable_count - 2)

        verse.scansion = self.produce_scansion(stresses,
                                               syllables_wspaces, offset_map)
        if len(StringUtils.stress_positions(self.constants.STRESSED, verse.scansion)) != \
                len(set(stresses)):
            verse.valid = False
            verse.scansion_notes += [self.constants.NOTE_MAP["invalid syllables"]]
            return verse

        if self.metrical_validator.is_valid_hexameter(verse.scansion):
            verse.scansion_notes += [self.constants.NOTE_MAP["positionally"]]
            return self.assign_candidate(verse, verse.scansion)

        # identify some obvious and probably choices based on number of syllables
        if verse.syllable_count == 17:  # produce all dactyls
            candidate = self.produce_scansion(
                self.metrical_validator.hexameter_known_stresses(),
                syllables_wspaces, offset_map)
            verse.scansion_notes += [self.constants.NOTE_MAP["17"]]
            if self.metrical_validator.is_valid_hexameter(candidate):
                return self.assign_candidate(verse, candidate)
        if verse.syllable_count == 12:  # create all spondee hexameter
            candidate = self.produce_scansion(list(range(12)), syllables_wspaces, offset_map)
            if self.metrical_validator.is_valid_hexameter(verse.scansion):
                verse.scansion_notes += [self.constants.NOTE_MAP["12"]]
                return self.assign_candidate(verse, candidate)
        if verse.syllable_count == 13:  # create spondee hexameter with a dactyl at 5th foot
            known_unaccents = [9, 10]
            last_syllable_accented = False
            for vowel in self.constants.ACCENTED_VOWELS:
                if vowel in verse.syllables[12]:
                    last_syllable_accented = True
            if not last_syllable_accented:
                known_unaccents.append(12)
            if set(known_unaccents) - set(stresses) != len(known_unaccents):
                verse.scansion = self.produce_scansion([x for x in range(13)
                                                        if x not in known_unaccents],
                                                       syllables_wspaces, offset_map)
                verse.scansion_notes += [self.constants.NOTE_MAP["5th dactyl"]]
                if self.metrical_validator.is_valid_hexameter(verse.scansion):
                    return self.assign_candidate(verse, verse.scansion)
        if verse.syllable_count > 17:
            verse.valid = False
            verse.scansion_notes += [self.constants.NOTE_MAP["> 17"]]
            return verse

        smoothed = self.correct_inverted_amphibrachs(verse.scansion)
        if distance(verse.scansion, smoothed) > 0:
            verse.scansion_notes += [self.constants.NOTE_MAP["inverted"]]
            verse.scansion = smoothed
            stresses += StringUtils.differences(verse.scansion, smoothed)

        if self.metrical_validator.is_valid_hexameter(verse.scansion):
            return self.assign_candidate(verse, verse.scansion)

        smoothed = self.correct_first_two_dactyls(verse.scansion)
        if distance(verse.scansion, smoothed) > 0:
            verse.scansion_notes += [self.constants.NOTE_MAP["invalid start"]]
            verse.scansion = smoothed
            stresses += StringUtils.differences(verse.scansion, smoothed)

        if self.metrical_validator.is_valid_hexameter(verse.scansion):
            return self.assign_candidate(verse, verse.scansion)

        smoothed = self.correct_invalid_fifth_foot(verse.scansion)
        if distance(verse.scansion, smoothed) > 0:
            verse.scansion_notes += [self.constants.NOTE_MAP["invalid 5th"]]
            verse.scansion = smoothed
            stresses += StringUtils.differences(verse.scansion, smoothed)

        if self.metrical_validator.is_valid_hexameter(verse.scansion):
            return self.assign_candidate(verse, verse.scansion)

        feet = self.metrical_validator.hexameter_feet(verse.scansion.replace(" ", ""))
        if feet:
            #  Normal good citizens are unwelcome in the house of hexameter
            invalid_feet_in_hexameter = [self.constants.IAMB, self.constants.TROCHEE]
            current_foot = 0
            ending = feet.pop()  # don't process the ending, a possible trochee, add it back after
            scanned_line = ""
            for foot in feet:
                if foot.replace(" ", "") in invalid_feet_in_hexameter:
                    scanned_line = self.invalid_foot_to_spondee(feet, foot, current_foot)
                    scanned_line = scanned_line + ending
                current_foot += 1
            smoothed = self.produce_scansion(stresses +
                                             StringUtils.stress_positions(
                                                 self.constants.STRESSED, scanned_line),
                                             syllables_wspaces, offset_map)

            if self.metrical_validator.is_valid_hexameter(smoothed):
                verse.scansion_notes += [self.constants.NOTE_MAP["invalid foot"]]
                return self.assign_candidate(verse, smoothed)

        # need to do this again, since the scansion has changed
        smoothed = self.correct_inverted_amphibrachs(verse.scansion)
        if distance(verse.scansion, smoothed) > 0:
            verse.scansion_notes += [self.constants.NOTE_MAP["inverted"]]
            verse.scansion = smoothed
            stresses += StringUtils.differences(verse.scansion, smoothed)

        if self.metrical_validator.is_valid_hexameter(verse.scansion):
            return self.assign_candidate(verse, verse.scansion)

        candidates = self.metrical_validator.closest_hexameter_patterns(verse.scansion)
        if candidates is not None:
            if len(candidates) == 1 \
                    and len(verse.scansion.replace(" ", "")) == len(candidates[0]) \
                    and len(StringUtils.differences(verse.scansion, candidates[0])) == 1:
                tmp_scansion = self.produce_scansion(
                    StringUtils.differences(verse.scansion, candidates[0]),
                    syllables_wspaces, offset_map)
                if self.metrical_validator.is_valid_hexameter(tmp_scansion):
                    verse.scansion_notes += [self.constants.NOTE_MAP["closest match"]]
                    return self.assign_candidate(verse, tmp_scansion)

        # need to do this again, since the scansion has changed
        smoothed = self.correct_inverted_amphibrachs(smoothed)
        if self.metrical_validator.is_valid_hexameter(smoothed):
            verse.scansion_notes += [self.constants.NOTE_MAP["inverted"]]
            return self.assign_candidate(verse, smoothed)

        if dactyl_smoothing:
            smoothed = self.correct_dactyl_chain(smoothed)
            if distance(verse.scansion, smoothed) > 0:
                verse.scansion_notes += [self.constants.NOTE_MAP["dactyl smoothing"]]
                verse.scansion = smoothed
            if self.metrical_validator.is_valid_hexameter(verse.scansion):
                return self.assign_candidate(verse, verse.scansion)

        # if the line doesn't scan "as is", if may scan if the optional i to j transformations
        # are made, so here we set them and try again.
        if self.optional_transform and not verse.valid:
            return self.scan(original_line, optional_transform=True, dactyl_smoothing=True)
        return verse
Exemple #41
0
    def correct_dactyl_chain(self, scansion: str) -> str:
        """Three or more unstressed accents in a row is a broken dactyl chain,
         best detected and processed backwards.

         Since this method takes a Procrustean approach to modifying the scansion pattern,
         it is not used by default in the scan method; however, it is available as an optional
         keyword parameter, and users looking to further automate the generation of scansion
         candidates should consider using this as a fall back.
        :param scansion: scansion with broken dactyl chain; inverted amphibrachs not allowed
        :return: corrected line of scansion

        >>> print(HexameterScanner().correct_dactyl_chain(
        ... "-   U U  -  - U U -  - - U U  - x"))
        -   - -  -  - U U -  - - U U  - x
        >>> print(HexameterScanner().correct_dactyl_chain(
        ... "-   U  U U  U -     -   -   -  -   U  U -   U")) # doctest: +NORMALIZE_WHITESPACE
        -   -  - U  U -     -   -   -  -   U  U -   U
        """
        mark_list = StringUtils.mark_list(scansion)
        vals = list(scansion.replace(" ", ""))
        #  ignore last two positions, save them
        feet = [vals.pop(), vals.pop()]
        length = len(vals)
        idx = length - 1
        while idx > 0:
            one = vals[idx]
            two = vals[idx - 1]
            if idx > 1:
                three = vals[idx - 2]
            else:
                three = ""
            # Dactyl foot is okay, no corrections
            if one == self.constants.UNSTRESSED and \
                            two == self.constants.UNSTRESSED and \
                            three == self.constants.STRESSED:
                feet += [one]
                feet += [two]
                feet += [three]
                idx -= 3
                continue
            # Spondee foot is okay, no corrections
            if one == self.constants.STRESSED and \
                            two == self.constants.STRESSED:
                feet += [one]
                feet += [two]
                idx -= 2
                continue
            # handle "U U U" foot as "- U U"
            if one == self.constants.UNSTRESSED and \
                            two == self.constants.UNSTRESSED and \
                            three == self.constants.UNSTRESSED:
                feet += [one]
                feet += [two]
                feet += [self.constants.STRESSED]
                idx -= 3
                continue
            # handle "U U -" foot as "- -"
            if one == self.constants.STRESSED and \
                            two == self.constants.UNSTRESSED and \
                            three == self.constants.UNSTRESSED:
                feet += [self.constants.STRESSED]
                feet += [self.constants.STRESSED]
                idx -= 2
                continue
            # handle "-  U" foot as "- -"
            if one == self.constants.UNSTRESSED and \
                            two == self.constants.STRESSED:
                feet += [self.constants.STRESSED]
                feet += [two]
                idx -= 2
                continue
        corrected = "".join(feet[::-1])
        new_line = list(" " * len(scansion))
        for idx, car in enumerate(corrected):
            new_line[mark_list[idx]] = car
        return "".join(new_line)
Exemple #42
0
    def syllabify(self, words: str) -> list:
        """Parse a Latin word into a list of syllable strings.
        :param words: a string containing one latin word or many words separated by spaces.
        :return: list of string, each representing a syllable.

        >>> syllabifier = Syllabifier()
        >>> print(syllabifier.syllabify("fuit"))
        ['fu', 'it']
        >>> print(syllabifier.syllabify("libri"))
        ['li', 'bri']
        >>> print(syllabifier.syllabify("contra"))
        ['con', 'tra']
        >>> print(syllabifier.syllabify("iaculum"))
        ['ja', 'cu', 'lum']
        >>> print(syllabifier.syllabify("amo"))
        ['a', 'mo']
        >>> print(syllabifier.syllabify("bracchia"))
        ['brac', 'chi', 'a']
        >>> print(syllabifier.syllabify("deinde"))
        ['dein', 'de']
        >>> print(syllabifier.syllabify("certabant"))
        ['cer', 'ta', 'bant']
        >>> print(syllabifier.syllabify("aere"))
        ['ae', 're']
        >>> print(syllabifier.syllabify("adiungere"))
        ['ad', 'jun', 'ge', 're']
        >>> print(syllabifier.syllabify("mōns"))
        ['mōns']
        >>> print(syllabifier.syllabify("domus"))
        ['do', 'mus']
        >>> print(syllabifier.syllabify("lixa"))
        ['li', 'xa']
        >>> print(syllabifier.syllabify("asper"))
        ['as', 'per']
        >>> #  handle doubles
        >>> print(syllabifier.syllabify("siccus"))
        ['sic', 'cus']
        >>> # handle liquid + liquid
        >>> print(syllabifier.syllabify("almus"))
        ['al', 'mus']
        >>> # handle liquid + mute
        >>> print(syllabifier.syllabify("ambo"))
        ['am', 'bo']
        >>> print(syllabifier.syllabify("anguis"))
        ['an', 'guis']
        >>> print(syllabifier.syllabify("arbor"))
        ['ar', 'bor']
        >>> print(syllabifier.syllabify("pulcher"))
        ['pul', 'cher']
        >>> print(syllabifier.syllabify("ruptus"))
        ['ru', 'ptus']
        >>> print(syllabifier.syllabify("Bīthÿnus"))
        ['Bī', 'thÿ', 'nus']
        >>> print(syllabifier.syllabify("sanguen"))
        ['san', 'guen']
        >>> print(syllabifier.syllabify("unguentum"))
        ['un', 'guen', 'tum']
        >>> print(syllabifier.syllabify("lingua"))
        ['lin', 'gua']
        >>> print(syllabifier.syllabify("linguā"))
        ['lin', 'guā']
        >>> print(syllabifier.syllabify("languidus"))
        ['lan', 'gui', 'dus']

        >>> print(syllabifier.syllabify("suis"))
        ['su', 'is']
        >>> print(syllabifier.syllabify("habui"))
        ['ha', 'bu', 'i']
        >>> print(syllabifier.syllabify("habuit"))
        ['ha', 'bu', 'it']
        >>> print(syllabifier.syllabify("qui"))
        ['qui']
        >>> print(syllabifier.syllabify("quibus"))
        ['qui', 'bus']
        >>> print(syllabifier.syllabify("hui"))
        ['hui']
        >>> print(syllabifier.syllabify("cui"))
        ['cui']
        >>> print(syllabifier.syllabify("huic"))
        ['huic']
        """
        cleaned = words.translate(self.remove_punct_map)
        cleaned = cleaned.replace("qu", "kw")
        cleaned = cleaned.replace("Qu", "Kw")
        cleaned = cleaned.replace("gua", "gwa")
        cleaned = cleaned.replace("Gua", "Gwa")
        cleaned = cleaned.replace("gue", "gwe")
        cleaned = cleaned.replace("Gue", "Gwe")
        cleaned = cleaned.replace("gui", "gwi")
        cleaned = cleaned.replace("Gui", "Gwi")
        cleaned = cleaned.replace("guo", "gwo")
        cleaned = cleaned.replace("Guo", "Gwo")
        cleaned = cleaned.replace("guu", "gwu")
        cleaned = cleaned.replace("Guu", "Gwu")
        cleaned = cleaned.replace("guā", "gwā")
        cleaned = cleaned.replace("Guā", "Gwā")
        cleaned = cleaned.replace("guē", "gwē")
        cleaned = cleaned.replace("Guē", "Gwē")
        cleaned = cleaned.replace("guī", "gwī")
        cleaned = cleaned.replace("Guī", "Gwī")
        cleaned = cleaned.replace("guō", "gwō")
        cleaned = cleaned.replace("Guō", "Gwō")
        cleaned = cleaned.replace("guū", "gwū")
        cleaned = cleaned.replace("Guū", "Gwū")
        items = cleaned.strip().split(" ")

        for char in cleaned:
            if not char in self.ACCEPTABLE_CHARS:
                LOG.error("Unsupported character found in %s " % cleaned)
                return items
        syllables: list = []
        for item in items:
            syllables += self._setup(item)
        for idx, syl in enumerate(syllables):
            if "kw" in syl:
                syl = syl.replace("kw", "qu")
                syllables[idx] = syl
            if "Kw" in syl:
                syl = syl.replace("Kw", "Qu")
                syllables[idx] = syl
            if "gw" in syl:
                syl = syl.replace("gw", "gu")
                syllables[idx] = syl
            if "Gw" in syl:
                syl = syl.replace("Gw", "Gu")
                syllables[idx] = syl

        return StringUtils.remove_blank_spaces(syllables)
Exemple #43
0
    def scan(self,
             original_line: str,
             optional_transform: bool = False,
             dactyl_smoothing: bool = False) -> Hexameter:
        """Scan a line of Latin hexameter and produce a scansion pattern, and other data.
        >>> scanner = HexameterScanner()
        >>> print(scanner.scan("impulerit. Tantaene animis caelestibus irae?"))
        Hexameter( original='impulerit. Tantaene animis caelestibus irae?', scansion='-  U U -    -   -   U U -    - -  U U  -  - ', valid=True, syllable_count=15, accented='īmpulerīt. Tāntaene animīs caelēstibus īrae?', scansion_notes=['Valid by positional stresses.'], syllables = ['īm, pu, le, rīt, Tān, taen, a, ni, mīs, cae, lēs, ti, bus, i, rae'])
        >>> # Note: possible doctest quirk with leading whitespace; so we strip responses:
        >>> print(scanner.scan(
        ... "Arma virumque cano, Troiae qui prīmus ab ōrīs").scansion.strip())
        -  U  U -   U  U -    -  -   -   - U  U  - -
        >>> print(scanner.scan(
        ... "Ītaliam, fāto profugus, Lāvīniaque vēnit").scansion.strip())
        - -  -    - -   U U -    - -  U  U  - U
        >>> print(HexameterScanner().scan(
        ... "lītora, multum ille et terrīs iactātus et alto").scansion.strip())
        - U U   -     -    -   -  -   -  - U  U  -  U
        >>> print(HexameterScanner().scan(
        ... "vī superum saevae memorem Iūnōnis ob īram;").scansion.strip())
        -  U U -    -  -  U U -   - - U  U  - U
        >>> # handle multiple elisions
        >>> print(scanner.scan(
        ... "monstrum horrendum, informe, ingens, cui lumen ademptum"
        ... ).scansion.strip())
        -        -  -      -  -     -  -      -  - U  U -   U
        >>> # if we have 17 syllables, create a chain of all dactyls
        >>> print(scanner.scan("quadrupedante putrem sonitu quatit ungula campum"
        ... ).scansion.strip())
        -  U U -  U  U  -   U U -   U U  -  U U  -  U
        >>> print(HexameterScanner().scan(
        ... "illi inter sese multa vi bracchia tollunt").scansion.strip())
        -    -  -   - -  -  -  -   -   UU  -  -
        >>> print( HexameterScanner().scan(
        ... "dat latus; insequitur cumulo praeruptus aquae mons").scansion.strip())
        -   U U   -  U  U -   U U -    - -  U  U   -  -
        >>> print(HexameterScanner().scan(
        ... "Non quivis videt inmodulata poëmata iudex").scansion.strip())
        -    - -   U U  -  U U - U  U- U U  - -
        >>> print( HexameterScanner().scan(
        ... "certabant urbem Romam Remoramne vocarent").scansion.strip())
        -  - -   -  -   - -   U U -  U  U - -
        >>> # advanced smoothing is available via keyword flags
        >>> print(HexameterScanner().scan(
        ... "his verbis: 'o gnata, tibi sunt ante ferendae",
        ... dactyl_smoothing=True).scansion.strip() )
        -   -  -    -   - U   U -  -   -  U  U -   -

        """
        hexameter = Hexameter(original_line)
        # replace punctuation with spaces
        line = original_line.translate(self.punctuation_substitutions)
        # conservative i to j
        line = self.transform_i_to_j(line)
        working_line = self.elide_all(line)
        working_line = self.accent_by_position(working_line)
        syllables = self.syllabifier.syllabify(working_line)
        if optional_transform:
            working_line = self.transform_i_to_j_optional(line)
            working_line = self.elide_all(working_line)
            working_line = self.accent_by_position(working_line)
            syllables = self.syllabifier.syllabify(working_line)
            hexameter.scansion_notes += [
                self.constants.NOTE_MAP["optional i to j"]
            ]
        hexameter.working_line = working_line
        hexameter.syllable_count = len(syllables)
        hexameter.syllables = syllables
        stresses = self.flag_dipthongs(syllables)
        syllables_wspaces = StringUtils.to_syllables_with_trailing_spaces(
            working_line, syllables)
        offset_map = self.calc_offset(syllables_wspaces)
        for idx, syl in enumerate(syllables):
            for accented in self.constants.ACCENTED_VOWELS:
                if accented in syl:
                    stresses.append(idx)
        # first syllable is always long
        stresses.append(0)
        # second to last syllable is always long
        stresses.append(hexameter.syllable_count - 2)

        def validate(scansion: str) -> bool:
            """Helper closure for validation."""
            if self.metrical_validator.is_valid_hexameter(scansion):
                hexameter.scansion = scansion
                hexameter.valid = True
                hexameter.accented = self.formatter.merge_line_scansion(
                    hexameter.original, hexameter.scansion)
                return True
            return False

        hexameter.scansion = self.produce_scansion(stresses, syllables_wspaces,
                                                   offset_map)
        if len(StringUtils.stress_positions(self.constants.STRESSED, hexameter.scansion)) != \
                len(set(stresses)):
            hexameter.valid = False
            hexameter.scansion_notes += [
                self.constants.NOTE_MAP["invalid syllables"]
            ]
            return hexameter

        if validate(hexameter.scansion):
            hexameter.scansion_notes += [
                self.constants.NOTE_MAP["positionally"]
            ]
            return hexameter

        smoothed = self.correct_inverted_amphibrachs(hexameter.scansion)

        if distance(hexameter.scansion, smoothed) > 0:
            hexameter.scansion_notes += [self.constants.NOTE_MAP["inverted"]]
            hexameter.scansion = smoothed
            stresses += StringUtils.differences(hexameter.scansion, smoothed)

        if validate(hexameter.scansion):
            return hexameter

        smoothed = self.correct_invalid_start(hexameter.scansion)

        if distance(hexameter.scansion, smoothed) > 0:
            hexameter.scansion_notes += [
                self.constants.NOTE_MAP["invalid start"]
            ]
            hexameter.scansion = smoothed
            stresses += StringUtils.differences(hexameter.scansion, smoothed)

        if validate(hexameter.scansion):
            return hexameter

        smoothed = self.correct_invalid_fifth_foot(hexameter.scansion)

        if distance(hexameter.scansion, smoothed) > 0:
            hexameter.scansion_notes += [
                self.constants.NOTE_MAP["invalid 5th"]
            ]
            hexameter.scansion = smoothed
            stresses += StringUtils.differences(hexameter.scansion, smoothed)

        if validate(hexameter.scansion):
            return hexameter

        feet = self.metrical_validator.hexameter_feet(
            hexameter.scansion.replace(" ", ""))
        if feet:
            #  Normal good citizens are unwelcome in the house of hexameter
            invalid_feet_in_hexameter = [
                self.constants.IAMB, self.constants.TROCHEE
            ]
            current_foot = 0
            ending = feet.pop(
            )  # don't process the ending, a possible trochee, add it back after
            scanned_line = ""
            for foot in feet:
                if foot.replace(" ", "") in invalid_feet_in_hexameter:
                    scanned_line = self.invalid_foot_to_spondee(
                        feet, foot, current_foot)
                    scanned_line = scanned_line + ending
                current_foot += 1
            smoothed = self.produce_scansion(
                stresses + StringUtils.stress_positions(
                    self.constants.STRESSED, scanned_line), syllables_wspaces,
                offset_map)
            if validate(smoothed):
                hexameter.scansion_notes += [
                    self.constants.NOTE_MAP["invalid foot"]
                ]
                return hexameter

        # need to do this again, since the scansion has changed
        smoothed = self.correct_inverted_amphibrachs(hexameter.scansion)

        if distance(hexameter.scansion, smoothed) > 0:
            hexameter.scansion_notes += [self.constants.NOTE_MAP["inverted"]]
            hexameter.scansion = smoothed
            stresses += StringUtils.differences(hexameter.scansion, smoothed)

        if validate(hexameter.scansion):
            return hexameter

        candidates = self.metrical_validator.closest_hexameter_patterns(
            hexameter.scansion)
        if candidates is not None:
            if len(candidates) == 1 \
                    and len(hexameter.scansion.replace(" ", "")) == len(candidates[0]) \
                    and len(StringUtils.differences(hexameter.scansion, candidates[0])) == 1:
                tmp_scansion = self.produce_scansion(
                    StringUtils.differences(hexameter.scansion, candidates[0]),
                    syllables_wspaces, offset_map)
                if validate(tmp_scansion):
                    hexameter.scansion = tmp_scansion
                    hexameter.scansion_notes += [
                        self.constants.NOTE_MAP["closest match"]
                    ]
                    return hexameter

        #  identify some obvious and probably choices based on number of syllables
        if hexameter.syllable_count == 17:  # produce all dactyls
            candidate = self.produce_scansion(
                self.metrical_validator.hexameter_known_stresses(),
                syllables_wspaces, offset_map)
            hexameter.scansion_notes += [self.constants.NOTE_MAP["17"]]
            if validate(candidate):
                return hexameter
        if hexameter.syllable_count == 12:  # create all spondee hexameter
            if validate(
                    self.produce_scansion(list(range(12)), syllables_wspaces,
                                          offset_map)):
                hexameter.scansion_notes += [self.constants.NOTE_MAP["12"]]
                return hexameter
        if hexameter.syllable_count < 12:
            hexameter.valid = False
            hexameter.scansion_notes += [self.constants.NOTE_MAP["< 12"]]
            return hexameter
        if hexameter.syllable_count == 13:  # create spondee hexameter with a dactyl at 5th foot
            known_unaccents = [9, 10, 12]
            if set(known_unaccents) - set(stresses) != len(known_unaccents):
                hexameter.scansion = self.produce_scansion(
                    [x for x in range(13) if x not in known_unaccents],
                    syllables_wspaces, offset_map)
                hexameter.scansion_notes += [
                    self.constants.NOTE_MAP["5th dactyl"]
                ]
                if validate(hexameter.scansion):
                    return hexameter
        if hexameter.syllable_count > 17:
            hexameter.valid = False
            hexameter.scansion_notes += [self.constants.NOTE_MAP["> 17"]]
            return hexameter

        # need to do this again, since the scansion has changed
        smoothed = self.correct_inverted_amphibrachs(smoothed)
        if validate(smoothed):
            hexameter.scansion = smoothed
            hexameter.scansion_notes += [self.constants.NOTE_MAP["inverted"]]
            return hexameter

        if dactyl_smoothing:
            smoothed = self.correct_dactyl_chain(smoothed)
            if distance(hexameter.scansion, smoothed) > 0:
                hexameter.scansion_notes += [
                    self.constants.NOTE_MAP["dactyl smoothing"]
                ]
                hexameter.scansion = smoothed
            if validate(hexameter.scansion):
                return hexameter

        # if the line doesn't scan "as is", if may scan if the optional i to j transformations
        # are made, so here we set them and try again.
        if not optional_transform and not hexameter.valid:
            return self.scan(original_line,
                             optional_transform=True,
                             dactyl_smoothing=True)

        return hexameter
Exemple #44
0
    def scan(self,
             original_line: str,
             optional_transform: bool = False) -> Verse:
        """Scan a line of Latin hendecasyllables and produce a scansion pattern, and other data.

        :return: a Verse object

        >>> scanner = HendecasyllableScanner()
        >>> print(scanner.scan("Cui dono lepidum novum libellum"))
        Verse(original='Cui dono lepidum novum libellum', scansion='  -  U -  U U -   U -   U -  U ', meter='hendecasyllable', valid=True, syllable_count=11, accented='Cui donō lepidūm novūm libēllum', scansion_notes=['Corrected invalid start.'], syllables = ['Cui', 'do', 'no', 'le', 'pi', 'dūm', 'no', 'vūm', 'li', 'bēl', 'lum'])
        >>> print(scanner.scan(
        ... "ārida modo pumice expolitum?").scansion)  # doctest: +NORMALIZE_WHITESPACE
        - U -  U U  - U   -  U - U
        """
        verse = Verse(original_line, meter='hendecasyllable')
        # replace punctuation with spaces
        line = original_line.translate(self.punctuation_substitutions)
        # conservative i to j
        line = self.transform_i_to_j(line)
        working_line = self.elide_all(line)
        working_line = self.accent_by_position(working_line)
        syllables = self.syllabifier.syllabify(working_line)
        if optional_transform:
            working_line = self.transform_i_to_j_optional(line)
            working_line = self.elide_all(working_line)
            working_line = self.accent_by_position(working_line)
            syllables = self.syllabifier.syllabify(working_line)
            verse.scansion_notes += [
                self.constants.NOTE_MAP["optional i to j"]
            ]
        verse.working_line = working_line
        verse.syllable_count = self.syllabifier.get_syllable_count(syllables)
        verse.syllables = syllables
        # identify some obvious and probably choices based on number of syllables
        if verse.syllable_count > 11:
            verse.valid = False
            verse.scansion_notes += [self.constants.NOTE_MAP["> 11"]]
            return verse
        if verse.syllable_count < 11:
            verse.valid = False
            verse.scansion_notes += [self.constants.NOTE_MAP["< 11"]]
            return verse

        stresses = self.flag_dipthongs(syllables)
        syllables_wspaces = StringUtils.to_syllables_with_trailing_spaces(
            working_line, syllables)
        offset_map = self.calc_offset(syllables_wspaces)
        for idx, syl in enumerate(syllables):
            for accented in self.constants.ACCENTED_VOWELS:
                if accented in syl:
                    stresses.append(idx)
        # second to last syllable is always long
        stresses.append(verse.syllable_count - 2)

        verse.scansion = self.produce_scansion(stresses, syllables_wspaces,
                                               offset_map)
        if len(StringUtils.stress_positions(self.constants.STRESSED, verse.scansion)) != \
                len(set(stresses)):
            verse.valid = False
            verse.scansion_notes += [
                self.constants.NOTE_MAP["invalid syllables"]
            ]
            return verse

        if self.metrical_validator.is_valid_hendecasyllables(verse.scansion):
            verse.scansion_notes += [self.constants.NOTE_MAP["positionally"]]
            return self.assign_candidate(verse, verse.scansion)

        smoothed = self.correct_invalid_start(verse.scansion)

        if distance(verse.scansion, smoothed) > 0:
            verse.scansion_notes += [self.constants.NOTE_MAP["invalid start"]]
            verse.scansion = smoothed
            stresses += StringUtils.differences(verse.scansion, smoothed)

        if self.metrical_validator.is_valid_hendecasyllables(verse.scansion):
            return self.assign_candidate(verse, verse.scansion)

        smoothed = self.correct_antepenult_chain(verse.scansion)

        if distance(verse.scansion, smoothed) > 0:
            verse.scansion_notes += [
                self.constants.NOTE_MAP["antepenult chain"]
            ]
            verse.scansion = smoothed
            stresses += StringUtils.differences(verse.scansion, smoothed)

        if self.metrical_validator.is_valid_hendecasyllables(verse.scansion):
            return self.assign_candidate(verse, verse.scansion)

        candidates = self.metrical_validator.closest_hendecasyllable_patterns(
            verse.scansion)
        if candidates is not None:
            if len(candidates) == 1 \
                    and len(verse.scansion.replace(" ", "")) == len(candidates[0]) \
                    and len(StringUtils.differences(verse.scansion, candidates[0])) == 1:
                tmp_scansion = self.produce_scansion(
                    StringUtils.differences(verse.scansion, candidates[0]),
                    syllables_wspaces, offset_map)
                if self.metrical_validator.is_valid_hendecasyllables(
                        tmp_scansion):
                    verse.scansion_notes += [
                        self.constants.NOTE_MAP["closest match"]
                    ]
                    return self.assign_candidate(verse, tmp_scansion)

        # if the line doesn't scan "as is", if may scan if the optional i to j transformations
        # are made, so here we set them and try again.
        if self.optional_transform and not verse.valid:
            return self.scan(original_line, optional_transform=True)

        verse.accented = self.formatter.merge_line_scansion(
            verse.original, verse.scansion)
        return verse
    def scan(self, original_line: str, optional_transform: bool = False) -> Verse:
        """Scan a line of Latin hendecasyllables and produce a scansion pattern, and other data.

        :return: a Verse object

        >>> scanner = HendecasyllableScanner()
        >>> print(scanner.scan("Cui dono lepidum novum libellum"))
        Verse(original='Cui dono lepidum novum libellum', scansion='  -  U -  U U -   U -   U -  U ', meter='hendecasyllable', valid=True, syllable_count=11, accented='Cui donō lepidūm novūm libēllum', scansion_notes=['Corrected invalid start.'], syllables = ['Cui', 'do', 'no', 'le', 'pi', 'dūm', 'no', 'vūm', 'li', 'bēl', 'lum'])
        >>> print(scanner.scan(
        ... "ārida modo pumice expolitum?").scansion)  # doctest: +NORMALIZE_WHITESPACE
        - U -  U U  - U   -  U - U
        """
        verse = Verse(original_line, meter='hendecasyllable')
        # replace punctuation with spaces
        line = original_line.translate(self.punctuation_substitutions)
        # conservative i to j
        line = self.transform_i_to_j(line)
        working_line = self.elide_all(line)
        working_line = self.accent_by_position(working_line)
        syllables = self.syllabifier.syllabify(working_line)
        if optional_transform:
            working_line = self.transform_i_to_j_optional(line)
            working_line = self.elide_all(working_line)
            working_line = self.accent_by_position(working_line)
            syllables = self.syllabifier.syllabify(working_line)
            verse.scansion_notes += [self.constants.NOTE_MAP["optional i to j"]]
        verse.working_line = working_line
        verse.syllable_count = self.syllabifier.get_syllable_count(syllables)
        verse.syllables = syllables
        # identify some obvious and probably choices based on number of syllables
        if verse.syllable_count > 11:
            verse.valid = False
            verse.scansion_notes += [self.constants.NOTE_MAP["> 11"]]
            return verse
        if verse.syllable_count < 11:
            verse.valid = False
            verse.scansion_notes += [self.constants.NOTE_MAP["< 11"]]
            return verse


        stresses = self.flag_dipthongs(syllables)
        syllables_wspaces = StringUtils.to_syllables_with_trailing_spaces(working_line, syllables)
        offset_map = self.calc_offset(syllables_wspaces)
        for idx, syl in enumerate(syllables):
            for accented in self.constants.ACCENTED_VOWELS:
                if accented in syl:
                    stresses.append(idx)
        # second to last syllable is always long
        stresses.append(verse.syllable_count - 2)

        verse.scansion = self.produce_scansion(stresses,
                                               syllables_wspaces, offset_map)
        if len(StringUtils.stress_positions(self.constants.STRESSED, verse.scansion)) != \
                len(set(stresses)):
            verse.valid = False
            verse.scansion_notes += [self.constants.NOTE_MAP["invalid syllables"]]
            return verse

        if self.metrical_validator.is_valid_hendecasyllables(verse.scansion):
            verse.scansion_notes += [self.constants.NOTE_MAP["positionally"]]
            return self.assign_candidate(verse, verse.scansion)

        smoothed = self.correct_invalid_start(verse.scansion)

        if distance(verse.scansion, smoothed) > 0:
            verse.scansion_notes += [self.constants.NOTE_MAP["invalid start"]]
            verse.scansion = smoothed
            stresses += StringUtils.differences(verse.scansion, smoothed)

        if self.metrical_validator.is_valid_hendecasyllables(verse.scansion):
            return self.assign_candidate(verse, verse.scansion)

        smoothed = self.correct_antepenult_chain(verse.scansion)

        if distance(verse.scansion, smoothed) > 0:
            verse.scansion_notes += [self.constants.NOTE_MAP["antepenult chain"]]
            verse.scansion = smoothed
            stresses += StringUtils.differences(verse.scansion, smoothed)

        if self.metrical_validator.is_valid_hendecasyllables(verse.scansion):
            return self.assign_candidate(verse, verse.scansion)

        candidates = self.metrical_validator.closest_hendecasyllable_patterns(verse.scansion)
        if candidates is not None:
            if len(candidates) == 1 \
                    and len(verse.scansion.replace(" ", "")) == len(candidates[0]) \
                    and len(StringUtils.differences(verse.scansion, candidates[0])) == 1:
                tmp_scansion = self.produce_scansion(
                    StringUtils.differences(verse.scansion, candidates[0]),
                    syllables_wspaces, offset_map)
                if self.metrical_validator.is_valid_hendecasyllables(tmp_scansion):
                    verse.scansion_notes += [self.constants.NOTE_MAP["closest match"]]
                    return self.assign_candidate(verse, tmp_scansion)

        # if the line doesn't scan "as is", if may scan if the optional i to j transformations
        # are made, so here we set them and try again.
        if self.optional_transform and not verse.valid:
            return self.scan(original_line, optional_transform=True)

        verse.accented = self.formatter.merge_line_scansion(
            verse.original, verse.scansion)
        return verse
Exemple #46
0
    def syllabify(self, words: str) -> list:
        """Parse a Latin word into a list of syllable strings.
        :param words: a string containing one latin word or many words separated by spaces.
        :return: list of string, each representing a syllable.

        >>> syllabifier = Syllabifier()
        >>> print(syllabifier.syllabify("fuit"))
        ['fu', 'it']
        >>> print(syllabifier.syllabify("libri"))
        ['li', 'bri']
        >>> print(syllabifier.syllabify("contra"))
        ['con', 'tra']
        >>> print(syllabifier.syllabify("iaculum"))
        ['ja', 'cu', 'lum']
        >>> print(syllabifier.syllabify("amo"))
        ['a', 'mo']
        >>> print(syllabifier.syllabify("bracchia"))
        ['brac', 'chi', 'a']
        >>> print(syllabifier.syllabify("deinde"))
        ['dein', 'de']
        >>> print(syllabifier.syllabify("certabant"))
        ['cer', 'ta', 'bant']
        >>> print(syllabifier.syllabify("aere"))
        ['ae', 're']
        >>> print(syllabifier.syllabify("adiungere"))
        ['ad', 'jun', 'ge', 're']
        >>> print(syllabifier.syllabify("mōns"))
        ['mōns']
        >>> print(syllabifier.syllabify("domus"))
        ['do', 'mus']
        >>> print(syllabifier.syllabify("lixa"))
        ['li', 'xa']
        >>> print(syllabifier.syllabify("asper"))
        ['as', 'per']
        >>> #  handle doubles
        >>> print(syllabifier.syllabify("siccus"))
        ['sic', 'cus']
        >>> # handle liquid + liquid
        >>> print(syllabifier.syllabify("almus"))
        ['al', 'mus']
        >>> # handle liquid + mute
        >>> print(syllabifier.syllabify("ambo"))
        ['am', 'bo']
        >>> print(syllabifier.syllabify("anguis"))
        ['an', 'guis']
        >>> print(syllabifier.syllabify("arbor"))
        ['ar', 'bor']
        >>> print(syllabifier.syllabify("pulcher"))
        ['pul', 'cher']
        >>> print(syllabifier.syllabify("ruptus"))
        ['ru', 'ptus']
        >>> print(syllabifier.syllabify("Bīthÿnus"))
        ['Bī', 'thÿ', 'nus']
        >>> print(syllabifier.syllabify("sanguen"))
        ['san', 'guen']
        >>> print(syllabifier.syllabify("unguentum"))
        ['un', 'guen', 'tum']
        >>> print(syllabifier.syllabify("lingua"))
        ['lin', 'gua']
        >>> print(syllabifier.syllabify("linguā"))
        ['lin', 'guā']
        >>> print(syllabifier.syllabify("languidus"))
        ['lan', 'gui', 'dus']

        >>> print(syllabifier.syllabify("suis"))
        ['su', 'is']
        >>> print(syllabifier.syllabify("habui"))
        ['ha', 'bu', 'i']
        >>> print(syllabifier.syllabify("habuit"))
        ['ha', 'bu', 'it']
        >>> print(syllabifier.syllabify("qui"))
        ['qui']
        >>> print(syllabifier.syllabify("quibus"))
        ['qui', 'bus']
        >>> print(syllabifier.syllabify("hui"))
        ['hui']
        >>> print(syllabifier.syllabify("cui"))
        ['cui']
        >>> print(syllabifier.syllabify("huic"))
        ['huic']
        """
        cleaned = words.translate(self.remove_punct_map)
        cleaned = cleaned.replace("qu", "kw")
        cleaned = cleaned.replace("Qu", "Kw")
        cleaned = cleaned.replace("gua", "gwa")
        cleaned = cleaned.replace("Gua", "Gwa")
        cleaned = cleaned.replace("gue", "gwe")
        cleaned = cleaned.replace("Gue", "Gwe")
        cleaned = cleaned.replace("gui", "gwi")
        cleaned = cleaned.replace("Gui", "Gwi")
        cleaned = cleaned.replace("guo", "gwo")
        cleaned = cleaned.replace("Guo", "Gwo")
        cleaned = cleaned.replace("guu", "gwu")
        cleaned = cleaned.replace("Guu", "Gwu")
        cleaned = cleaned.replace("guā", "gwā")
        cleaned = cleaned.replace("Guā", "Gwā")
        cleaned = cleaned.replace("guē", "gwē")
        cleaned = cleaned.replace("Guē", "Gwē")
        cleaned = cleaned.replace("guī", "gwī")
        cleaned = cleaned.replace("Guī", "Gwī")
        cleaned = cleaned.replace("guō", "gwō")
        cleaned = cleaned.replace("Guō", "Gwō")
        cleaned = cleaned.replace("guū", "gwū")
        cleaned = cleaned.replace("Guū", "Gwū")
        items = cleaned.strip().split(" ")

        for char in cleaned:
            if not char in self.ACCEPTABLE_CHARS:
                LOG.error("Unsupported character found in %s " % cleaned)
                return items
        syllables: list = []
        for item in items:
            syllables += self._setup(item)
        for idx, syl in enumerate(syllables):
            if "kw" in syl:
                syl = syl.replace("kw", "qu")
                syllables[idx] = syl
            if "Kw" in syl:
                syl = syl.replace("Kw", "Qu")
                syllables[idx] = syl
            if "gw" in syl:
                syl = syl.replace("gw", "gu")
                syllables[idx] = syl
            if "Gw" in syl:
                syl = syl.replace("Gw", "Gu")
                syllables[idx] = syl

        return StringUtils.remove_blank_spaces(syllables)
Exemple #47
0
    def accent_by_position(self, verse_line: str) -> str:
        """Accent vowels according to the rules of scansion.

        :param verse: a line of unaccented verse
        :return: the same line with vowels accented by position

        >>> print(VerseScanner().accent_by_position(
        ... "Arma virumque cano, Troiae qui primus ab oris").lstrip())
        Ārma virūmque canō  Trojae qui primus ab oris
        """
        line = verse_line.translate(self.punctuation_substitutions)
        line = self.transform_i_to_j(line)
        marks = list(line)

        # locate and save dipthong positions since we don't want them being accented
        dipthong_positions = []
        for dipth in self.constants.DIPTHONGS:
            if dipth in line:
                dipthong_positions.append(line.find(dipth))

        # Vowels followed by 2 consonants
        # The digraphs ch, ph, th, qu and sometimes gu and su count as single consonants.
        # see http://people.virginia.edu/~jdk3t/epicintrog/scansion.htm
        marks = StringUtils.overwrite(marks, "[{}][{}][{}]".format(
            self.constants.VOWELS,
            self.constants.CONSONANTS,
            self.constants.CONSONANTS_WO_H),
                                      self.constants.STRESSED)
        # one space (or more for 'dropped' punctuation may intervene)
        marks = StringUtils.overwrite(marks,
                                      r"[{}][{}]\s*[{}]".format(
                                          self.constants.VOWELS,
                                          self.constants.CONSONANTS,
                                          self.constants.CONSONANTS_WO_H),
                                      self.constants.STRESSED)
        # ... if both consonants are in the next word, the vowel may be long
        # .... but it could be short if the vowel is not on the thesis/emphatic part of the foot
        # ... see Gildersleeve and Lodge p.446
        marks = StringUtils.overwrite(marks,
                                      r"[{}]\s*[{}][{}]".format(
                                          self.constants.VOWELS,
                                          self.constants.CONSONANTS,
                                          self.constants.CONSONANTS_WO_H),
                                      self.constants.STRESSED)
        #  x is considered as two letters
        marks = StringUtils.overwrite(marks,
                                      "[{}][xX]".format(self.constants.VOWELS),
                                      self.constants.STRESSED)
        #  z is considered as two letters
        marks = StringUtils.overwrite(marks,
                                      r"[{}][zZ]".format(self.constants.VOWELS),
                                      self.constants.STRESSED)
        original_verse = list(line)
        for idx, word in enumerate(original_verse):
            if marks[idx] == self.constants.STRESSED:
                original_verse[idx] = self.constants.VOWELS_TO_ACCENTS[original_verse[idx]]
        # make sure dipthongs aren't accented
        for idx in dipthong_positions:
            if original_verse[idx + 1] in self.constants.ACCENTS_TO_VOWELS:
                original_verse[idx + 1] = self.constants.ACCENTS_TO_VOWELS[original_verse[idx + 1]]

        return "".join(original_verse)