Python Syllabifier Exemples, cltk.prosody.latin.syllabifier.Syllabifier Python Exemples

Exemple #1

0

Afficher le fichier

    def __init__(self, constants=None, syllabifier=None,
                 optional_transform: bool = False, *args, **kwargs)->None:
        """
        :param constants: None or a class that implements ScansionConstants
        :param syllabifier: None or a class that implements Syllabifier methods
        :param optional_tranform: boolean, whether or not to apply aggresive verse transformations.
        :param kwargs:
        """
        super().__init__(*args, **kwargs)
        self.constants = ScansionConstants() if constants is None else constants
        self.syllabifier = Syllabifier() if syllabifier is None else syllabifier
        self.remove_punct_map = string_utils.remove_punctuation_dict()
        self.punctuation_substitutions = string_utils.punctuation_for_spaces_dict()
        self.metrical_validator = MetricalValidator(self.constants)
        self.formatter = ScansionFormatter(self.constants)
        self.optional_transform = optional_transform
        self.inverted_amphibrach_re = re.compile(
            r"{}\s*{}\s*{}".format(self.constants.STRESSED,
                                   self.constants.UNSTRESSED,
                                   self.constants.STRESSED))
        self.syllable_matcher = re.compile(r"[{}]".format(self.constants.VOWELS +
                                                          self.constants.ACCENTED_VOWELS +
                                                          self.constants.LIQUIDS +
                                                          self.constants.MUTES))
        self.SPONDAIC_PENTAMETER = self.constants.SPONDEE + self.constants.SPONDEE + \
                                   self.constants.STRESSED + self.constants.DACTYL + \
                                   self.constants.DACTYL + self.constants.OPTIONAL_ENDING

        self.DACTYLIC_PENTAMETER = self.constants.DACTYL + self.constants.DACTYL + \
                                   self.constants.STRESSED + self.constants.DACTYL + \
                                   self.constants.DACTYL + self.constants.OPTIONAL_ENDING

Exemple #2

0

Afficher le fichier

 def __init__(self, punctuation=None, clausula_length=13, elide=True):
     if punctuation is None:
         self.punctuation = [".", "?", "!", ";", ":"]
     else:
         self.punctuation = punctuation
     self.clausula_length = clausula_length
     self.elide = elide
     self.syllabifier = Syllabifier()

Exemple #3

0

Afficher le fichier

Fichier : test_scansion.py Projet : cltk/cltk

 def test_syllabifier(self):
     syllabifier = Syllabifier()
     # break a word into syllables
     self.assertEqual(syllabifier.syllabify("Bīthÿnus"), ['Bī', 'thÿ', 'nus'])
     # break a group of words into a group of syllables:
     self.assertEqual(syllabifier.syllabify("arbor pulcher ruptus"), [
         'ar', 'bor', 'pul', 'cher', 'ru', 'ptus'])
     # do not process character sets that have not been specified by the ScansionConstants class
     # that is injected into the constructor; a whole group is rejected when this occurs
     self.assertEqual(syllabifier.syllabify("Platonis Ψυχη"),['Platonis', 'Ψυχη'])

Exemple #4

0

Afficher le fichier

 def test_syllabifier(self):
     syllabifier = Syllabifier()
     # break a word into syllables
     self.assertEqual(syllabifier.syllabify("Bīthÿnus"),
                      ['Bī', 'thÿ', 'nus'])
     # break a group of words into a group of syllables:
     self.assertEqual(syllabifier.syllabify("arbor pulcher ruptus"),
                      ['ar', 'bor', 'pul', 'cher', 'ru', 'ptus'])
     # do not process character sets that have not been specified by the ScansionConstants class
     # that is injected into the constructor; a whole group is rejected when this occurs
     self.assertEqual(syllabifier.syllabify("Platonis Ψυχη"),
                      ['Platonis', 'Ψυχη'])

Exemple #5

0

Afficher le fichier

    def __init__(self,
                 constants=ScansionConstants(),
                 syllabifier=Syllabifier(),
                 optional_transform: bool = False,
                 *args,
                 **kwargs):
        super().__init__(*args, **kwargs)
        self.constants = constants
        self.remove_punct_map = StringUtils.remove_punctuation_dict()
        self.punctuation_substitutions = StringUtils.punctuation_for_spaces_dict(
        )
        self.metrical_validator = MetricalValidator(constants)
        self.formatter = ScansionFormatter(constants)
        self.syllabifier = syllabifier
        self.optional_transform = optional_transform
        self.inverted_amphibrach_re = re.compile(r"{}\s*{}\s*{}".format(
            self.constants.STRESSED, self.constants.UNSTRESSED,
            self.constants.STRESSED))
        self.syllable_matcher = re.compile(
            r"[{}]".format(self.constants.VOWELS +
                           self.constants.ACCENTED_VOWELS +
                           self.constants.LIQUIDS + self.constants.MUTES))
        self.SPONDAIC_PENTAMETER = self.constants.SPONDEE + self.constants.SPONDEE + \
                                   self.constants.STRESSED + self.constants.DACTYL + \
                                   self.constants.DACTYL + self.constants.OPTIONAL_ENDING

        self.DACTYLIC_PENTAMETER = self.constants.DACTYL + self.constants.DACTYL + \
                                   self.constants.STRESSED + self.constants.DACTYL + \
                                   self.constants.DACTYL + self.constants.OPTIONAL_ENDING

Exemple #6

0

Afficher le fichier

 def __init__(self, constants=None, syllabifier=None, **kwargs):
     """
     :param constants: None or a class that implements ScansionConstants
     :param syllabifier: None or a class that implements Syllabifier methods
     :param kwargs:
     """
     self.constants = ScansionConstants(
     ) if constants is None else constants
     self.syllabifier = Syllabifier(
     ) if syllabifier is None else syllabifier
     self.remove_punct_map = string_utils.remove_punctuation_dict()
     self.punctuation_substitutions = string_utils.punctuation_for_spaces_dict(
     )
     self.metrical_validator = MetricalValidator(self.constants)
     self.formatter = ScansionFormatter(self.constants)
     self.inverted_amphibrach_re = re.compile(r"{}\s*{}\s*{}".format(
         self.constants.STRESSED, self.constants.UNSTRESSED,
         self.constants.STRESSED))
     self.syllable_matcher = re.compile(
         r"[{}]".format(self.constants.VOWELS +
                        self.constants.ACCENTED_VOWELS +
                        self.constants.LIQUIDS + self.constants.MUTES))

Exemple #7

0

Afficher le fichier

 def __init__(self,
              constants=ScansionConstants(),
              syllabifier=Syllabifier(),
              **kwargs):
     self.constants = constants
     self.remove_punct_map = string_utils.remove_punctuation_dict()
     self.punctuation_substitutions = string_utils.punctuation_for_spaces_dict(
     )
     self.metrical_validator = MetricalValidator(constants)
     self.formatter = ScansionFormatter(constants)
     self.syllabifier = syllabifier
     self.inverted_amphibrach_re = re.compile(r"{}\s*{}\s*{}".format(
         self.constants.STRESSED, self.constants.UNSTRESSED,
         self.constants.STRESSED))
     self.syllable_matcher = re.compile(
         r"[{}]".format(self.constants.VOWELS +
                        self.constants.ACCENTED_VOWELS +
                        self.constants.LIQUIDS + self.constants.MUTES))

Exemple #8

0

Afficher le fichier

class VerseScanner:
    """
    The scansion symbols used can be configured by passing a suitable constants class to
    the constructor.
    """
    def __init__(self, constants=None, syllabifier=None, **kwargs):
        """
        :param constants: None or a class that implements ScansionConstants
        :param syllabifier: None or a class that implements Syllabifier methods
        :param kwargs:
        """
        self.constants = ScansionConstants(
        ) if constants is None else constants
        self.syllabifier = Syllabifier(
        ) if syllabifier is None else syllabifier
        self.remove_punct_map = string_utils.remove_punctuation_dict()
        self.punctuation_substitutions = string_utils.punctuation_for_spaces_dict(
        )
        self.metrical_validator = MetricalValidator(self.constants)
        self.formatter = ScansionFormatter(self.constants)
        self.inverted_amphibrach_re = re.compile(r"{}\s*{}\s*{}".format(
            self.constants.STRESSED, self.constants.UNSTRESSED,
            self.constants.STRESSED))
        self.syllable_matcher = re.compile(
            r"[{}]".format(self.constants.VOWELS +
                           self.constants.ACCENTED_VOWELS +
                           self.constants.LIQUIDS + self.constants.MUTES))

    def transform_i_to_j(self, line: str) -> str:
        """
        Transform instances of consonantal i to j
        :param line:
        :return:

        >>> print(VerseScanner().transform_i_to_j("iactātus"))
        jactātus
        >>> print(VerseScanner().transform_i_to_j("bracchia"))
        bracchia
        """

        words = line.split(" ")
        space_list = string_utils.space_list(line)
        corrected_words = []
        for word in words:
            found = False
            for prefix in self.constants.PREFIXES:
                if word.startswith(prefix) and word != prefix:
                    corrected_words.append(
                        self.syllabifier.convert_consonantal_i(prefix))
                    corrected_words.append(
                        self.syllabifier.convert_consonantal_i(
                            word[len(prefix):]))
                    found = True
                    break
            if not found:
                corrected_words.append(
                    self.syllabifier.convert_consonantal_i(word))
        new_line = string_utils.join_syllables_spaces(corrected_words,
                                                      space_list)
        char_list = string_utils.overwrite(
            list(new_line),
            r"\b[iī][{}]".format(self.constants.VOWELS +
                                 self.constants.ACCENTED_VOWELS), "j")
        char_list = string_utils.overwrite(
            char_list, r"\b[I][{}]".format(self.constants.VOWELS_WO_I), "J")
        char_list = string_utils.overwrite(
            char_list, r"[{}][i][{}]".format(self.constants.VOWELS_WO_I,
                                             self.constants.VOWELS), "j", 1)
        return "".join(char_list)

    def transform_i_to_j_optional(self, line: str) -> str:
        """
        Sometimes for the demands of meter a more permissive i to j transformation is warranted.

        :param line:
        :return:

        >>> print(VerseScanner().transform_i_to_j_optional("Italiam"))
        Italjam
        >>> print(VerseScanner().transform_i_to_j_optional("Lāvīniaque"))
        Lāvīnjaque
        >>> print(VerseScanner().transform_i_to_j_optional("omnium"))
        omnjum
        """
        words = line.split(" ")
        space_list = string_utils.space_list(line)
        corrected_words = []
        for word in words:
            found = False
            for prefix in self.constants.PREFIXES:
                if word.startswith(prefix) and word != prefix:
                    corrected_words.append(
                        self.syllabifier.convert_consonantal_i(prefix))
                    corrected_words.append(
                        self.syllabifier.convert_consonantal_i(
                            word[len(prefix):]))
                    found = True
                    break
            if not found:
                corrected_words.append(
                    self.syllabifier.convert_consonantal_i(word))
        new_line = string_utils.join_syllables_spaces(corrected_words,
                                                      space_list)
        #  the following two may be tunable and subject to improvement
        char_list = string_utils.overwrite(
            list(new_line),
            "[bcdfgjkmpqrstvwxzBCDFGHJKMPQRSTVWXZ][i][{}]".format(
                self.constants.VOWELS_WO_I), "j", 1)
        char_list = string_utils.overwrite(
            char_list, "[{}][iI][{}]".format(self.constants.LIQUIDS,
                                             self.constants.VOWELS_WO_I), "j",
            1)
        return "".join(char_list)

    def accent_by_position(self, verse_line: str) -> str:
        """
        Accent vowels according to the rules of scansion.

        :param verse_line: a line of unaccented verse
        :return: the same line with vowels accented by position

        >>> print(VerseScanner().accent_by_position(
        ... "Arma virumque cano, Troiae qui primus ab oris").lstrip())
        Ārma virūmque canō  Trojae qui primus ab oris
        """
        line = verse_line.translate(self.punctuation_substitutions)
        line = self.transform_i_to_j(line)
        marks = list(line)

        # locate and save dipthong positions since we don't want them being accented
        dipthong_positions = []
        for dipth in self.constants.DIPTHONGS:
            if dipth in line:
                dipthong_positions.append(line.find(dipth))

        # Vowels followed by 2 consonants
        # The digraphs ch, ph, th, qu and sometimes gu and su count as single consonants.
        # see http://people.virginia.edu/~jdk3t/epicintrog/scansion.htm
        marks = string_utils.overwrite(
            marks, "[{}][{}][{}]".format(self.constants.VOWELS,
                                         self.constants.CONSONANTS,
                                         self.constants.CONSONANTS_WO_H),
            self.constants.STRESSED)
        # one space (or more for 'dropped' punctuation may intervene)
        marks = string_utils.overwrite(
            marks, r"[{}][{}]\s*[{}]".format(self.constants.VOWELS,
                                             self.constants.CONSONANTS,
                                             self.constants.CONSONANTS_WO_H),
            self.constants.STRESSED)
        # ... if both consonants are in the next word, the vowel may be long
        # .... but it could be short if the vowel is not on the thesis/emphatic part of the foot
        # ... see Gildersleeve and Lodge p.446
        marks = string_utils.overwrite(
            marks, r"[{}]\s*[{}][{}]".format(self.constants.VOWELS,
                                             self.constants.CONSONANTS,
                                             self.constants.CONSONANTS_WO_H),
            self.constants.STRESSED)
        #  x is considered as two letters
        marks = string_utils.overwrite(
            marks, "[{}][xX]".format(self.constants.VOWELS),
            self.constants.STRESSED)
        #  z is considered as two letters
        marks = string_utils.overwrite(
            marks, r"[{}][zZ]".format(self.constants.VOWELS),
            self.constants.STRESSED)
        original_verse = list(line)
        for idx, word in enumerate(original_verse):
            if marks[idx] == self.constants.STRESSED:
                original_verse[idx] = self.constants.VOWELS_TO_ACCENTS[
                    original_verse[idx]]
        # make sure dipthongs aren't accented
        for idx in dipthong_positions:
            if original_verse[idx + 1] in self.constants.ACCENTS_TO_VOWELS:
                original_verse[idx + 1] = self.constants.ACCENTS_TO_VOWELS[
                    original_verse[idx + 1]]

        return "".join(original_verse)

    def elide_all(self, line: str) -> str:
        """
        Given a string of space separated syllables, erase with spaces the syllable portions
        that would disappear according to the rules of elision.

        :param line:
        :return:
        """
        marks = list(line.translate(self.remove_punct_map))
        all_vowels = self.constants.VOWELS + self.constants.ACCENTED_VOWELS
        tmp = "".join(marks)
        # Elision rules are compound but not cummulative: we place all elision edits into a list
        #  of candidates, and then merge, taking the least of each section of the line.
        candidates = [
            tmp,
            self.elide(
                tmp, r"[{}][{}]\s+[{}]".format(self.constants.CONSONANTS,
                                               all_vowels, all_vowels), 1, 1),
            self.elide(
                tmp, r"[{}][{}]\s+[hH]".format(self.constants.CONSONANTS,
                                               all_vowels), 1, 1),
            self.elide(tmp, r"[aāuū]m\s+[{}]".format(all_vowels), 2),
            self.elide(tmp, r"ae\s+[{}]".format(all_vowels), 2),
            self.elide(tmp, r"[{}]\s+[{}]".format(all_vowels, all_vowels), 1),
            self.elide(tmp, r"[uū]m\s+h", 2)
        ]
        results = string_utils.merge_elisions(candidates)
        return results

    def calc_offset(self, syllables_spaces: List[str]) -> Dict[int, int]:
        """
        Calculate a dictionary of accent positions from a list of syllables with spaces.

        :param syllables_spaces:
        :return:
        """
        line = string_utils.flatten(syllables_spaces)
        mydict = {}  # type: Dict[int, int]
        # #defaultdict(int) #type: Dict[int, int]
        for idx, syl in enumerate(syllables_spaces):
            target_syllable = syllables_spaces[idx]
            skip_qu = string_utils.starts_with_qu(target_syllable)
            matches = list(self.syllable_matcher.finditer(target_syllable))
            for position, possible in enumerate(matches):
                if skip_qu:
                    skip_qu = False
                    continue
                (start, end) = possible.span()
                if target_syllable[start:end] in \
                        self.constants.VOWELS + self.constants.ACCENTED_VOWELS:
                    part = line[:len("".join(syllables_spaces[:idx]))]
                    offset = len(part) + start
                    if line[offset] not in self.constants.VOWELS + self.constants.ACCENTED_VOWELS:
                        LOG.error("Problem at line {} offset {}".format(
                            line, offset))
                    mydict[idx] = offset
        return mydict

    def produce_scansion(self, stresses: list, syllables_wspaces: List[str],
                         offset_map: Dict[int, int]) -> str:
        """
        Create a scansion string that has stressed and unstressed syllable positions in locations
        that correspond with the original texts syllable vowels.

        :param stresses list of syllable positions
        :param syllables_wspaces list of syllables with spaces escaped for punctuation or elision
        :param offset_map dictionary of syllable positions, and an offset amount which is the
        number of spaces to skip in the original line before inserting the accent.
        """
        scansion = list(" " * len(string_utils.flatten(syllables_wspaces)))
        unstresses = string_utils.get_unstresses(stresses,
                                                 len(syllables_wspaces))
        try:
            for idx in unstresses:
                location = offset_map.get(idx)
                if location is not None:
                    scansion[location] = self.constants.UNSTRESSED
            for idx in stresses:
                location = offset_map.get(idx)
                if location is not None:
                    scansion[location] = self.constants.STRESSED
        except Exception as e:
            LOG.error(
                "problem with syllables; check syllabification {}, {}".format(
                    syllables_wspaces, e))
        return "".join(scansion)

    def flag_dipthongs(self, syllables: List[str]) -> List[int]:
        """
        Return a list of syllables that contain a dipthong

        :param syllables:
        :return:
        """
        long_positions = []
        for idx, syl in enumerate(syllables):
            for dipthong in self.constants.DIPTHONGS:
                if dipthong in syllables[idx]:
                    if not string_utils.starts_with_qu(syllables[idx]):
                        long_positions.append(idx)
        return long_positions

    def elide(self,
              line: str,
              regexp: str,
              quantity: int = 1,
              offset: int = 0) -> str:
        """
        Erase a section of a line, matching on a regex, pushing in a quantity of blank spaces,
        and jumping forward with an offset if necessary.
        If the elided vowel was strong, the vowel merged with takes on the stress.

        :param line:
        :param regexp:
        :param quantity:
        :param offset:
        :return:

        >>> print(VerseScanner().elide("uvae avaritia", r"[e]\s*[a]"))
        uv   āvaritia
        >>> print(VerseScanner().elide("mare avaritia", r"[e]\s*[a]"))
        mar  avaritia
        """
        matcher = re.compile(regexp)
        positions = matcher.finditer(line)
        new_line = line
        for match in positions:
            (start, end) = match.span()  # pylint: disable=unused-variable
            if (start > 0) and new_line[start - 1:start +
                                        1] in self.constants.DIPTHONGS:
                vowel_to_coerce = new_line[end - 1]
                new_line = new_line[:(start - 1) + offset] + (" " * (quantity + 2)) + \
                           self.constants.stress_accent_dict[vowel_to_coerce] + new_line[end:]
            else:
                new_line = new_line[:start + offset] + \
                           (" " * quantity) + new_line[start + quantity + offset:]
        return new_line

    def correct_invalid_start(self, scansion: str) -> str:
        """
        If a hexameter, hendecasyllables, or pentameter scansion starts with spondee,
        an unstressed syllable in the third position must actually be stressed,
        so we will convert it: - - | U    ->  - - | -

        :param scansion:
        :return:

        >>> print(VerseScanner().correct_invalid_start(
        ... " -   - U   U -  -  U U U U  U U  - -").strip())
        -   - -   - -  -  U U U U  U U  - -
        """
        mark_list = string_utils.mark_list(scansion)
        raw_scansion = scansion.replace(" ", "")
        if raw_scansion.startswith(self.constants.SPONDEE +
                                   self.constants.UNSTRESSED):
            new_scansion = list(self.constants.SPONDEE +
                                self.constants.SPONDEE + raw_scansion[4:])
            corrected = "".join(new_scansion)
            new_sequence = list(" " * len(scansion))
            for idx, car in enumerate(corrected):
                new_sequence[mark_list[idx]] = car
            return "".join(new_sequence)
        return scansion

    def correct_first_two_dactyls(self, scansion: str) -> str:
        """
        If a hexameter or pentameter starts with spondee,
        an unstressed syllable in the third position must actually be stressed,
        so we will convert it: - - | U    ->  - - | -
        And/or if the starting pattern is spondee + trochee + stressed, then the unstressed
        trochee can be corrected: - - | - u | -   ->  - - | - -| -

        :param scansion:
        :return:

        >>> print(VerseScanner().correct_first_two_dactyls(
        ... " -   - U   U -  -  U U U U  U U  - -")) # doctest: +NORMALIZE_WHITESPACE
         -   - -   - -  -  U U U U  U U  - -
        """
        mark_list = string_utils.mark_list(scansion)
        new_line = self.correct_invalid_start(scansion)
        raw_scansion = new_line.replace(" ", "")
        if raw_scansion.startswith(self.constants.SPONDEE +
                                   self.constants.TROCHEE +
                                   self.constants.STRESSED):
            new_scansion = list(self.constants.SPONDEE +
                                self.constants.SPONDEE +
                                self.constants.STRESSED + raw_scansion[5:])
            corrected = "".join(new_scansion)
            new_sequence = list(" " * len(scansion))
            for idx, car in enumerate(corrected):
                new_sequence[mark_list[idx]] = car
            return "".join(new_sequence)
        return new_line

    def assign_candidate(self, verse: Verse, candidate: str) -> Verse:
        """
        Helper method; make sure that the verse object is properly packaged.

        :param verse:
        :param candidate:
        :return:
        """
        verse.scansion = candidate
        verse.valid = True
        verse.accented = self.formatter.merge_line_scansion(
            verse.original, verse.scansion)
        return verse

Exemple #9

0

Afficher le fichier

    def ProcessLine(self, givenLine, df):
        syllabifier = Syllabifier()

        words = givenLine.find_all('word')

        line = givenLine['name']

        for word in words:

            myWord = word.string
            mySyllables = syllabifier.syllabify(myWord.lower())
            # We now want to split every syllable to match its scansion.
            item = word['sy']
            n = 2
            # print('syllable', [item[i:i+n] for i in range(0, len(item), n)])
            myScansions = [item[i:i + n] for i in range(0, len(item), n)]

            # try:
            #   # print('word boundary', word['wb'])
            #   myWb = word['wb']
            # except:
            #   # print("empty field")
            #   myWb = ''
            # try:
            #   # print('metrical feature', word['mf'])
            #   myMf = word['mf']

            # except:
            #   # print("empty field")
            #   myMf = ''

            # print('-------------------------------')

            for i in range(len(mySyllables)):
                mySyllable = mySyllables[i]
                # To remove punctuation.
                mySyllable = mySyllable.translate(
                    str.maketrans('', '', string.punctuation))

                try:
                    myScansion = myScansions[i]
                    foot = myScansion[0]
                    feet_pos = myScansion[1]
                    # No metrical feature, so leave field empty
                    myMf2 = ''

                except:
                    myScansion = ''
                    foot = feet_pos = ''
                    # Add the reason for this emptiness
                    myMf2 = myMf

                if feet_pos == 'A':
                    length = 1
                elif feet_pos == 'T':
                    length = 1
                elif feet_pos == 'b':
                    length = 0
                elif feet_pos == 'c':
                    length = 0
                elif feet_pos == '':
                    length = -1
                else:
                    print('Error occured determining feet_pos of syllable')

                # Now, fill the dataframe: TODO: split length in foot and length
                newLine = {
                    'author': self.author,
                    'text': self.title,
                    'line': line,
                    'syllable': mySyllable,
                    'foot': foot,
                    'feet_pos': feet_pos,
                    'length': length
                }

                # newLine = {'author': self.author, 'text': self.title, 'line': line, 'syllable': mySyllable, 'foot': foot, 'feet_pos': feet_pos,
                #   'length': length, 'word_boundary': myWb, 'metrical_feature': myMf2}

                df = df.append(newLine, ignore_index=True)

        return df

Exemple #10

0

Afficher le fichier

class Scansion:
    """
    Prepossesses Latin text for prose rhythm analysis.
    """

    SHORT_VOWELS = ["a", "e", "i", "o", "u", "y"]
    LONG_VOWELS = ["ā", "ē", "ī", "ō", "ū"]
    VOWELS = SHORT_VOWELS + LONG_VOWELS
    DIPHTHONGS = ["ae", "au", "ei", "oe", "ui"]

    SINGLE_CONSONANTS = [
        "b", "c", "d", "g", "k", "l", "m", "n", "p", "q", "r", "s", "t", "v",
        "f", "j"
    ]
    DOUBLE_CONSONANTS = ["x", "z"]
    CONSONANTS = SINGLE_CONSONANTS + DOUBLE_CONSONANTS
    DIGRAPHS = ["ch", "ph", "th", "qu"]
    LIQUIDS = ["r", "l"]
    MUTES = ["b", "p", "d", "t", "c", "g"]
    MUTE_LIQUID_EXCEPTIONS = ["gl", "bl"]
    NASALS = ["m", "n"]
    SESTS = ["sc", "sm", "sp", "st", "z"]

    def __init__(self, punctuation=None, clausula_length=13, elide=True):
        if punctuation is None:
            self.punctuation = [".", "?", "!", ";", ":"]
        else:
            self.punctuation = punctuation
        self.clausula_length = clausula_length
        self.elide = elide
        self.syllabifier = Syllabifier()

    def _tokenize_syllables(self, word: str) -> List[Dict[str, Any]]:
        """
        Tokenize syllables for word.
        "mihi" -> [{"syllable": "mi", index: 0, ... } ... ]
        Syllable properties:
            syllable: string -> syllable
            index: int -> postion in word
            long_by_nature: bool -> is syllable long by nature
            accented: bool -> does receive accent
            long_by_position: bool -> is syllable long by position
        :param word: string
        :return: list

        >>> Scansion()._tokenize_syllables("mihi")
        [{'syllable': 'mi', 'index': 0, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (False, None), 'accented': True}, {'syllable': 'hi', 'index': 1, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (False, None), 'accented': False}]
        >>> Scansion()._tokenize_syllables("ivi")
        [{'syllable': 'i', 'index': 0, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (False, None), 'accented': True}, {'syllable': 'vi', 'index': 1, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (False, None), 'accented': False}]
        >>> Scansion()._tokenize_syllables("audītū")
        [{'syllable': 'au', 'index': 0, 'elide': (False, None), 'long_by_nature': True, 'long_by_position': (False, None), 'accented': False}, {'syllable': 'dī', 'index': 1, 'elide': (False, None), 'long_by_nature': True, 'long_by_position': (False, None), 'accented': True}, {'syllable': 'tū', 'index': 2, 'elide': (False, None), 'long_by_nature': True, 'long_by_position': (False, None), 'accented': False}]
        >>> Scansion()._tokenize_syllables("ā")
        [{'syllable': 'ā', 'index': 0, 'elide': (False, None), 'long_by_nature': True, 'long_by_position': (False, None), 'accented': True}]
        >>> Scansion()._tokenize_syllables("conjiciō")
        [{'syllable': 'con', 'index': 0, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (True, None), 'accented': False}, {'syllable': 'ji', 'index': 1, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (False, None), 'accented': True}, {'syllable': 'ci', 'index': 2, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (False, None), 'accented': False}, {'syllable': 'ō', 'index': 3, 'elide': (False, None), 'long_by_nature': True, 'long_by_position': (False, None), 'accented': False}]
        >>> Scansion()._tokenize_syllables("lingua")
        [{'syllable': 'lin', 'index': 0, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (True, None), 'accented': True}, {'syllable': 'gua', 'index': 1, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (False, None), 'accented': False}]
        >>> Scansion()._tokenize_syllables("abrante")
        [{'syllable': 'ab', 'index': 0, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (False, 'mute+liquid'), 'accented': False}, {'syllable': 'ran', 'index': 1, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (True, None), 'accented': True}, {'syllable': 'te', 'index': 2, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (False, None), 'accented': False}]
        >>> Scansion()._tokenize_syllables("redemptor")
        [{'syllable': 'red', 'index': 0, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (False, None), 'accented': False}, {'syllable': 'em', 'index': 1, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (True, None), 'accented': True}, {'syllable': 'ptor', 'index': 2, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (False, None), 'accented': False}]
        >>> Scansion()._tokenize_syllables("nagrante")
        [{'syllable': 'na', 'index': 0, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (False, 'mute+liquid'), 'accented': False}, {'syllable': 'gran', 'index': 1, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (True, None), 'accented': True}, {'syllable': 'te', 'index': 2, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (False, None), 'accented': False}]
        """
        syllable_tokens = []
        syllables = self.syllabifier.syllabify(word)

        longs = self.LONG_VOWELS + self.DIPHTHONGS

        for i, _ in enumerate(syllables):
            # basic properties
            syllable_dict = {
                "syllable": syllables[i],
                "index": i,
                "elide": (False, None)
            }

            # is long by nature
            if any(long in syllables[i] for long in longs):
                if syllables[i][:3] != "qui":
                    syllable_dict["long_by_nature"] = True
                else:
                    syllable_dict["long_by_nature"] = False
            else:
                syllable_dict["long_by_nature"] = False

            # long by position intra word
            if i < len(syllables) - 1 and \
                    syllable_dict["syllable"][-1] in self.CONSONANTS:
                if syllable_dict["syllable"][-1] in self.MUTES and syllables[i + \
                    1][0] in self.LIQUIDS and syllable_dict["syllable"][-1] + syllables[i + 1][0] not in self.MUTE_LIQUID_EXCEPTIONS:
                    syllable_dict["long_by_position"] = \
                        (False, "mute+liquid")
                elif syllable_dict["syllable"][-1] in self.DOUBLE_CONSONANTS or \
                        syllables[i + 1][0] in self.CONSONANTS:
                    syllable_dict["long_by_position"] = (True, None)
                else:
                    syllable_dict["long_by_position"] = (False, None)
            elif i < len(syllables) - 1 and syllable_dict["syllable"][-1] in \
                    self.VOWELS and len(syllables[i + 1]) > 1:
                if syllables[i + 1][0] in self.MUTES and syllables[i + 1][1] in self.LIQUIDS and syllables[i + \
                    1][0] + syllables[i + 1][1] not in self.MUTE_LIQUID_EXCEPTIONS:
                    syllable_dict["long_by_position"] = \
                        (False, "mute+liquid")
                elif syllables[i + 1][0] in self.CONSONANTS and syllables[i + 1][1] in \
                        self.CONSONANTS or syllables[i + 1][0] in self.DOUBLE_CONSONANTS:
                    syllable_dict["long_by_position"] = (True, None)
                else:
                    syllable_dict["long_by_position"] = (False, None)
            elif len(syllable_dict["syllable"]) > 2 and syllable_dict["syllable"][-1] in self.CONSONANTS and \
                    syllable_dict["syllable"][-2] in self.CONSONANTS and syllable_dict["syllable"][-3] in self.VOWELS:
                syllable_dict["long_by_position"] = (True, None)
            else:
                syllable_dict["long_by_position"] = (False, None)

            syllable_tokens.append(syllable_dict)

            # is accented
            if len(syllables) > 2 and i == len(syllables) - 2:
                if syllable_dict["long_by_nature"] or syllable_dict[
                        "long_by_position"][0]:
                    syllable_dict["accented"] = True
                else:
                    syllable_tokens[i - 1]["accented"] = True
            elif len(syllables) == 2 and i == 0 or len(syllables) == 1:
                syllable_dict["accented"] = True

            syllable_dict[
                "accented"] = False if "accented" not in syllable_dict else True

        return syllable_tokens

    def _tokenize_words(self, sentence: str) -> List[Dict[str, Any]]:
        """
        Tokenize words for sentence.
        "Puella bona est" -> [{word: puella, index: 0, ... }, ... ]
        Word properties:
            word: string -> word
            index: int -> position in sentence
            syllables: list -> list of syllable objects
            syllables_count: int -> number of syllables in word
        :param sentence: string
        :return: list

        >>> Scansion()._tokenize_words('dedērunt te miror antōnī quorum.')
        [{'word': 'dedērunt', 'index': 0, 'syllables': [{'syllable': 'de', 'index': 0, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (False, None), 'accented': False}, {'syllable': 'dē', 'index': 1, 'elide': (False, None), 'long_by_nature': True, 'long_by_position': (False, None), 'accented': True}, {'syllable': 'runt', 'index': 2, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (True, None), 'accented': False}], 'syllables_count': 3}, {'word': 'te', 'index': 1, 'syllables': [{'syllable': 'te', 'index': 0, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (False, None), 'accented': True}], 'syllables_count': 1}, {'word': 'miror', 'index': 2, 'syllables': [{'syllable': 'mi', 'index': 0, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (False, None), 'accented': True}, {'syllable': 'ror', 'index': 1, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (False, None), 'accented': False}], 'syllables_count': 2}, {'word': 'antōnī', 'index': 3, 'syllables': [{'syllable': 'an', 'index': 0, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (True, None), 'accented': False}, {'syllable': 'tō', 'index': 1, 'elide': (False, None), 'long_by_nature': True, 'long_by_position': (False, None), 'accented': True}, {'syllable': 'nī', 'index': 2, 'elide': (False, None), 'long_by_nature': True, 'long_by_position': (False, None), 'accented': False}], 'syllables_count': 3}, {'word': 'quorum.', 'index': 4, 'syllables': [{'syllable': 'quo', 'index': 0, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (False, None), 'accented': True}, {'syllable': 'rum', 'index': 1, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (False, None), 'accented': False}], 'syllables_count': 2}]
        >>> Scansion()._tokenize_words('a spes co i no xe cta.')
        [{'word': 'a', 'index': 0, 'syllables': [{'syllable': 'a', 'index': 0, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (False, 'sest'), 'accented': True}], 'syllables_count': 1}, {'word': 'spes', 'index': 1, 'syllables': [{'syllable': 'spes', 'index': 0, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (True, None), 'accented': True}], 'syllables_count': 1}, {'word': 'co', 'index': 2, 'syllables': [{'syllable': 'co', 'index': 0, 'elide': (True, 'weak'), 'long_by_nature': False, 'long_by_position': (False, None), 'accented': True}], 'syllables_count': 1}, {'word': 'i', 'index': 3, 'syllables': [{'syllable': 'i', 'index': 0, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (False, None), 'accented': True}], 'syllables_count': 1}, {'word': 'no', 'index': 4, 'syllables': [{'syllable': 'no', 'index': 0, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (True, None), 'accented': True}], 'syllables_count': 1}, {'word': 'xe', 'index': 5, 'syllables': [{'syllable': 'xe', 'index': 0, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (True, None), 'accented': True}], 'syllables_count': 1}, {'word': 'cta.', 'index': 6, 'syllables': [{'syllable': 'cta', 'index': 0, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (False, None), 'accented': True}], 'syllables_count': 1}]
        >>> Scansion()._tokenize_words('x')
        []
        >>> Scansion()._tokenize_words('atae amo.')
        [{'word': 'atae', 'index': 0, 'syllables': [{'syllable': 'a', 'index': 0, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (False, None), 'accented': True}, {'syllable': 'tae', 'index': 1, 'elide': (True, 'strong'), 'long_by_nature': True, 'long_by_position': (False, None), 'accented': False}], 'syllables_count': 2}, {'word': 'amo.', 'index': 1, 'syllables': [{'syllable': 'a', 'index': 0, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (False, None), 'accented': True}, {'syllable': 'mo', 'index': 1, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (False, None), 'accented': False}], 'syllables_count': 2}]
        >>> Scansion()._tokenize_words('bar rid.')
        [{'word': 'bar', 'index': 0, 'syllables': [{'syllable': 'bar', 'index': 0, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (True, None), 'accented': True}], 'syllables_count': 1}, {'word': 'rid.', 'index': 1, 'syllables': [{'syllable': 'rid', 'index': 0, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (False, None), 'accented': True}], 'syllables_count': 1}]
        >>> Scansion()._tokenize_words('ba brid.')
        [{'word': 'ba', 'index': 0, 'syllables': [{'syllable': 'ba', 'index': 0, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (False, 'mute+liquid'), 'accented': True}], 'syllables_count': 1}, {'word': 'brid.', 'index': 1, 'syllables': [{'syllable': 'brid', 'index': 0, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (False, None), 'accented': True}], 'syllables_count': 1}]
        """
        tokens = []
        split_sent = [word for word in sentence.split(" ") if word != '']
        for i, word in enumerate(split_sent):
            if len(word) == 1 and word not in self.VOWELS:
                break
            # basic properties
            word_dict = {"word": split_sent[i], "index": i}

            # syllables and syllables count
            word_dict["syllables"] = self._tokenize_syllables(split_sent[i])
            word_dict["syllables_count"] = len(word_dict["syllables"])
            if i != 0 and word_dict["syllables"][0]["syllable"][0] in \
                    self.VOWELS or i != 0 and \
                    word_dict["syllables"][0]["syllable"][0] == "h":
                last_syll_prev_word = tokens[i - 1]["syllables"][-1]
                if last_syll_prev_word["syllable"][-1] in \
                        self.LONG_VOWELS or \
                        last_syll_prev_word["syllable"][-1] == "m":
                    last_syll_prev_word["elide"] = (True, "strong")
                elif len(last_syll_prev_word["syllable"]) > 1 and \
                        last_syll_prev_word["syllable"][-2:] in self.DIPHTHONGS:
                    last_syll_prev_word["elide"] = (True, "strong")
                elif last_syll_prev_word["syllable"][-1] in self.SHORT_VOWELS:
                    last_syll_prev_word["elide"] = (True, "weak")
            # long by position inter word
            if i > 0 and tokens[i - 1]["syllables"][-1]["syllable"][-1] in \
                    self.CONSONANTS and \
                    word_dict["syllables"][0]["syllable"][0] in self.CONSONANTS:
                # previous word ends in consonant and current word begins with
                # consonant
                tokens[i - 1]["syllables"][-1]["long_by_position"] = (True,
                                                                      None)
            elif i > 0 and tokens[i - 1]["syllables"][-1]["syllable"][-1] in \
                    self.VOWELS and \
                    word_dict["syllables"][0]["syllable"][0] in self.CONSONANTS:
                # previous word ends in vowel and current word begins in
                # consonant
                if any(sest in word_dict["syllables"][0]["syllable"]
                       for sest in self.SESTS):
                    # current word begins with sest
                    tokens[i - \
                        1]["syllables"][-1]["long_by_position"] = (False, "sest")
                elif word_dict["syllables"][0]["syllable"][0] in self.MUTES and \
                        word_dict["syllables"][0]["syllable"][1] in self.LIQUIDS:
                    # current word begins with mute + liquid
                    tokens[i - \
                        1]["syllables"][-1]["long_by_position"] = (False, "mute+liquid")
                elif word_dict["syllables"][0]["syllable"][0] in \
                        self.DOUBLE_CONSONANTS or\
                        word_dict["syllables"][0]["syllable"][1] in self.CONSONANTS:
                    # current word begins 2 consonants
                    tokens[i - \
                        1]["syllables"][-1]["long_by_position"] = (True, None)

            tokens.append(word_dict)

        return tokens

    def tokenize(self, text: str) -> List[Dict[str, Any]]:
        """
        Tokenize text on supplied characters.
        "Puella bona est. Puer malus est." ->
        [ [{word: puella, syllables: [...], index: 0}, ... ], ... ]
        :return:list

        >>> Scansion().tokenize('puella bona est. puer malus est.')
        [{'plain_text_sentence': 'puella bona est', 'structured_sentence': [{'word': 'puella', 'index': 0, 'syllables': [{'syllable': 'pu', 'index': 0, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (False, None), 'accented': False}, {'syllable': 'el', 'index': 1, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (True, None), 'accented': True}, {'syllable': 'la', 'index': 2, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (False, None), 'accented': False}], 'syllables_count': 3}, {'word': 'bona', 'index': 1, 'syllables': [{'syllable': 'bo', 'index': 0, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (False, None), 'accented': True}, {'syllable': 'na', 'index': 1, 'elide': (True, 'weak'), 'long_by_nature': False, 'long_by_position': (False, None), 'accented': False}], 'syllables_count': 2}, {'word': 'est', 'index': 2, 'syllables': [{'syllable': 'est', 'index': 0, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (True, None), 'accented': True}], 'syllables_count': 1}]}, {'plain_text_sentence': ' puer malus est', 'structured_sentence': [{'word': 'puer', 'index': 0, 'syllables': [{'syllable': 'pu', 'index': 0, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (False, None), 'accented': True}, {'syllable': 'er', 'index': 1, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (True, None), 'accented': False}], 'syllables_count': 2}, {'word': 'malus', 'index': 1, 'syllables': [{'syllable': 'ma', 'index': 0, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (False, None), 'accented': True}, {'syllable': 'lus', 'index': 1, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (False, None), 'accented': False}], 'syllables_count': 2}, {'word': 'est', 'index': 2, 'syllables': [{'syllable': 'est', 'index': 0, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (True, None), 'accented': True}], 'syllables_count': 1}]}, {'plain_text_sentence': '', 'structured_sentence': []}]
        """

        tokenized_sentences = text.split('.')

        tokenized_text = []
        for sentence in tokenized_sentences:
            sentence_dict = {}  # type: ignore
            sentence_dict["plain_text_sentence"] = sentence
            sentence_dict["structured_sentence"] = self._tokenize_words(
                sentence)  # type: ignore
            tokenized_text.append(sentence_dict)

        return tokenized_text

    def scan_text(self, text: str) -> List[str]:
        """
        Return a flat list of rhythms.
        Desired clausula length is passed as a parameter. Clausula shorter than the specified
        length can be exluded.
        :return:

        >>> Scansion().scan_text('dedērunt te miror antōnī quorum. sī quid est in mē ingenī jūdicēs quod sentiō.')
        ['u--uuu---ux', 'u---u--u---ux']
        """
        tokens = self.tokenize(text)
        clausulae = []
        for sentence in tokens:
            sentence_clausula = []  # type: List[str]
            syllables = [
                word['syllables'] for word in sentence['structured_sentence']
            ]
            flat_syllables = [
                syllable for word in syllables for syllable in word
            ]
            if self.elide:
                flat_syllables = [
                    syll for syll in flat_syllables if not syll['elide'][0]
                ][:-1][::-1]
            for syllable in flat_syllables:
                if len(sentence_clausula) < self.clausula_length - 1:
                    if syllable['long_by_nature'] or syllable[
                            'long_by_position'][0]:
                        sentence_clausula.append('-')
                    else:
                        sentence_clausula.append('u')
            sentence_clausula = sentence_clausula[::-1]
            sentence_clausula.append('x')
            clausulae.append(''.join(sentence_clausula))
        clausulae = clausulae[:-1]
        return clausulae

Exemple #11

0

Afficher le fichier

class PentameterScanner(VerseScanner):
    """The scansion symbols used can be configured by passing a suitable constants class to
    the constructor."""

    def __init__(self, constants=None, syllabifier=None,
                 optional_transform: bool = False, *args, **kwargs)->None:
        """
        :param constants: None or a class that implements ScansionConstants
        :param syllabifier: None or a class that implements Syllabifier methods
        :param optional_tranform: boolean, whether or not to apply aggresive verse transformations.
        :param kwargs:
        """
        super().__init__(*args, **kwargs)
        self.constants = ScansionConstants() if constants is None else constants
        self.syllabifier = Syllabifier() if syllabifier is None else syllabifier
        self.remove_punct_map = string_utils.remove_punctuation_dict()
        self.punctuation_substitutions = string_utils.punctuation_for_spaces_dict()
        self.metrical_validator = MetricalValidator(self.constants)
        self.formatter = ScansionFormatter(self.constants)
        self.optional_transform = optional_transform
        self.inverted_amphibrach_re = re.compile(
            r"{}\s*{}\s*{}".format(self.constants.STRESSED,
                                   self.constants.UNSTRESSED,
                                   self.constants.STRESSED))
        self.syllable_matcher = re.compile(r"[{}]".format(self.constants.VOWELS +
                                                          self.constants.ACCENTED_VOWELS +
                                                          self.constants.LIQUIDS +
                                                          self.constants.MUTES))
        self.SPONDAIC_PENTAMETER = self.constants.SPONDEE + self.constants.SPONDEE + \
                                   self.constants.STRESSED + self.constants.DACTYL + \
                                   self.constants.DACTYL + self.constants.OPTIONAL_ENDING

        self.DACTYLIC_PENTAMETER = self.constants.DACTYL + self.constants.DACTYL + \
                                   self.constants.STRESSED + self.constants.DACTYL + \
                                   self.constants.DACTYL + self.constants.OPTIONAL_ENDING

    def scan(self, original_line: str, optional_transform: bool = False) -> Verse:
        """
        Scan a line of Latin pentameter and produce a scansion pattern, and other data.

        :param original_line: the original line of Latin verse
        :param optional_transform: whether or not to perform i to j transform for syllabification
        :return: a Verse object

        >>> scanner = PentameterScanner()
        >>> print(scanner.scan('ex hoc ingrato gaudia amore tibi.'))
        Verse(original='ex hoc ingrato gaudia amore tibi.', scansion='-   -  -   - -   - U  U - U  U U ', meter='pentameter', valid=True, syllable_count=12, accented='ēx hōc īngrātō gaudia amōre tibi.', scansion_notes=['Spondaic pentameter'], syllables = ['ēx', 'hoc', 'īn', 'gra', 'to', 'gau', 'di', 'a', 'mo', 're', 'ti', 'bi'])
        >>> print(scanner.scan(
        ... "in vento et rapida scribere oportet aqua.").scansion) # doctest: +NORMALIZE_WHITESPACE
        -   -    -   U U -    - U   U -  U  U  U
        """
        verse = Verse(original_line, meter='pentameter')
        # replace punctuation with spaces
        line = original_line.translate(self.punctuation_substitutions)
        if optional_transform:
            working_line = self.transform_i_to_j_optional(line)
            verse.scansion_notes += [self.constants.NOTE_MAP["optional i to j"]]
        else:
            working_line = self.transform_i_to_j(line) # conservative i to j
        working_line = self.elide_all(working_line)
        verse.working_line = self.accent_by_position(working_line)
        verse.syllables = self.syllabifier.syllabify(verse.working_line)
        verse.syllable_count = self.syllabifier.get_syllable_count(verse.syllables)
        if verse.syllable_count < 12:
            verse.valid = False
            verse.scansion_notes += [self.constants.NOTE_MAP["< 12p"]]
            return verse
        stresses = self.flag_dipthongs(verse.syllables)
        syllables_wspaces = string_utils.to_syllables_with_trailing_spaces(verse.working_line, verse.syllables)
        offset_map = self.calc_offset(syllables_wspaces)
        for idx, syl in enumerate(verse.syllables):
            for accented in self.constants.ACCENTED_VOWELS:
                if accented in syl:
                    stresses.append(idx)
        # first syllable is always long in Pentameter
        stresses.append(0)
        # second to last syllable is always long
        stresses.append(verse.syllable_count - 2)

        verse.scansion = self.produce_scansion(stresses,
                                               syllables_wspaces, offset_map)
        if len(string_utils.stress_positions(self.constants.STRESSED, verse.scansion)) != \
                len(set(stresses)):
            verse.valid = False
            verse.scansion_notes += [self.constants.NOTE_MAP["invalid syllables"]]
            return verse

        if self.metrical_validator.is_valid_pentameter(verse.scansion):
            verse.scansion_notes += [self.constants.NOTE_MAP["positionally"]]
            return self.assign_candidate(verse, verse.scansion)

        # identify some obvious and probably choices based on number of syllables
        if verse.syllable_count == 12:  # produce spondees where possible
            candidate = self.make_spondaic(verse.scansion)
            verse.scansion_notes += [self.constants.NOTE_MAP["12p"]]
            return self.assign_candidate(verse, candidate)
        if verse.syllable_count == 14:  # produce spondees where possible
            candidate = self.make_dactyls(verse.scansion)
            verse.scansion_notes += [self.constants.NOTE_MAP["14p"]]
            return self.assign_candidate(verse, candidate)
        if verse.syllable_count > 14:
            verse.valid = False
            verse.scansion_notes += [self.constants.NOTE_MAP["> 14"]]
            return verse

        smoothed = self.correct_first_two_dactyls(verse.scansion)

        if Levenshtein.levenshtein_distance(verse.scansion, smoothed) > 0:
            verse.scansion_notes += [self.constants.NOTE_MAP["invalid start"]]
            verse.scansion = smoothed
            stresses += string_utils.differences(verse.scansion, smoothed)

        if self.metrical_validator.is_valid_pentameter(verse.scansion):
            return self.assign_candidate(verse, verse.scansion)

        smoothed = self.correct_penultimate_dactyl_chain(verse.scansion)

        if Levenshtein.levenshtein_distance(verse.scansion, smoothed) > 0:
            verse.scansion_notes += [self.constants.NOTE_MAP["penultimate dactyl chain"]]
            verse.scansion = smoothed
            stresses += string_utils.differences(verse.scansion, smoothed)

        if self.metrical_validator.is_valid_pentameter(verse.scansion):
            return self.assign_candidate(verse, verse.scansion)

        candidates = self.metrical_validator.closest_pentameter_patterns(verse.scansion)
        if candidates is not None:
            if len(candidates) == 1 \
                    and len(verse.scansion.replace(" ", "")) == len(candidates[0]) \
                    and len(string_utils.differences(verse.scansion, candidates[0])) == 1:
                tmp_scansion = self.produce_scansion(
                    string_utils.differences(verse.scansion, candidates[0]),
                    syllables_wspaces, offset_map)

                if self.metrical_validator.is_valid_pentameter(tmp_scansion):
                    verse.scansion_notes += [self.constants.NOTE_MAP["closest match"]]
                    return self.assign_candidate(verse, tmp_scansion)

        # if the line doesn't scan "as is", it may scan if the optional i to j transformations
        # are made, so here we set them and try again.
        if self.optional_transform and not optional_transform and not verse.valid:
            return self.scan(original_line, optional_transform=True)

        verse.accented = self.formatter.merge_line_scansion(verse.original, verse.scansion)
        return verse

    def make_spondaic(self, scansion: str) -> str:
        """
        If a pentameter line has 12 syllables, then it must start with double spondees.

        :param scansion: a string of scansion patterns
        :return: a scansion pattern string starting with two spondees

        >>> print(PentameterScanner().make_spondaic("U  U  U  U  U  U  U  U  U  U  U  U"))
        -  -  -  -  -  -  U  U  -  U  U  U
        """
        mark_list = string_utils.mark_list(scansion)
        vals = list(scansion.replace(" ", ""))
        new_vals = self.SPONDAIC_PENTAMETER[:-1] + vals[-1]
        corrected = "".join(new_vals)
        new_line = list(" " * len(scansion))
        for idx, car in enumerate(corrected):
            new_line[mark_list[idx]] = car
        return "".join(new_line)

    def make_dactyls(self, scansion: str) -> str:
        """
        If a pentameter line has 14 syllables, it starts and ends with double dactyls.

        :param scansion: a string of scansion patterns
        :return: a scansion pattern string starting and ending with double dactyls

        >>> print(PentameterScanner().make_dactyls("U  U  U  U  U  U  U  U  U  U  U  U  U  U"))
        -  U  U  -  U  U  -  -  U  U  -  U  U  U
        """
        mark_list = string_utils.mark_list(scansion)
        vals = list(scansion.replace(" ", ""))
        new_vals = self.DACTYLIC_PENTAMETER[:-1] + vals[-1]
        corrected = "".join(new_vals)
        new_line = list(" " * len(scansion))
        for idx, car in enumerate(corrected):
            new_line[mark_list[idx]] = car
        return "".join(new_line)

    def correct_penultimate_dactyl_chain(self, scansion: str) -> str:
        """
        For pentameter the last two feet of the verse are predictable dactyls,
        and do not regularly allow substitutions.

        :param scansion: scansion line thus far
        :return: corrected line of scansion

        >>> print(PentameterScanner().correct_penultimate_dactyl_chain(
        ... "U  U  U  U  U  U  U  U  U  U  U  U  U  U"))
        U  U  U  U  U  U  U  -  U  U  -  U  U  U
        """
        mark_list = string_utils.mark_list(scansion)
        vals = list(scansion.replace(" ", ""))
        n_vals = vals[:-7] + [self.constants.DACTYL + self.constants.DACTYL] + [vals[-1]]
        corrected = "".join(n_vals)
        new_line = list(" " * len(scansion))
        for idx, car in enumerate(corrected):
            new_line[mark_list[idx]] = car
        return "".join(new_line)

Exemple #12

0

Afficher le fichier

Fichier : hendecasyllable_scanner.py Projet : gymnosophist/pharr_format

class HendecasyllableScanner(VerseScanner):
    """The scansion symbols used can be configured by passing a suitable constants class to
    the constructor."""
    def __init__(self,
                 constants=None,
                 syllabifier=None,
                 optional_tranform: bool = False,
                 *args,
                 **kwargs):
        """
        :param constants: None or a class that implements ScansionConstants
        :param syllabifier: None or a class that implements Syllabifier methods
        :param optional_tranform: boolean, whether or not to apply aggresive verse transformations.
        :param kwargs:
        """
        super().__init__(*args, **kwargs)
        self.constants = ScansionConstants(
        ) if constants is None else constants
        self.syllabifier = Syllabifier(
        ) if syllabifier is None else syllabifier
        self.remove_punct_map = string_utils.remove_punctuation_dict()
        self.punctuation_substitutions = string_utils.punctuation_for_spaces_dict(
        )
        self.metrical_validator = MetricalValidator(self.constants)
        self.formatter = ScansionFormatter(self.constants)
        self.inverted_amphibrach_re = re.compile(r"{}\s*{}\s*{}".format(
            self.constants.STRESSED, self.constants.UNSTRESSED,
            self.constants.STRESSED))
        self.syllable_matcher = re.compile(
            r"[{}]".format(self.constants.VOWELS +
                           self.constants.ACCENTED_VOWELS +
                           self.constants.LIQUIDS + self.constants.MUTES))
        self.optional_transform = optional_tranform

    def scan(self,
             original_line: str,
             optional_transform: bool = False) -> Verse:
        """
        Scan a line of Latin hendecasyllables and produce a scansion pattern, and other data.

        :param original_line: the original line of Latin verse
        :param optional_transform: whether or not to perform i to j transform for syllabification
        :return: a Verse object

        >>> scanner = HendecasyllableScanner()
        >>> print(scanner.scan("Cui dono lepidum novum libellum"))
        Verse(original='Cui dono lepidum novum libellum', scansion='  -  U -  U U -   U -   U -  U ', meter='hendecasyllable', valid=True, syllable_count=11, accented='Cui donō lepidūm novūm libēllum', scansion_notes=['Corrected invalid start.'], syllables = ['Cui', 'do', 'no', 'le', 'pi', 'dūm', 'no', 'vūm', 'li', 'bēl', 'lum'])
        >>> print(scanner.scan(
        ... "ārida modo pumice expolitum?").scansion)  # doctest: +NORMALIZE_WHITESPACE
        - U -  U U  - U   -  U - U
        """
        verse = Verse(original_line, meter='hendecasyllable')
        # replace punctuation with spaces
        line = original_line.translate(self.punctuation_substitutions)
        if optional_transform:
            working_line = self.transform_i_to_j_optional(line)
            verse.scansion_notes += [
                self.constants.NOTE_MAP["optional i to j"]
            ]
        else:
            working_line = self.transform_i_to_j(line)  # conservative i to j
        working_line = self.elide_all(working_line)
        verse.working_line = self.accent_by_position(working_line)
        verse.syllables = self.syllabifier.syllabify(verse.working_line)
        verse.syllable_count = self.syllabifier.get_syllable_count(
            verse.syllables)
        # identify some obvious and probably choices based on number of syllables
        if verse.syllable_count > 11:
            verse.valid = False
            verse.scansion_notes += [self.constants.NOTE_MAP["> 11"]]
            return verse
        if verse.syllable_count < 11:
            verse.valid = False
            verse.scansion_notes += [self.constants.NOTE_MAP["< 11"]]
            return verse

        stresses = self.flag_dipthongs(verse.syllables)
        syllables_wspaces = string_utils.to_syllables_with_trailing_spaces(
            verse.working_line, verse.syllables)
        offset_map = self.calc_offset(syllables_wspaces)
        for idx, syl in enumerate(verse.syllables):
            for accented in self.constants.ACCENTED_VOWELS:
                if accented in syl:
                    stresses.append(idx)
        # second to last syllable is always long
        stresses.append(verse.syllable_count - 2)

        verse.scansion = self.produce_scansion(stresses, syllables_wspaces,
                                               offset_map)
        if len(string_utils.stress_positions(self.constants.STRESSED, verse.scansion)) != \
                len(set(stresses)):
            verse.valid = False
            verse.scansion_notes += [
                self.constants.NOTE_MAP["invalid syllables"]
            ]
            return verse

        if self.metrical_validator.is_valid_hendecasyllables(verse.scansion):
            verse.scansion_notes += [self.constants.NOTE_MAP["positionally"]]
            return self.assign_candidate(verse, verse.scansion)

        smoothed = self.correct_invalid_start(verse.scansion)

        if Levenshtein.levenshtein_distance(verse.scansion, smoothed) > 0:
            verse.scansion_notes += [self.constants.NOTE_MAP["invalid start"]]
            verse.scansion = smoothed
            stresses += string_utils.differences(verse.scansion, smoothed)

        if self.metrical_validator.is_valid_hendecasyllables(verse.scansion):
            return self.assign_candidate(verse, verse.scansion)

        smoothed = self.correct_antepenult_chain(verse.scansion)

        if Levenshtein.levenshtein_distance(verse.scansion, smoothed) > 0:
            verse.scansion_notes += [
                self.constants.NOTE_MAP["antepenult chain"]
            ]
            verse.scansion = smoothed
            stresses += string_utils.differences(verse.scansion, smoothed)

        if self.metrical_validator.is_valid_hendecasyllables(verse.scansion):
            return self.assign_candidate(verse, verse.scansion)

        candidates = self.metrical_validator.closest_hendecasyllable_patterns(
            verse.scansion)
        if candidates is not None:
            if len(candidates) == 1 \
                    and len(verse.scansion.replace(" ", "")) == len(candidates[0]) \
                    and len(string_utils.differences(verse.scansion, candidates[0])) == 1:
                tmp_scansion = self.produce_scansion(
                    string_utils.differences(verse.scansion, candidates[0]),
                    syllables_wspaces, offset_map)
                if self.metrical_validator.is_valid_hendecasyllables(
                        tmp_scansion):
                    verse.scansion_notes += [
                        self.constants.NOTE_MAP["closest match"]
                    ]
                    return self.assign_candidate(verse, tmp_scansion)

        # if the line doesn't scan "as is", if may scan if the optional i to j transformations
        # are made, so here we set them and try again.
        if self.optional_transform and not optional_transform and not verse.valid:
            return self.scan(original_line, optional_transform=True)

        verse.accented = self.formatter.merge_line_scansion(
            verse.original, verse.scansion)
        return verse

    def correct_invalid_start(self, scansion: str) -> str:
        """
        The third syllable of a hendecasyllabic line is long, so we will convert it.

        :param scansion: scansion string
        :return: scansion string with corrected start

        >>> print(HendecasyllableScanner().correct_invalid_start(
        ... "- U U  U U  - U   -  U - U").strip())
        - U -  U U  - U   -  U - U
        """
        mark_list = string_utils.mark_list(scansion)
        vals = list(scansion.replace(" ", ""))
        corrected = vals[:2] + [self.constants.STRESSED] + vals[3:]
        new_line = list(" " * len(scansion))
        for idx, car in enumerate(corrected):
            new_line[mark_list[idx]] = car
        return "".join(new_line)

    def correct_antepenult_chain(self, scansion: str) -> str:
        """
        For hendecasyllables the last three feet of the verse are predictable
        and do not regularly allow substitutions.

        :param scansion: scansion line thus far
        :return: corrected line of scansion

        >>> print(HendecasyllableScanner().correct_antepenult_chain(
        ... "-U -UU UU UU UX").strip())
        -U -UU -U -U -X
        """
        mark_list = string_utils.mark_list(scansion)
        vals = list(scansion.replace(" ", ""))
        new_vals = vals[:len(vals) - 6] + [
            self.constants.TROCHEE + self.constants.TROCHEE +
            self.constants.STRESSED
        ] + vals[-1:]
        corrected = "".join(new_vals)
        new_line = list(" " * len(scansion))
        for idx, car in enumerate(corrected):
            new_line[mark_list[idx]] = car
        return "".join(new_line)

Exemple #13

0

Afficher le fichier

Fichier : hexameter_scanner.py Projet : gymnosophist/pharr_format

class HexameterScanner(VerseScanner):
    """The scansion symbols used can be configured by passing a suitable constants class to
    the constructor."""
    def __init__(self,
                 constants=None,
                 syllabifier=None,
                 optional_transform: bool = False,
                 *args,
                 **kwargs):
        """
        :param constants: None or a class that implements ScansionConstants
        :param syllabifier: None or a class that implements Syllabifier methods
        :param optional_tranform: boolean, whether or not to apply aggresive verse transformations.
        :param kwargs:
        """
        super().__init__(*args, **kwargs)
        self.constants = ScansionConstants(
        ) if constants is None else constants
        self.syllabifier = Syllabifier(
        ) if syllabifier is None else syllabifier
        self.remove_punct_map = string_utils.remove_punctuation_dict()
        self.punctuation_substitutions = string_utils.punctuation_for_spaces_dict(
        )
        self.metrical_validator = MetricalValidator(self.constants)
        self.formatter = ScansionFormatter(self.constants)
        self.inverted_amphibrach_re = re.compile(r"{}\s*{}\s*{}".format(
            self.constants.STRESSED, self.constants.UNSTRESSED,
            self.constants.STRESSED))
        self.syllable_matcher = re.compile(
            r"[{}]".format(self.constants.VOWELS +
                           self.constants.ACCENTED_VOWELS +
                           self.constants.LIQUIDS + self.constants.MUTES))
        self.optional_transform = optional_transform

    def scan(self,
             original_line: str,
             optional_transform: bool = False,
             dactyl_smoothing: bool = False) -> Verse:
        """
        Scan a line of Latin hexameter and produce a scansion pattern, and other data.

        :param original_line: the original line of Latin verse
        :param optional_transform: whether or not to perform i to j transform for syllabification
        :param dactyl_smoothing: whether or not to perform dactyl smoothing
        :return: a Verse object

        >>> scanner = HexameterScanner()

        >>> print(HexameterScanner().scan(
        ... "ēxiguām sedēm pariturae tērra negavit").scansion) # doctest: +NORMALIZE_WHITESPACE
        - -  -   - -   U U -  -  -  U  U - U

        >>> print(scanner.scan("impulerit. Tantaene animis caelestibus irae?"))
        Verse(original='impulerit. Tantaene animis caelestibus irae?', scansion='-  U U -    -   -   U U -    - -  U U  -  - ', meter='hexameter', valid=True, syllable_count=15, accented='īmpulerīt. Tāntaene animīs caelēstibus īrae?', scansion_notes=['Valid by positional stresses.'], syllables = ['īm', 'pu', 'le', 'rīt', 'Tān', 'taen', 'a', 'ni', 'mīs', 'cae', 'lēs', 'ti', 'bus', 'i', 'rae'])

        >>> print(scanner.scan(
        ... "Arma virumque cano, Troiae qui prīmus ab ōrīs").scansion) # doctest: +NORMALIZE_WHITESPACE
        -  U  U -   U  U -    -  -   -   - U  U  - -

        >>> # some hexameters need the optional transformations:
        >>> optional_transform_scanner = HexameterScanner(optional_transform=True)
        >>> print(optional_transform_scanner.scan(
        ... "Ītaliam, fāto profugus, Lāvīniaque vēnit").scansion) # doctest: +NORMALIZE_WHITESPACE
        - -  -    - -   U U -    - -  U  U  - U

        >>> print(HexameterScanner().scan(
        ... "lītora, multum ille et terrīs iactātus et alto").scansion) # doctest: +NORMALIZE_WHITESPACE
        - U U   -     -    -   -  -   -  - U  U  -  U

        >>> print(HexameterScanner().scan(
        ... "vī superum saevae memorem Iūnōnis ob īram;").scansion) # doctest: +NORMALIZE_WHITESPACE
        -  U U -    -  -  U U -   - - U  U  - U

        >>> # handle multiple elisions
        >>> print(scanner.scan("monstrum horrendum, informe, ingens, cui lumen ademptum").scansion) # doctest: +NORMALIZE_WHITESPACE
        -        -  -      -  -     -  -      -  - U  U -   U

        >>> # if we have 17 syllables, create a chain of all dactyls
        >>> print(scanner.scan("quadrupedante putrem sonitu quatit ungula campum"
        ... ).scansion) # doctest: +NORMALIZE_WHITESPACE
        -  U U -  U  U  -   U U -   U U  -  U U  -  U

        >>> # if we have 13 syllables exactly, we'll create a spondaic hexameter
        >>> print(HexameterScanner().scan(
        ... "illi inter sese multa vi bracchia tollunt").scansion)  # doctest: +NORMALIZE_WHITESPACE
        -    -  -   - -  -  -  -   -   UU  -  -

        >>> print(HexameterScanner().scan(
        ... "dat latus; insequitur cumulo praeruptus aquae mons").scansion) # doctest: +NORMALIZE_WHITESPACE
        -   U U   -  U  U -   U U -    - -  U  U   -  -

        >>> print(optional_transform_scanner.scan(
        ... "Non quivis videt inmodulata poëmata iudex").scansion) # doctest: +NORMALIZE_WHITESPACE
        -    - -   U U  -  U U - U  U- U U  - -

        >>> print(HexameterScanner().scan(
        ... "certabant urbem Romam Remoramne vocarent").scansion) # doctest: +NORMALIZE_WHITESPACE
        -  - -   -  -   - -   U U -  U  U - -

        >>> # advanced smoothing is available via keyword flags: dactyl_smoothing
        >>> print(HexameterScanner().scan(
        ... "his verbis: 'o gnata, tibi sunt ante ferendae",
        ... dactyl_smoothing=True).scansion) # doctest: +NORMALIZE_WHITESPACE
        -   -  -    -   - U   U -  -   -  U  U -   -

        >>> HexameterScanner().scan('Italiam non sponte sequor.')
        Verse(original='Italiam non sponte sequor.', scansion='', meter='hexameter', valid=False, syllable_count=9, accented='', scansion_notes=['Incomplete hexameter; not enough syllables.'], syllables = ['I', 'ta', 'li', 'ām', 'nōn', 'spōn', 'te', 'se', 'quor'])

        >>> HexameterScanner().scan('Phaselus ille, quem videtis, hospites')
        Verse(original='Phaselus ille, quem videtis, hospites', scansion='  - U U  -  -    -   U U U    -  - U ', meter='hexameter', valid=False, syllable_count=12, accented='', scansion_notes=['Inverted amphibrachs corrected.'], syllables = ['Pha', 'se', 'lus', 'īl', 'le', 'quēm', 'vi', 'de', 'tis', 'hōs', 'pi', 'tes'])

        """
        verse = Verse(original_line, meter='hexameter')
        # replace punctuation with spaces
        line = original_line.translate(self.punctuation_substitutions)
        if optional_transform:
            working_line = self.transform_i_to_j_optional(line)
            verse.scansion_notes += [
                self.constants.NOTE_MAP["optional i to j"]
            ]
        else:
            working_line = self.transform_i_to_j(line)  # conservative i to j
        working_line = self.elide_all(working_line)
        verse.working_line = self.accent_by_position(working_line)
        verse.syllables = self.syllabifier.syllabify(verse.working_line)
        verse.syllable_count = self.syllabifier.get_syllable_count(
            verse.syllables)
        if verse.syllable_count < 12:
            verse.valid = False
            verse.scansion_notes += [self.constants.NOTE_MAP["< 12"]]
            return verse
        stresses = self.flag_dipthongs(verse.syllables)
        syllables_wspaces = string_utils.to_syllables_with_trailing_spaces(
            verse.working_line, verse.syllables)
        offset_map = self.calc_offset(syllables_wspaces)
        for idx, syl in enumerate(verse.syllables):
            for accented in self.constants.ACCENTED_VOWELS:
                if accented in syl:
                    stresses.append(idx)
        # first syllable is always long in hexameter
        stresses.append(0)
        # second to last syllable is always long
        stresses.append(verse.syllable_count - 2)

        verse.scansion = self.produce_scansion(stresses, syllables_wspaces,
                                               offset_map)
        if len(string_utils.stress_positions(self.constants.STRESSED, verse.scansion)) != \
                len(set(stresses)):
            verse.valid = False
            verse.scansion_notes += [
                self.constants.NOTE_MAP["invalid syllables"]
            ]
            return verse

        if self.metrical_validator.is_valid_hexameter(verse.scansion):
            verse.scansion_notes += [self.constants.NOTE_MAP["positionally"]]
            return self.assign_candidate(verse, verse.scansion)

        # identify some obvious and probably choices based on number of syllables
        if verse.syllable_count == 17:  # produce all dactyls
            candidate = self.produce_scansion(
                self.metrical_validator.hexameter_known_stresses(),
                syllables_wspaces, offset_map)
            verse.scansion_notes += [self.constants.NOTE_MAP["17"]]
            if self.metrical_validator.is_valid_hexameter(candidate):
                return self.assign_candidate(verse, candidate)
        if verse.syllable_count == 12:  # create all spondee hexameter
            candidate = self.produce_scansion(list(range(12)),
                                              syllables_wspaces, offset_map)
            if self.metrical_validator.is_valid_hexameter(verse.scansion):
                verse.scansion_notes += [self.constants.NOTE_MAP["12"]]
                return self.assign_candidate(verse, candidate)
        if verse.syllable_count == 13:  # create spondee hexameter with a dactyl at 5th foot
            known_unaccents = [9, 10]
            last_syllable_accented = False
            for vowel in self.constants.ACCENTED_VOWELS:
                if vowel in verse.syllables[12]:
                    last_syllable_accented = True
            if not last_syllable_accented:
                known_unaccents.append(12)
            if set(known_unaccents) - set(stresses) != len(known_unaccents):
                verse.scansion = self.produce_scansion(
                    [x for x in range(13) if x not in known_unaccents],
                    syllables_wspaces, offset_map)
                verse.scansion_notes += [self.constants.NOTE_MAP["5th dactyl"]]
                if self.metrical_validator.is_valid_hexameter(verse.scansion):
                    return self.assign_candidate(verse, verse.scansion)
        if verse.syllable_count > 17:
            verse.valid = False
            verse.scansion_notes += [self.constants.NOTE_MAP["> 17"]]
            return verse

        smoothed = self.correct_inverted_amphibrachs(verse.scansion)
        if Levenshtein.levenshtein_distance(verse.scansion, smoothed) > 0:
            verse.scansion_notes += [self.constants.NOTE_MAP["inverted"]]
            verse.scansion = smoothed
            stresses += string_utils.differences(verse.scansion, smoothed)

        if self.metrical_validator.is_valid_hexameter(verse.scansion):
            return self.assign_candidate(verse, verse.scansion)

        smoothed = self.correct_first_two_dactyls(verse.scansion)

        if Levenshtein.levenshtein_distance(verse.scansion, smoothed) > 0:
            verse.scansion_notes += [self.constants.NOTE_MAP["invalid start"]]
            verse.scansion = smoothed
            stresses += string_utils.differences(verse.scansion, smoothed)

        if self.metrical_validator.is_valid_hexameter(verse.scansion):
            return self.assign_candidate(verse, verse.scansion)

        smoothed = self.correct_invalid_fifth_foot(verse.scansion)

        if Levenshtein.levenshtein_distance(verse.scansion, smoothed) > 0:
            verse.scansion_notes += [self.constants.NOTE_MAP["invalid 5th"]]
            verse.scansion = smoothed
            stresses += string_utils.differences(verse.scansion, smoothed)

        if self.metrical_validator.is_valid_hexameter(verse.scansion):
            return self.assign_candidate(verse, verse.scansion)

        feet = self.metrical_validator.hexameter_feet(
            verse.scansion.replace(" ", ""))
        if feet:
            #  Normal good citizens are unwelcome in the house of hexameter
            invalid_feet_in_hexameter = [
                self.constants.IAMB, self.constants.TROCHEE
            ]
            current_foot = 0
            ending = feet.pop(
            )  # don't process the ending, a possible trochee, add it back after
            scanned_line = ""
            for foot in feet:
                if foot.replace(" ", "") in invalid_feet_in_hexameter:
                    scanned_line = self.invalid_foot_to_spondee(
                        feet, foot, current_foot)
                    scanned_line = scanned_line + ending
                current_foot += 1
            smoothed = self.produce_scansion(
                stresses + string_utils.stress_positions(
                    self.constants.STRESSED, scanned_line), syllables_wspaces,
                offset_map)

            if self.metrical_validator.is_valid_hexameter(smoothed):
                verse.scansion_notes += [
                    self.constants.NOTE_MAP["invalid foot"]
                ]
                return self.assign_candidate(verse, smoothed)

        # need to do this again, since the scansion has changed
        smoothed = self.correct_inverted_amphibrachs(verse.scansion)

        if Levenshtein.levenshtein_distance(verse.scansion, smoothed) > 0:
            verse.scansion_notes += [self.constants.NOTE_MAP["inverted"]]
            verse.scansion = smoothed
            stresses += string_utils.differences(verse.scansion, smoothed)

        if self.metrical_validator.is_valid_hexameter(verse.scansion):
            return self.assign_candidate(verse, verse.scansion)

        candidates = self.metrical_validator.closest_hexameter_patterns(
            verse.scansion)
        if candidates is not None:
            if len(candidates) == 1 \
                    and len(verse.scansion.replace(" ", "")) == len(candidates[0]) \
                    and len(string_utils.differences(verse.scansion, candidates[0])) == 1:
                tmp_scansion = self.produce_scansion(
                    string_utils.differences(verse.scansion, candidates[0]),
                    syllables_wspaces, offset_map)
                if self.metrical_validator.is_valid_hexameter(tmp_scansion):
                    verse.scansion_notes += [
                        self.constants.NOTE_MAP["closest match"]
                    ]
                    return self.assign_candidate(verse, tmp_scansion)

        # need to do this again, since the scansion has changed
        smoothed = self.correct_inverted_amphibrachs(smoothed)
        if self.metrical_validator.is_valid_hexameter(smoothed):
            verse.scansion_notes += [self.constants.NOTE_MAP["inverted"]]
            return self.assign_candidate(verse, smoothed)

        if dactyl_smoothing:
            smoothed = self.correct_dactyl_chain(smoothed)
            if Levenshtein.levenshtein_distance(verse.scansion, smoothed) > 0:
                verse.scansion_notes += [
                    self.constants.NOTE_MAP["dactyl smoothing"]
                ]
                verse.scansion = smoothed
            if self.metrical_validator.is_valid_hexameter(verse.scansion):
                return self.assign_candidate(verse, verse.scansion)

        # if the line doesn't scan "as is", if may scan if the optional i to j transformations
        # are made, so here we set them and try again.
        if self.optional_transform and not optional_transform and not verse.valid:
            return self.scan(original_line,
                             optional_transform=True,
                             dactyl_smoothing=True)
        return verse

    def correct_invalid_fifth_foot(self, scansion: str) -> str:
        """
        The 'inverted amphibrach': stressed_unstressed_stressed syllable pattern is invalid
        in hexameters, so here we coerce it to stressed when it occurs at the end of a line

        :param scansion: the scansion pattern
        :return corrected scansion: the corrected scansion pattern

        >>> print(HexameterScanner().correct_invalid_fifth_foot(
        ... " -   - -   U U  -  U U U -  - U U U  - x")) # doctest: +NORMALIZE_WHITESPACE
        -   - -   U U  -  U U U -  - - U U  - x
        """
        scansion_wo_spaces = scansion.replace(
            " ", "")[:-1] + self.constants.OPTIONAL_ENDING
        if scansion_wo_spaces.endswith(self.constants.DACTYL +
                                       self.constants.IAMB +
                                       self.constants.OPTIONAL_ENDING):
            matches = list(
                re.compile(r"{}\s*{}\s*{}\s*{}\s*{}".format(
                    self.constants.STRESSED, self.constants.UNSTRESSED,
                    self.constants.UNSTRESSED, self.constants.UNSTRESSED,
                    self.constants.STRESSED)).finditer(scansion))
            (start, end) = matches[len(matches) - 1].span()
            unstressed_idx = scansion.index(self.constants.UNSTRESSED, start)
            new_line = scansion[:unstressed_idx] + self.constants.STRESSED \
                       + scansion[unstressed_idx + 1:]
            return new_line
        return scansion

    def invalid_foot_to_spondee(self, feet: list, foot: str, idx: int) -> str:
        """
        In hexameters, a single foot that is a  unstressed_stressed syllable pattern is often
        just a double spondee, so here we coerce it to stressed.

        :param feet: list of string representations of meterical feet
        :param foot: the bad foot to correct
        :param idx: the index of the foot to correct
        :return: corrected scansion

        >>> print(HexameterScanner().invalid_foot_to_spondee(
        ... ['-UU', '--', '-U', 'U-', '--', '-UU'],'-U', 2))  # doctest: +NORMALIZE_WHITESPACE
        -UU----U----UU
        """
        new_foot = foot.replace(self.constants.UNSTRESSED,
                                self.constants.STRESSED)
        feet[idx] = new_foot
        return "".join(feet)

    def correct_dactyl_chain(self, scansion: str) -> str:
        """
        Three or more unstressed accents in a row is a broken dactyl chain, best detected and
        processed backwards.

        Since this method takes a Procrustean approach to modifying the scansion pattern,
        it is not used by default in the scan method; however, it is available as an optional
        keyword parameter, and users looking to further automate the generation of scansion
        candidates should consider using this as a fall back.

        :param scansion: scansion with broken dactyl chain; inverted amphibrachs not allowed
        :return: corrected line of scansion

        >>> print(HexameterScanner().correct_dactyl_chain(
        ... "-   U U  -  - U U -  - - U U  - x"))
        -   - -  -  - U U -  - - U U  - x
        >>> print(HexameterScanner().correct_dactyl_chain(
        ... "-   U  U U  U -     -   -   -  -   U  U -   U")) # doctest: +NORMALIZE_WHITESPACE
        -   -  - U  U -     -   -   -  -   U  U -   U
        """
        mark_list = string_utils.mark_list(scansion)
        vals = list(scansion.replace(" ", ""))
        #  ignore last two positions, save them
        feet = [vals.pop(), vals.pop()]
        length = len(vals)
        idx = length - 1
        while idx > 0:
            one = vals[idx]
            two = vals[idx - 1]
            if idx > 1:
                three = vals[idx - 2]
            else:
                three = ""
            # Dactyl foot is okay, no corrections
            if one == self.constants.UNSTRESSED and \
                    two == self.constants.UNSTRESSED and \
                    three == self.constants.STRESSED:
                feet += [one]
                feet += [two]
                feet += [three]
                idx -= 3
                continue
            # Spondee foot is okay, no corrections
            if one == self.constants.STRESSED and \
                    two == self.constants.STRESSED:
                feet += [one]
                feet += [two]
                idx -= 2
                continue
            # handle "U U U" foot as "- U U"
            if one == self.constants.UNSTRESSED and \
                    two == self.constants.UNSTRESSED and \
                    three == self.constants.UNSTRESSED:
                feet += [one]
                feet += [two]
                feet += [self.constants.STRESSED]
                idx -= 3
                continue
            # handle "U U -" foot as "- -"
            if one == self.constants.STRESSED and \
                    two == self.constants.UNSTRESSED and \
                    three == self.constants.UNSTRESSED:
                feet += [self.constants.STRESSED]
                feet += [self.constants.STRESSED]
                idx -= 2
                continue
            # handle "-  U" foot as "- -"
            if one == self.constants.UNSTRESSED and \
                    two == self.constants.STRESSED:
                feet += [self.constants.STRESSED]
                feet += [two]
                idx -= 2
                continue
        corrected = "".join(feet[::-1])
        new_line = list(" " * len(scansion))
        for idx, car in enumerate(corrected):
            new_line[mark_list[idx]] = car
        return "".join(new_line)

    def correct_inverted_amphibrachs(self, scansion: str) -> str:
        """
        The 'inverted amphibrach': stressed_unstressed_stressed syllable pattern is invalid
        in hexameters, so here we coerce it to stressed:  - U - -> - - -

        :param scansion: the scansion stress pattern
        :return: a string with the corrected scansion pattern

        >>> print(HexameterScanner().correct_inverted_amphibrachs(
        ... " -   U -   - U  -  U U U U  - U  - x")) # doctest: +NORMALIZE_WHITESPACE
        -   - -   - -  -  U U U U  - -  - x
        >>> print(HexameterScanner().correct_inverted_amphibrachs(
        ... " -   - -   U -  -  U U U U  U- - U  - x")) # doctest: +NORMALIZE_WHITESPACE
        -   - -   - -  -  U U U U  U- - -  - x
        >>> print(HexameterScanner().correct_inverted_amphibrachs(
        ... "-  - -   -  -   U -   U U -  U  U - -")) # doctest: +NORMALIZE_WHITESPACE
        -  - -   -  -   - -   U U -  U  U - -
        >>> print(HexameterScanner().correct_inverted_amphibrachs(
        ... "- UU-   U -   U -  -   U   U U   U-   U")) # doctest: +NORMALIZE_WHITESPACE
        - UU-   - -   - -  -   U   U U   U-   U
        """
        new_line = scansion
        while list(self.inverted_amphibrach_re.finditer(new_line)):
            matches = list(self.inverted_amphibrach_re.finditer(new_line))
            for match in matches:
                (start, end) = match.span()  # pylint: disable=unused-variable
                unstressed_idx = new_line.index(self.constants.UNSTRESSED,
                                                start)
                new_line = new_line[:unstressed_idx] + \
                           self.constants.STRESSED + new_line[unstressed_idx + 1:]
        return new_line