コード例 #1
0
ファイル: test_text_reuse.py プロジェクト: willismonroe/cltk
 def test_damerau_levenshtein_distance(self):
     """Test for Damerau-Levenshtein Distance between two words"""
     l = Levenshtein()
     dist = l.Damerau_Levenshtein_Distance(
         "all haile whose solempne glorious concepcioun",
         "fresche floure in quhom the hevinlie dewe doun fell")
     self.assertEqual(dist, 35)
コード例 #2
0
ファイル: test_text_reuse.py プロジェクト: willismonroe/cltk
 def test_levenshtein_distance(self):
     """Test for Levenshtein Distance between two words"""
     l = Levenshtein()
     dist = l.Levenshtein_Distance(
         "now grete glorious god through grace of himselven",
         "and the precious prayer of his pris moder")
     self.assertEqual(dist, 36)
コード例 #3
0
 def levenshtein_distance(self, other_word: str) -> int:
     """
     Basic use of Levenshtein distance function
     :param other_word:
     :return:
     """
     return Levenshtein.Levenshtein_Distance(self.word, other_word)
コード例 #4
0
    def _calculate_ratios(self, list_a, list_b):
        """
        Calulate a matrix of string comparisons given two input lists
        :param list_a: list [object]
        :param list_b: list [object]
        :return: list [[Comparison]]
        """

        comparisons = []
        l = Levenshtein()

        # For all strings in list a
        for i, str_a in enumerate(list_a):

            # Add a new list to our list of lists of comparisons
            comparisons.append([])

            # Compare str_a to every string in list_b
            for str_b in list_b:

                # If the sanitize, input flag is set, make the ratio with the sanitized values
                if self.sanitize_input:
                    new_comparison = Comparison(
                                            str_a['text'],
                                            str_b['text'],
                                            l.ratio(str_a['sanitized'], str_b['sanitized'])
                                        )

                # Otherwise, make the ratio with the original, unsanitize text strings
                else:
                    new_comparison = Comparison(
                                            str_a['text'],
                                            str_b['text'],
                                            l.ratio(str_a['text'], str_b['text'])
                                        )

                # If text metadata is set on this class for text a or b, save that data with the
                # comparison
                if self.text_ref_a:
                    new_comparison.set_ref_a(self.text_ref_a)
                if self.text_ref_b:
                    new_comparison.set_ref_b(self.text_ref_b)

                # Finally, append the new comparison to the list of comparisons
                comparisons[i].append(new_comparison)

        return comparisons
コード例 #5
0
def compute_distance_matrix(proper_nouns: List[str]):
    """
    Distance matrix with Levenshtein distance.

    :param proper_nouns: Items of proper_nouns must be unique
    :return:
    """
    levenshtein = Levenshtein()

    # We try to keep regroup different forms of a lemma
    distance_matrix = np.zeros((len(proper_nouns), len(proper_nouns)))

    for i in range(len(proper_nouns)):
        for j in range(len(proper_nouns)):
            distance_matrix[i, j] = levenshtein.Levenshtein_Distance(proper_nouns[i], proper_nouns[j])

    return distance_matrix
コード例 #6
0
    def _closest_patterns(self, patterns: List[str],
                          scansion: str) -> List[str]:
        """
        Find the closest group of matching valid patterns.

        :patterns: a list of patterns
        :scansion: the scansion pattern thus far
        :return: list of the closest valid patterns; only candidates with a matching
        length/number of syllables are considered.
        """
        pattern = scansion.replace(" ", "")
        pattern = pattern.replace(self.constants.FOOT_SEPARATOR, "")
        ending = pattern[-1]
        candidate = pattern[:len(pattern) - 1] + self.constants.OPTIONAL_ENDING
        cans = [(Levenshtein.levenshtein_distance(candidate, x), x)
                for x in patterns if len(x) == len(candidate)]
        if cans:
            cans = sorted(cans, key=lambda tup: tup[0])
            top = cans[0][0]
            return [can[1][:-1] + ending for can in cans if can[0] == top]
        return []
コード例 #7
0
ファイル: test_text_reuse.py プロジェクト: nodage/cltk
 def test_distance_ratio(self):
     """Test returning simple Levenshtein distance calculation ratio between two strings"""
     l = Levenshtein()
     ratio = l.ratio("dique deaeque omnes, studium quibus arua tueri,",
                     "dique deaeque omnes, quibus est tutela per agros,")
     self.assertEqual(ratio, 0.71)
コード例 #8
0
ファイル: test_text_reuse.py プロジェクト: TylerKirby/cltk
 def test_distance_ratio(self):
     """Test returning simple Levenshtein distance calculation ratio between two strings"""
     l = Levenshtein()
     ratio = l.ratio("dique deaeque omnes, studium quibus arua tueri,", "dique deaeque omnes, quibus est tutela per agros,")
     self.assertEqual(ratio, 0.71)
コード例 #9
0
ファイル: cltk_doc.py プロジェクト: thePortus/arakhne
 def compare_levenshtein(self, other_text):
     return Levenshtein().ratio(self.data, other_text)
コード例 #10
0
    def scan(self, original_line: str, optional_transform: bool = False) -> Verse:
        """
        Scan a line of Latin pentameter and produce a scansion pattern, and other data.

        :param original_line: the original line of Latin verse
        :param optional_transform: whether or not to perform i to j transform for syllabification
        :return: a Verse object

        >>> scanner = PentameterScanner()
        >>> print(scanner.scan('ex hoc ingrato gaudia amore tibi.'))
        Verse(original='ex hoc ingrato gaudia amore tibi.', scansion='-   -  -   - -   - U  U - U  U U ', meter='pentameter', valid=True, syllable_count=12, accented='ēx hōc īngrātō gaudia amōre tibi.', scansion_notes=['Spondaic pentameter'], syllables = ['ēx', 'hoc', 'īn', 'gra', 'to', 'gau', 'di', 'a', 'mo', 're', 'ti', 'bi'])
        >>> print(scanner.scan(
        ... "in vento et rapida scribere oportet aqua.").scansion) # doctest: +NORMALIZE_WHITESPACE
        -   -    -   U U -    - U   U -  U  U  U
        """
        verse = Verse(original_line, meter='pentameter')
        # replace punctuation with spaces
        line = original_line.translate(self.punctuation_substitutions)
        if optional_transform:
            working_line = self.transform_i_to_j_optional(line)
            verse.scansion_notes += [self.constants.NOTE_MAP["optional i to j"]]
        else:
            working_line = self.transform_i_to_j(line) # conservative i to j
        working_line = self.elide_all(working_line)
        verse.working_line = self.accent_by_position(working_line)
        verse.syllables = self.syllabifier.syllabify(verse.working_line)
        verse.syllable_count = self.syllabifier.get_syllable_count(verse.syllables)
        if verse.syllable_count < 12:
            verse.valid = False
            verse.scansion_notes += [self.constants.NOTE_MAP["< 12p"]]
            return verse
        stresses = self.flag_dipthongs(verse.syllables)
        syllables_wspaces = string_utils.to_syllables_with_trailing_spaces(verse.working_line, verse.syllables)
        offset_map = self.calc_offset(syllables_wspaces)
        for idx, syl in enumerate(verse.syllables):
            for accented in self.constants.ACCENTED_VOWELS:
                if accented in syl:
                    stresses.append(idx)
        # first syllable is always long in Pentameter
        stresses.append(0)
        # second to last syllable is always long
        stresses.append(verse.syllable_count - 2)

        verse.scansion = self.produce_scansion(stresses,
                                               syllables_wspaces, offset_map)
        if len(string_utils.stress_positions(self.constants.STRESSED, verse.scansion)) != \
                len(set(stresses)):
            verse.valid = False
            verse.scansion_notes += [self.constants.NOTE_MAP["invalid syllables"]]
            return verse

        if self.metrical_validator.is_valid_pentameter(verse.scansion):
            verse.scansion_notes += [self.constants.NOTE_MAP["positionally"]]
            return self.assign_candidate(verse, verse.scansion)

        # identify some obvious and probably choices based on number of syllables
        if verse.syllable_count == 12:  # produce spondees where possible
            candidate = self.make_spondaic(verse.scansion)
            verse.scansion_notes += [self.constants.NOTE_MAP["12p"]]
            return self.assign_candidate(verse, candidate)
        if verse.syllable_count == 14:  # produce spondees where possible
            candidate = self.make_dactyls(verse.scansion)
            verse.scansion_notes += [self.constants.NOTE_MAP["14p"]]
            return self.assign_candidate(verse, candidate)
        if verse.syllable_count > 14:
            verse.valid = False
            verse.scansion_notes += [self.constants.NOTE_MAP["> 14"]]
            return verse

        smoothed = self.correct_first_two_dactyls(verse.scansion)

        if Levenshtein.levenshtein_distance(verse.scansion, smoothed) > 0:
            verse.scansion_notes += [self.constants.NOTE_MAP["invalid start"]]
            verse.scansion = smoothed
            stresses += string_utils.differences(verse.scansion, smoothed)

        if self.metrical_validator.is_valid_pentameter(verse.scansion):
            return self.assign_candidate(verse, verse.scansion)

        smoothed = self.correct_penultimate_dactyl_chain(verse.scansion)

        if Levenshtein.levenshtein_distance(verse.scansion, smoothed) > 0:
            verse.scansion_notes += [self.constants.NOTE_MAP["penultimate dactyl chain"]]
            verse.scansion = smoothed
            stresses += string_utils.differences(verse.scansion, smoothed)

        if self.metrical_validator.is_valid_pentameter(verse.scansion):
            return self.assign_candidate(verse, verse.scansion)

        candidates = self.metrical_validator.closest_pentameter_patterns(verse.scansion)
        if candidates is not None:
            if len(candidates) == 1 \
                    and len(verse.scansion.replace(" ", "")) == len(candidates[0]) \
                    and len(string_utils.differences(verse.scansion, candidates[0])) == 1:
                tmp_scansion = self.produce_scansion(
                    string_utils.differences(verse.scansion, candidates[0]),
                    syllables_wspaces, offset_map)

                if self.metrical_validator.is_valid_pentameter(tmp_scansion):
                    verse.scansion_notes += [self.constants.NOTE_MAP["closest match"]]
                    return self.assign_candidate(verse, tmp_scansion)

        # if the line doesn't scan "as is", it may scan if the optional i to j transformations
        # are made, so here we set them and try again.
        if self.optional_transform and not optional_transform and not verse.valid:
            return self.scan(original_line, optional_transform=True)

        verse.accented = self.formatter.merge_line_scansion(verse.original, verse.scansion)
        return verse
コード例 #11
0
    def scan(self,
             original_line: str,
             optional_transform: bool = False) -> Verse:
        """
        Scan a line of Latin hendecasyllables and produce a scansion pattern, and other data.

        :param original_line: the original line of Latin verse
        :param optional_transform: whether or not to perform i to j transform for syllabification
        :return: a Verse object

        >>> scanner = HendecasyllableScanner()
        >>> print(scanner.scan("Cui dono lepidum novum libellum"))
        Verse(original='Cui dono lepidum novum libellum', scansion='  -  U -  U U -   U -   U -  U ', meter='hendecasyllable', valid=True, syllable_count=11, accented='Cui donō lepidūm novūm libēllum', scansion_notes=['Corrected invalid start.'], syllables = ['Cui', 'do', 'no', 'le', 'pi', 'dūm', 'no', 'vūm', 'li', 'bēl', 'lum'])
        >>> print(scanner.scan(
        ... "ārida modo pumice expolitum?").scansion)  # doctest: +NORMALIZE_WHITESPACE
        - U -  U U  - U   -  U - U
        """
        verse = Verse(original_line, meter='hendecasyllable')
        # replace punctuation with spaces
        line = original_line.translate(self.punctuation_substitutions)
        if optional_transform:
            working_line = self.transform_i_to_j_optional(line)
            verse.scansion_notes += [
                self.constants.NOTE_MAP["optional i to j"]
            ]
        else:
            working_line = self.transform_i_to_j(line)  # conservative i to j
        working_line = self.elide_all(working_line)
        verse.working_line = self.accent_by_position(working_line)
        verse.syllables = self.syllabifier.syllabify(verse.working_line)
        verse.syllable_count = self.syllabifier.get_syllable_count(
            verse.syllables)
        # identify some obvious and probably choices based on number of syllables
        if verse.syllable_count > 11:
            verse.valid = False
            verse.scansion_notes += [self.constants.NOTE_MAP["> 11"]]
            return verse
        if verse.syllable_count < 11:
            verse.valid = False
            verse.scansion_notes += [self.constants.NOTE_MAP["< 11"]]
            return verse

        stresses = self.flag_dipthongs(verse.syllables)
        syllables_wspaces = string_utils.to_syllables_with_trailing_spaces(
            verse.working_line, verse.syllables)
        offset_map = self.calc_offset(syllables_wspaces)
        for idx, syl in enumerate(verse.syllables):
            for accented in self.constants.ACCENTED_VOWELS:
                if accented in syl:
                    stresses.append(idx)
        # second to last syllable is always long
        stresses.append(verse.syllable_count - 2)

        verse.scansion = self.produce_scansion(stresses, syllables_wspaces,
                                               offset_map)
        if len(string_utils.stress_positions(self.constants.STRESSED, verse.scansion)) != \
                len(set(stresses)):
            verse.valid = False
            verse.scansion_notes += [
                self.constants.NOTE_MAP["invalid syllables"]
            ]
            return verse

        if self.metrical_validator.is_valid_hendecasyllables(verse.scansion):
            verse.scansion_notes += [self.constants.NOTE_MAP["positionally"]]
            return self.assign_candidate(verse, verse.scansion)

        smoothed = self.correct_invalid_start(verse.scansion)

        if Levenshtein.levenshtein_distance(verse.scansion, smoothed) > 0:
            verse.scansion_notes += [self.constants.NOTE_MAP["invalid start"]]
            verse.scansion = smoothed
            stresses += string_utils.differences(verse.scansion, smoothed)

        if self.metrical_validator.is_valid_hendecasyllables(verse.scansion):
            return self.assign_candidate(verse, verse.scansion)

        smoothed = self.correct_antepenult_chain(verse.scansion)

        if Levenshtein.levenshtein_distance(verse.scansion, smoothed) > 0:
            verse.scansion_notes += [
                self.constants.NOTE_MAP["antepenult chain"]
            ]
            verse.scansion = smoothed
            stresses += string_utils.differences(verse.scansion, smoothed)

        if self.metrical_validator.is_valid_hendecasyllables(verse.scansion):
            return self.assign_candidate(verse, verse.scansion)

        candidates = self.metrical_validator.closest_hendecasyllable_patterns(
            verse.scansion)
        if candidates is not None:
            if len(candidates) == 1 \
                    and len(verse.scansion.replace(" ", "")) == len(candidates[0]) \
                    and len(string_utils.differences(verse.scansion, candidates[0])) == 1:
                tmp_scansion = self.produce_scansion(
                    string_utils.differences(verse.scansion, candidates[0]),
                    syllables_wspaces, offset_map)
                if self.metrical_validator.is_valid_hendecasyllables(
                        tmp_scansion):
                    verse.scansion_notes += [
                        self.constants.NOTE_MAP["closest match"]
                    ]
                    return self.assign_candidate(verse, tmp_scansion)

        # if the line doesn't scan "as is", if may scan if the optional i to j transformations
        # are made, so here we set them and try again.
        if self.optional_transform and not optional_transform and not verse.valid:
            return self.scan(original_line, optional_transform=True)

        verse.accented = self.formatter.merge_line_scansion(
            verse.original, verse.scansion)
        return verse
コード例 #12
0
    def scan(self,
             original_line: str,
             optional_transform: bool = False,
             dactyl_smoothing: bool = False) -> Verse:
        """
        Scan a line of Latin hexameter and produce a scansion pattern, and other data.

        :param original_line: the original line of Latin verse
        :param optional_transform: whether or not to perform i to j transform for syllabification
        :param dactyl_smoothing: whether or not to perform dactyl smoothing
        :return: a Verse object

        >>> scanner = HexameterScanner()

        >>> print(HexameterScanner().scan(
        ... "ēxiguām sedēm pariturae tērra negavit").scansion) # doctest: +NORMALIZE_WHITESPACE
        - -  -   - -   U U -  -  -  U  U - U

        >>> print(scanner.scan("impulerit. Tantaene animis caelestibus irae?"))
        Verse(original='impulerit. Tantaene animis caelestibus irae?', scansion='-  U U -    -   -   U U -    - -  U U  -  - ', meter='hexameter', valid=True, syllable_count=15, accented='īmpulerīt. Tāntaene animīs caelēstibus īrae?', scansion_notes=['Valid by positional stresses.'], syllables = ['īm', 'pu', 'le', 'rīt', 'Tān', 'taen', 'a', 'ni', 'mīs', 'cae', 'lēs', 'ti', 'bus', 'i', 'rae'])

        >>> print(scanner.scan(
        ... "Arma virumque cano, Troiae qui prīmus ab ōrīs").scansion) # doctest: +NORMALIZE_WHITESPACE
        -  U  U -   U  U -    -  -   -   - U  U  - -

        >>> # some hexameters need the optional transformations:
        >>> optional_transform_scanner = HexameterScanner(optional_transform=True)
        >>> print(optional_transform_scanner.scan(
        ... "Ītaliam, fāto profugus, Lāvīniaque vēnit").scansion) # doctest: +NORMALIZE_WHITESPACE
        - -  -    - -   U U -    - -  U  U  - U

        >>> print(HexameterScanner().scan(
        ... "lītora, multum ille et terrīs iactātus et alto").scansion) # doctest: +NORMALIZE_WHITESPACE
        - U U   -     -    -   -  -   -  - U  U  -  U

        >>> print(HexameterScanner().scan(
        ... "vī superum saevae memorem Iūnōnis ob īram;").scansion) # doctest: +NORMALIZE_WHITESPACE
        -  U U -    -  -  U U -   - - U  U  - U

        >>> # handle multiple elisions
        >>> print(scanner.scan("monstrum horrendum, informe, ingens, cui lumen ademptum").scansion) # doctest: +NORMALIZE_WHITESPACE
        -        -  -      -  -     -  -      -  - U  U -   U

        >>> # if we have 17 syllables, create a chain of all dactyls
        >>> print(scanner.scan("quadrupedante putrem sonitu quatit ungula campum"
        ... ).scansion) # doctest: +NORMALIZE_WHITESPACE
        -  U U -  U  U  -   U U -   U U  -  U U  -  U

        >>> # if we have 13 syllables exactly, we'll create a spondaic hexameter
        >>> print(HexameterScanner().scan(
        ... "illi inter sese multa vi bracchia tollunt").scansion)  # doctest: +NORMALIZE_WHITESPACE
        -    -  -   - -  -  -  -   -   UU  -  -

        >>> print(HexameterScanner().scan(
        ... "dat latus; insequitur cumulo praeruptus aquae mons").scansion) # doctest: +NORMALIZE_WHITESPACE
        -   U U   -  U  U -   U U -    - -  U  U   -  -

        >>> print(optional_transform_scanner.scan(
        ... "Non quivis videt inmodulata poëmata iudex").scansion) # doctest: +NORMALIZE_WHITESPACE
        -    - -   U U  -  U U - U  U- U U  - -

        >>> print(HexameterScanner().scan(
        ... "certabant urbem Romam Remoramne vocarent").scansion) # doctest: +NORMALIZE_WHITESPACE
        -  - -   -  -   - -   U U -  U  U - -

        >>> # advanced smoothing is available via keyword flags: dactyl_smoothing
        >>> print(HexameterScanner().scan(
        ... "his verbis: 'o gnata, tibi sunt ante ferendae",
        ... dactyl_smoothing=True).scansion) # doctest: +NORMALIZE_WHITESPACE
        -   -  -    -   - U   U -  -   -  U  U -   -

        >>> HexameterScanner().scan('Italiam non sponte sequor.')
        Verse(original='Italiam non sponte sequor.', scansion='', meter='hexameter', valid=False, syllable_count=9, accented='', scansion_notes=['Incomplete hexameter; not enough syllables.'], syllables = ['I', 'ta', 'li', 'ām', 'nōn', 'spōn', 'te', 'se', 'quor'])

        >>> HexameterScanner().scan('Phaselus ille, quem videtis, hospites')
        Verse(original='Phaselus ille, quem videtis, hospites', scansion='  - U U  -  -    -   U U U    -  - U ', meter='hexameter', valid=False, syllable_count=12, accented='', scansion_notes=['Inverted amphibrachs corrected.'], syllables = ['Pha', 'se', 'lus', 'īl', 'le', 'quēm', 'vi', 'de', 'tis', 'hōs', 'pi', 'tes'])

        """
        verse = Verse(original_line, meter='hexameter')
        # replace punctuation with spaces
        line = original_line.translate(self.punctuation_substitutions)
        if optional_transform:
            working_line = self.transform_i_to_j_optional(line)
            verse.scansion_notes += [
                self.constants.NOTE_MAP["optional i to j"]
            ]
        else:
            working_line = self.transform_i_to_j(line)  # conservative i to j
        working_line = self.elide_all(working_line)
        verse.working_line = self.accent_by_position(working_line)
        verse.syllables = self.syllabifier.syllabify(verse.working_line)
        verse.syllable_count = self.syllabifier.get_syllable_count(
            verse.syllables)
        if verse.syllable_count < 12:
            verse.valid = False
            verse.scansion_notes += [self.constants.NOTE_MAP["< 12"]]
            return verse
        stresses = self.flag_dipthongs(verse.syllables)
        syllables_wspaces = string_utils.to_syllables_with_trailing_spaces(
            verse.working_line, verse.syllables)
        offset_map = self.calc_offset(syllables_wspaces)
        for idx, syl in enumerate(verse.syllables):
            for accented in self.constants.ACCENTED_VOWELS:
                if accented in syl:
                    stresses.append(idx)
        # first syllable is always long in hexameter
        stresses.append(0)
        # second to last syllable is always long
        stresses.append(verse.syllable_count - 2)

        verse.scansion = self.produce_scansion(stresses, syllables_wspaces,
                                               offset_map)
        if len(string_utils.stress_positions(self.constants.STRESSED, verse.scansion)) != \
                len(set(stresses)):
            verse.valid = False
            verse.scansion_notes += [
                self.constants.NOTE_MAP["invalid syllables"]
            ]
            return verse

        if self.metrical_validator.is_valid_hexameter(verse.scansion):
            verse.scansion_notes += [self.constants.NOTE_MAP["positionally"]]
            return self.assign_candidate(verse, verse.scansion)

        # identify some obvious and probably choices based on number of syllables
        if verse.syllable_count == 17:  # produce all dactyls
            candidate = self.produce_scansion(
                self.metrical_validator.hexameter_known_stresses(),
                syllables_wspaces, offset_map)
            verse.scansion_notes += [self.constants.NOTE_MAP["17"]]
            if self.metrical_validator.is_valid_hexameter(candidate):
                return self.assign_candidate(verse, candidate)
        if verse.syllable_count == 12:  # create all spondee hexameter
            candidate = self.produce_scansion(list(range(12)),
                                              syllables_wspaces, offset_map)
            if self.metrical_validator.is_valid_hexameter(verse.scansion):
                verse.scansion_notes += [self.constants.NOTE_MAP["12"]]
                return self.assign_candidate(verse, candidate)
        if verse.syllable_count == 13:  # create spondee hexameter with a dactyl at 5th foot
            known_unaccents = [9, 10]
            last_syllable_accented = False
            for vowel in self.constants.ACCENTED_VOWELS:
                if vowel in verse.syllables[12]:
                    last_syllable_accented = True
            if not last_syllable_accented:
                known_unaccents.append(12)
            if set(known_unaccents) - set(stresses) != len(known_unaccents):
                verse.scansion = self.produce_scansion(
                    [x for x in range(13) if x not in known_unaccents],
                    syllables_wspaces, offset_map)
                verse.scansion_notes += [self.constants.NOTE_MAP["5th dactyl"]]
                if self.metrical_validator.is_valid_hexameter(verse.scansion):
                    return self.assign_candidate(verse, verse.scansion)
        if verse.syllable_count > 17:
            verse.valid = False
            verse.scansion_notes += [self.constants.NOTE_MAP["> 17"]]
            return verse

        smoothed = self.correct_inverted_amphibrachs(verse.scansion)
        if Levenshtein.levenshtein_distance(verse.scansion, smoothed) > 0:
            verse.scansion_notes += [self.constants.NOTE_MAP["inverted"]]
            verse.scansion = smoothed
            stresses += string_utils.differences(verse.scansion, smoothed)

        if self.metrical_validator.is_valid_hexameter(verse.scansion):
            return self.assign_candidate(verse, verse.scansion)

        smoothed = self.correct_first_two_dactyls(verse.scansion)

        if Levenshtein.levenshtein_distance(verse.scansion, smoothed) > 0:
            verse.scansion_notes += [self.constants.NOTE_MAP["invalid start"]]
            verse.scansion = smoothed
            stresses += string_utils.differences(verse.scansion, smoothed)

        if self.metrical_validator.is_valid_hexameter(verse.scansion):
            return self.assign_candidate(verse, verse.scansion)

        smoothed = self.correct_invalid_fifth_foot(verse.scansion)

        if Levenshtein.levenshtein_distance(verse.scansion, smoothed) > 0:
            verse.scansion_notes += [self.constants.NOTE_MAP["invalid 5th"]]
            verse.scansion = smoothed
            stresses += string_utils.differences(verse.scansion, smoothed)

        if self.metrical_validator.is_valid_hexameter(verse.scansion):
            return self.assign_candidate(verse, verse.scansion)

        feet = self.metrical_validator.hexameter_feet(
            verse.scansion.replace(" ", ""))
        if feet:
            #  Normal good citizens are unwelcome in the house of hexameter
            invalid_feet_in_hexameter = [
                self.constants.IAMB, self.constants.TROCHEE
            ]
            current_foot = 0
            ending = feet.pop(
            )  # don't process the ending, a possible trochee, add it back after
            scanned_line = ""
            for foot in feet:
                if foot.replace(" ", "") in invalid_feet_in_hexameter:
                    scanned_line = self.invalid_foot_to_spondee(
                        feet, foot, current_foot)
                    scanned_line = scanned_line + ending
                current_foot += 1
            smoothed = self.produce_scansion(
                stresses + string_utils.stress_positions(
                    self.constants.STRESSED, scanned_line), syllables_wspaces,
                offset_map)

            if self.metrical_validator.is_valid_hexameter(smoothed):
                verse.scansion_notes += [
                    self.constants.NOTE_MAP["invalid foot"]
                ]
                return self.assign_candidate(verse, smoothed)

        # need to do this again, since the scansion has changed
        smoothed = self.correct_inverted_amphibrachs(verse.scansion)

        if Levenshtein.levenshtein_distance(verse.scansion, smoothed) > 0:
            verse.scansion_notes += [self.constants.NOTE_MAP["inverted"]]
            verse.scansion = smoothed
            stresses += string_utils.differences(verse.scansion, smoothed)

        if self.metrical_validator.is_valid_hexameter(verse.scansion):
            return self.assign_candidate(verse, verse.scansion)

        candidates = self.metrical_validator.closest_hexameter_patterns(
            verse.scansion)
        if candidates is not None:
            if len(candidates) == 1 \
                    and len(verse.scansion.replace(" ", "")) == len(candidates[0]) \
                    and len(string_utils.differences(verse.scansion, candidates[0])) == 1:
                tmp_scansion = self.produce_scansion(
                    string_utils.differences(verse.scansion, candidates[0]),
                    syllables_wspaces, offset_map)
                if self.metrical_validator.is_valid_hexameter(tmp_scansion):
                    verse.scansion_notes += [
                        self.constants.NOTE_MAP["closest match"]
                    ]
                    return self.assign_candidate(verse, tmp_scansion)

        # need to do this again, since the scansion has changed
        smoothed = self.correct_inverted_amphibrachs(smoothed)
        if self.metrical_validator.is_valid_hexameter(smoothed):
            verse.scansion_notes += [self.constants.NOTE_MAP["inverted"]]
            return self.assign_candidate(verse, smoothed)

        if dactyl_smoothing:
            smoothed = self.correct_dactyl_chain(smoothed)
            if Levenshtein.levenshtein_distance(verse.scansion, smoothed) > 0:
                verse.scansion_notes += [
                    self.constants.NOTE_MAP["dactyl smoothing"]
                ]
                verse.scansion = smoothed
            if self.metrical_validator.is_valid_hexameter(verse.scansion):
                return self.assign_candidate(verse, verse.scansion)

        # if the line doesn't scan "as is", if may scan if the optional i to j transformations
        # are made, so here we set them and try again.
        if self.optional_transform and not optional_transform and not verse.valid:
            return self.scan(original_line,
                             optional_transform=True,
                             dactyl_smoothing=True)
        return verse