Esempi in Python per levenshtein

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: smewt.base.textutils

Metodo/funzione: levenshtein

Esempi su hotexamples.com: 4

levenshtein in Python: 4 esempi trovati. Questi sono i migliori esempi reali in Python per smewt.base.textutils.levenshtein, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Esempio n. 1

Mostra file

File: tvdbmetadataprovider.py Progetto: EQ4/smewt

    def startEpisode(self, episode):
        self.tmdb.lang = guiLanguage().alpha2

        if episode.get('series') is None:
            raise SmewtException("TVDBMetadataProvider: Episode doesn't contain 'series' field: %s", episode)

        name = episode.series.title
        name = name.replace(',', ' ')

        matching_series = self.getSeries(name)

        # Try first with the languages from guessit, and then with english
        languages = tolist(episode.get('language', [])) + ['en']

        # Sort the series by id (stupid heuristic about most popular series
        #                        might have been added sooner to the db and the db id
        #                        follows the insertion order)
        # TODO: we should do something smarter like comparing series name distance,
        #       episodes count and/or episodes names
        #print '\n'.join(['%s %s --> %f [%s] %s' % (x[1], name, textutils.levenshtein(x[1], name), x[2], x[0]) for x in matching_series])
        matching_series.sort(key=lambda x: (textutils.levenshtein(x[1], name), int(x[0])))

        series = None
        language = 'en'
        for lang in languages:
            try:
                language = lang
                ind = zip(*matching_series)[2].index(lang)
                series = matching_series[ind][0]
                break
            except ValueError, e:
                language = matching_series[0][2]
                series = matching_series[0][0]

Esempio n. 2

Mostra file

def fuzzyMatch2(baseGuess, md):
    for p1, p2 in zip(baseGuess.unique_key(), md.unique_key()):
        if type(p1) == str or type(p1) == unicode:
            # TODO: levenshtein doesn't cut it here, we need a better string distance
            if levenshtein(p1.lower(), p2.lower()) > 80:
                return False
        elif isinstance(p1, Metadata):
            if not fuzzyMatch2(p1, p2):
                return False
        else:
            if p1 != p2:
                return False
    return True

Esempio n. 3

Mostra file

File: simplesolver.py Progetto: robmcmullen/smewt

def fuzzyMatch2(baseGuess, md):
    for p1, p2 in zip(baseGuess.unique_key(), md.unique_key()):
        if type(p1) == str or type(p1) == unicode:
            # TODO: levenshtein doesn't cut it here, we need a better string distance
            if levenshtein(p1.lower(), p2.lower()) > 80:
                return False
        elif isinstance(p1, Metadata):
            if not fuzzyMatch2(p1, p2):
                return False
        else:
            if p1 != p2:
                return False
    return True

Esempio n. 4

Mostra file

    def startEpisode(self, episode):
        self.tmdb.lang = guiLanguage().alpha2

        if episode.get('series') is None:
            raise SmewtException(
                "TVDBMetadataProvider: Episode doesn't contain 'series' field: %s",
                episode)

        name = episode.series.title
        name = name.replace(',', ' ')

        matching_series = self.getSeries(name)

        # Try first with the languages from guessit, and then with english
        languages = tolist(episode.get('language', [])) + ['en']

        # Sort the series by id (stupid heuristic about most popular series
        #                        might have been added sooner to the db and the db id
        #                        follows the insertion order)
        # TODO: we should do something smarter like comparing series name distance,
        #       episodes count and/or episodes names
        #print '\n'.join(['%s %s --> %f [%s] %s' % (x[1], name, textutils.levenshtein(x[1], name), x[2], x[0]) for x in matching_series])
        matching_series.sort(
            key=lambda x: (textutils.levenshtein(x[1], name), int(x[0])))

        series = None
        language = 'en'
        for lang in languages:
            try:
                language = lang
                ind = zip(*matching_series)[2].index(lang)
                series = matching_series[ind][0]
                break
            except ValueError, e:
                language = matching_series[0][2]
                series = matching_series[0][0]