def startEpisode(self, episode): self.tmdb.lang = guiLanguage().alpha2 if episode.get('series') is None: raise SmewtException("TVDBMetadataProvider: Episode doesn't contain 'series' field: %s", episode) name = episode.series.title name = name.replace(',', ' ') matching_series = self.getSeries(name) # Try first with the languages from guessit, and then with english languages = tolist(episode.get('language', [])) + ['en'] # Sort the series by id (stupid heuristic about most popular series # might have been added sooner to the db and the db id # follows the insertion order) # TODO: we should do something smarter like comparing series name distance, # episodes count and/or episodes names #print '\n'.join(['%s %s --> %f [%s] %s' % (x[1], name, textutils.levenshtein(x[1], name), x[2], x[0]) for x in matching_series]) matching_series.sort(key=lambda x: (textutils.levenshtein(x[1], name), int(x[0]))) series = None language = 'en' for lang in languages: try: language = lang ind = zip(*matching_series)[2].index(lang) series = matching_series[ind][0] break except ValueError, e: language = matching_series[0][2] series = matching_series[0][0]
def fuzzyMatch2(baseGuess, md): for p1, p2 in zip(baseGuess.unique_key(), md.unique_key()): if type(p1) == str or type(p1) == unicode: # TODO: levenshtein doesn't cut it here, we need a better string distance if levenshtein(p1.lower(), p2.lower()) > 80: return False elif isinstance(p1, Metadata): if not fuzzyMatch2(p1, p2): return False else: if p1 != p2: return False return True
def startEpisode(self, episode): self.tmdb.lang = guiLanguage().alpha2 if episode.get('series') is None: raise SmewtException( "TVDBMetadataProvider: Episode doesn't contain 'series' field: %s", episode) name = episode.series.title name = name.replace(',', ' ') matching_series = self.getSeries(name) # Try first with the languages from guessit, and then with english languages = tolist(episode.get('language', [])) + ['en'] # Sort the series by id (stupid heuristic about most popular series # might have been added sooner to the db and the db id # follows the insertion order) # TODO: we should do something smarter like comparing series name distance, # episodes count and/or episodes names #print '\n'.join(['%s %s --> %f [%s] %s' % (x[1], name, textutils.levenshtein(x[1], name), x[2], x[0]) for x in matching_series]) matching_series.sort( key=lambda x: (textutils.levenshtein(x[1], name), int(x[0]))) series = None language = 'en' for lang in languages: try: language = lang ind = zip(*matching_series)[2].index(lang) series = matching_series[ind][0] break except ValueError, e: language = matching_series[0][2] series = matching_series[0][0]