def _scan_country(self, country, strict=False): """ Find a country if it is at the start or end of country string """ words_match = list(iter_words(country.lower())) s = "" start = None for word_match in words_match: if not start: start = word_match.start(0) s += word_match.group(0) try: return Country.fromguessit(s), (start, word_match.end(0)) except babelfish.Error: continue words_match.reverse() s = "" end = None for word_match in words_match: if not end: end = word_match.end(0) s = word_match.group(0) + s try: return Country.fromguessit(s), (word_match.start(0), end) except babelfish.Error: continue return Country.fromguessit(country), (start, end)
def process(self, mtree, options=None): GuessFinder(self.guess_country, None, self.log, options).process_nodes(mtree.unidentified_leaves()) for node in mtree.leaves_containing('language'): c = node.clean_value.lower() if c in self.replace_language: node.guess.set('language', None) try: country = Country.fromguessit(c) if self.is_valid_country(country, options): guess = Guess(country=country, confidence=0.9, input=node.value, span=node.span) found_guess(node, guess, logger=log) except babelfish.Error: pass
def refine(video, **kwargs): """Refine a video by searching `TheTVDB <http://thetvdb.com/>`_. .. note:: This refiner only work for instances of :class:`~subliminal.video.Episode`. Several attributes can be found: * :attr:`~subliminal.video.Episode.series` * :attr:`~subliminal.video.Episode.year` * :attr:`~subliminal.video.Episode.series_imdb_id` * :attr:`~subliminal.video.Episode.series_tvdb_id` * :attr:`~subliminal.video.Episode.title` * :attr:`~subliminal.video.Video.imdb_id` * :attr:`~subliminal.video.Episode.tvdb_id` """ # only deal with Episode videos if not isinstance(video, Episode): logger.error('Cannot refine episodes') return # exit if the information is complete if video.series_tvdb_id and video.tvdb_id: logger.debug('No need to search') return # search the series logger.info('Searching series %r', video.series) results = search_series(video.series.lower()) if not results: logger.warning('No results for series') return logger.debug('Found %d results', len(results)) # search for exact matches matching_results = [] for result in results: matching_result = {} # use seriesName and aliases series_names = [result['seriesName']] series_names.extend(result['aliases']) # parse the original series as series + year or country original_match = series_re.match(result['seriesName']).groupdict() # parse series year series_year = None if result['firstAired']: series_year = datetime.strptime(result['firstAired'], '%Y-%m-%d').year # discard mismatches on year if video.year and series_year and video.year != series_year: logger.debug('Discarding series %r mismatch on year %d', result['seriesName'], series_year) continue # iterate over series names for series_name in series_names: # parse as series, year and country series, year, country = series_re.match(series_name).groups() if year: year = int(year) if country: country = Country.fromguessit(country) # discard mismatches on year if year and (video.original_series or video.year != year): logger.debug('Discarding series name %r mismatch on year %d', series, year) continue # discard mismatches on country if video.country and video.country != country: logger.debug( 'Discarding series name %r mismatch on country %r', series, country) continue # match on sanitized series name if sanitize(series) == sanitize(video.series): logger.debug('Found exact match on series %r', series_name) matching_result['match'] = { 'series': original_match['series'], 'year': series_year or year, 'country': country, 'original_series': original_match['year'] is None and country is None } break # add the result on match if matching_result: matching_result['data'] = result matching_results.append(matching_result) # exit if we don't have exactly 1 matching result if not matching_results: logger.error('No matching series found') return if len(matching_results) > 1: logger.error('Multiple matches found') return # get the series matching_result = matching_results[0] series = get_series(matching_result['data']['id']) # add series information logger.debug('Found series %r', series) video.series = matching_result['match']['series'] video.alternative_series.extend(series['aliases']) video.year = matching_result['match']['year'] video.country = matching_result['match']['country'] video.original_series = matching_result['match']['original_series'] video.series_tvdb_id = series['id'] video.series_imdb_id = series['imdbId'] or None # get the episode logger.info('Getting series episode %dx%d', video.season, video.episode) episode = get_series_episode(video.series_tvdb_id, video.season, video.episode) if not episode: logger.warning('No results for episode') return # add episode information logger.debug('Found episode %r', episode) video.tvdb_id = episode['id'] video.title = episode['episodeName'] or None video.imdb_id = episode['imdbId'] or None