def get_matches(self, video): matches = set() if isinstance(video, Movie): # title if video.title and sanitize(self.title) == sanitize(video.title): matches.add('title') # year if video.year and self.year == video.year: matches.add('year') # imdb id if video.imdb_id and self.imdb_id == video.imdb_id: matches.add('imdb_id') # fps if video.fps and self.fps and not framerate_equal( video.fps, self.fps): logger.warning("nekur: Wrong FPS (expected: %s, got: %s)", video.fps, self.fps) # guess additional info from notes matches |= guess_matches(video, guessit(self.notes, {'type': 'movie'}), partial=True) self.matches = matches return matches
def _search_show_id(self, series): """Search the show id from the `series` :param str series: series of the episode. :return: the show id, if found. :rtype: int or None """ # build the param params = {'apikey': self.apikey, 'q': series} # make the search logger.info('Searching show ids with %r', params) r = self.session.get(self.server_url + 'shows/search', params=params, timeout=10) r.raise_for_status() root = etree.fromstring(r.content) if int(root.find('data/count').text) == 0: logger.warning('Show id not found: no suggestion') return None # Looking for show in first page for show in root.findall('data/shows/show'): if sanitize(show.find('name').text).lower() == sanitize( series.lower()): show_id = int(show.find('id').text) logger.debug('Found show id %d', show_id) return show_id # Not in the first page of result try next (if any) next_page = root.find('data/next') while next_page.text is not None: # pragma: no cover r = self.session.get(next_page.text, timeout=10) r.raise_for_status() root = etree.fromstring(r.content) logger.info('Loading suggestion page %r', root.find('data/page').text) # Looking for show in following pages for show in root.findall('data/shows/show'): if sanitize(show.find('name').text).lower() == sanitize( series.lower()): show_id = int(show.find('id').text) logger.debug('Found show id %d', show_id) return show_id next_page = root.find('data/next') # No matches found logger.warning('Show id not found: suggestions does not match') return None
def _search_show_id(self, series): """Search the show id from the `series` :param str series: series of the episode. :return: the show id, if found. :rtype: int or None """ # build the param params = {'apikey': self.apikey, 'q': series} # make the search logger.info('Searching show ids with %r', params) r = self.session.get(self.server_url + 'shows/search', params=params, timeout=10) r.raise_for_status() root = etree.fromstring(r.content) if int(root.find('data/count').text) == 0: logger.warning('Show id not found: no suggestion') return None # Looking for show in first page for show in root.findall('data/shows/show'): if sanitize(show.find('name').text).lower() == sanitize(series.lower()): show_id = int(show.find('id').text) logger.debug('Found show id %d', show_id) return show_id # Not in the first page of result try next (if any) next_page = root.find('data/next') while next_page.text is not None: # pragma: no cover r = self.session.get(next_page.text, timeout=10) r.raise_for_status() root = etree.fromstring(r.content) logger.info('Loading suggestion page %r', root.find('data/page').text) # Looking for show in following pages for show in root.findall('data/shows/show'): if sanitize(show.find('name').text).lower() == sanitize(series.lower()): show_id = int(show.find('id').text) logger.debug('Found show id %d', show_id) return show_id next_page = root.find('data/next') # No matches found logger.warning('Show id not found: suggestions does not match') return None
def get_matches(self, video): matches = set() if isinstance(video, Movie): # title if video.title and sanitize(self.title) == sanitize(video.title): matches.add('title') # year if video.year and self.year == video.year: matches.add('year') # imdb id if video.imdb_id and self.imdb_id == video.imdb_id: matches.add('imdb_id') self.matches = matches return matches
def get_show_id(self, series, country_code=None): """Get the best matching show id for `series`. First search in the result of :meth:`_get_show_ids` and fallback on a search with :meth:`_search_show_id` :param str series: series of the episode. :param str country_code: the country in which teh show is aired. :return: the show id, if found. :rtype: int or None """ series_sanitized = sanitize(series).lower() show_ids = self._get_show_ids() show_id = None # attempt with country if not show_id and country_code: logger.debug('Getting show id with country') show_id = show_ids.get('{0} {1}'.format(series_sanitized, country_code.lower())) # attempt clean if not show_id: logger.debug('Getting show id') show_id = show_ids.get(series_sanitized) # search as last resort if not show_id: logger.warning('Series not found in show ids') show_id = self._search_show_id(series) return show_id
def _get_show_ids(self): """Get the ``dict`` of show ids per series by querying the `shows` page. :return: show id per series, lower case and without quotes. :rtype: dict """ # get the show page logger.info('Getting show ids') params = {'apikey': self.apikey} r = self.session.get(self.server_url + 'shows', timeout=10, params=params) r.raise_for_status() root = etree.fromstring(r.content) # populate the show ids show_ids = {} for show in root.findall('data/shows/show'): if show.find('name').text is None: # pragma: no cover continue show_ids[sanitize(show.find('name').text).lower()] = int( show.find('id').text) logger.debug('Found %d show ids', len(show_ids)) return show_ids
def get_matches(self, video, hearing_impaired=False): matches = set() # episode if isinstance(video, Episode) and self.type == 'episode': # series if video.series and (sanitize(self.title) in ( sanitize(name) for name in [video.series] + video.alternative_series)): matches.add('series') # year if video.original_series and self.year is None or video.year and video.year == self.year: matches.add('year') # imdb_id if video.series_imdb_id and self.imdb_id == video.series_imdb_id: matches.add('series_imdb_id') # movie elif isinstance(video, Movie) and self.type == 'movie': # title if video.title and (sanitize(self.title) in ( sanitize(name) for name in [video.title] + video.alternative_titles)): matches.add('title') # year if video.year and self.year == video.year: matches.add('year') # imdb_id if video.imdb_id and self.imdb_id == video.imdb_id: matches.add('imdb_id') # name matches |= guess_matches(video, guessit(self.name, {'type': self.type})) return matches
def get_matches(self, video, hearing_impaired=False): matches = set() # series if video.series and sanitize(self.series) == sanitize(video.series): matches.add('series') # season if video.season and self.season == video.season: matches.add('season') # episode if video.episode and self.episode == video.episode: matches.add('episode') # format if video.format and video.format.lower() in self.format.lower(): matches.add('format') if video.year and self.year == video.year: matches.add('year') if video.series_tvdb_id and self.tvdb_id == video.series_tvdb_id: matches.add('series_tvdb_id') # other properties matches |= guess_matches(video, guessit(self.full_data), partial=True) return matches
def _get_show_ids(self): """Get the ``dict`` of show ids per series by querying the `shows` page. :return: show id per series, lower case and without quotes. :rtype: dict """ # get the show page logger.info('Getting show ids') params = {'apikey': self.apikey} r = self.session.get(self.server_url + 'shows', timeout=10, params=params) r.raise_for_status() root = etree.fromstring(r.content) # populate the show ids show_ids = {} for show in root.findall('data/shows/show'): if show.find('name').text is None: # pragma: no cover continue show_ids[sanitize(show.find('name').text).lower()] = int(show.find('id').text) logger.debug('Found %d show ids', len(show_ids)) return show_ids
def is_valid_title(title, title_id, sanitized_title, season, year): """Check if is a valid title.""" sanitized_result = sanitize(title['title']) if sanitized_result != sanitized_title: logger.debug("Mismatched title, discarding title %d (%s)", title_id, sanitized_result) return # episode type if season: # discard mismatches on type if title['type'] != 'episode': logger.debug( "Mismatched 'episode' type, discarding title %d (%s)", title_id, sanitized_result) return # discard mismatches on season if 'season' not in title or title['season'] != season: logger.debug('Mismatched season %s, discarding title %d (%s)', title.get('season'), title_id, sanitized_result) return # movie type else: # discard mismatches on type if title['type'] != 'movie': logger.debug( "Mismatched 'movie' type, discarding title %d (%s)", title_id, sanitized_result) return # discard mismatches on year if year is not None and 'year' in title and title['year'] != year: logger.debug("Mismatched movie year, discarding title %d (%s)", title_id, sanitized_result) return return True
def guess_matches(video, guess, partial=False): """Get matches between a `video` and a `guess`. If a guess is `partial`, the absence information won't be counted as a match. Patch: add multiple release group and formats handling :param video: the video. :type video: :class:`~subliminal.video.Video` :param guess: the guess. :type guess: dict :param bool partial: whether or not the guess is partial. :return: matches between the `video` and the `guess`. :rtype: set """ matches = set() if isinstance(video, Episode): # series if video.series and 'title' in guess and sanitize( guess['title']) == sanitize(video.series): matches.add('series') # title if video.title and 'episode_title' in guess and sanitize( guess['episode_title']) == sanitize(video.title): matches.add('title') # season if video.season and 'season' in guess and guess[ 'season'] == video.season: matches.add('season') # episode # Currently we only have single-ep support (guessit returns a multi-ep as a list with int values) # Most providers only support single-ep, so make sure it contains only 1 episode # In case of multi-ep, take the lowest episode (subtitles will normally be available on lowest episode number) if video.episode and 'episode' in guess: episode_guess = guess['episode'] episode = min(episode_guess) if episode_guess and isinstance( episode_guess, list) else episode_guess if episode == video.episode: matches.add('episode') # year if video.year and 'year' in guess and guess['year'] == video.year: matches.add('year') # count "no year" as an information if not partial and video.original_series and 'year' not in guess: matches.add('year') elif isinstance(video, Movie): # year if video.year and 'year' in guess and guess['year'] == video.year: matches.add('year') # title if video.title and 'title' in guess and sanitize( guess['title']) == sanitize(video.title): matches.add('title') # release_group if 'release_group' in guess: release_groups = guess["release_group"] if not isinstance(release_groups, types.ListType): release_groups = [release_groups] if video.release_group: for release_group in release_groups: if (sanitize_release_group(release_group) in get_equivalent_release_groups( sanitize_release_group(video.release_group))): matches.add('release_group') break # resolution if video.resolution and 'screen_size' in guess and guess[ 'screen_size'] == video.resolution: matches.add('resolution') # format if 'format' in guess: formats = guess["format"] if not isinstance(formats, types.ListType): formats = [formats] if video.format: video_format = video.format if video_format in ("HDTV", "SDTV", "TV"): video_format = "TV" logger.debug("Treating HDTV/SDTV the same") for frmt in formats: if frmt in ("HDTV", "SDTV"): frmt = "TV" if frmt.lower() == video_format.lower(): matches.add('format') break # video_codec if video.video_codec and 'video_codec' in guess and guess[ 'video_codec'] == video.video_codec: matches.add('video_codec') # audio_codec if video.audio_codec and 'audio_codec' in guess and guess[ 'audio_codec'] == video.audio_codec: matches.add('audio_codec') return matches
def guess_matches(video, guess, partial=False): """Get matches between a `video` and a `guess`. If a guess is `partial`, the absence information won't be counted as a match. Patch: add multiple release group and formats handling :param video: the video. :type video: :class:`~subliminal.video.Video` :param guess: the guess. :type guess: dict :param bool partial: whether or not the guess is partial. :return: matches between the `video` and the `guess`. :rtype: set """ matches = set() if isinstance(video, Episode): # series if video.series and 'title' in guess and sanitize(guess['title']) == sanitize(video.series): matches.add('series') # title if video.title and 'episode_title' in guess and sanitize(guess['episode_title']) == sanitize(video.title): matches.add('title') # season if video.season and 'season' in guess and guess['season'] == video.season: matches.add('season') # episode if video.episode and 'episode' in guess and guess['episode'] == video.episode: matches.add('episode') # year if video.year and 'year' in guess and guess['year'] == video.year: matches.add('year') # count "no year" as an information if not partial and video.original_series and 'year' not in guess: matches.add('year') elif isinstance(video, Movie): # year if video.year and 'year' in guess and guess['year'] == video.year: matches.add('year') # title if video.title and 'title' in guess and sanitize(guess['title']) == sanitize(video.title): matches.add('title') # release_group if 'release_group' in guess: release_groups = guess["release_group"] if not isinstance(release_groups, types.ListType): release_groups = [release_groups] if video.release_group: for release_group in release_groups: if (sanitize_release_group(release_group) in get_equivalent_release_groups(sanitize_release_group(video.release_group))): matches.add('release_group') break # resolution if video.resolution and 'screen_size' in guess and guess['screen_size'] == video.resolution: matches.add('resolution') # format if 'format' in guess: formats = guess["format"] if not isinstance(formats, types.ListType): formats = [formats] if video.format: for frmt in formats: if frmt.lower() == video.format.lower(): matches.add('format') break # video_codec if video.video_codec and 'video_codec' in guess and guess['video_codec'] == video.video_codec: matches.add('video_codec') # audio_codec if video.audio_codec and 'audio_codec' in guess and guess['audio_codec'] == video.audio_codec: matches.add('audio_codec') return matches
def query(self, language, title, season=None, episode=None, year=None): # search for titles sanitized_title = sanitize(title) titles = self.search_titles(sanitized_title, season) # search for titles with the quote or dot character ignore_characters = {'\'', '.'} if any(c in title for c in ignore_characters): titles.update(self.search_titles(sanitize(title, ignore_characters=ignore_characters), season)) subtitles = [] # iterate over titles for title_id, t in titles.items(): # discard mismatches on title sanitized_result = sanitize(t['title']) if sanitized_result != sanitized_title: logger.debug("Mismatched title, discarding title %d (%s)", title_id, sanitized_result) continue # episode if season and episode: # discard mismatches on type if t['type'] != 'episode': logger.debug("Mismatched 'episode' type, discarding title %d (%s)", title_id, sanitized_result) continue # discard mismatches on season if 'season' not in t or t['season'] != season: logger.debug('Mismatched season %s, discarding title %d (%s)', t.get('season'), title_id, sanitized_result) continue # movie else: # discard mismatches on type if t['type'] != 'movie': logger.debug("Mismatched 'movie' type, discarding title %d (%s)", title_id, sanitized_result) continue # discard mismatches on year if year is not None and 'year' in t and t['year'] != year: logger.debug("Mismatched movie year, discarding title %d (%s)", title_id, sanitized_result) continue # iterate over title's archives for a in self.get_archives(title_id, language.legendastv): # clean name of path separators and pack flags clean_name = a.name.replace('/', '-') if a.pack and clean_name.startswith('(p)'): clean_name = clean_name[3:] # guess from name guess = guessit(clean_name, {'type': t['type']}) # episode if season and episode: # discard mismatches on episode in non-pack archives if not a.pack and 'episode' in guess and guess['episode'] != episode: logger.debug('Mismatched episode %s, discarding archive: %s', guess['episode'], a.name) continue # compute an expiration time based on the archive timestamp expiration_time = (datetime.utcnow().replace(tzinfo=pytz.utc) - a.timestamp).total_seconds() # attempt to get the releases from the cache cache_item = releases_key.format(archive_id=a.id, archive_name=a.name) releases = region.get(cache_item, expiration_time=expiration_time) # the releases are not in cache or cache is expired if releases == NO_VALUE: logger.info('Releases not found in cache') # download archive self.download_archive(a) # extract the releases releases = [] for name in a.content.namelist(): # discard the legendastv file if name.startswith('Legendas.tv'): continue # discard hidden files if os.path.split(name)[-1].startswith('.'): continue # discard non-subtitle files if not name.lower().endswith(SUBTITLE_EXTENSIONS): continue releases.append(name) # cache the releases region.set(cache_item, releases) # iterate over releases for r in releases: subtitle = LegendasTVSubtitle(language, t['type'], t['title'], t.get('year'), t.get('imdb_id'), t.get('season'), a, r) logger.debug('Found subtitle %r', subtitle) subtitles.append(subtitle) return subtitles
def search_titles(self, title, season, title_year): """Search for titles matching the `title`. For episodes, each season has it own title :param str title: title to search for. :param int season: season of the title :param int title_year: year of the title :return: found titles. :rtype: dict """ titles = {} sanitized_titles = [sanitize(title)] ignore_characters = {'\'', '.'} if any(c in title for c in ignore_characters): sanitized_titles.append( sanitize(title, ignore_characters=ignore_characters)) for sanitized_title in sanitized_titles: # make the query if season: logger.info('Searching episode title %r for season %r', sanitized_title, season) else: logger.info('Searching movie title %r', sanitized_title) r = self.session.get(self.server_url + 'legenda/sugestao/{}'.format(sanitized_title), timeout=10) raise_for_status(r) results = json.loads(r.text) # loop over results for result in results: source = result['_source'] # extract id title_id = int(source['id_filme']) # extract type title = {'type': type_map[source['tipo']]} # extract title, year and country name, year, country = title_re.match( source['dsc_nome']).groups() title['title'] = name # extract imdb_id if source['id_imdb'] != '0': if not source['id_imdb'].startswith('tt'): title['imdb_id'] = 'tt' + source['id_imdb'].zfill(7) else: title['imdb_id'] = source['id_imdb'] # extract season if title['type'] == 'episode': if source['temporada'] and source['temporada'].isdigit(): title['season'] = int(source['temporada']) else: match = season_re.search(source['dsc_nome_br']) if match: title['season'] = int(match.group('season')) else: logger.warning( 'No season detected for title %d (%s)', title_id, name) # extract year if year: title['year'] = int(year) elif source['dsc_data_lancamento'] and source[ 'dsc_data_lancamento'].isdigit(): # year is based on season air date hence the adjustment title['year'] = int( source['dsc_data_lancamento']) - title.get( 'season', 1) + 1 # add title only if is valid # Check against title without ignored chars if self.is_valid_title(title, title_id, sanitized_titles[0], season, title_year): titles[title_id] = title logger.debug('Found %d titles', len(titles)) return titles