Beispiel #1
0
    def get_matches(self, video):
        matches = set()

        if isinstance(video, Movie):
            # title
            if video.title and sanitize(self.title) == sanitize(video.title):
                matches.add('title')
            # year
            if video.year and self.year == video.year:
                matches.add('year')
            # imdb id
            if video.imdb_id and self.imdb_id == video.imdb_id:
                matches.add('imdb_id')
            # fps
            if video.fps and self.fps and not framerate_equal(
                    video.fps, self.fps):
                logger.warning("nekur: Wrong FPS (expected: %s, got: %s)",
                               video.fps, self.fps)
            # guess additional info from notes
            matches |= guess_matches(video,
                                     guessit(self.notes, {'type': 'movie'}),
                                     partial=True)

        self.matches = matches
        return matches
Beispiel #2
0
    def _search_show_id(self, series):
        """Search the show id from the `series`

        :param str series: series of the episode.
        :return: the show id, if found.
        :rtype: int or None

        """
        # build the param
        params = {'apikey': self.apikey, 'q': series}

        # make the search
        logger.info('Searching show ids with %r', params)
        r = self.session.get(self.server_url + 'shows/search',
                             params=params,
                             timeout=10)
        r.raise_for_status()
        root = etree.fromstring(r.content)

        if int(root.find('data/count').text) == 0:
            logger.warning('Show id not found: no suggestion')
            return None

        # Looking for show in first page
        for show in root.findall('data/shows/show'):
            if sanitize(show.find('name').text).lower() == sanitize(
                    series.lower()):
                show_id = int(show.find('id').text)
                logger.debug('Found show id %d', show_id)

                return show_id

        # Not in the first page of result try next (if any)
        next_page = root.find('data/next')
        while next_page.text is not None:  # pragma: no cover

            r = self.session.get(next_page.text, timeout=10)
            r.raise_for_status()
            root = etree.fromstring(r.content)

            logger.info('Loading suggestion page %r',
                        root.find('data/page').text)

            # Looking for show in following pages
            for show in root.findall('data/shows/show'):
                if sanitize(show.find('name').text).lower() == sanitize(
                        series.lower()):
                    show_id = int(show.find('id').text)
                    logger.debug('Found show id %d', show_id)

                    return show_id

            next_page = root.find('data/next')

        # No matches found
        logger.warning('Show id not found: suggestions does not match')

        return None
Beispiel #3
0
    def _search_show_id(self, series):
        """Search the show id from the `series`

        :param str series: series of the episode.
        :return: the show id, if found.
        :rtype: int or None

        """
        # build the param
        params = {'apikey': self.apikey, 'q': series}

        # make the search
        logger.info('Searching show ids with %r', params)
        r = self.session.get(self.server_url + 'shows/search', params=params, timeout=10)
        r.raise_for_status()
        root = etree.fromstring(r.content)

        if int(root.find('data/count').text) == 0:
            logger.warning('Show id not found: no suggestion')
            return None

        # Looking for show in first page
        for show in root.findall('data/shows/show'):
            if sanitize(show.find('name').text).lower() == sanitize(series.lower()):
                show_id = int(show.find('id').text)
                logger.debug('Found show id %d', show_id)

                return show_id

        # Not in the first page of result try next (if any)
        next_page = root.find('data/next')
        while next_page.text is not None:  # pragma: no cover

            r = self.session.get(next_page.text, timeout=10)
            r.raise_for_status()
            root = etree.fromstring(r.content)

            logger.info('Loading suggestion page %r', root.find('data/page').text)

            # Looking for show in following pages
            for show in root.findall('data/shows/show'):
                if sanitize(show.find('name').text).lower() == sanitize(series.lower()):
                    show_id = int(show.find('id').text)
                    logger.debug('Found show id %d', show_id)

                    return show_id

            next_page = root.find('data/next')

        # No matches found
        logger.warning('Show id not found: suggestions does not match')

        return None
Beispiel #4
0
 def get_matches(self, video):
     matches = set()
     if isinstance(video, Movie):
         # title
         if video.title and sanitize(self.title) == sanitize(video.title):
             matches.add('title')
         # year
         if video.year and self.year == video.year:
             matches.add('year')
         # imdb id
         if video.imdb_id and self.imdb_id == video.imdb_id:
             matches.add('imdb_id')
     
     self.matches = matches
     return matches
Beispiel #5
0
    def get_show_id(self, series, country_code=None):
        """Get the best matching show id for `series`.

        First search in the result of :meth:`_get_show_ids` and fallback on a search with :meth:`_search_show_id`

        :param str series: series of the episode.
        :param str country_code: the country in which teh show is aired.
        :return: the show id, if found.
        :rtype: int or None

        """
        series_sanitized = sanitize(series).lower()
        show_ids = self._get_show_ids()
        show_id = None

        # attempt with country
        if not show_id and country_code:
            logger.debug('Getting show id with country')
            show_id = show_ids.get('{0} {1}'.format(series_sanitized,
                                                    country_code.lower()))

        # attempt clean
        if not show_id:
            logger.debug('Getting show id')
            show_id = show_ids.get(series_sanitized)

        # search as last resort
        if not show_id:
            logger.warning('Series not found in show ids')
            show_id = self._search_show_id(series)

        return show_id
Beispiel #6
0
    def _get_show_ids(self):
        """Get the ``dict`` of show ids per series by querying the `shows` page.

        :return: show id per series, lower case and without quotes.
        :rtype: dict

        """
        # get the show page
        logger.info('Getting show ids')
        params = {'apikey': self.apikey}
        r = self.session.get(self.server_url + 'shows',
                             timeout=10,
                             params=params)
        r.raise_for_status()
        root = etree.fromstring(r.content)

        # populate the show ids
        show_ids = {}
        for show in root.findall('data/shows/show'):
            if show.find('name').text is None:  # pragma: no cover
                continue
            show_ids[sanitize(show.find('name').text).lower()] = int(
                show.find('id').text)
        logger.debug('Found %d show ids', len(show_ids))

        return show_ids
Beispiel #7
0
    def get_show_id(self, series, country_code=None):
        """Get the best matching show id for `series`.

        First search in the result of :meth:`_get_show_ids` and fallback on a search with :meth:`_search_show_id`

        :param str series: series of the episode.
        :param str country_code: the country in which teh show is aired.
        :return: the show id, if found.
        :rtype: int or None

        """
        series_sanitized = sanitize(series).lower()
        show_ids = self._get_show_ids()
        show_id = None

        # attempt with country
        if not show_id and country_code:
            logger.debug('Getting show id with country')
            show_id = show_ids.get('{0} {1}'.format(series_sanitized, country_code.lower()))

        # attempt clean
        if not show_id:
            logger.debug('Getting show id')
            show_id = show_ids.get(series_sanitized)

        # search as last resort
        if not show_id:
            logger.warning('Series not found in show ids')
            show_id = self._search_show_id(series)

        return show_id
Beispiel #8
0
    def get_matches(self, video, hearing_impaired=False):
        matches = set()

        # episode
        if isinstance(video, Episode) and self.type == 'episode':
            # series
            if video.series and (sanitize(self.title) in (
                    sanitize(name)
                    for name in [video.series] + video.alternative_series)):
                matches.add('series')

            # year
            if video.original_series and self.year is None or video.year and video.year == self.year:
                matches.add('year')

            # imdb_id
            if video.series_imdb_id and self.imdb_id == video.series_imdb_id:
                matches.add('series_imdb_id')

        # movie
        elif isinstance(video, Movie) and self.type == 'movie':
            # title
            if video.title and (sanitize(self.title) in (
                    sanitize(name)
                    for name in [video.title] + video.alternative_titles)):
                matches.add('title')

            # year
            if video.year and self.year == video.year:
                matches.add('year')

            # imdb_id
            if video.imdb_id and self.imdb_id == video.imdb_id:
                matches.add('imdb_id')

        # name
        matches |= guess_matches(video, guessit(self.name,
                                                {'type': self.type}))

        return matches
Beispiel #9
0
    def get_matches(self, video, hearing_impaired=False):
        matches = set()

        # series
        if video.series and sanitize(self.series) == sanitize(video.series):
            matches.add('series')
        # season
        if video.season and self.season == video.season:
            matches.add('season')
        # episode
        if video.episode and self.episode == video.episode:
            matches.add('episode')
        # format
        if video.format and video.format.lower() in self.format.lower():
            matches.add('format')
        if video.year and self.year == video.year:
            matches.add('year')
        if video.series_tvdb_id and self.tvdb_id == video.series_tvdb_id:
            matches.add('series_tvdb_id')

        # other properties
        matches |= guess_matches(video, guessit(self.full_data), partial=True)

        return matches
Beispiel #10
0
    def get_matches(self, video, hearing_impaired=False):
        matches = set()

        # series
        if video.series and sanitize(self.series) == sanitize(video.series):
            matches.add('series')
        # season
        if video.season and self.season == video.season:
            matches.add('season')
        # episode
        if video.episode and self.episode == video.episode:
            matches.add('episode')
        # format
        if video.format and video.format.lower() in self.format.lower():
            matches.add('format')
        if video.year and self.year == video.year:
            matches.add('year')
        if video.series_tvdb_id and self.tvdb_id == video.series_tvdb_id:
            matches.add('series_tvdb_id')

        # other properties
        matches |= guess_matches(video, guessit(self.full_data), partial=True)

        return matches
Beispiel #11
0
    def _get_show_ids(self):
        """Get the ``dict`` of show ids per series by querying the `shows` page.

        :return: show id per series, lower case and without quotes.
        :rtype: dict

        """
        # get the show page
        logger.info('Getting show ids')
        params = {'apikey': self.apikey}
        r = self.session.get(self.server_url + 'shows', timeout=10, params=params)
        r.raise_for_status()
        root = etree.fromstring(r.content)

        # populate the show ids
        show_ids = {}
        for show in root.findall('data/shows/show'):
            if show.find('name').text is None:  # pragma: no cover
                continue
            show_ids[sanitize(show.find('name').text).lower()] = int(show.find('id').text)
        logger.debug('Found %d show ids', len(show_ids))

        return show_ids
Beispiel #12
0
    def is_valid_title(title, title_id, sanitized_title, season, year):
        """Check if is a valid title."""
        sanitized_result = sanitize(title['title'])
        if sanitized_result != sanitized_title:
            logger.debug("Mismatched title, discarding title %d (%s)",
                         title_id, sanitized_result)
            return

        # episode type
        if season:
            # discard mismatches on type
            if title['type'] != 'episode':
                logger.debug(
                    "Mismatched 'episode' type, discarding title %d (%s)",
                    title_id, sanitized_result)
                return

            # discard mismatches on season
            if 'season' not in title or title['season'] != season:
                logger.debug('Mismatched season %s, discarding title %d (%s)',
                             title.get('season'), title_id, sanitized_result)
                return
        # movie type
        else:
            # discard mismatches on type
            if title['type'] != 'movie':
                logger.debug(
                    "Mismatched 'movie' type, discarding title %d (%s)",
                    title_id, sanitized_result)
                return

            # discard mismatches on year
            if year is not None and 'year' in title and title['year'] != year:
                logger.debug("Mismatched movie year, discarding title %d (%s)",
                             title_id, sanitized_result)
                return
        return True
Beispiel #13
0
def guess_matches(video, guess, partial=False):
    """Get matches between a `video` and a `guess`.

    If a guess is `partial`, the absence information won't be counted as a match.

    Patch: add multiple release group and formats handling

    :param video: the video.
    :type video: :class:`~subliminal.video.Video`
    :param guess: the guess.
    :type guess: dict
    :param bool partial: whether or not the guess is partial.
    :return: matches between the `video` and the `guess`.
    :rtype: set

    """

    matches = set()
    if isinstance(video, Episode):
        # series
        if video.series and 'title' in guess and sanitize(
                guess['title']) == sanitize(video.series):
            matches.add('series')
        # title
        if video.title and 'episode_title' in guess and sanitize(
                guess['episode_title']) == sanitize(video.title):
            matches.add('title')
        # season
        if video.season and 'season' in guess and guess[
                'season'] == video.season:
            matches.add('season')
        # episode
        # Currently we only have single-ep support (guessit returns a multi-ep as a list with int values)
        # Most providers only support single-ep, so make sure it contains only 1 episode
        # In case of multi-ep, take the lowest episode (subtitles will normally be available on lowest episode number)
        if video.episode and 'episode' in guess:
            episode_guess = guess['episode']
            episode = min(episode_guess) if episode_guess and isinstance(
                episode_guess, list) else episode_guess
            if episode == video.episode:
                matches.add('episode')
        # year
        if video.year and 'year' in guess and guess['year'] == video.year:
            matches.add('year')
        # count "no year" as an information
        if not partial and video.original_series and 'year' not in guess:
            matches.add('year')
    elif isinstance(video, Movie):
        # year
        if video.year and 'year' in guess and guess['year'] == video.year:
            matches.add('year')
        # title
        if video.title and 'title' in guess and sanitize(
                guess['title']) == sanitize(video.title):
            matches.add('title')

    # release_group
    if 'release_group' in guess:
        release_groups = guess["release_group"]
        if not isinstance(release_groups, types.ListType):
            release_groups = [release_groups]

        if video.release_group:
            for release_group in release_groups:
                if (sanitize_release_group(release_group)
                        in get_equivalent_release_groups(
                            sanitize_release_group(video.release_group))):
                    matches.add('release_group')
                    break

    # resolution
    if video.resolution and 'screen_size' in guess and guess[
            'screen_size'] == video.resolution:
        matches.add('resolution')

    # format
    if 'format' in guess:
        formats = guess["format"]
        if not isinstance(formats, types.ListType):
            formats = [formats]

        if video.format:
            video_format = video.format
            if video_format in ("HDTV", "SDTV", "TV"):
                video_format = "TV"
                logger.debug("Treating HDTV/SDTV the same")

            for frmt in formats:
                if frmt in ("HDTV", "SDTV"):
                    frmt = "TV"

                if frmt.lower() == video_format.lower():
                    matches.add('format')
                    break
    # video_codec
    if video.video_codec and 'video_codec' in guess and guess[
            'video_codec'] == video.video_codec:
        matches.add('video_codec')
    # audio_codec
    if video.audio_codec and 'audio_codec' in guess and guess[
            'audio_codec'] == video.audio_codec:
        matches.add('audio_codec')

    return matches
Beispiel #14
0
def guess_matches(video, guess, partial=False):
    """Get matches between a `video` and a `guess`.

    If a guess is `partial`, the absence information won't be counted as a match.

    Patch: add multiple release group and formats handling

    :param video: the video.
    :type video: :class:`~subliminal.video.Video`
    :param guess: the guess.
    :type guess: dict
    :param bool partial: whether or not the guess is partial.
    :return: matches between the `video` and the `guess`.
    :rtype: set

    """

    matches = set()
    if isinstance(video, Episode):
        # series
        if video.series and 'title' in guess and sanitize(guess['title']) == sanitize(video.series):
            matches.add('series')
        # title
        if video.title and 'episode_title' in guess and sanitize(guess['episode_title']) == sanitize(video.title):
            matches.add('title')
        # season
        if video.season and 'season' in guess and guess['season'] == video.season:
            matches.add('season')
        # episode
        if video.episode and 'episode' in guess and guess['episode'] == video.episode:
            matches.add('episode')
        # year
        if video.year and 'year' in guess and guess['year'] == video.year:
            matches.add('year')
        # count "no year" as an information
        if not partial and video.original_series and 'year' not in guess:
            matches.add('year')
    elif isinstance(video, Movie):
        # year
        if video.year and 'year' in guess and guess['year'] == video.year:
            matches.add('year')
        # title
        if video.title and 'title' in guess and sanitize(guess['title']) == sanitize(video.title):
            matches.add('title')

    # release_group
    if 'release_group' in guess:
        release_groups = guess["release_group"]
        if not isinstance(release_groups, types.ListType):
            release_groups = [release_groups]

        if video.release_group:
            for release_group in release_groups:
                if (sanitize_release_group(release_group) in
                        get_equivalent_release_groups(sanitize_release_group(video.release_group))):
                    matches.add('release_group')
                    break

    # resolution
    if video.resolution and 'screen_size' in guess and guess['screen_size'] == video.resolution:
        matches.add('resolution')

    # format
    if 'format' in guess:
        formats = guess["format"]
        if not isinstance(formats, types.ListType):
            formats = [formats]

        if video.format:
            for frmt in formats:
                if frmt.lower() == video.format.lower():
                    matches.add('format')
                    break
    # video_codec
    if video.video_codec and 'video_codec' in guess and guess['video_codec'] == video.video_codec:
        matches.add('video_codec')
    # audio_codec
    if video.audio_codec and 'audio_codec' in guess and guess['audio_codec'] == video.audio_codec:
        matches.add('audio_codec')

    return matches
Beispiel #15
0
    def query(self, language, title, season=None, episode=None, year=None):
        # search for titles
        sanitized_title = sanitize(title)
        titles = self.search_titles(sanitized_title, season)

        # search for titles with the quote or dot character
        ignore_characters = {'\'', '.'}
        if any(c in title for c in ignore_characters):
            titles.update(self.search_titles(sanitize(title, ignore_characters=ignore_characters), season))

        subtitles = []
        # iterate over titles
        for title_id, t in titles.items():
            # discard mismatches on title
            sanitized_result = sanitize(t['title'])
            if sanitized_result != sanitized_title:
                logger.debug("Mismatched title, discarding title %d (%s)",
                             title_id, sanitized_result)
                continue

            # episode
            if season and episode:
                # discard mismatches on type
                if t['type'] != 'episode':
                    logger.debug("Mismatched 'episode' type, discarding title %d (%s)", title_id, sanitized_result)
                    continue

                # discard mismatches on season
                if 'season' not in t or t['season'] != season:
                    logger.debug('Mismatched season %s, discarding title %d (%s)',
                                 t.get('season'), title_id, sanitized_result)
                    continue
            # movie
            else:
                # discard mismatches on type
                if t['type'] != 'movie':
                    logger.debug("Mismatched 'movie' type, discarding title %d (%s)", title_id, sanitized_result)
                    continue

                # discard mismatches on year
                if year is not None and 'year' in t and t['year'] != year:
                    logger.debug("Mismatched movie year, discarding title %d (%s)", title_id, sanitized_result)
                    continue

            # iterate over title's archives
            for a in self.get_archives(title_id, language.legendastv):
                # clean name of path separators and pack flags
                clean_name = a.name.replace('/', '-')
                if a.pack and clean_name.startswith('(p)'):
                    clean_name = clean_name[3:]

                # guess from name
                guess = guessit(clean_name, {'type': t['type']})

                # episode
                if season and episode:
                    # discard mismatches on episode in non-pack archives
                    if not a.pack and 'episode' in guess and guess['episode'] != episode:
                        logger.debug('Mismatched episode %s, discarding archive: %s',
                                     guess['episode'], a.name)
                        continue

                # compute an expiration time based on the archive timestamp
                expiration_time = (datetime.utcnow().replace(tzinfo=pytz.utc) - a.timestamp).total_seconds()

                # attempt to get the releases from the cache
                cache_item = releases_key.format(archive_id=a.id, archive_name=a.name)
                releases = region.get(cache_item, expiration_time=expiration_time)

                # the releases are not in cache or cache is expired
                if releases == NO_VALUE:
                    logger.info('Releases not found in cache')

                    # download archive
                    self.download_archive(a)

                    # extract the releases
                    releases = []
                    for name in a.content.namelist():
                        # discard the legendastv file
                        if name.startswith('Legendas.tv'):
                            continue

                        # discard hidden files
                        if os.path.split(name)[-1].startswith('.'):
                            continue

                        # discard non-subtitle files
                        if not name.lower().endswith(SUBTITLE_EXTENSIONS):
                            continue

                        releases.append(name)

                    # cache the releases
                    region.set(cache_item, releases)

                # iterate over releases
                for r in releases:
                    subtitle = LegendasTVSubtitle(language, t['type'], t['title'], t.get('year'), t.get('imdb_id'),
                                                  t.get('season'), a, r)
                    logger.debug('Found subtitle %r', subtitle)
                    subtitles.append(subtitle)

        return subtitles
Beispiel #16
0
    def search_titles(self, title, season, title_year):
        """Search for titles matching the `title`.

        For episodes, each season has it own title

        :param str title: title to search for.
        :param int season: season of the title
        :param int title_year: year of the title
        :return: found titles.
        :rtype: dict

        """
        titles = {}
        sanitized_titles = [sanitize(title)]
        ignore_characters = {'\'', '.'}
        if any(c in title for c in ignore_characters):
            sanitized_titles.append(
                sanitize(title, ignore_characters=ignore_characters))

        for sanitized_title in sanitized_titles:
            # make the query
            if season:
                logger.info('Searching episode title %r for season %r',
                            sanitized_title, season)
            else:
                logger.info('Searching movie title %r', sanitized_title)

            r = self.session.get(self.server_url +
                                 'legenda/sugestao/{}'.format(sanitized_title),
                                 timeout=10)
            raise_for_status(r)
            results = json.loads(r.text)

            # loop over results
            for result in results:
                source = result['_source']

                # extract id
                title_id = int(source['id_filme'])

                # extract type
                title = {'type': type_map[source['tipo']]}

                # extract title, year and country
                name, year, country = title_re.match(
                    source['dsc_nome']).groups()
                title['title'] = name

                # extract imdb_id
                if source['id_imdb'] != '0':
                    if not source['id_imdb'].startswith('tt'):
                        title['imdb_id'] = 'tt' + source['id_imdb'].zfill(7)
                    else:
                        title['imdb_id'] = source['id_imdb']

                # extract season
                if title['type'] == 'episode':
                    if source['temporada'] and source['temporada'].isdigit():
                        title['season'] = int(source['temporada'])
                    else:
                        match = season_re.search(source['dsc_nome_br'])
                        if match:
                            title['season'] = int(match.group('season'))
                        else:
                            logger.warning(
                                'No season detected for title %d (%s)',
                                title_id, name)

                # extract year
                if year:
                    title['year'] = int(year)
                elif source['dsc_data_lancamento'] and source[
                        'dsc_data_lancamento'].isdigit():
                    # year is based on season air date hence the adjustment
                    title['year'] = int(
                        source['dsc_data_lancamento']) - title.get(
                            'season', 1) + 1

                # add title only if is valid
                # Check against title without ignored chars
                if self.is_valid_title(title, title_id, sanitized_titles[0],
                                       season, title_year):
                    titles[title_id] = title

            logger.debug('Found %d titles', len(titles))

        return titles