Example #1
0
    def get_matches(self, video):
        matches = set()
        # episode
        if isinstance(video, Episode):
            # series
            if video.series and (sanitize(self.title) in (
                    sanitize(name)
                    for name in [video.series] + video.alternative_series)):
                matches.add("series")
            # season
            if video.season and self.season == video.season:
                matches.add("season")
            # episode
            if video.episode and self.episode == video.episode:
                matches.add("episode")
            # imdb_id
            if video.series_imdb_id and self.imdb_id == video.series_imdb_id:
                matches.add("series_imdb_id")
            # guess
            matches |= guess_matches(
                video, guessit(self.release, {"type": "episode"}))
        # movie
        elif isinstance(video, Movie):
            # guess
            matches |= guess_matches(video,
                                     guessit(self.release, {"type": "movie"}))

            # title
            if video.title and (sanitize(self.title) in (
                    sanitize(name)
                    for name in [video.title] + video.alternative_titles)):
                matches.add("title")

        return matches
Example #2
0
    def get_matches(self, video, hearing_impaired=False):
        matches = super(OpenSubtitlesSubtitle, self).get_matches(video)

        type_ = "episode" if isinstance(video, Episode) else "movie"
        matches |= guess_matches(
            video, guessit(self.movie_release_name, {'type': type_}))
        matches |= guess_matches(video, guessit(self.filename,
                                                {'type': type_}))

        # episode
        if type_ == "episode" and self.movie_kind == "episode":
            # series
            if fix_tv_naming(video.series) and (sanitize(self.series_name) in (
                    sanitize(name) for name in [fix_tv_naming(video.series)] +
                    video.alternative_series)):
                matches.add('series')
        # movie
        elif type_ == "movie" and self.movie_kind == "movie":
            # title
            if fix_movie_naming(video.title) and (sanitize(
                    self.movie_name) in (
                        sanitize(name)
                        for name in [fix_movie_naming(video.title)] +
                        video.alternative_titles)):
                matches.add('title')

        sub_fps = None
        try:
            sub_fps = float(self.fps)
        except ValueError:
            pass

        # video has fps info, sub also, and sub's fps is greater than 0
        if video.fps and sub_fps and not framerate_equal(video.fps, self.fps):
            self.wrong_fps = True

            if self.skip_wrong_fps:
                logger.debug(
                    "Wrong FPS (expected: %s, got: %s, lowering score massively)",
                    video.fps, self.fps)
                # fixme: may be too harsh
                return set()
            else:
                logger.debug("Wrong FPS (expected: %s, got: %s, continuing)",
                             video.fps, self.fps)

        # matched by tag?
        if self.matched_by == "tag":
            # treat a tag match equally to a hash match
            logger.debug(
                "Subtitle matched by tag, treating it as a hash-match. Tag: '%s'",
                self.query_parameters.get("tag", None))
            matches.add("hash")

        # imdb_id match so we'll consider year as matching
        if self.movie_imdb_id and video.imdb_id and (self.movie_imdb_id
                                                     == video.imdb_id):
            matches.add("year")

        return matches
    def get_matches(self, video):
        matches = set()
        logger.debug("--ScrewZiraSubtitle--\n{}".format(self.__dict__))

        # episode
        if isinstance(video, Episode):
            # series
            if video.series and sanitize(self.series) == sanitize(
                    video.series):
                matches.add('series')
            # season
            if video.season and self.season == video.season:
                matches.add('season')
            # episode
            if video.episode and self.episode == video.episode:
                matches.add('episode')
            # guess
            matches |= guess_matches(
                video, guessit(self.release, {'type': 'episode'}))
        # movie
        elif isinstance(video, Movie):
            # title
            if video.title and (sanitize(self.series) in (
                    sanitize(name)
                    for name in [video.title] + video.alternative_titles)):
                matches.add('title')
            # year
            if video.year and self.year == video.year:
                matches.add('year')
            # guess
            matches |= guess_matches(video,
                                     guessit(self.release, {'type': 'movie'}))

        logger.debug("ScrewZira subtitle criteria match:\n{}".format(matches))
        return matches
Example #4
0
    def query(self, language, video):
        subtitles = []
        isEpisode = isinstance(video, Episode)

        params = {
            'act': 'search',
            'movie': '',
            'select-language': '2',
            'upldr': '',
            'yr': '',
            'release': ''
        }

        if isEpisode:
            params['movie'] = "%s %02d %02d" % (sanitize(
                fix_tv_naming(video.series),
                {'\''}), video.season, video.episode)
        else:
            params['yr'] = video.year
            params['movie'] = sanitize(video.title, {'\''})

        if language == 'en' or language == 'eng':
            params['select-language'] = 1

        logger.info('Searching subtitle %r', params)
        response = self.session.post('http://subs.sab.bz/index.php?',
                                     params=params,
                                     allow_redirects=False,
                                     timeout=10,
                                     headers={
                                         'Referer': 'http://subs.sab.bz/',
                                     })

        response.raise_for_status()

        if response.status_code != 200:
            logger.debug('No subtitles found')
            return subtitles

        soup = BeautifulSoup(response.content, 'lxml')
        rows = soup.findAll('tr', {'class': 'subs-row'})

        # Search on first 20 rows only
        for row in rows[:20]:
            a_element_wrapper = row.find('td', {'class': 'c2field'})
            if a_element_wrapper:
                element = a_element_wrapper.find('a')
                if element:
                    link = element.get('href')
                    element = row.find('a', href=re.compile(r'.*showuser=.*'))
                    uploader = element.get_text() if element else None
                    logger.info('Found subtitle link %r', link)
                    sub = self.download_archive_and_add_subtitle_files(
                        link, language, video)
                    for s in sub:
                        s.uploader = uploader
                    subtitles = subtitles + sub
        return subtitles
Example #5
0
    def get_matches(self, video):
        matches = set()

        # handle movies and series separately
        if isinstance(video, Episode):
            # series
            if video.series and sanitize(
                    self.title) == fix_inconsistent_naming(
                        video.series) or sanitize(
                            self.alt_title) == fix_inconsistent_naming(
                                video.series):
                matches.add('series')
            # year
            if video.original_series and self.year is None or video.year and video.year == self.year:
                matches.add('year')
            # season
            if video.season and self.season == video.season:
                matches.add('season')
            # episode
            if video.episode and self.episode == video.episode:
                matches.add('episode')
        # movie
        elif isinstance(video, Movie):
            # title
            if video.title and sanitize(self.title) == fix_inconsistent_naming(
                    video.title) or sanitize(
                        self.alt_title) == fix_inconsistent_naming(
                            video.title):
                matches.add('title')
            # year
            if video.year and self.year == video.year:
                matches.add('year')

        # rest is same for both groups

        # release_group
        if (video.release_group and self.releases
                and any(r in sanitize_release_group(self.releases)
                        for r in get_equivalent_release_groups(
                            sanitize_release_group(video.release_group)))):
            matches.add('release_group')
        # resolution
        if video.resolution and self.releases and video.resolution in self.releases.lower(
        ):
            matches.add('resolution')
        # source
        if video.source and self.releases and video.source.lower(
        ) in self.releases.lower():
            matches.add('source')
        # other properties
        matches |= guess_matches(video, guessit(self.releases))

        self.matches = matches

        return matches
Example #6
0
    def get_show_id(self, series, year=None, country_code=None, ignore_cache=False):
        """Get the best matching show id for `series`, `year` and `country_code`.

        First search in the result of :meth:`_get_show_ids` and fallback on a search with :meth:`_search_show_id`.

        :param str series: series of the episode.
        :param year: year of the series, if any.
        :type year: int
        :param country_code: country code of the series, if any.
        :type country_code: str
        :return: the show id, if found.
        :rtype: int
        """
        show_id = None
        ids_to_look_for = {sanitize(series).lower(), sanitize(series.replace(".", "")).lower(),
                           sanitize(series.replace("&", "and")).lower()}
        show_ids = self._get_show_ids()
        if ignore_cache or not show_ids:
            show_ids = self._get_show_ids.refresh(self)

        logger.debug("Trying show ids: %s", ids_to_look_for)
        for series_sanitized in ids_to_look_for:
            # attempt with country
            if not show_id and country_code:
                logger.debug('Getting show id with country')
                show_id = show_ids.get('%s %s' % (series_sanitized, country_code.lower()))

            # attempt with year
            if not show_id and year:
                logger.debug('Getting show id with year')
                show_id = show_ids.get('%s %d' % (series_sanitized, year))

            # attempt clean
            if not show_id:
                logger.debug('Getting show id')
                show_id = show_ids.get(series_sanitized)

                if not show_id:
                    now = datetime.datetime.now()
                    last_fetch = region.get(self.last_show_ids_fetch_key)

                    # re-fetch show ids once per day if any show ID not found
                    if not ignore_cache and last_fetch != NO_VALUE and last_fetch + datetime.timedelta(days=1) < now:
                        logger.info("Show id not found; re-fetching show ids")
                        return self.get_show_id(series, year=year, country_code=country_code, ignore_cache=True)
                    logger.debug("Not refreshing show ids, as the last fetch has been too recent")

            # search as last resort
            # broken right now
            # if not show_id:
            #     logger.warning('Series %s not found in show ids', series)
            #     show_id = self._search_show_id(series)

        return show_id
Example #7
0
    def query(self, language, video):
        subtitles = []
        isEpisode = isinstance(video, Episode)
        params = {'s': '', 'y': '', 'u': '', 'l': 'BG', 'i': ''}

        if isEpisode:
            params['s'] = "%s s%02de%02d" % (sanitize(
                video.series, {'\''}), video.season, video.episode)
        else:
            params['y'] = video.year
            params['s'] = sanitize(video.title, {'\''})

        if language == 'en' or language == 'eng':
            params['l'] = 'EN'
        elif language == 'ru' or language == 'rus':
            params['l'] = 'RU'
        elif language == 'es' or language == 'spa':
            params['l'] = 'ES'
        elif language == 'it' or language == 'ita':
            params['l'] = 'IT'

        logger.info('Searching subtitle %r', params)
        response = self.session.get('http://yavka.net/subtitles.php',
                                    params=params,
                                    allow_redirects=False,
                                    timeout=10,
                                    headers={
                                        'Referer': 'http://yavka.net/',
                                    })

        response.raise_for_status()

        if response.status_code != 200:
            logger.debug('No subtitles found')
            return subtitles

        soup = BeautifulSoup(response.content, 'lxml')
        rows = soup.findAll('tr', {'class': 'info'})

        # Search on first 20 rows only
        for row in rows[:20]:
            element = row.find('a', {'class': 'selector'})
            if element:
                link = element.get('href')
                element = row.find('a', {'class': 'click'})
                uploader = element.get_text() if element else None
                logger.info('Found subtitle link %r', link)
                sub = self.download_archive_and_add_subtitle_files(
                    'http://yavka.net/' + link, language, video)
                for s in sub:
                    s.uploader = uploader
                subtitles = subtitles + sub
        return subtitles
Example #8
0
    def get_matches(self, video):
        """
        patch: set guessit to single_value
        :param video:
        :return:
        """
        matches = set()

        # episode
        if isinstance(video, Episode):
            # series
            if video.series:
                matches.add('series')
            # year
            if video.original_series and self.year is None or video.year and video.year == self.year:
                matches.add('year')
            # season
            if video.season and self.season == video.season:
                matches.add('season')
            # episode
            if video.episode and self.episode == video.episode:
                matches.add('episode')
            # guess
            matches |= guess_matches(
                video,
                guessit(self.version, {
                    'type': 'episode',
                    "single_value": True
                }))
            pass
        # movie
        elif isinstance(video, Movie):
            # title
            if video.title and (sanitize(self.title) in (
                    sanitize(name)
                    for name in [video.title] + video.alternative_titles)):
                matches.add('title')
            # year
            if video.year and self.year == video.year:
                matches.add('year')
            # guess
            matches |= guess_matches(
                video,
                guessit(self.version, {
                    'type': 'movie',
                    "single_value": True
                }))

        self.matches = matches

        return matches
Example #9
0
    def get_matches(self, video):
        matches = set()

        if video.year and self.year == video.year:
            matches.add('year')

        if video.release_group and video.release_group in self.comments:
            matches.add('release_group')

        if isinstance(video, Movie):
            # title
            if video.title and sanitize(self.title) == fix_inconsistent_naming(
                    video.title):
                matches.add('title')

            # imdb
            if video.imdb_id and self.imdb_id == video.imdb_id:
                matches.add('imdb_id')

            # guess match others
            matches |= guess_matches(video,
                                     guessit(self.comments, {"type": "movie"}))

        else:
            # title
            seasonless_title = re.sub(r'\s-\sSezonul\s\d+$', '',
                                      self.title.rstrip())
            if video.series and fix_inconsistent_naming(
                    video.series) == sanitize(seasonless_title):
                matches.add('series')

            # imdb
            if video.series_imdb_id and self.imdb_id == video.series_imdb_id:
                matches.add('imdb_id')

            # season
            if f"Sezonul {video.season}" in self.comments:
                matches.add('season')

            # episode
            if {"imdb_id", "season"}.issubset(matches):
                matches.add('episode')

            # guess match others
            matches |= guess_matches(
                video, guessit(self.comments, {"type": "episode"}))

        self.matches = matches

        return matches
Example #10
0
    def _search_show_id(self, series, year=None):
        """Search the show id from the `series` and `year`.

        :param str series: series of the episode.
        :param year: year of the series, if any.
        :type year: int
        :return: the show id, if found.
        :rtype: int

        """
        # addic7ed doesn't support search with quotes
        series = series.replace('\'', ' ')

        # build the params
        series_year = '%s %d' % (series, year) if year is not None else series
        params = {'search': series_year, 'Submit': 'Search'}

        # make the search
        logger.info('Searching show ids with %r', params)
        r = self.session.get(self.server_url + 'search.php',
                             params=params,
                             timeout=10)
        r.raise_for_status()
        if r.status_code == 304:
            raise TooManyRequests()
        soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])

        suggestion = None

        # get the suggestion
        try:
            suggestion = soup.select('span.titulo > a[href^="/show/"]')
            if not suggestion:
                logger.warning('Show id not found: no suggestion')
                return None
            if not sanitize(suggestion[0].i.text.replace('\'', ' '),
                            default_characters=self.sanitize_characters) == \
                    sanitize(series_year, default_characters=self.sanitize_characters):
                logger.warning('Show id not found: suggestion does not match')
                return None
            show_id = int(suggestion[0]['href'][6:])
            logger.debug('Found show id %d', show_id)

            return show_id
        finally:
            if suggestion:
                suggestion.decompose()
            soup.decompose()
            soup = None
Example #11
0
    def get_movie_id(self, movie, year=None):
        """Get the best matching movie id for `movie`, `year`.

        :param str movie: movie.
        :param year: year of the movie, if any.
        :type year: int
        :return: the movie id, if found.
        :rtype: int
        """
        movie_id = None

        # get the movie id
        logger.info('Getting movie id')

        r = self.session.get(self.server_url + 'search.php?search=' + quote_plus(movie), timeout=10)
        r.raise_for_status()

        soup = ParserBeautifulSoup(r.content.decode('utf-8', 'ignore'), ['lxml', 'html.parser'])

        # populate the movie id
        movies_table = soup.find('table', {'class': 'tabel'})
        movies = movies_table.find_all('tr')
        for item in movies:
            link = item.find('a', href=True)
            if link:
                if link['href'].startswith('movie/'):
                    splitted_uri = link['href'].split('/')
                    if len(splitted_uri) == 2:
                        media_id = splitted_uri[1]
                    else:
                        continue
                    media_title = link.text
                    match = re.search(r'(.+)\s\((\d{4})\)$', media_title)
                    if match:
                        media_name = match.group(1)
                        media_year = match.group(2)
                        if sanitize(media_name.lower()) == sanitize(movie.lower()) and media_year == str(year):
                            movie_id = media_id

        soup.decompose()
        soup = None

        logger.debug(f'Found this movie id: {movie_id}')

        if not movie_id:
            logging.debug(f"Addic7ed: Cannot find this movie with guessed year {year}: {movie}")

        return movie_id
Example #12
0
    def get_matches(self, video):
        matches = set()
        type_ = "movie" if isinstance(video, Movie) else "episode"
        # handle movies and series separately
        if type_ == "episode":
            # series
            if video.series and sanitize(
                    self.title) == fix_inconsistent_naming(
                        video.series) or sanitize(
                            self.alt_title) == fix_inconsistent_naming(
                                video.series):
                matches.add('series')
            # year
            if video.original_series and self.year is None or video.year and video.year == self.year:
                matches.add('year')
            # season
            if video.season and self.season == video.season:
                matches.add('season')
            # episode
            if video.episode and self.episode == video.episode:
                matches.add('episode')
        # movie
        else:
            # title
            if video.title and sanitize(self.title) == fix_inconsistent_naming(
                    video.title) or sanitize(
                        self.alt_title) == fix_inconsistent_naming(
                            video.title):
                matches.add('title')
            # year
            if video.year and self.year == video.year:
                matches.add('year')

        # rest is same for both groups

        # release_group
        if (video.release_group and self.releases
                and any(r in sanitize_release_group(self.releases)
                        for r in get_equivalent_release_groups(
                            sanitize_release_group(video.release_group)))):
            matches.add('release_group')

        matches |= guess_matches(video, guessit(self.releases,
                                                {"type": type_}))

        self.matches = matches

        return matches
Example #13
0
    def _get_show_ids(self):
        """Get the ``dict`` of show ids per series by querying the `shows.php` page.
        :return: show id per series, lower case and without quotes.
        :rtype: dict

        # patch: add punctuation cleaning
        """
        # get the show page
        logger.info('Getting show ids')
        r = self.session.get(self.server_url + 'shows.php', timeout=10)
        r.raise_for_status()
        soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])

        # populate the show ids
        show_ids = {}
        for show in soup.select('td.version > h3 > a[href^="/show/"]'):
            show_clean = sanitize(show.text,
                                  default_characters=self.sanitize_characters)
            try:
                show_id = int(show['href'][6:])
            except ValueError:
                continue

            show_ids[show_clean] = show_id
            match = series_year_re.match(show_clean)
            if match and match.group(2) and match.group(1) not in show_ids:
                # year found, also add it without year
                show_ids[match.group(1)] = show_id

        soup.decompose()
        soup = None

        logger.debug('Found %d show ids', len(show_ids))

        return show_ids
Example #14
0
    def get_matches(self, video):
        matches = set()

        # handle movies and series separately
        if isinstance(video, Episode):
            # series
            if video.series and sanitize(self.title) == fix_inconsistent_naming(video.series) or sanitize(
                    self.alt_title) == fix_inconsistent_naming(video.series):
                matches.add('series')
            # year
            if video.original_series and self.year is None or video.year and video.year == self.year:
                matches.add('year')
            # season
            if video.season and self.season == video.season:
                matches.add('season')
            # episode
            if video.episode and self.episode == video.episode:
                matches.add('episode')
        # movie
        elif isinstance(video, Movie):
            # title
            if video.title and sanitize(self.title) == fix_inconsistent_naming(video.title) or sanitize(
                    self.alt_title) == fix_inconsistent_naming(video.title):
                matches.add('title')
            # year
            if video.year and self.year == video.year:
                matches.add('year')

        # rest is same for both groups

        # release_group
        if (video.release_group and self.releases and
                any(r in sanitize_release_group(self.releases)
                    for r in get_equivalent_release_groups(sanitize_release_group(video.release_group)))):
            matches.add('release_group')
        # resolution
        if video.resolution and self.releases and video.resolution in self.releases.lower():
            matches.add('resolution')
        # format
        if video.format and self.releases and video.format.lower() in self.releases.lower():
            matches.add('format')
        # other properties
        matches |= guess_matches(video, guessit(self.releases))

        self.matches = matches

        return matches
Example #15
0
    def get_matches(self, video, hearing_impaired=False):
        matches = super(OpenSubtitlesSubtitle, self).get_matches(video)

        # episode
        if isinstance(video, Episode) and self.movie_kind == 'episode':
            # series
            if video.series and (sanitize(self.series_name) in (
                    sanitize(name)
                    for name in [video.series] + video.alternative_series)):
                matches.add('series')
        # movie
        elif isinstance(video, Movie) and self.movie_kind == 'movie':
            # title
            if video.title and (sanitize(self.movie_name) in (
                    sanitize(name)
                    for name in [video.title] + video.alternative_titles)):
                matches.add('title')

        sub_fps = None
        try:
            sub_fps = float(self.fps)
        except ValueError:
            pass

        # video has fps info, sub also, and sub's fps is greater than 0
        if video.fps and sub_fps and not framerate_equal(video.fps, self.fps):
            self.wrong_fps = True

            if self.skip_wrong_fps:
                logger.debug(
                    "Wrong FPS (expected: %s, got: %s, lowering score massively)",
                    video.fps, self.fps)
                # fixme: may be too harsh
                return set()
            else:
                logger.debug("Wrong FPS (expected: %s, got: %s, continuing)",
                             video.fps, self.fps)

        # matched by tag?
        if self.matched_by == "tag":
            # treat a tag match equally to a hash match
            logger.debug(
                "Subtitle matched by tag, treating it as a hash-match. Tag: '%s'",
                self.query_parameters.get("tag", None))
            matches.add("hash")

        return matches
Example #16
0
    def query(self, language, video):
        subtitles = []
        isEpisode = isinstance(video, Episode)

        params = {
            'm': '',
            'l': 0,
            'c': '',
            'y': '',
            'action': "   Търси   ",
            'a': '',
            'd': '',
            'u': '',
            'g': '',
            't': '',
            'imdbcheck': 1
        }

        if isEpisode:
            params['m'] = "%s %02d %02d" % (sanitize(
                video.series), video.season, video.episode)
        else:
            params['y'] = video.year
            params['m'] = (video.title)

        if language == 'en' or language == 'eng':
            params['l'] = 1

        logger.info('Searching subtitle %r', params)
        response = self.session.post('https://subsunacs.net/search.php',
                                     params=params,
                                     allow_redirects=False,
                                     timeout=10,
                                     headers={
                                         'Referer':
                                         'https://subsunacs.net/index.php',
                                     })

        response.raise_for_status()

        if response.status_code != 200:
            logger.debug('No subtitles found')
            return subtitles

        soup = BeautifulSoup(response.content, 'html.parser')
        rows = soup.findAll('td', {'class': 'tdMovie'})

        # Search on first 10 rows only
        for row in rows[:10]:
            element = row.find('a', {'class': 'tooltip'})
            if element:
                link = element.get('href')
                logger.info('Found subtitle link %r', link)
                subtitles = subtitles + self.download_archive_and_add_subtitle_files(
                    'https://subsunacs.net' + link, language, video)

        return subtitles
Example #17
0
    def get_matches(self, video, hearing_impaired=False):
        matches = super(OpenSubtitlesSubtitle, self).get_matches(video)

        # episode
        if isinstance(video, Episode) and self.movie_kind == 'episode':
            # series
            if video.series and (sanitize(self.series_name) in (
                    sanitize(name) for name in [video.series] + video.alternative_series)):
                matches.add('series')
        # movie
        elif isinstance(video, Movie) and self.movie_kind == 'movie':
            # title
            if video.title and (sanitize(self.movie_name) in (
                    sanitize(name) for name in [video.title] + video.alternative_titles)):
                matches.add('title')

        sub_fps = None
        try:
            sub_fps = float(self.fps)
        except ValueError:
            pass

        # video has fps info, sub also, and sub's fps is greater than 0
        if video.fps and sub_fps and not framerate_equal(video.fps, self.fps):
            self.wrong_fps = True

            if self.skip_wrong_fps:
                logger.debug("Wrong FPS (expected: %s, got: %s, lowering score massively)", video.fps, self.fps)
                # fixme: may be too harsh
                return set()
            else:
                logger.debug("Wrong FPS (expected: %s, got: %s, continuing)", video.fps, self.fps)

        # matched by tag?
        if self.matched_by == "tag":
            # treat a tag match equally to a hash match
            logger.debug("Subtitle matched by tag, treating it as a hash-match. Tag: '%s'",
                         self.query_parameters.get("tag", None))
            matches.add("hash")

        return matches
Example #18
0
    def _get_show_ids(self):
        """Get the ``dict`` of show ids per series by querying the `shows.php` page.
        :return: show id per series, lower case and without quotes.
        :rtype: dict

        # patch: add punctuation cleaning
        """
        # get the show page
        logger.info('Getting show ids')
        region.set(self.last_show_ids_fetch_key, datetime.datetime.now())

        r = self.session.get(self.server_url + 'shows.php', timeout=10)
        r.raise_for_status()

        # LXML parser seems to fail when parsing Addic7ed.com HTML markup.
        # Last known version to work properly is 3.6.4 (next version, 3.7.0, fails)
        # Assuming the site's markup is bad, and stripping it down to only contain what's needed.
        show_cells = re.findall(show_cells_re, r.content)
        if show_cells:
            soup = ParserBeautifulSoup(
                b''.join(show_cells).decode('utf-8', 'ignore'),
                ['lxml', 'html.parser'])
        else:
            # If RegEx fails, fall back to original r.text and use 'html.parser'
            soup = ParserBeautifulSoup(r.text, ['html.parser'])

        # populate the show ids
        show_ids = {}
        shows = soup.select('td > h3 > a[href^="/show/"]')
        for show in shows:
            show_clean = sanitize(show.text,
                                  default_characters=self.sanitize_characters)
            try:
                show_id = int(show['href'][6:])
            except ValueError:
                continue

            show_ids[show_clean] = show_id
            match = series_year_re.match(show_clean)
            if match and match.group(2) and match.group(1) not in show_ids:
                # year found, also add it without year
                show_ids[match.group(1)] = show_id

        soup.decompose()
        soup = None

        logger.debug('Found %d show ids', len(show_ids))

        if not show_ids:
            raise Exception("Addic7ed: No show IDs found!")

        return show_ids
Example #19
0
    def get_matches(self, video):
        matches = set()

        # episode
        if isinstance(video, Episode):
            # series
            if video.series and (sanitize(self.title) in (
                    sanitize(name)
                    for name in [video.series] + video.alternative_series)):
                matches.add('series')
            # season
            if video.season and self.season == video.season:
                matches.add('season')
            # episode
            if video.episode and self.episode == video.episode:
                matches.add('episode')
            # imdb_id
            if video.series_imdb_id and self.imdb_id == video.series_imdb_id:
                matches.add('series_imdb_id')
            # guess
            matches |= guess_matches(video,
                                     guessit(self.release,
                                             {'type': 'episode'}),
                                     partial=True)
        # movie
        elif isinstance(video, Movie):
            # guess
            matches |= guess_matches(video,
                                     guessit(self.release, {'type': 'movie'}),
                                     partial=True)

            # title
            if video.title and (sanitize(self.title) in (
                    sanitize(name)
                    for name in [video.title] + video.alternative_titles)):
                matches.add('title')

        return matches
Example #20
0
    def _get_show_ids(self):
        """Get the ``dict`` of show ids per series by querying the `shows.php` page.
        :return: show id per series, lower case and without quotes.
        :rtype: dict

        # patch: add punctuation cleaning
        """
        # get the show page
        logger.info('Getting show ids')
        region.set(self.last_show_ids_fetch_key, datetime.datetime.now())

        r = self.session.get(self.server_url, timeout=10)
        r.raise_for_status()

        soup = ParserBeautifulSoup(r.content.decode('utf-8', 'ignore'), ['lxml', 'html.parser'])

        # populate the show ids
        show_ids = {}
        shows = soup.find(id='qsShow')
        for show in shows:
            if hasattr(show, 'attrs'):
                try:
                    show_id = int(show.attrs['value'])
                except ValueError:
                    continue

                if show_id != 0:
                    show_clean = sanitize(show.text, default_characters=self.sanitize_characters)

                    show_ids[show_clean] = show_id
                    match = series_year_re.match(show_clean)
                    if match and match.group(2) and match.group(1) not in show_ids:
                        # year found, also add it without year
                        show_ids[match.group(1)] = show_id

        soup.decompose()
        soup = None

        logger.debug('Found %d show ids', len(show_ids))

        if not show_ids:
            raise Exception("Addic7ed: No show IDs found!")

        return show_ids
Example #21
0
    def _get_show_ids(self):
        """Get the ``dict`` of show ids per series by querying the `shows.php` page.
        :return: show id per series, lower case and without quotes.
        :rtype: dict

        # patch: add punctuation cleaning
        """
        # get the show page
        logger.info('Getting show ids')
        r = self.session.get(self.server_url + 'shows.php', timeout=10)
        r.raise_for_status()

        # LXML parser seems to fail when parsing Addic7ed.com HTML markup.
        # Last known version to work properly is 3.6.4 (next version, 3.7.0, fails)
        # Assuming the site's markup is bad, and stripping it down to only contain what's needed.
        show_cells = re.findall(show_cells_re, r.content)
        if show_cells:
            soup = ParserBeautifulSoup(b''.join(show_cells), ['lxml', 'html.parser'])
        else:
            # If RegEx fails, fall back to original r.content and use 'html.parser'
            soup = ParserBeautifulSoup(r.content, ['html.parser'])

        # populate the show ids
        show_ids = {}
        for show in soup.select('td > h3 > a[href^="/show/"]'):
            show_clean = sanitize(show.text, default_characters=self.sanitize_characters)
            try:
                show_id = int(show['href'][6:])
            except ValueError:
                continue

            show_ids[show_clean] = show_id
            match = series_year_re.match(show_clean)
            if match and match.group(2) and match.group(1) not in show_ids:
                # year found, also add it without year
                show_ids[match.group(1)] = show_id

        soup.decompose()
        soup = None

        logger.debug('Found %d show ids', len(show_ids))

        return show_ids
Example #22
0
    def get_show_id(self, series, year=None, country_code=None):
        """Get the best matching show id for `series`, `year` and `country_code`.

        First search in the result of :meth:`_get_show_ids` and fallback on a search with :meth:`_search_show_id`.

        :param str series: series of the episode.
        :param year: year of the series, if any.
        :type year: int
        :param country_code: country code of the series, if any.
        :type country_code: str
        :return: the show id, if found.
        :rtype: int

        """
        series_sanitized = sanitize(series).lower()
        show_ids = self._get_show_ids()
        show_id = None

        # attempt with country
        if not show_id and country_code:
            logger.debug('Getting show id with country')
            show_id = show_ids.get('%s %s' % (series_sanitized, country_code.lower()))

        # attempt with year
        if not show_id and year:
            logger.debug('Getting show id with year')
            show_id = show_ids.get('%s %d' % (series_sanitized, year))

        # attempt clean
        if not show_id:
            logger.debug('Getting show id')
            show_id = show_ids.get(series_sanitized)

        # search as last resort
        # broken right now
        # if not show_id:
        #     logger.warning('Series %s not found in show ids', series)
        #     show_id = self._search_show_id(series)

        return show_id
Example #23
0
    def get_matches(self, video):
        matches = set()

        if isinstance(video, Movie):
            # title
            if video.title and sanitize(self.title) == fix_inconsistent_naming(video.title):
                matches.add('title')

            if video.year and self.year == video.year:
                matches.add('year')

            if video.imdb_id and self.imdb_id == video.imdb_id:
                matches.add('imdb_id')

            if video.release_group and video.release_group in self.comments:
                matches.add('release_group')

            if video.resolution and video.resolution.lower() in self.comments:
                matches.add('resolution')

        self.matches = matches

        return matches
Example #24
0
    def query(self, language, video):
        subtitles = []
        isEpisode = isinstance(video, Episode)

        params = {
            'm': '',
            'l': 0,
            'c': '',
            'y': '',
            'action': "   Търси   ",
            'a': '',
            'd': '',
            'u': '',
            'g': '',
            't': '',
            'imdbcheck': 1
        }

        if isEpisode:
            params['m'] = "%s %02d %02d" % (sanitize(
                fix_tv_naming(video.series),
                {'\''}), video.season, video.episode)
        else:
            params['y'] = video.year
            params['m'] = sanitize(video.title, {'\''})

        if language == 'en' or language == 'eng':
            params['l'] = 1

        logger.info('Searching subtitle %r', params)
        response = self.session.post('https://subsunacs.net/search.php',
                                     params=params,
                                     allow_redirects=False,
                                     timeout=10,
                                     headers={
                                         'Referer':
                                         'https://subsunacs.net/index.php',
                                     })

        response.raise_for_status()

        if response.status_code != 200:
            logger.debug('No subtitles found')
            return subtitles

        soup = BeautifulSoup(response.content, 'lxml')
        rows = soup.findAll('tr', onmouseover=True)

        # Search on first 20 rows only
        for row in rows[:20]:
            a_element_wrapper = row.find('td', {'class': 'tdMovie'})
            if a_element_wrapper:
                element = a_element_wrapper.find('a', {'class': 'tooltip'})
                if element:
                    link = element.get('href')
                    element = row.find(
                        'a',
                        href=re.compile(r'.*/search\.php\?t=1\&(memid|u)=.*'))
                    uploader = element.get_text() if element else None
                    logger.info('Found subtitle link %r', link)
                    sub = self.download_archive_and_add_subtitle_files(
                        'https://subsunacs.net' + link, language, video)
                    for s in sub:
                        s.uploader = uploader
                    subtitles = subtitles + sub
        return subtitles
Example #25
0
    def get_matches(self, video):
        matches = set()
        subtitle_filename = self.release.lower()

        # episode
        if isinstance(video, Episode):
            # series
            if video.series and (sanitize(self.title) in (
                    sanitize(name) for name in [video.series] + video.alternative_series)):
                matches.add('series')
            # season
            if video.season and self.season == video.season:
                matches.add('season')
            # episode
            if video.episode and self.episode == video.episode:
                matches.add('episode')
            # imdb_id
            if video.series_imdb_id and self.imdb_id == video.series_imdb_id:
                matches.add('series_imdb_id')
            # guess
            matches |= guess_matches(video, guessit(self.release, {'type': 'episode'}), partial=True)
        # movie
        elif isinstance(video, Movie):
            matches |= guess_matches(video, guessit(self.release, {'type': 'movie'}), partial=True)

            # title
            if video.title and (sanitize(self.title) in (
                    sanitize(name) for name in [video.title] + video.alternative_titles)):
                matches.add('title')

        # release_group
        if video.release_group and video.release_group.lower() in subtitle_filename:
            matches.add('release_group')

        # resolution
        if video.resolution and video.resolution.lower() in subtitle_filename:
            matches.add('resolution')

        # source
        formats = []
        if video.source:
            formats = [video.source.lower()]
            if formats[0] == "web":
                formats.append("webdl")
                formats.append("webrip")
                formats.append("web ")
            for frmt in formats:
                if frmt.lower() in subtitle_filename:
                    matches.add('source')
                    break

        # video_codec
        if video.video_codec:
            video_codecs = [video.video_codec.lower()]
            if video_codecs[0] == "h.264":
                formats.append("x264")
            elif video_codecs[0] == "h.265":
                formats.append("x265")
            for vc in formats:
                if vc.lower() in subtitle_filename:
                    matches.add('video_codec')
                    break

        return matches
Example #26
0
def guess_matches(video, guess, partial=False):
    """Get matches between a `video` and a `guess`.

    If a guess is `partial`, the absence information won't be counted as a match.

    Patch: add multiple release group and formats handling

    :param video: the video.
    :type video: :class:`~subliminal.video.Video`
    :param guess: the guess.
    :type guess: dict
    :param bool partial: whether or not the guess is partial.
    :return: matches between the `video` and the `guess`.
    :rtype: set

    """
    matches = set()
    if isinstance(video, Episode):
        # series
        if video.series and 'title' in guess:
            titles = guess["title"]
            if not isinstance(titles, list):
                titles = [titles]

            for title in titles:
                if sanitize(title) in (sanitize(name)
                                       for name in [video.series] +
                                       video.alternative_series):
                    matches.add('series')

        # title
        if video.title and 'episode_title' in guess and sanitize(
                guess['episode_title']) == sanitize(video.title):
            matches.add('title')

        # season
        if video.season and 'season' in guess and guess[
                'season'] == video.season:
            matches.add('season')

        # episode
        # Currently we only have single-ep support (guessit returns a multi-ep as a list with int values)
        # Most providers only support single-ep, so make sure it contains only 1 episode
        # In case of multi-ep, take the lowest episode (subtitles will normally be available on lowest episode number)
        if video.episode and 'episode' in guess:
            episode_guess = guess['episode']
            episode = min(episode_guess) if episode_guess and isinstance(
                episode_guess, list) else episode_guess
            if episode == video.episode:
                matches.add('episode')

        # year
        if video.year and 'year' in guess and guess['year'] == video.year:
            matches.add('year')

        # count "no year" as an information
        if not partial and video.original_series and 'year' not in guess:
            matches.add('year')

    elif isinstance(video, Movie):
        # year
        if video.year and 'year' in guess and guess['year'] == video.year:
            matches.add('year')
        # title
        if video.title and 'title' in guess and sanitize(guess['title']) in (
                sanitize(name)
                for name in [video.title] + video.alternative_titles):
            matches.add('title')

    # release_group
    if 'release_group' in guess:
        release_groups = guess["release_group"]
        if not isinstance(release_groups, list):
            release_groups = [release_groups]

        if video.release_group:
            for release_group in release_groups:
                if (sanitize_release_group(release_group)
                        in get_equivalent_release_groups(
                            sanitize_release_group(video.release_group))):
                    matches.add('release_group')
                    break
    # source
    if 'source' in guess:
        formats = guess["source"]
        if not isinstance(formats, list):
            formats = [formats]

        if video.source:
            video_format = video.source.lower()
            _video_gen_format = MERGED_FORMATS_REV.get(video_format)
            matched = False
            for frmt in formats:
                _guess_gen_frmt = MERGED_FORMATS_REV.get(frmt.lower())
                # We don't want to match a singleton
                if _guess_gen_frmt is None:  # If the source is not in MERGED_FORMATS
                    _guess_gen_frmt = guess["source"]

                if _guess_gen_frmt == _video_gen_format:
                    matched = True
                    matches.add('source')
                    break

            logger.debug("Source match found? %s: %s -> %s", matched,
                         video.source, formats)

        if "release_group" in matches and "source" not in matches:
            logger.info(
                "Release group matched but source didn't. Removing release group match."
            )
            matches.remove("release_group")

    guess.update({"resolution": guess.get("screen_size")})

    # Solve match keys for potential lists
    for key in ("video_codec", "audio_codec", "edition", "streaming_service",
                "resolution"):
        if _has_match(video, guess, key):
            matches.add(key)

    # Add streaming service match for non-web sources
    if video.source and video.source != "Web":
        matches.add("streaming_service")

    # As edition tags are rare, add edition match if the video doesn't have an edition
    if not video.edition:
        matches.add("edition")

    return matches
Example #27
0
    def _search_show_id(self, series, year=None):
        """Search the show id from the `series` and `year`.

        :param str series: series of the episode.
        :param year: year of the series, if any.
        :type year: int
        :return: the show id, if found.
        :rtype: int

        """
        # addic7ed doesn't support search with quotes
        series = series.replace('\'', ' ')

        # build the params
        series_year = '%s %d' % (series, year) if year is not None else series
        params = {'search': series_year, 'Submit': 'Search'}

        # make the search
        logger.info('Searching show ids with %r', params)

        # currently addic7ed searches via srch.php from the front page, then a re-search is needed which calls
        # search.php
        for endpoint in ("srch.php", "search.php",):
            headers = None
            if endpoint == "search.php":
                headers = {
                    "referer": self.server_url + "srch.php"
                }
            r = self.session.get(self.server_url + endpoint, params=params, timeout=10, headers=headers)
            r.raise_for_status()

            if r.content and "Sorry, your search" not in r.content:
                break

            time.sleep(4)

        if r.status_code == 304:
            raise TooManyRequests()

        soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])

        suggestion = None

        # get the suggestion
        try:
            suggestion = soup.select('span.titulo > a[href^="/show/"]')
            if not suggestion:
                logger.warning('Show id not found: no suggestion')
                return None
            if not sanitize(suggestion[0].i.text.replace('\'', ' '),
                            default_characters=self.sanitize_characters) == \
                    sanitize(series_year, default_characters=self.sanitize_characters):
                logger.warning('Show id not found: suggestion does not match')
                return None
            show_id = int(suggestion[0]['href'][6:])
            logger.debug('Found show id %d', show_id)

            return show_id
        finally:
            soup.decompose()
            soup = None
Example #28
0
    def _search_show_id(self, series, year=None):
        """Search the show id from the `series` and `year`.

        :param str series: series of the episode.
        :param year: year of the series, if any.
        :type year: int
        :return: the show id, if found.
        :rtype: int

        """
        # addic7ed doesn't support search with quotes
        series = series.replace('\'', ' ')

        # build the params
        series_year = '%s %d' % (series, year) if year is not None else series
        params = {'search': series_year, 'Submit': 'Search'}

        # make the search
        logger.info('Searching show ids with %r', params)

        # currently addic7ed searches via srch.php from the front page, then a re-search is needed which calls
        # search.php
        for endpoint in (
                "srch.php",
                "search.php",
        ):
            headers = None
            if endpoint == "search.php":
                headers = {"referer": self.server_url + "srch.php"}
            r = self.session.get(self.server_url + endpoint,
                                 params=params,
                                 timeout=10,
                                 headers=headers)
            r.raise_for_status()

            if r.text and "Sorry, your search" not in r.text:
                break

            time.sleep(4)

        if r.status_code == 304:
            raise TooManyRequests()

        soup = ParserBeautifulSoup(r.text, ['lxml', 'html.parser'])

        suggestion = None

        # get the suggestion
        try:
            suggestion = soup.select('span.titulo > a[href^="/show/"]')
            if not suggestion:
                logger.warning('Show id not found: no suggestion')
                return None
            if not sanitize(suggestion[0].i.text.replace('\'', ' '),
                            default_characters=self.sanitize_characters) == \
                    sanitize(series_year, default_characters=self.sanitize_characters):
                logger.warning('Show id not found: suggestion does not match')
                return None
            show_id = int(suggestion[0]['href'][6:])
            logger.debug('Found show id %d', show_id)

            return show_id
        finally:
            soup.decompose()
            soup = None
Example #29
0
    def query(self, language, video):
        subtitles = []
        isEpisode = isinstance(video, Episode)

        params = {
            'act': 'search',
            'movie': '',
            'select-language': '2',
            'upldr': '',
            'yr': '',
            'release': ''
        }

        if isEpisode:
            params['movie'] = "%s %02d %02d" % (sanitize(
                fix_tv_naming(video.series),
                {'\''}), video.season, video.episode)
        else:
            params['yr'] = video.year
            params['movie'] = sanitize(video.title, {'\''})

        if language == 'en' or language == 'eng':
            params['select-language'] = 1

        logger.info('Searching subtitle %r', params)
        response = self.session.post('http://subs.sab.bz/index.php?',
                                     params=params,
                                     allow_redirects=False,
                                     timeout=10,
                                     headers={
                                         'Referer': 'http://subs.sab.bz/',
                                     })

        response.raise_for_status()

        if response.status_code != 200:
            logger.debug('No subtitles found')
            return subtitles

        soup = BeautifulSoup(response.content, 'lxml')
        rows = soup.findAll('tr', {'class': 'subs-row'})

        # Search on first 25 rows only
        for row in rows[:25]:
            a_element_wrapper = row.find('td', {'class': 'c2field'})
            if a_element_wrapper:
                element = a_element_wrapper.find('a')
                if element:
                    link = element.get('href')
                    notes = element.get('onmouseover')
                    title = element.get_text()

                    try:
                        year = int(str(element.next_sibling).strip(' ()'))
                    except:
                        year = None

                    td = row.findAll('td')

                    try:
                        num_cds = int(td[6].get_text())
                    except:
                        num_cds = None

                    try:
                        fps = float(td[7].get_text())
                    except:
                        fps = None

                    try:
                        uploader = td[8].get_text()
                    except:
                        uploader = None

                    try:
                        imdb_id = re.findall(r'imdb.com/title/(tt\d+)/?$',
                                             td[9].find('a').get('href'))[0]
                    except:
                        imdb_id = None

                    logger.info('Found subtitle link %r', link)
                    sub = self.download_archive_and_add_subtitle_files(
                        link, language, video, fps, num_cds)
                    for s in sub:
                        s.title = title
                        s.notes = notes
                        s.year = year
                        s.uploader = uploader
                        s.imdb_id = imdb_id
                        s.single_file = True if len(
                            sub) == 1 and num_cds == 1 else False
                    subtitles = subtitles + sub
        return subtitles
Example #30
0
    def query(self, language, video):
        subtitles = []
        isEpisode = isinstance(video, Episode)
        params = {'s': '', 'y': '', 'u': '', 'l': 'BG', 'i': ''}

        if isEpisode:
            params['s'] = "%s s%02de%02d" % (sanitize(
                video.series, {'\''}), video.season, video.episode)
        else:
            params['y'] = video.year
            params['s'] = sanitize(video.title, {'\''})

        if language == 'en' or language == 'eng':
            params['l'] = 'EN'
        elif language == 'ru' or language == 'rus':
            params['l'] = 'RU'
        elif language == 'es' or language == 'spa':
            params['l'] = 'ES'
        elif language == 'it' or language == 'ita':
            params['l'] = 'IT'

        logger.info('Searching subtitle %r', params)
        response = self.session.get('http://yavka.net/subtitles.php',
                                    params=params,
                                    allow_redirects=False,
                                    timeout=10,
                                    headers={
                                        'Referer': 'http://yavka.net/',
                                    })

        response.raise_for_status()

        if response.status_code != 200:
            logger.debug('No subtitles found')
            return subtitles

        soup = BeautifulSoup(response.content, 'lxml')
        rows = soup.findAll('tr')

        # Search on first 25 rows only
        for row in rows[:25]:
            element = row.find('a', {'class': 'selector'})
            if element:
                link = element.get('href')
                notes = re.sub(r'(?s)<p.*><img [A-z0-9=\'/\. :;#]*>(.*)</p>',
                               r"\1", element.get('content'))
                title = element.get_text()

                try:
                    year = int(
                        element.find_next_sibling('span').text.strip('()'))
                except:
                    year = None

                try:
                    fps = float(
                        row.find('span', {
                            'title': 'Кадри в секунда'
                        }).text.strip())
                except:
                    fps = None

                element = row.find('a', {'class': 'click'})
                uploader = element.get_text() if element else None
                logger.info('Found subtitle link %r', link)
                sub = self.download_archive_and_add_subtitle_files(
                    'http://yavka.net/' + link, language, video, fps)
                for s in sub:
                    s.title = title
                    s.notes = notes
                    s.year = year
                    s.uploader = uploader
                    s.single_file = True if len(sub) == 1 else False
                subtitles = subtitles + sub
        return subtitles
Example #31
0
def guess_matches(video, guess, partial=False):
    """Get matches between a `video` and a `guess`.

    If a guess is `partial`, the absence information won't be counted as a match.

    Patch: add multiple release group and formats handling

    :param video: the video.
    :type video: :class:`~subliminal.video.Video`
    :param guess: the guess.
    :type guess: dict
    :param bool partial: whether or not the guess is partial.
    :return: matches between the `video` and the `guess`.
    :rtype: set

    """

    matches = set()
    if isinstance(video, Episode):
        # series
        if video.series and 'title' in guess:
            titles = guess["title"]
            if not isinstance(titles, list):
                titles = [titles]

            for title in titles:
                if sanitize(title) in (sanitize(name) for name in [video.series] + video.alternative_series):
                    matches.add('series')
        # title
        if video.title and 'episode_title' in guess and sanitize(guess['episode_title']) == sanitize(video.title):
            matches.add('title')
        # season
        if video.season and 'season' in guess and guess['season'] == video.season:
            matches.add('season')
        # episode
        # Currently we only have single-ep support (guessit returns a multi-ep as a list with int values)
        # Most providers only support single-ep, so make sure it contains only 1 episode
        # In case of multi-ep, take the lowest episode (subtitles will normally be available on lowest episode number)
        if video.episode and 'episode' in guess:
            episode_guess = guess['episode']
            episode = min(episode_guess) if episode_guess and isinstance(episode_guess, list) else episode_guess
            if episode == video.episode:
                matches.add('episode')
        # year
        if video.year and 'year' in guess and guess['year'] == video.year:
            matches.add('year')
        # count "no year" as an information
        if not partial and video.original_series and 'year' not in guess:
            matches.add('year')
    elif isinstance(video, Movie):
        # year
        if video.year and 'year' in guess and guess['year'] == video.year:
            matches.add('year')
        # title
        if video.title and 'title' in guess and sanitize(guess['title']) in (
                    sanitize(name) for name in [video.title] + video.alternative_titles):
            matches.add('title')

    # release_group
    if 'release_group' in guess:
        release_groups = guess["release_group"]
        if not isinstance(release_groups, list):
            release_groups = [release_groups]

        if video.release_group:
            for release_group in release_groups:
                if (sanitize_release_group(release_group) in
                        get_equivalent_release_groups(sanitize_release_group(video.release_group))):
                    matches.add('release_group')
                    break

    # resolution
    if video.resolution and 'screen_size' in guess and guess['screen_size'] == video.resolution:
        matches.add('resolution')

    # format
    if 'format' in guess:
        formats = guess["format"]
        if not isinstance(formats, list):
            formats = [formats]

        if video.format:
            video_format = video.format
            if video_format in ("HDTV", "SDTV", "TV"):
                video_format = "TV"
                logger.debug("Treating HDTV/SDTV the same")

            for frmt in formats:
                if frmt in ("HDTV", "SDTV"):
                    frmt = "TV"

                if frmt.lower() == video_format.lower():
                    matches.add('format')
                    break
    # video_codec
    if video.video_codec and 'video_codec' in guess and guess['video_codec'] == video.video_codec:
        matches.add('video_codec')
    # audio_codec
    if video.audio_codec and 'audio_codec' in guess and guess['audio_codec'] == video.audio_codec:
        matches.add('audio_codec')

    return matches
Example #32
0
    def query(self, language, video):
        subtitles = []
        isEpisode = isinstance(video, Episode)

        params = {
            'm': '',
            'l': 0,
            'c': '',
            'y': '',
            'action': "   Търси   ",
            'a': '',
            'd': '',
            'u': '',
            'g': '',
            't': '',
            'imdbcheck': 1
        }

        if isEpisode:
            params['m'] = "%s %02d %02d" % (sanitize(
                fix_tv_naming(video.series),
                {'\''}), video.season, video.episode)
        else:
            params['y'] = video.year
            params['m'] = sanitize(fix_movie_naming(video.title), {'\''})

        if language == 'en' or language == 'eng':
            params['l'] = 1

        logger.info('Searching subtitle %r', params)
        response = self.session.post('https://subsunacs.net/search.php',
                                     params=params,
                                     allow_redirects=False,
                                     timeout=10,
                                     headers={
                                         'Referer':
                                         'https://subsunacs.net/index.php',
                                     })

        response.raise_for_status()

        if response.status_code != 200:
            logger.debug('No subtitles found')
            return subtitles

        soup = BeautifulSoup(response.content, 'lxml')
        rows = soup.findAll('tr', onmouseover=True)

        # Search on first 20 rows only
        for row in rows[:20]:
            a_element_wrapper = row.find('td', {'class': 'tdMovie'})
            if a_element_wrapper:
                element = a_element_wrapper.find('a', {'class': 'tooltip'})
                if element:
                    link = element.get('href')
                    notes = re.sub(r'(<img.*)(src=")(/)(.*.jpg">)', r"",
                                   element.get('title'))
                    title = element.get_text()

                    try:
                        year = int(
                            element.find_next_sibling('span', {
                                'class': 'smGray'
                            }).text.strip('\xa0()'))
                    except:
                        year = None

                    td = row.findAll('td')

                    try:
                        num_cds = int(td[1].get_text())
                    except:
                        num_cds = None

                    try:
                        fps = float(td[2].get_text())
                    except:
                        fps = None

                    try:
                        rating = float(td[3].find('img').get('title'))
                    except:
                        rating = None

                    try:
                        uploader = td[5].get_text()
                    except:
                        uploader = None

                    logger.info('Found subtitle link %r', link)
                    sub = self.download_archive_and_add_subtitle_files(
                        'https://subsunacs.net' + link, language, video, fps,
                        num_cds)
                    for s in sub:
                        s.title = title
                        s.notes = notes
                        s.year = year
                        s.rating = rating
                        s.uploader = uploader
                        s.single_file = True if len(
                            sub) == 1 and num_cds == 1 else False
                    subtitles = subtitles + sub
        return subtitles
Example #33
0
def guess_matches(video, guess, partial=False):
    """Get matches between a `video` and a `guess`.

    If a guess is `partial`, the absence information won't be counted as a match.

    Patch: add multiple release group and formats handling

    :param video: the video.
    :type video: :class:`~subliminal.video.Video`
    :param guess: the guess.
    :type guess: dict
    :param bool partial: whether or not the guess is partial.
    :return: matches between the `video` and the `guess`.
    :rtype: set

    """

    matches = set()
    if isinstance(video, Episode):
        # series
        if video.series and 'title' in guess:
            titles = guess["title"]
            if not isinstance(titles, types.ListType):
                titles = [titles]

            for title in titles:
                if sanitize(title) in (sanitize(name) for name in [video.series] + video.alternative_series):
                    matches.add('series')
        # title
        if video.title and 'episode_title' in guess and sanitize(guess['episode_title']) == sanitize(video.title):
            matches.add('title')
        # season
        if video.season and 'season' in guess and guess['season'] == video.season:
            matches.add('season')
        # episode
        # Currently we only have single-ep support (guessit returns a multi-ep as a list with int values)
        # Most providers only support single-ep, so make sure it contains only 1 episode
        # In case of multi-ep, take the lowest episode (subtitles will normally be available on lowest episode number)
        if video.episode and 'episode' in guess:
            episode_guess = guess['episode']
            episode = min(episode_guess) if episode_guess and isinstance(episode_guess, list) else episode_guess
            if episode == video.episode:
                matches.add('episode')
        # year
        if video.year and 'year' in guess and guess['year'] == video.year:
            matches.add('year')
        # count "no year" as an information
        if not partial and video.original_series and 'year' not in guess:
            matches.add('year')
    elif isinstance(video, Movie):
        # year
        if video.year and 'year' in guess and guess['year'] == video.year:
            matches.add('year')
        # title
        if video.title and 'title' in guess and sanitize(guess['title']) in (
                    sanitize(name) for name in [video.title] + video.alternative_titles):
            matches.add('title')

    # release_group
    if 'release_group' in guess:
        release_groups = guess["release_group"]
        if not isinstance(release_groups, types.ListType):
            release_groups = [release_groups]

        if video.release_group:
            for release_group in release_groups:
                if (sanitize_release_group(release_group) in
                        get_equivalent_release_groups(sanitize_release_group(video.release_group))):
                    matches.add('release_group')
                    break

    # resolution
    if video.resolution and 'screen_size' in guess and guess['screen_size'] == video.resolution:
        matches.add('resolution')

    # format
    if 'format' in guess:
        formats = guess["format"]
        if not isinstance(formats, types.ListType):
            formats = [formats]

        if video.format:
            video_format = video.format
            if video_format in ("HDTV", "SDTV", "TV"):
                video_format = "TV"
                logger.debug("Treating HDTV/SDTV the same")

            for frmt in formats:
                if frmt in ("HDTV", "SDTV"):
                    frmt = "TV"

                if frmt.lower() == video_format.lower():
                    matches.add('format')
                    break
    # video_codec
    if video.video_codec and 'video_codec' in guess and guess['video_codec'] == video.video_codec:
        matches.add('video_codec')
    # audio_codec
    if video.audio_codec and 'audio_codec' in guess and guess['audio_codec'] == video.audio_codec:
        matches.add('audio_codec')

    return matches