Example #1
0
    def get_matches(self, video):
        matches = set()

        # episode
        if isinstance(video, Episode):
            # series name
            if video.series and sanitize(self.series) in (
                    sanitize(name)
                    for name in [video.series] + video.alternative_series):
                matches.add('series')
            # year
            if video.original_series and self.year is None or video.year and video.year == self.year:
                matches.add('year')

        # release_group
        if (video.release_group and self.version
                and any(r in sanitize_release_group(self.version)
                        for r in get_equivalent_release_groups(
                            sanitize_release_group(video.release_group)))):
            matches.add('release_group')
        # other properties
        matches |= guess_matches(video,
                                 guessit(self.version, {'type': 'episode'}),
                                 partial=True)

        return matches
Example #2
0
    def get_matches(self, video: Episode):
        matches = set()

        # series
        if video.series and sanitize(self.series) == sanitize(video.series):
            matches.add('series')
        # season
        if video.season and self.season == video.season:
            matches.add('season')
        # episode
        if video.episode and self.episode == video.episode:
            matches.add('episode')
        # title
        if video.title and sanitize(self.title) == sanitize(video.title):
            matches.add('title')
        # year
        if video.original_series and self.year is None or video.year and video.year == self.year:
            matches.add('year')
        # release_group
        if (video.release_group and self.version and
                any(r in sanitize_release_group(self.version)
                    for r in get_equivalent_release_groups(sanitize_release_group(video.release_group)))):
            matches.add('release_group')
        # resolution
        if video.resolution and self.version and video.resolution in self.version.lower():
            matches.add('resolution')
        # source
        if video.source and self.version and video.source.lower() in self.version.lower():
            matches.add('source')
        # other properties
        matches |= guess_matches(video, guessit(self.version), partial=True)

        return matches
Example #3
0
    def get_matches(self, video):
        matches = set()
        # series
        if video.series and ( sanitize(self.series) == sanitize(fix_inconsistent_naming(video.series)) or sanitize(self.series) == sanitize(video.series)):
            matches.add('series')
        # season
        if video.season and self.season == video.season:
            matches.add('season')
        # episode
        if video.episode and self.episode == video.episode:
            matches.add('episode')
        # year
        if ('series' in matches and video.original_series and self.year is None or
           video.year and video.year == self.year):
            matches.add('year')

        # release_group
        if (video.release_group and self.version and
                any(r in sanitize_release_group(self.version)
                    for r in get_equivalent_release_groups(sanitize_release_group(video.release_group)))):
            matches.add('release_group')
        # resolution
        if video.resolution and self.version and video.resolution in self.version.lower():
            matches.add('resolution')
        # format
        if video.format and self.version and video.format.lower() in self.version.lower():
            matches.add('format')
        # other properties
        matches |= guess_matches(video, guessit(self.release_info.encode("utf-8")))

        return matches
Example #4
0
    def get_matches(self, video):
        type_ = "movie" if isinstance(video, Movie) else "episode"
        matches = guess_matches(video,
                                guessit(self.release_info, {"type": type_}))

        # episode
        if isinstance(video, Episode):
            # series
            if video.series and sanitize(self.series) == sanitize(
                    video.series):
                matches.add('series')
            # imdb_id
            if video.series_imdb_id and self.imdb_id and str(
                    self.imdb_id) == str(video.series_imdb_id):
                matches.add('series_imdb_id')
                matches.add('series')
                matches.add('year')
            # year
            if 'year' not in matches and 'series' in matches and video.original_series and self.year is None:
                matches.add('year')
        # movie
        elif isinstance(video, Movie):
            # title
            if video.title and (sanitize(self.series) in (
                    sanitize(name)
                    for name in [video.title] + video.alternative_titles)):
                matches.add('title')
            # imdb_id
            if video.imdb_id and self.imdb_id == video.imdb_id:
                matches.add('imdb_id')
                matches.add('title')
                matches.add('year')
            # year
            if video.year and self.year == video.year:
                matches.add('year')

        # release_group
        if video.release_group and self.releases:
            video_release_groups = get_equivalent_release_groups(
                sanitize_release_group(video.release_group))
            for release in self.releases:

                if any(r in sanitize_release_group(release)
                       for r in video_release_groups):
                    matches.add('release_group')

                    if video.resolution and video.resolution in release.lower(
                    ):
                        matches.add('resolution')

                    if video.source and video.source in release.lower():
                        matches.add('source')

                # We don't have to continue in case it is a perfect match
                if all(m in matches
                       for m in ['release_group', 'resolution', 'source']):
                    break

        self.matches = matches
        return matches
Example #5
0
    def get_matches(self, video):
        matches = set()

        # episode
        if isinstance(video, Episode):
            # series
            if video.series and sanitize(self.series) == sanitize(
                    video.series):
                matches.add("series")
            # season
            if video.season and self.season == video.season:
                matches.add("season")
            # episode
            if video.episode and self.episode == video.episode:
                matches.add("episode")
            # guess
            for release in self.releases:
                matches |= guess_matches(video,
                                         guessit(release, {"type": "episode"}))
        # movie
        elif isinstance(video, Movie):
            # guess
            for release in self.releases:
                matches |= guess_matches(video,
                                         guessit(release, {"type": "movie"}))

        # title
        if video.title and sanitize(self.title) == sanitize(video.title):
            matches.add("title")

        return matches
Example #6
0
    def get_matches(self, video):
        matches = set()

        # movie
        if isinstance(video, Movie):
            # title
            if video.title and (sanitize(self.title) in (
                    sanitize(name)
                    for name in [video.title] + video.alternative_titles)):
                matches.add('title')
            # year
            if video.year and self.year == video.year:
                matches.add('year')

        # release_group
        if (video.release_group and self.version
                and any(r in sanitize_release_group(self.version)
                        for r in get_equivalent_release_groups(
                            sanitize_release_group(video.release_group)))):
            matches.add('release_group')
        # other properties
        matches |= guess_matches(video,
                                 guessit(self.version, {'type': 'movie'}),
                                 partial=True)

        return matches
Example #7
0
    def get_matches(self, video):
        matches = set()
        # series
        if video.series and (sanitize(self.series) == sanitize(
                fix_inconsistent_naming(video.series)) or sanitize(self.series)
                             == sanitize(video.series)):
            matches.add('series')
        # season
        if video.season and self.season == video.season:
            matches.add('season')
        # episode
        if video.episode and self.episode == video.episode:
            matches.add('episode')
        # year
        if ('series' in matches and video.original_series and self.year is None
                or video.year and video.year == self.year):
            matches.add('year')

        logger.debug("Matches: %s", matches)

        # release_group
        if (video.release_group and self.version
                and any(r in sanitize_release_group(self.version)
                        for r in get_equivalent_release_groups(
                            sanitize_release_group(video.release_group)))):
            matches.add('release_group')

        matches |= guess_matches(video, guessit(self.release_info),
                                 {"type": "episode"})

        return matches
Example #8
0
    def get_show_links(self, title, year=None):
        """Get the matching show links for `title` and `year`.

        First search in the result of :meth:`_get_show_suggestions`.

        :param title: show title.
        :param year: year of the show, if any.
        :type year: int
        :return: the show links, if found.
        :rtype: list of str

        """
        title = sanitize(title)
        suggestions = self._get_suggestions(title)

        show_links = []
        for suggestion in suggestions:
            show_title = sanitize(suggestion['title'])

            if show_title == title or (year and show_title
                                       == '{title} {year:d}'.format(
                                           title=title, year=year)):
                logger.debug('Getting show id')
                show_links.append(suggestion['link'].split('?p=')[-1])

        return show_links
Example #9
0
    def get_matches(self, video):
        matches = set()

        # series name
        if video.series and sanitize(self.series) in (
                sanitize(name) for name in [video.series] + video.alternative_series):
            matches.add('series')
        # season
        if video.season and self.season == video.season:
            matches.add('season')
        # episode
        if video.episode and self.episode == video.episode:
            matches.add('episode')
        # title of the episode
        if video.title and sanitize(self.title) == sanitize(video.title):
            matches.add('title')
        # year
        if video.original_series and self.year is None or video.year and video.year == self.year:
            matches.add('year')
        # release_group
        if (video.release_group and self.version and
                any(r in sanitize_release_group(self.version)
                    for r in get_equivalent_release_groups(sanitize_release_group(video.release_group)))):
            matches.add('release_group')
        # resolution
        if video.resolution and self.version and video.resolution in self.version.lower():
            matches.add('resolution')
        # source
        if video.source and self.version and video.source.lower() in self.version.lower():
            matches.add('source')
        # other properties
        matches |= guess_matches(video, guessit(self.version), partial=True)

        return matches
Example #10
0
    def _search_show_id(self, series, year=None):
        """Search the show id from the `series` and `year`.

        :param str series: series of the episode.
        :param year: year of the series, if any.
        :type year: int
        :return: the show id, if found.
        :rtype: int

        """
        # addic7ed doesn't support search with quotes
        series = series.replace('\'', ' ')

        # build the params
        series_year = '%s %d' % (series, year) if year is not None else series
        params = {'search': series_year, 'Submit': 'Search'}

        # make the search
        logger.info('Searching show ids with %r', params)
        r = self.session.get(self.server_url + 'srch.php', params=params, timeout=10)
        r.raise_for_status()
        soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])

        # get the suggestion
        suggestion = soup.select('span.titulo > a[href^="/show/"]')
        if not suggestion:
            logger.warning('Show id not found: no suggestion')
            return None
        if not sanitize(suggestion[0].i.text.replace('\'', ' ')) == sanitize(series_year):
            logger.warning('Show id not found: suggestion does not match')
            return None
        show_id = int(suggestion[0]['href'][6:])
        logger.debug('Found show id %d', show_id)

        return show_id
Example #11
0
    def get_matches(self, video):
        matches = guess_matches(video,
                                guessit(self.release_info.encode("utf-8")))

        # episode
        if isinstance(video, Episode):
            # series
            if video.series and sanitize(self.series) == sanitize(
                    video.series):
                matches.add('series')
            # season
            if video.season and self.season == video.season:
                matches.add('season')
            # episode
            if video.episode and self.episode == video.episode:
                matches.add('episode')
            # imdb_id
            if video.series_imdb_id and self.imdb_id and str(
                    self.imdb_id) == str(video.series_imdb_id):
                matches.add('series_imdb_id')
                matches.add('series')
                matches.add('year')
            # year
            if ('series' in matches and video.original_series
                    and self.year is None
                    or video.year and video.year == self.year):
                matches.add('year')
        # movie
        elif isinstance(video, Movie):
            # title
            if video.title and (sanitize(self.series) in (
                    sanitize(name)
                    for name in [video.title] + video.alternative_titles)):
                matches.add('title')
            # imdb_id
            if video.imdb_id and self.imdb_id == video.imdb_id:
                matches.add('imdb_id')
                matches.add('title')
                matches.add('year')
            # year
            if video.year and self.year == video.year:
                matches.add('year')

        # release_group
        if (video.release_group and self.version
                and any(r in sanitize_release_group(self.version)
                        for r in get_equivalent_release_groups(
                            sanitize_release_group(video.release_group)))):
            matches.add('release_group')
        # resolution
        if video.resolution and self.version and video.resolution in self.version.lower(
        ):
            matches.add('resolution')
        # format
        if video.format and self.version and video.format.lower(
        ) in self.version.lower():
            matches.add('format')

        self.matches = matches
        return matches
Example #12
0
    def get_matches(self, video):
        matches = set()

        # episode
        if isinstance(video, Episode):
            # series
            if video.series and sanitize(self.series) == sanitize(video.series):
                matches.add('series')
            # season
            if video.season and self.season == video.season:
                matches.add('season')
            # episode
            if video.episode and self.episode == video.episode:
                matches.add('episode')
            # imdb_id
            if video.series_imdb_id and self.imdb_id == video.series_imdb_id:
                matches.add('series_imdb_id')
            # guess
            for release in self.releases:
                matches |= guess_matches(video, guessit(release, {'type': 'episode'}))
        # movie
        elif isinstance(video, Movie):
            # guess
            for release in self.releases:
                matches |= guess_matches(video, guessit(release, {'type': 'movie'}))

        # title
        if video.title and sanitize(self.title) == sanitize(video.title):
            matches.add('title')

        return matches
Example #13
0
    def get_matches(self, video):
        matches = set()
        # series
        if video.series and ( sanitize(self.series) == sanitize(fix_inconsistent_naming(video.series)) or sanitize(self.series) == sanitize(video.series)):
            matches.add('series')
        # season
        if video.season and self.season == video.season:
            matches.add('season')
        # episode
        if video.episode and self.episode == video.episode:
            matches.add('episode')
        # year
        if ('series' in matches and video.original_series and self.year is None or
           video.year and video.year == self.year):
            matches.add('year')

        # release_group
        if (video.release_group and self.version and
                any(r in sanitize_release_group(self.version)
                    for r in get_equivalent_release_groups(sanitize_release_group(video.release_group)))):
            matches.add('release_group')
        # resolution
        if video.resolution and self.version and video.resolution in self.version.lower():
            matches.add('resolution')
        # format
        if video.format and self.version and video.format.lower() in self.version.lower():
            matches.add('format')
        # other properties
        matches |= guess_matches(video, guessit(self.release_info))

        return matches
Example #14
0
    def get_matches(self, video):
        matches = set()

        # episode
        if isinstance(video, Episode):
            # series
            if video.series and sanitize(self.series) == sanitize(
                    video.series):
                matches.add('series')
            # season
            if video.season and self.season == video.season:
                matches.add('season')
            # episode
            if video.episode and self.episode == video.episode:
                matches.add('episode')
            # guess
            for release in self.releases:
                matches |= guess_matches(video,
                                         guessit(release, {'type': 'episode'}))
        # movie
        elif isinstance(video, Movie):
            # guess
            for release in self.releases:
                matches |= guess_matches(video,
                                         guessit(release, {'type': 'movie'}))

        # title
        if video.title and sanitize(self.title) == sanitize(video.title):
            matches.add('title')

        return matches
Example #15
0
    def find_id(self, series, year, original_title):
        """
        We need to find the id of the series at the following url:
        https://www.feliratok.info/index.php?term=SERIESNAME&nyelv=0&action=autoname
        Where SERIESNAME is a searchable string.
        The result will be something like this:
        [{"name":"DC\u2019s Legends of Tomorrow (2016)","ID":"3725"},{"name":"Miles from Tomorrowland (2015)",
        "ID":"3789"},{"name":"No Tomorrow (2016)","ID":"4179"}]

        """

        # Search for exact name
        url = self.server_url + "index.php?term=" + series + "&nyelv=0&action=autoname"
        # url = self.server_url + "index.php?term=" + "fla"+ "&nyelv=0&action=autoname"
        logger.info('Get series id from URL %s', url)
        r = self.session.get(url, timeout=10)

        # r is something like this:
        # [{"name":"DC\u2019s Legends of Tomorrow (2016)","ID":"3725"},{"name":"Miles from Tomorrowland (2015)",
        # "ID":"3789"},{"name":"No Tomorrow (2016)","ID":"4179"}]

        results = r.json()

        # check all of the results:
        for result in results:
            try:
                # "name":"Miles from Tomorrowland (2015)","ID":"3789"
                result_year = re.search(r"(?<=\()\d\d\d\d(?=\))",
                                        result['name'])
                result_year = result_year.group() if result_year else ''
            except IndexError:
                result_year = ""

            try:
                # "name":"Miles from Tomorrowland (2015)","ID":"3789"
                result_title = re.search(r".*(?=\(\d\d\d\d\))", result['name'])
                result_title = result_title.group() if result_title else ''
                result_id = result['ID']
            except IndexError:
                continue

            result_title = result_title.strip().replace("�",
                                                        "").replace(" ", ".")
            if not result_title:
                continue

            guessable = result_title.strip() + ".s01e01." + result_year
            guess = guessit(guessable, {'type': "episode"})

            if sanitize(original_title) == sanitize(guess['title']) and year and guess['year'] and \
                    year == guess['year']:
                # Return the founded id
                return result_id
            elif sanitize(original_title) == sanitize(
                    guess['title']) and not year:
                # Return the founded id
                return result_id

        return None
Example #16
0
    def _search_show_id(self, series):
        """Search the show id from the `series`

        :param str series: series of the episode.
        :return: the show id, if found.
        :rtype: int or None

        """
        # build the param
        params = {'apikey': self.apikey, 'q': series}

        # make the search
        logger.info('Searching show ids with %r', params)
        r = self.session.get(self.server_url + 'shows/search',
                             params=params,
                             timeout=10)
        r.raise_for_status()
        root = etree.fromstring(r.content)

        if int(root.find('data/count').text) == 0:
            logger.warning('Show id not found: no suggestion')
            return None

        # Looking for show in first page
        for show in root.findall('data/shows/show'):
            if sanitize(show.find('name').text).lower() == sanitize(
                    series.lower()):
                series_id = int(show.find('id').text)
                logger.debug('Found show id %d', series_id)

                return series_id

        # Not in the first page of result try next (if any)
        next_page = root.find('data/next')
        while next_page.text is not None:  # pragma: no cover

            r = self.session.get(next_page.text, timeout=10)
            r.raise_for_status()
            root = etree.fromstring(r.content)

            logger.info('Loading suggestion page %r',
                        root.find('data/page').text)

            # Looking for show in following pages
            for show in root.findall('data/shows/show'):
                if sanitize(show.find('name').text).lower() == sanitize(
                        series.lower()):
                    series_id = int(show.find('id').text)
                    logger.debug('Found show id %d', series_id)

                    return series_id

            next_page = root.find('data/next')

        # No matches found
        logger.warning('Show id not found: suggestions does not match')

        return None
Example #17
0
    def _search_url_titles(self, series, season, episode, year=None):
        """Search the URL titles by kind for the given `title`, `season` and `episode`.

        :param str series: series to search for.
        :param int season: season to search for.
        :param int episode: episode to search for.
        :param int year: year to search for.
        :return: the episode URL.
        :rtype: str

        """
        # make the search
        logger.info('Searching episode url for %s, season %d, episode %d',
                    series, season, episode)
        episode_url = None

        search = '{} {}x{}'.format(series, season, episode)
        r = self.session.get(self.search_url,
                             headers={'Referer': self.server_url},
                             params={'q': search},
                             timeout=10)
        r.raise_for_status()

        if r.status_code != 200:
            logger.warning('Error getting episode url')
            raise ProviderError('%s: Error getting episode url',
                                self.__class__.__name__.upper())

        results = json.loads(r.text)

        for result in results:
            title = sanitize(result['name'])

            # attempt series with year
            if sanitize('{} ({})'.format(series, year)) in title:
                for episode_data in result['episodes']:
                    if season == episode_data[
                            'season'] and episode == episode_data['number']:
                        episode_url = self.server_url + 'episodes/{}'.format(
                            episode_data['id'])
                        logger.info('Episode url found with year %s',
                                    episode_url)
                        return episode_url
            # attempt series without year
            elif sanitize(series) in title:
                for episode_data in result['episodes']:
                    if season == episode_data[
                            'season'] and episode == episode_data['number']:
                        episode_url = self.server_url + 'episodes/{}'.format(
                            episode_data['id'])
                        logger.info('Episode url found without year %s',
                                    episode_url)
                        return episode_url

        return episode_url
Example #18
0
    def get_matches(self, video):
        """
        patch: set guessit to single_value
        :param video:
        :return:
        """
        matches = set()

        # episode
        if isinstance(video, Episode):
            # series
            if video.series and (sanitize(self.title) in (
                    sanitize(name)
                    for name in [video.series] + video.alternative_series)):
                matches.add('series')
            # year
            if video.original_series and self.year is None or video.year and video.year == self.year:
                matches.add('year')
            # season
            if video.season and self.season == video.season:
                matches.add('season')
            # episode
            if video.episode and self.episode == video.episode:
                matches.add('episode')
            # guess
            for release in self.releases:
                matches |= guess_matches(
                    video,
                    guessit(release, {
                        'type': 'episode',
                        "single_value": True
                    }))
        # movie
        elif isinstance(video, Movie):
            # title
            if video.title and (sanitize(self.title) in (
                    sanitize(name)
                    for name in [video.title] + video.alternative_titles)):
                matches.add('title')
            # year
            if video.year and self.year == video.year:
                matches.add('year')
            # guess
            for release in self.releases:
                matches |= guess_matches(
                    video,
                    guessit(release, {
                        'type': 'movie',
                        "single_value": True
                    }))

        self.matches = matches

        return matches
Example #19
0
    def get_show_ids(self,
                     title,
                     year=None,
                     is_episode=True,
                     country_code=None):
        """Get the best matching show id for `series`, `year` and `country_code`.

        First search in the result of :meth:`_get_show_suggestions`.

        :param title: show title.
        :param year: year of the show, if any.
        :type year: int
        :param is_episode: if the search is for episode.
        :type is_episode: bool
        :param country_code: country code of the show, if any.
        :type country_code: str
        :return: the show id, if found.
        :rtype: str

        """
        title_sanitized = sanitize(title).lower()
        show_ids = self._get_suggestions(title, is_episode)

        matched_show_ids = []
        for show in show_ids:
            show_id = None
            # attempt with country
            if not show_id and country_code:
                logger.debug('Getting show id with country')
                if sanitize(show['title']) == text_type(
                        '{title} {country}').format(
                            title=title_sanitized,
                            country=country_code.lower()):
                    show_id = show['link'].split('/')[-1]

            # attempt with year
            if not show_id and year:
                logger.debug('Getting show id with year')
                if sanitize(
                        show['title']) == text_type('{title} {year}').format(
                            title=title_sanitized, year=year):
                    show_id = show['link'].split('/')[-1]

            # attempt clean
            if not show_id:
                logger.debug('Getting show id')
                show_id = show['link'].split('/')[-1] if sanitize(
                    show['title']) == title_sanitized else None

            if show_id:
                matched_show_ids.append(show_id)

        return matched_show_ids
Example #20
0
    def search_ids(self, title, **kwargs):
        query = title
        titles = kwargs.get("titles") or []

        is_episode = False
        if kwargs.get("season") and kwargs.get("episode"):
            is_episode = True
            query = f"{title} S{kwargs['season']:02}E{kwargs['episode']:02}"

        logger.info(f"Searching ID (episode: {is_episode}) for {query}")

        r = self.session.get(API_URL + "search", params={"q": query}, timeout=10)
        r.raise_for_status()

        results = r.json()
        match_ids = []
        if results["total"] >= 1:
            for result in results["results"]:
                if (result["type"] == "episode" and not is_episode) or (
                    result["type"] == "movie" and is_episode
                ):
                    continue

                # shortcut in case of matching imdb id (don't match NoneType)
                if not is_episode and f"tt{result.get('imdb', 'n/a')}" == kwargs.get(
                    "imdb_id"
                ):
                    logger.debug(f"Movie matched by IMDB ID, taking shortcut")
                    match_ids = [result["id"]]
                    break

                # advanced title check in case of multiple movie results
                if results["total"] > 1:
                    if not is_episode and kwargs.get("year"):
                        if result["title"] and not (
                            sanitize(result["title"])
                            in (
                                "%s %s" % (sanitize(name), kwargs.get("year"))
                                for name in titles
                            )
                        ):
                            continue

                match_ids.append(result["id"])
        else:
            logger.error(f"No episode ID found for {query}")

        if match_ids:
            logger.debug(
                f"Found matching IDs: {', '.join(str(id) for id in match_ids)}"
            )

        return match_ids
Example #21
0
    def _search_url_titles(self, series, season, episode, year=None):
        """Search the URL titles by kind for the given `title`, `season` and `episode`.

        :param str series: series to search for.
        :param int season: season to search for.
        :param int episode: episode to search for.
        :param int year: year to search for.
        :return: the episode URL.
        :rtype: str

        """
        # make the search
        logger.info("Searching episode url for %s, season %d, episode %d",
                    series, season, episode)
        episode_url = None

        search = "{} {}x{}".format(series, season, episode)
        r = self.session.get(self.search_url,
                             headers={"Referer": self.server_url},
                             params={"q": search},
                             timeout=10)
        r.raise_for_status()

        if r.status_code != 200:
            logger.error("Error getting episode url")
            raise ProviderError("Error getting episode url")

        results = json.loads(r.text)

        for result in results:
            title = sanitize(result["name"])

            # attempt series with year
            if sanitize("{} ({})".format(series, year)) in title:
                for episode_data in result["episodes"]:
                    if season == episode_data[
                            "season"] and episode == episode_data["number"]:
                        episode_url = self.server_url + "episodes/{}".format(
                            episode_data["id"])
                        return episode_url
            # attempt series without year
            elif sanitize(series) in title:
                for episode_data in result["episodes"]:
                    if season == episode_data[
                            "season"] and episode == episode_data["number"]:
                        episode_url = self.server_url + "episodes/{}".format(
                            episode_data["id"])
                        return episode_url

        return episode_url
Example #22
0
    def get_episode_url(self, show_id, series, season, episode, year=None):
        """Get the url best matching show id for `series`, `season`, `episode` and `year`.

        :param int show_id: show id of the series
        :param str series: serie of the episode.
        :param int season: season of the episode.
        :param int episode: number of the episode.
        :param int year: year of the series.
        :return: the episode url, if found.
        :rtype: str

        """
        # get the page of the season of the show
        logger.info('Getting the page of show id %d, season %d', show_id,
                    season)

        series_sanitized = sanitize(series)
        episode_url = None

        r = self.session.get(self.subtitles_url,
                             params={
                                 'show': show_id,
                                 'season': season
                             },
                             timeout=10)
        r.raise_for_status()
        soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])

        # loop over episodes rows
        for html_episode in soup.select('td > a[href*="/episodes/"]'):
            title = sanitize(html_episode.get_text())

            # attempt series with year
            if sanitize('{} {} {}x{:02d}'.format(series_sanitized, year,
                                                 season, episode)) in title:
                episode_url = 'https://' + html_episode['href'][2:]
                logger.debug(
                    'Subtitle found for %s, season: %d, episode: %d. URL: %s',
                    series, season, episode, episode_url)
                break
            elif sanitize('{} {}x{:02d}'.format(series_sanitized, season,
                                                episode)) in title:
                episode_url = 'https://' + html_episode['href'][2:]
                logger.debug(
                    'Subtitle found for %s, season: %d, episode: %d. URL: %s',
                    series, season, episode, episode_url)
                break

        return episode_url
Example #23
0
    def list_subtitles(self, video, languages):
        # lookup show_id
        titles = [video.title] + video.alternative_titles if isinstance(
            video, Movie) else []

        show_links = None
        for title in titles:
            show_links = self.get_show_links(title, video.year)
            if show_links:
                break

        subtitles = []
        # query for subtitles with the show_id
        if show_links:
            for show_link in show_links:
                subtitles += [
                    s for s in self.query(show_link, video.title, video.year)
                    if s.language in languages
                ]
        else:
            subtitles += [
                s for s in self.query(None, sanitize(video.title), video.year)
                if s.language in languages
            ]

        return subtitles
Example #24
0
    def _get_show_ids(self):
        """Get the ``dict`` of show ids per series by querying the `shows` page.

        :return: show id per series, lower case and without quotes.
        :rtype: dict

        """
        # get the show page
        logger.info('Getting show ids')
        params = {'apikey': self.apikey}
        r = self.session.get(self.server_url + 'shows',
                             timeout=10,
                             params=params)
        r.raise_for_status()
        root = etree.fromstring(r.content)

        # populate the show ids
        show_ids = {}
        for show in root.findall('data/shows/show'):
            if show.find('name').text is None:  # pragma: no cover
                continue
            show_ids[sanitize(show.find('name').text).lower()] = int(
                show.find('id').text)
        logger.debug('Found %d show ids', len(show_ids))

        return show_ids
Example #25
0
    def _get_show_ids(self):
        """Get the ``dict`` of show ids per series by querying the `shows.php` page.

        :return: show id per series, lower case and without quotes.
        :rtype: dict
        """
        # get the show page
        logger.info('Getting show ids')
        r = self.session.get(self.server_url + 'shows.php',
                             timeout=20,
                             cookies=self.cookies)
        r.raise_for_status()

        # LXML parser seems to fail when parsing Addic7ed.com HTML markup.
        # Last known version to work properly is 3.6.4 (next version, 3.7.0, fails)
        # Assuming the site's markup is bad, and stripping it down to only contain what's needed.
        show_cells = re.findall(show_cells_re, r.content)
        if show_cells:
            soup = ParserBeautifulSoup(b''.join(show_cells),
                                       ['lxml', 'html.parser'])
        else:
            # If RegEx fails, fall back to original r.content and use 'html.parser'
            soup = ParserBeautifulSoup(r.content, ['html.parser'])

        # populate the show ids
        show_ids = {}
        for show in soup.select('td.vr > h3 > a[href^="/show/"]'):
            show_ids[sanitize(show.text)] = int(show['href'][6:])
        logger.debug('Found %d show ids', len(show_ids))

        return show_ids
Example #26
0
    def get_show_id(self, series_names, year=None):
        series_sanitized_names = []
        for name in series_names:
            sanitized_name = sanitize(name)
            series_sanitized_names.append(sanitized_name)
            alternative_name = _get_alternative_name(sanitized_name)
            if alternative_name:
                series_sanitized_names.append(alternative_name)

        show_ids = self._get_show_ids()
        show_id = None

        for series_sanitized in series_sanitized_names:
            # attempt with year
            if year:
                logger.debug('Getting show id with year')
                show_id = show_ids.get('{series} {year:d}'.format(
                    series=series_sanitized, year=year))

            # attempt with article at the end
            if not show_id and year:
                logger.debug('Getting show id with year in brackets')
                show_id = show_ids.get('{series} [{year:d}]'.format(
                    series=series_sanitized, year=year))

            # attempt clean
            if not show_id:
                logger.debug('Getting show id')
                show_id = show_ids.get(series_sanitized)

            if show_id:
                break

        return int(show_id) if show_id else None
Example #27
0
    def _get_show_ids(self):
        """Get the ``dict`` of show ids per series by querying the `shows.php` page.

        :return: show id per series, lower case and without quotes.
        :rtype: dict

        """
        # get the show page
        logger.info('Getting show ids')
        r = self.session.get(self.server_url + 'shows.php', timeout=10)
        r.raise_for_status()

        # LXML parser seems to fail when parsing Addic7ed.com HTML markup.
        # Last known version to work properly is 3.6.4 (next version, 3.7.0, fails)
        # Assuming the site's markup is bad, and stripping it down to only contain what's needed.
        show_cells = re.findall(show_cells_re, r.content)
        if show_cells:
            soup = ParserBeautifulSoup(b''.join(show_cells), ['lxml', 'html.parser'])
        else:
            # If RegEx fails, fall back to original r.content and use 'html.parser'
            soup = ParserBeautifulSoup(r.content, ['html.parser'])

        # populate the show ids
        show_ids = {}
        for show in soup.select('td.version > h3 > a[href^="/show/"]'):
            show_ids[sanitize(show.text)] = int(show['href'][6:])
        logger.debug('Found %d show ids', len(show_ids))

        return show_ids
Example #28
0
    def _get_show_ids(self):
        """Get the ``dict`` of show ids per series by querying the `series.php` page.

        :return: show id per series, lower case and without quotes.
        :rtype: dict

        """
        # get the show page
        logger.info('Getting show ids')
        r = self.session.get(self.series_url, timeout=10)
        r.raise_for_status()

        if r.status_code != 200:
            logger.error('Error getting show ids')
            raise ProviderError('Error getting show ids')

        soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])

        # populate the show ids
        show_ids = {}
        for show in soup.select('td > a[href^="/show/"]'):
            show_ids[sanitize(show.get_text())] = int(show['href'][6:])
        logger.debug('Found %d show ids', len(show_ids))

        return show_ids
Example #29
0
    def _get_show_ids(self):
        # get the shows page
        logger.info('Getting show ids')
        r = self.session.get(self.server_url + self.all_series_url, timeout=10)
        r.raise_for_status()

        if not r.content:
            logger.debug('No data returned from provider')
            return []

        soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])

        # populate the show ids
        show_ids = {}
        for show_category in soup.findAll('seriesl'):
            if show_category.attrs['category'] == u'Σειρές':
                for show in show_category.findAll('series'):
                    series = show.text
                    series_match = series_sanitize_re.match(series)
                    if series_match:
                        series = series_match.group(1)
                    show_ids[sanitize(series)] = int(show['srsid'])
                break
        logger.debug('Found %d show ids', len(show_ids))

        return show_ids
Example #30
0
    def search_show_id(self, series, year=None):
        """Search the show id from the `series` and `year`.
        :param string series: series of the episode.
        :param year: year of the series, if any.
        :type year: int or None
        :return: the show id, if any.
        :rtype: int or None
        """
        # make the search
        logger.info('Searching show id for %r', series)
        r = self.session.post(self.server_url + 'search.php', data={'q': series}, timeout=10)
        r.raise_for_status()

        # get the series out of the suggestions
        soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])
        show_id = None
        for suggestion in soup.select('div.left li div a[href^="/tvshow-"]'):
            match = link_re.match(suggestion.text)
            if not match:
                logger.error('Failed to match %s', suggestion.text)
                continue

            if sanitize(match.group('series')).lower() == series.lower():
                if year is not None and int(match.group('first_year')) != year:
                    logger.debug('Year does not match')
                    continue
                show_id = int(suggestion['href'][8:-5])
                logger.debug('Found show id %d', show_id)
                break

        soup.decompose()
        soup = None

        return show_id
Example #31
0
    def get_show_id(self, series, year=None):
        """Get the best matching show id for `series` and `year`.

        :param str series: series of the episode.
        :param year: year of the series, if any.
        :type year: int
        :return: the show id, if found.
        :rtype: int

        """
        series_sanitized = sanitize(series)
        show_ids = self._get_show_ids()
        show_id = None

        # attempt with year
        if not show_id and year:
            logger.debug('Getting show id with year')
            show_id = show_ids.get('%s %d' % (series_sanitized, year))

        # attempt clean
        if not show_id:
            logger.debug('Getting show id')
            show_id = show_ids.get(series_sanitized)

        return show_id
Example #32
0
    def get_show_id(self, series, country_code=None):
        """Get the best matching show id for `series`.

        First search in the result of :meth:`_get_show_ids` and fallback on a search with :meth:`_search_show_id`

        :param str series: series of the episode.
        :param str country_code: the country in which teh show is aired.
        :return: the show id, if found.
        :rtype: int or None

        """
        series_sanitized = sanitize(series).lower()
        show_ids = self._get_show_ids()
        series_id = None

        # attempt with country
        if not series_id and country_code:
            logger.debug('Getting show id with country')
            series_id = show_ids.get('{0} {1}'.format(series_sanitized,
                                                      country_code.lower()))

        # attempt clean
        if not series_id:
            logger.debug('Getting show id')
            series_id = show_ids.get(series_sanitized)

        # search as last resort
        if not series_id:
            logger.warning('Series not found in show ids')
            series_id = self._search_show_id(series)

        return series_id
Example #33
0
    def search_show_id(self, series, year=None):
        """Search the show id from the `series` and `year`.
        :param string series: series of the episode.
        :param year: year of the series, if any.
        :type year: int or None
        :return: the show id, if any.
        :rtype: int or None
        """
        # make the search
        logger.info('Searching show id for %r', series)
        r = self.session.post(self.server_url + 'search.php', data={'q': series}, timeout=10)
        r.raise_for_status()

        # get the series out of the suggestions
        soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])
        show_id = None
        for suggestion in soup.select('div.left li div a[href^="/tvshow-"]'):
            match = link_re.match(suggestion.text)
            if not match:
                logger.error('Failed to match %s', suggestion.text)
                continue

            if sanitize(match.group('series')).lower() == series.lower():
                if year is not None and int(match.group('first_year')) != year:
                    logger.debug('Year does not match')
                    continue
                show_id = int(suggestion['href'][8:-5])
                logger.debug('Found show id %d', show_id)
                break

        soup.decompose()
        soup = None

        return show_id
Example #34
0
    def query(self, series, season, episode, year=None):
        # get the show id
        show_id = self.get_show_id(series, year)
        if show_id is None:
            logger.error('No show id found for %s (%r)', series, year)
            return []

        # get the episode url
        episode_url = self.get_episode_url(show_id, series, season, episode, year)
        if episode_url is None:
            logger.error('No episode url found for %s, season %d, episode %d', series, season, episode)
            return []

        # get the page of the episode of the show
        r = self.session.get(episode_url, timeout=10)
        r.raise_for_status()
        soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])

        # get episode title
        title_pattern = re.compile('Subt.+tulos de {}(.+){}x{:02d} - (.+)'.format(series, season, episode).lower())
        title = title_pattern.search(soup.select('#cabecera-subtitulo')[0].get_text().strip().lower()).group(2)

        # loop over subtitle rows
        subtitles = []

        for sub in soup.find_all('div', attrs={'id': re.compile('version([0-9]+)')}):
            # read the release subtitle
            release = sanitize_release_group(release_pattern.search(sub.find('p', class_='title-sub')
                                                                    .contents[2]).group(1))

            for html_language in sub.select('ul.sslist'):
                language = Language.fromtusubtitulo(html_language.find_next('b').get_text().strip())
                hearing_impaired = False

                # modify spanish latino subtitle language to only spanish and set hearing_impaired = True
                # because if exists spanish and spanish latino subtitle for the same episode, the score will be
                # higher with spanish subtitle. Spanish subtitle takes priority.
                if language == Language('spa', 'MX'):
                    language = Language('spa')
                    hearing_impaired = True

                # ignore incomplete subtitles
                status = sanitize(html_language.find_next('li', class_=re.compile('li-estado')).get_text())
                if status != 'completado':
                    logger.debug('Ignoring subtitle with status %s', status)
                    continue

                # get the most updated version of the subtitle and if it doesn't exist get the original version
                html_status = html_language.select('a[href^="updated/"]')
                if len(html_status) == 0:
                    html_status = html_language.select('a[href^="original/"]')

                subtitle_url = self.server_url + html_status[0]['href']
                subtitle = TuSubtituloSubtitle(language, hearing_impaired, episode_url, series, season, episode, title,
                                               year, release, subtitle_url)
                logger.debug('Found subtitle %r', subtitle)
                subtitles.append(subtitle)

        return subtitles
Example #35
0
    def query(self, series, season, episode, year=None):
        # get the show id
        show_id = self.get_show_id(series, year)
        if show_id is None:
            logger.error("No show id found for %s (%r)", series, year)
            return []

        # get the episode url
        episode_url = self.get_episode_url(show_id, series, season, episode, year)
        if episode_url is None:
            logger.info(f"[{self.provider_name}]: No episode url found for {series}, season {season}, episode {episode}")
            return []

        # get the page of the episode of the show
        r = self.session.get(episode_url, timeout=10)
        r.raise_for_status()
        soup = ParserBeautifulSoup(r.content, ["lxml", "html.parser"])

        # get episode title
        title_pattern = re.compile("Subt.+tulos de {}(.+){}x{:02d} - (.+)".format(series, season, episode).lower())
        title = title_pattern.search(soup.select("#cabecera-subtitulo")[0].get_text().strip().lower()).group(2)

        # loop over subtitle rows
        subtitles = []

        for sub in soup.find_all("div", attrs={"id": re.compile("version([0-9]+)")}):
            # read the release subtitle
            release = sanitize_release_group(release_pattern.search(sub.find("p", class_="title-sub").contents[2]).group(1))

            for html_language in sub.select("ul.sslist"):
                language = Language.fromtusubtitulo(html_language.find_next("b").get_text().strip())
                hearing_impaired = False

                # modify spanish latino subtitle language to only spanish and set hearing_impaired = True
                # because if exists spanish and spanish latino subtitle for the same episode, the score will be
                # higher with spanish subtitle. Spanish subtitle takes priority.
                if language == Language("spa", "MX"):
                    language = Language("spa")
                    hearing_impaired = True

                # ignore incomplete subtitles
                status = sanitize(html_language.find_next("li", class_=re.compile("li-estado")).get_text())
                if status != "completado":
                    logger.debug("Ignoring subtitle with status %s", status)
                    continue

                # get the most updated version of the subtitle and if it doesn't exist get the original version
                html_status = html_language.select('a[href^="updated/"]')
                if len(html_status) == 0:
                    html_status = html_language.select('a[href^="original/"]')

                subtitle_url = self.server_url + html_status[0]["href"]
                subtitle = TuSubtituloSubtitle(language, hearing_impaired, episode_url, series, season, episode, title, year, release, subtitle_url)
                logger.debug("Found subtitle %r", subtitle)
                subtitles.append(subtitle)

        return subtitles
Example #36
0
def test_video_fromname_episode_no_season(episodes):
    video = Video.fromname(episodes["the_jinx_e05"].name)
    assert type(video) is Episode
    assert video.name == episodes["the_jinx_e05"].name
    assert video.format == episodes["the_jinx_e05"].format
    assert video.release_group == episodes["the_jinx_e05"].release_group
    assert video.resolution == episodes["the_jinx_e05"].resolution
    assert video.video_codec == episodes["the_jinx_e05"].video_codec
    assert video.audio_codec is None
    assert video.imdb_id is None
    assert video.hashes == {}
    assert video.size is None
    assert video.subtitle_languages == set()
    assert sanitize(video.series) == sanitize(episodes["the_jinx_e05"].series)
    assert video.season == episodes["the_jinx_e05"].season
    assert video.episode == episodes["the_jinx_e05"].episode
    assert video.title is None
    assert video.year is None
    assert video.tvdb_id is None
Example #37
0
    def get_matches(self, video):
        """
        patch: set guessit to single_value
        :param video:
        :return:
        """
        matches = set()

        # episode
        if isinstance(video, Episode):
            # series
            if video.series and (fix_inconsistent_naming(self.title) in (
                    fix_inconsistent_naming(name) for name in [video.series] + video.alternative_series)):
                matches.add('series')
            # year
            if video.original_series and self.year is None or video.year and video.year == self.year:
                matches.add('year')
            # season
            if video.season and self.season == video.season:
                matches.add('season')
            # episode
            if video.episode and self.episode == video.episode:
                matches.add('episode')
            # guess
            for release in self.releases:
                matches |= guess_matches(video, guessit(release, {'type': 'episode', "single_value": True}))
        # movie
        elif isinstance(video, Movie):
            # title
            if video.title and (sanitize(self.title) in (
                    sanitize(name) for name in [video.title] + video.alternative_titles)):
                matches.add('title')
            # year
            if video.year and self.year == video.year:
                matches.add('year')
            # guess
            for release in self.releases:
                matches |= guess_matches(video, guessit(release, {'type': 'movie', "single_value": True}))

        self.matches = matches

        return matches
Example #38
0
    def search_ids(self, title, year=None, imdb_id=None, season=None, episode=None, titles=None):
        """Search movie or episode id from the `title`, `season` and `episode`.

        :param imdb_id: imdb id of the given movie
        :param titles: all titles of the given series or movie
        :param year: release year of the given movie
        :param str title: series of the episode or movie name
        :param int season: season of the episode.
        :param int episode: episode number.
        :return: list of ids
        :rtype: list

        """
        # make the search
        query = title
        titles = titles or []

        is_episode = False
        if season and episode:
            is_episode = True
            query = '%s S%#02dE%#02d' % (title, season, episode)

        logger.info(u'Searching %s ID for %r', "episode" if is_episode else "movie", query)
        r = self.session.get(self.API_URL + 'search', params={'q': query}, timeout=10)
        r.raise_for_status()
        results = r.json()
        match_ids = []
        if results['total'] >= 1:
            for result in results["results"]:
                if (result['type'] == "episode" and not is_episode) or (result['type'] == "movie" and is_episode):
                    continue

                # shortcut in case of matching imdb id
                if not is_episode and imdb_id and "imdb" in result and "tt%s" % result["imdb"] == str(imdb_id):
                    logger.debug("Movie matched by IMDB ID %s, taking shortcut", imdb_id)
                    match_ids = [result['id']]
                    break

                # advanced title check in case of multiple movie results
                if results['total'] > 1:
                    if not is_episode and year:
                        if result["title"] and not (sanitize(result["title"]) in (u"%s %s" % (sanitize(name), year)
                                                                                  for name in titles)):
                            continue

                match_ids.append(result['id'])
        else:
            logger.error(u'No episode ID found for %r', query)

        if match_ids:
            logger.debug(u"Found matching IDs: %s", ", ".join(str(id) for id in match_ids))

        return match_ids
Example #39
0
    def get_show_id(self, series, year=None, country_code=None):
        """Get the best matching show id for `series`, `year` and `country_code`.

        First search in the result of :meth:`_get_show_ids` and fallback on a search with :meth:`_search_show_id`.

        :param str series: series of the episode.
        :param year: year of the series, if any.
        :type year: int
        :param country_code: country code of the series, if any.
        :type country_code: str
        :return: the show id, if found.
        :rtype: int

        """
        series_sanitized = sanitize(series).lower()
        show_ids = self._get_show_ids()
        show_id = None

        # attempt with country
        if not show_id and country_code:
            logger.debug('Getting show id with country')
            show_id = show_ids.get('%s %s' % (series_sanitized, country_code.lower()))

        # attempt with year
        if not show_id and year:
            logger.debug('Getting show id with year')
            show_id = show_ids.get('%s %d' % (series_sanitized, year))

        # attempt clean
        if not show_id:
            logger.debug('Getting show id')
            show_id = show_ids.get(series_sanitized)

        # search as last resort
        if not show_id:
            logger.warning('Series not found in show ids')
            show_id = self._search_show_id(series)

        return show_id
Example #40
0
    def get_matches(self, video):
        matches = set()
        # series
        if isinstance(video, Episode) and self.movie_kind == 'episode':
            if video.series and (sanitize(self.title) in (
                     sanitize(name) for name in [video.series] + video.alternative_series)):
                matches.add('series')
            # season
            if video.season and self.season == video.season:
                matches.add('season')
            # episode
            if video.episode and self.episode == video.episode:
                matches.add('episode')

            # tvdb_id
            if video.tvdb_id and str(self.tvdb_id) == str(video.tvdb_id):
                matches.add('tvdb_id')

        elif isinstance(video, Movie) and self.movie_kind == 'movie':
            # title
            if video.title and (sanitize(self.title) in (
                     sanitize(name) for name in [video.title] + video.alternative_titles)):
                matches.add('title')

            # imdb_id
            if video.imdb_id and self.imdb_id and str(self.imdb_id) == str(video.imdb_id):
                matches.add('imdb_id')

            # year
            if video.year and self.year == video.year:
                matches.add('year')
        else:
            logger.info('%r is not a valid movie_kind', self.movie_kind)
            return matches

        # release_group
        if video.release_group and self.release:
            rg = sanitize_release_group(video.release_group)
            if any(r in sanitize_release_group(self.release) for r in get_equivalent_release_groups(rg)):
                matches.add('release_group')

                # blatantly assume we've got a matching format if the release group matches
                # fixme: smart?
                #matches.add('format')

        # resolution
        if video.resolution and self.version and str(video.resolution) in self.version.lower():
            matches.add('resolution')
        # format
        if video.format and self.format:
            formats = [video.format]
            if video.format == "WEB-DL":
                formats.append("WEB")

            for fmt in formats:
                if fmt.lower() in self.format.lower():
                    matches.add('format')
                    break

        matches |= guess_matches(video, guessit(self.release_info), partial=True)
        self.matches = matches
        return matches
Example #41
0
def test_sanitize():
    assert sanitize('Marvel\'s Agents of S.H.I.E.L.D.') == 'marvels agents of s h i e l d'