def get_matches(self, video): matches = set() # episode if isinstance(video, Episode): # series name if video.series and sanitize(self.series) in ( sanitize(name) for name in [video.series] + video.alternative_series): matches.add('series') # year if video.original_series and self.year is None or video.year and video.year == self.year: matches.add('year') # release_group if (video.release_group and self.version and any(r in sanitize_release_group(self.version) for r in get_equivalent_release_groups( sanitize_release_group(video.release_group)))): matches.add('release_group') # other properties matches |= guess_matches(video, guessit(self.version, {'type': 'episode'}), partial=True) return matches
def get_matches(self, video: Episode): matches = set() # series if video.series and sanitize(self.series) == sanitize(video.series): matches.add('series') # season if video.season and self.season == video.season: matches.add('season') # episode if video.episode and self.episode == video.episode: matches.add('episode') # title if video.title and sanitize(self.title) == sanitize(video.title): matches.add('title') # year if video.original_series and self.year is None or video.year and video.year == self.year: matches.add('year') # release_group if (video.release_group and self.version and any(r in sanitize_release_group(self.version) for r in get_equivalent_release_groups(sanitize_release_group(video.release_group)))): matches.add('release_group') # resolution if video.resolution and self.version and video.resolution in self.version.lower(): matches.add('resolution') # source if video.source and self.version and video.source.lower() in self.version.lower(): matches.add('source') # other properties matches |= guess_matches(video, guessit(self.version), partial=True) return matches
def get_matches(self, video): matches = set() # series if video.series and ( sanitize(self.series) == sanitize(fix_inconsistent_naming(video.series)) or sanitize(self.series) == sanitize(video.series)): matches.add('series') # season if video.season and self.season == video.season: matches.add('season') # episode if video.episode and self.episode == video.episode: matches.add('episode') # year if ('series' in matches and video.original_series and self.year is None or video.year and video.year == self.year): matches.add('year') # release_group if (video.release_group and self.version and any(r in sanitize_release_group(self.version) for r in get_equivalent_release_groups(sanitize_release_group(video.release_group)))): matches.add('release_group') # resolution if video.resolution and self.version and video.resolution in self.version.lower(): matches.add('resolution') # format if video.format and self.version and video.format.lower() in self.version.lower(): matches.add('format') # other properties matches |= guess_matches(video, guessit(self.release_info.encode("utf-8"))) return matches
def get_matches(self, video): type_ = "movie" if isinstance(video, Movie) else "episode" matches = guess_matches(video, guessit(self.release_info, {"type": type_})) # episode if isinstance(video, Episode): # series if video.series and sanitize(self.series) == sanitize( video.series): matches.add('series') # imdb_id if video.series_imdb_id and self.imdb_id and str( self.imdb_id) == str(video.series_imdb_id): matches.add('series_imdb_id') matches.add('series') matches.add('year') # year if 'year' not in matches and 'series' in matches and video.original_series and self.year is None: matches.add('year') # movie elif isinstance(video, Movie): # title if video.title and (sanitize(self.series) in ( sanitize(name) for name in [video.title] + video.alternative_titles)): matches.add('title') # imdb_id if video.imdb_id and self.imdb_id == video.imdb_id: matches.add('imdb_id') matches.add('title') matches.add('year') # year if video.year and self.year == video.year: matches.add('year') # release_group if video.release_group and self.releases: video_release_groups = get_equivalent_release_groups( sanitize_release_group(video.release_group)) for release in self.releases: if any(r in sanitize_release_group(release) for r in video_release_groups): matches.add('release_group') if video.resolution and video.resolution in release.lower( ): matches.add('resolution') if video.source and video.source in release.lower(): matches.add('source') # We don't have to continue in case it is a perfect match if all(m in matches for m in ['release_group', 'resolution', 'source']): break self.matches = matches return matches
def get_matches(self, video): matches = set() # episode if isinstance(video, Episode): # series if video.series and sanitize(self.series) == sanitize( video.series): matches.add("series") # season if video.season and self.season == video.season: matches.add("season") # episode if video.episode and self.episode == video.episode: matches.add("episode") # guess for release in self.releases: matches |= guess_matches(video, guessit(release, {"type": "episode"})) # movie elif isinstance(video, Movie): # guess for release in self.releases: matches |= guess_matches(video, guessit(release, {"type": "movie"})) # title if video.title and sanitize(self.title) == sanitize(video.title): matches.add("title") return matches
def get_matches(self, video): matches = set() # movie if isinstance(video, Movie): # title if video.title and (sanitize(self.title) in ( sanitize(name) for name in [video.title] + video.alternative_titles)): matches.add('title') # year if video.year and self.year == video.year: matches.add('year') # release_group if (video.release_group and self.version and any(r in sanitize_release_group(self.version) for r in get_equivalent_release_groups( sanitize_release_group(video.release_group)))): matches.add('release_group') # other properties matches |= guess_matches(video, guessit(self.version, {'type': 'movie'}), partial=True) return matches
def get_matches(self, video): matches = set() # series if video.series and (sanitize(self.series) == sanitize( fix_inconsistent_naming(video.series)) or sanitize(self.series) == sanitize(video.series)): matches.add('series') # season if video.season and self.season == video.season: matches.add('season') # episode if video.episode and self.episode == video.episode: matches.add('episode') # year if ('series' in matches and video.original_series and self.year is None or video.year and video.year == self.year): matches.add('year') logger.debug("Matches: %s", matches) # release_group if (video.release_group and self.version and any(r in sanitize_release_group(self.version) for r in get_equivalent_release_groups( sanitize_release_group(video.release_group)))): matches.add('release_group') matches |= guess_matches(video, guessit(self.release_info), {"type": "episode"}) return matches
def get_show_links(self, title, year=None): """Get the matching show links for `title` and `year`. First search in the result of :meth:`_get_show_suggestions`. :param title: show title. :param year: year of the show, if any. :type year: int :return: the show links, if found. :rtype: list of str """ title = sanitize(title) suggestions = self._get_suggestions(title) show_links = [] for suggestion in suggestions: show_title = sanitize(suggestion['title']) if show_title == title or (year and show_title == '{title} {year:d}'.format( title=title, year=year)): logger.debug('Getting show id') show_links.append(suggestion['link'].split('?p=')[-1]) return show_links
def get_matches(self, video): matches = set() # series name if video.series and sanitize(self.series) in ( sanitize(name) for name in [video.series] + video.alternative_series): matches.add('series') # season if video.season and self.season == video.season: matches.add('season') # episode if video.episode and self.episode == video.episode: matches.add('episode') # title of the episode if video.title and sanitize(self.title) == sanitize(video.title): matches.add('title') # year if video.original_series and self.year is None or video.year and video.year == self.year: matches.add('year') # release_group if (video.release_group and self.version and any(r in sanitize_release_group(self.version) for r in get_equivalent_release_groups(sanitize_release_group(video.release_group)))): matches.add('release_group') # resolution if video.resolution and self.version and video.resolution in self.version.lower(): matches.add('resolution') # source if video.source and self.version and video.source.lower() in self.version.lower(): matches.add('source') # other properties matches |= guess_matches(video, guessit(self.version), partial=True) return matches
def _search_show_id(self, series, year=None): """Search the show id from the `series` and `year`. :param str series: series of the episode. :param year: year of the series, if any. :type year: int :return: the show id, if found. :rtype: int """ # addic7ed doesn't support search with quotes series = series.replace('\'', ' ') # build the params series_year = '%s %d' % (series, year) if year is not None else series params = {'search': series_year, 'Submit': 'Search'} # make the search logger.info('Searching show ids with %r', params) r = self.session.get(self.server_url + 'srch.php', params=params, timeout=10) r.raise_for_status() soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser']) # get the suggestion suggestion = soup.select('span.titulo > a[href^="/show/"]') if not suggestion: logger.warning('Show id not found: no suggestion') return None if not sanitize(suggestion[0].i.text.replace('\'', ' ')) == sanitize(series_year): logger.warning('Show id not found: suggestion does not match') return None show_id = int(suggestion[0]['href'][6:]) logger.debug('Found show id %d', show_id) return show_id
def get_matches(self, video): matches = guess_matches(video, guessit(self.release_info.encode("utf-8"))) # episode if isinstance(video, Episode): # series if video.series and sanitize(self.series) == sanitize( video.series): matches.add('series') # season if video.season and self.season == video.season: matches.add('season') # episode if video.episode and self.episode == video.episode: matches.add('episode') # imdb_id if video.series_imdb_id and self.imdb_id and str( self.imdb_id) == str(video.series_imdb_id): matches.add('series_imdb_id') matches.add('series') matches.add('year') # year if ('series' in matches and video.original_series and self.year is None or video.year and video.year == self.year): matches.add('year') # movie elif isinstance(video, Movie): # title if video.title and (sanitize(self.series) in ( sanitize(name) for name in [video.title] + video.alternative_titles)): matches.add('title') # imdb_id if video.imdb_id and self.imdb_id == video.imdb_id: matches.add('imdb_id') matches.add('title') matches.add('year') # year if video.year and self.year == video.year: matches.add('year') # release_group if (video.release_group and self.version and any(r in sanitize_release_group(self.version) for r in get_equivalent_release_groups( sanitize_release_group(video.release_group)))): matches.add('release_group') # resolution if video.resolution and self.version and video.resolution in self.version.lower( ): matches.add('resolution') # format if video.format and self.version and video.format.lower( ) in self.version.lower(): matches.add('format') self.matches = matches return matches
def get_matches(self, video): matches = set() # episode if isinstance(video, Episode): # series if video.series and sanitize(self.series) == sanitize(video.series): matches.add('series') # season if video.season and self.season == video.season: matches.add('season') # episode if video.episode and self.episode == video.episode: matches.add('episode') # imdb_id if video.series_imdb_id and self.imdb_id == video.series_imdb_id: matches.add('series_imdb_id') # guess for release in self.releases: matches |= guess_matches(video, guessit(release, {'type': 'episode'})) # movie elif isinstance(video, Movie): # guess for release in self.releases: matches |= guess_matches(video, guessit(release, {'type': 'movie'})) # title if video.title and sanitize(self.title) == sanitize(video.title): matches.add('title') return matches
def get_matches(self, video): matches = set() # series if video.series and ( sanitize(self.series) == sanitize(fix_inconsistent_naming(video.series)) or sanitize(self.series) == sanitize(video.series)): matches.add('series') # season if video.season and self.season == video.season: matches.add('season') # episode if video.episode and self.episode == video.episode: matches.add('episode') # year if ('series' in matches and video.original_series and self.year is None or video.year and video.year == self.year): matches.add('year') # release_group if (video.release_group and self.version and any(r in sanitize_release_group(self.version) for r in get_equivalent_release_groups(sanitize_release_group(video.release_group)))): matches.add('release_group') # resolution if video.resolution and self.version and video.resolution in self.version.lower(): matches.add('resolution') # format if video.format and self.version and video.format.lower() in self.version.lower(): matches.add('format') # other properties matches |= guess_matches(video, guessit(self.release_info)) return matches
def get_matches(self, video): matches = set() # episode if isinstance(video, Episode): # series if video.series and sanitize(self.series) == sanitize( video.series): matches.add('series') # season if video.season and self.season == video.season: matches.add('season') # episode if video.episode and self.episode == video.episode: matches.add('episode') # guess for release in self.releases: matches |= guess_matches(video, guessit(release, {'type': 'episode'})) # movie elif isinstance(video, Movie): # guess for release in self.releases: matches |= guess_matches(video, guessit(release, {'type': 'movie'})) # title if video.title and sanitize(self.title) == sanitize(video.title): matches.add('title') return matches
def find_id(self, series, year, original_title): """ We need to find the id of the series at the following url: https://www.feliratok.info/index.php?term=SERIESNAME&nyelv=0&action=autoname Where SERIESNAME is a searchable string. The result will be something like this: [{"name":"DC\u2019s Legends of Tomorrow (2016)","ID":"3725"},{"name":"Miles from Tomorrowland (2015)", "ID":"3789"},{"name":"No Tomorrow (2016)","ID":"4179"}] """ # Search for exact name url = self.server_url + "index.php?term=" + series + "&nyelv=0&action=autoname" # url = self.server_url + "index.php?term=" + "fla"+ "&nyelv=0&action=autoname" logger.info('Get series id from URL %s', url) r = self.session.get(url, timeout=10) # r is something like this: # [{"name":"DC\u2019s Legends of Tomorrow (2016)","ID":"3725"},{"name":"Miles from Tomorrowland (2015)", # "ID":"3789"},{"name":"No Tomorrow (2016)","ID":"4179"}] results = r.json() # check all of the results: for result in results: try: # "name":"Miles from Tomorrowland (2015)","ID":"3789" result_year = re.search(r"(?<=\()\d\d\d\d(?=\))", result['name']) result_year = result_year.group() if result_year else '' except IndexError: result_year = "" try: # "name":"Miles from Tomorrowland (2015)","ID":"3789" result_title = re.search(r".*(?=\(\d\d\d\d\))", result['name']) result_title = result_title.group() if result_title else '' result_id = result['ID'] except IndexError: continue result_title = result_title.strip().replace("�", "").replace(" ", ".") if not result_title: continue guessable = result_title.strip() + ".s01e01." + result_year guess = guessit(guessable, {'type': "episode"}) if sanitize(original_title) == sanitize(guess['title']) and year and guess['year'] and \ year == guess['year']: # Return the founded id return result_id elif sanitize(original_title) == sanitize( guess['title']) and not year: # Return the founded id return result_id return None
def _search_show_id(self, series): """Search the show id from the `series` :param str series: series of the episode. :return: the show id, if found. :rtype: int or None """ # build the param params = {'apikey': self.apikey, 'q': series} # make the search logger.info('Searching show ids with %r', params) r = self.session.get(self.server_url + 'shows/search', params=params, timeout=10) r.raise_for_status() root = etree.fromstring(r.content) if int(root.find('data/count').text) == 0: logger.warning('Show id not found: no suggestion') return None # Looking for show in first page for show in root.findall('data/shows/show'): if sanitize(show.find('name').text).lower() == sanitize( series.lower()): series_id = int(show.find('id').text) logger.debug('Found show id %d', series_id) return series_id # Not in the first page of result try next (if any) next_page = root.find('data/next') while next_page.text is not None: # pragma: no cover r = self.session.get(next_page.text, timeout=10) r.raise_for_status() root = etree.fromstring(r.content) logger.info('Loading suggestion page %r', root.find('data/page').text) # Looking for show in following pages for show in root.findall('data/shows/show'): if sanitize(show.find('name').text).lower() == sanitize( series.lower()): series_id = int(show.find('id').text) logger.debug('Found show id %d', series_id) return series_id next_page = root.find('data/next') # No matches found logger.warning('Show id not found: suggestions does not match') return None
def _search_url_titles(self, series, season, episode, year=None): """Search the URL titles by kind for the given `title`, `season` and `episode`. :param str series: series to search for. :param int season: season to search for. :param int episode: episode to search for. :param int year: year to search for. :return: the episode URL. :rtype: str """ # make the search logger.info('Searching episode url for %s, season %d, episode %d', series, season, episode) episode_url = None search = '{} {}x{}'.format(series, season, episode) r = self.session.get(self.search_url, headers={'Referer': self.server_url}, params={'q': search}, timeout=10) r.raise_for_status() if r.status_code != 200: logger.warning('Error getting episode url') raise ProviderError('%s: Error getting episode url', self.__class__.__name__.upper()) results = json.loads(r.text) for result in results: title = sanitize(result['name']) # attempt series with year if sanitize('{} ({})'.format(series, year)) in title: for episode_data in result['episodes']: if season == episode_data[ 'season'] and episode == episode_data['number']: episode_url = self.server_url + 'episodes/{}'.format( episode_data['id']) logger.info('Episode url found with year %s', episode_url) return episode_url # attempt series without year elif sanitize(series) in title: for episode_data in result['episodes']: if season == episode_data[ 'season'] and episode == episode_data['number']: episode_url = self.server_url + 'episodes/{}'.format( episode_data['id']) logger.info('Episode url found without year %s', episode_url) return episode_url return episode_url
def get_matches(self, video): """ patch: set guessit to single_value :param video: :return: """ matches = set() # episode if isinstance(video, Episode): # series if video.series and (sanitize(self.title) in ( sanitize(name) for name in [video.series] + video.alternative_series)): matches.add('series') # year if video.original_series and self.year is None or video.year and video.year == self.year: matches.add('year') # season if video.season and self.season == video.season: matches.add('season') # episode if video.episode and self.episode == video.episode: matches.add('episode') # guess for release in self.releases: matches |= guess_matches( video, guessit(release, { 'type': 'episode', "single_value": True })) # movie elif isinstance(video, Movie): # title if video.title and (sanitize(self.title) in ( sanitize(name) for name in [video.title] + video.alternative_titles)): matches.add('title') # year if video.year and self.year == video.year: matches.add('year') # guess for release in self.releases: matches |= guess_matches( video, guessit(release, { 'type': 'movie', "single_value": True })) self.matches = matches return matches
def get_show_ids(self, title, year=None, is_episode=True, country_code=None): """Get the best matching show id for `series`, `year` and `country_code`. First search in the result of :meth:`_get_show_suggestions`. :param title: show title. :param year: year of the show, if any. :type year: int :param is_episode: if the search is for episode. :type is_episode: bool :param country_code: country code of the show, if any. :type country_code: str :return: the show id, if found. :rtype: str """ title_sanitized = sanitize(title).lower() show_ids = self._get_suggestions(title, is_episode) matched_show_ids = [] for show in show_ids: show_id = None # attempt with country if not show_id and country_code: logger.debug('Getting show id with country') if sanitize(show['title']) == text_type( '{title} {country}').format( title=title_sanitized, country=country_code.lower()): show_id = show['link'].split('/')[-1] # attempt with year if not show_id and year: logger.debug('Getting show id with year') if sanitize( show['title']) == text_type('{title} {year}').format( title=title_sanitized, year=year): show_id = show['link'].split('/')[-1] # attempt clean if not show_id: logger.debug('Getting show id') show_id = show['link'].split('/')[-1] if sanitize( show['title']) == title_sanitized else None if show_id: matched_show_ids.append(show_id) return matched_show_ids
def search_ids(self, title, **kwargs): query = title titles = kwargs.get("titles") or [] is_episode = False if kwargs.get("season") and kwargs.get("episode"): is_episode = True query = f"{title} S{kwargs['season']:02}E{kwargs['episode']:02}" logger.info(f"Searching ID (episode: {is_episode}) for {query}") r = self.session.get(API_URL + "search", params={"q": query}, timeout=10) r.raise_for_status() results = r.json() match_ids = [] if results["total"] >= 1: for result in results["results"]: if (result["type"] == "episode" and not is_episode) or ( result["type"] == "movie" and is_episode ): continue # shortcut in case of matching imdb id (don't match NoneType) if not is_episode and f"tt{result.get('imdb', 'n/a')}" == kwargs.get( "imdb_id" ): logger.debug(f"Movie matched by IMDB ID, taking shortcut") match_ids = [result["id"]] break # advanced title check in case of multiple movie results if results["total"] > 1: if not is_episode and kwargs.get("year"): if result["title"] and not ( sanitize(result["title"]) in ( "%s %s" % (sanitize(name), kwargs.get("year")) for name in titles ) ): continue match_ids.append(result["id"]) else: logger.error(f"No episode ID found for {query}") if match_ids: logger.debug( f"Found matching IDs: {', '.join(str(id) for id in match_ids)}" ) return match_ids
def _search_url_titles(self, series, season, episode, year=None): """Search the URL titles by kind for the given `title`, `season` and `episode`. :param str series: series to search for. :param int season: season to search for. :param int episode: episode to search for. :param int year: year to search for. :return: the episode URL. :rtype: str """ # make the search logger.info("Searching episode url for %s, season %d, episode %d", series, season, episode) episode_url = None search = "{} {}x{}".format(series, season, episode) r = self.session.get(self.search_url, headers={"Referer": self.server_url}, params={"q": search}, timeout=10) r.raise_for_status() if r.status_code != 200: logger.error("Error getting episode url") raise ProviderError("Error getting episode url") results = json.loads(r.text) for result in results: title = sanitize(result["name"]) # attempt series with year if sanitize("{} ({})".format(series, year)) in title: for episode_data in result["episodes"]: if season == episode_data[ "season"] and episode == episode_data["number"]: episode_url = self.server_url + "episodes/{}".format( episode_data["id"]) return episode_url # attempt series without year elif sanitize(series) in title: for episode_data in result["episodes"]: if season == episode_data[ "season"] and episode == episode_data["number"]: episode_url = self.server_url + "episodes/{}".format( episode_data["id"]) return episode_url return episode_url
def get_episode_url(self, show_id, series, season, episode, year=None): """Get the url best matching show id for `series`, `season`, `episode` and `year`. :param int show_id: show id of the series :param str series: serie of the episode. :param int season: season of the episode. :param int episode: number of the episode. :param int year: year of the series. :return: the episode url, if found. :rtype: str """ # get the page of the season of the show logger.info('Getting the page of show id %d, season %d', show_id, season) series_sanitized = sanitize(series) episode_url = None r = self.session.get(self.subtitles_url, params={ 'show': show_id, 'season': season }, timeout=10) r.raise_for_status() soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser']) # loop over episodes rows for html_episode in soup.select('td > a[href*="/episodes/"]'): title = sanitize(html_episode.get_text()) # attempt series with year if sanitize('{} {} {}x{:02d}'.format(series_sanitized, year, season, episode)) in title: episode_url = 'https://' + html_episode['href'][2:] logger.debug( 'Subtitle found for %s, season: %d, episode: %d. URL: %s', series, season, episode, episode_url) break elif sanitize('{} {}x{:02d}'.format(series_sanitized, season, episode)) in title: episode_url = 'https://' + html_episode['href'][2:] logger.debug( 'Subtitle found for %s, season: %d, episode: %d. URL: %s', series, season, episode, episode_url) break return episode_url
def list_subtitles(self, video, languages): # lookup show_id titles = [video.title] + video.alternative_titles if isinstance( video, Movie) else [] show_links = None for title in titles: show_links = self.get_show_links(title, video.year) if show_links: break subtitles = [] # query for subtitles with the show_id if show_links: for show_link in show_links: subtitles += [ s for s in self.query(show_link, video.title, video.year) if s.language in languages ] else: subtitles += [ s for s in self.query(None, sanitize(video.title), video.year) if s.language in languages ] return subtitles
def _get_show_ids(self): """Get the ``dict`` of show ids per series by querying the `shows` page. :return: show id per series, lower case and without quotes. :rtype: dict """ # get the show page logger.info('Getting show ids') params = {'apikey': self.apikey} r = self.session.get(self.server_url + 'shows', timeout=10, params=params) r.raise_for_status() root = etree.fromstring(r.content) # populate the show ids show_ids = {} for show in root.findall('data/shows/show'): if show.find('name').text is None: # pragma: no cover continue show_ids[sanitize(show.find('name').text).lower()] = int( show.find('id').text) logger.debug('Found %d show ids', len(show_ids)) return show_ids
def _get_show_ids(self): """Get the ``dict`` of show ids per series by querying the `shows.php` page. :return: show id per series, lower case and without quotes. :rtype: dict """ # get the show page logger.info('Getting show ids') r = self.session.get(self.server_url + 'shows.php', timeout=20, cookies=self.cookies) r.raise_for_status() # LXML parser seems to fail when parsing Addic7ed.com HTML markup. # Last known version to work properly is 3.6.4 (next version, 3.7.0, fails) # Assuming the site's markup is bad, and stripping it down to only contain what's needed. show_cells = re.findall(show_cells_re, r.content) if show_cells: soup = ParserBeautifulSoup(b''.join(show_cells), ['lxml', 'html.parser']) else: # If RegEx fails, fall back to original r.content and use 'html.parser' soup = ParserBeautifulSoup(r.content, ['html.parser']) # populate the show ids show_ids = {} for show in soup.select('td.vr > h3 > a[href^="/show/"]'): show_ids[sanitize(show.text)] = int(show['href'][6:]) logger.debug('Found %d show ids', len(show_ids)) return show_ids
def get_show_id(self, series_names, year=None): series_sanitized_names = [] for name in series_names: sanitized_name = sanitize(name) series_sanitized_names.append(sanitized_name) alternative_name = _get_alternative_name(sanitized_name) if alternative_name: series_sanitized_names.append(alternative_name) show_ids = self._get_show_ids() show_id = None for series_sanitized in series_sanitized_names: # attempt with year if year: logger.debug('Getting show id with year') show_id = show_ids.get('{series} {year:d}'.format( series=series_sanitized, year=year)) # attempt with article at the end if not show_id and year: logger.debug('Getting show id with year in brackets') show_id = show_ids.get('{series} [{year:d}]'.format( series=series_sanitized, year=year)) # attempt clean if not show_id: logger.debug('Getting show id') show_id = show_ids.get(series_sanitized) if show_id: break return int(show_id) if show_id else None
def _get_show_ids(self): """Get the ``dict`` of show ids per series by querying the `shows.php` page. :return: show id per series, lower case and without quotes. :rtype: dict """ # get the show page logger.info('Getting show ids') r = self.session.get(self.server_url + 'shows.php', timeout=10) r.raise_for_status() # LXML parser seems to fail when parsing Addic7ed.com HTML markup. # Last known version to work properly is 3.6.4 (next version, 3.7.0, fails) # Assuming the site's markup is bad, and stripping it down to only contain what's needed. show_cells = re.findall(show_cells_re, r.content) if show_cells: soup = ParserBeautifulSoup(b''.join(show_cells), ['lxml', 'html.parser']) else: # If RegEx fails, fall back to original r.content and use 'html.parser' soup = ParserBeautifulSoup(r.content, ['html.parser']) # populate the show ids show_ids = {} for show in soup.select('td.version > h3 > a[href^="/show/"]'): show_ids[sanitize(show.text)] = int(show['href'][6:]) logger.debug('Found %d show ids', len(show_ids)) return show_ids
def _get_show_ids(self): """Get the ``dict`` of show ids per series by querying the `series.php` page. :return: show id per series, lower case and without quotes. :rtype: dict """ # get the show page logger.info('Getting show ids') r = self.session.get(self.series_url, timeout=10) r.raise_for_status() if r.status_code != 200: logger.error('Error getting show ids') raise ProviderError('Error getting show ids') soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser']) # populate the show ids show_ids = {} for show in soup.select('td > a[href^="/show/"]'): show_ids[sanitize(show.get_text())] = int(show['href'][6:]) logger.debug('Found %d show ids', len(show_ids)) return show_ids
def _get_show_ids(self): # get the shows page logger.info('Getting show ids') r = self.session.get(self.server_url + self.all_series_url, timeout=10) r.raise_for_status() if not r.content: logger.debug('No data returned from provider') return [] soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser']) # populate the show ids show_ids = {} for show_category in soup.findAll('seriesl'): if show_category.attrs['category'] == u'Σειρές': for show in show_category.findAll('series'): series = show.text series_match = series_sanitize_re.match(series) if series_match: series = series_match.group(1) show_ids[sanitize(series)] = int(show['srsid']) break logger.debug('Found %d show ids', len(show_ids)) return show_ids
def search_show_id(self, series, year=None): """Search the show id from the `series` and `year`. :param string series: series of the episode. :param year: year of the series, if any. :type year: int or None :return: the show id, if any. :rtype: int or None """ # make the search logger.info('Searching show id for %r', series) r = self.session.post(self.server_url + 'search.php', data={'q': series}, timeout=10) r.raise_for_status() # get the series out of the suggestions soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser']) show_id = None for suggestion in soup.select('div.left li div a[href^="/tvshow-"]'): match = link_re.match(suggestion.text) if not match: logger.error('Failed to match %s', suggestion.text) continue if sanitize(match.group('series')).lower() == series.lower(): if year is not None and int(match.group('first_year')) != year: logger.debug('Year does not match') continue show_id = int(suggestion['href'][8:-5]) logger.debug('Found show id %d', show_id) break soup.decompose() soup = None return show_id
def get_show_id(self, series, year=None): """Get the best matching show id for `series` and `year`. :param str series: series of the episode. :param year: year of the series, if any. :type year: int :return: the show id, if found. :rtype: int """ series_sanitized = sanitize(series) show_ids = self._get_show_ids() show_id = None # attempt with year if not show_id and year: logger.debug('Getting show id with year') show_id = show_ids.get('%s %d' % (series_sanitized, year)) # attempt clean if not show_id: logger.debug('Getting show id') show_id = show_ids.get(series_sanitized) return show_id
def get_show_id(self, series, country_code=None): """Get the best matching show id for `series`. First search in the result of :meth:`_get_show_ids` and fallback on a search with :meth:`_search_show_id` :param str series: series of the episode. :param str country_code: the country in which teh show is aired. :return: the show id, if found. :rtype: int or None """ series_sanitized = sanitize(series).lower() show_ids = self._get_show_ids() series_id = None # attempt with country if not series_id and country_code: logger.debug('Getting show id with country') series_id = show_ids.get('{0} {1}'.format(series_sanitized, country_code.lower())) # attempt clean if not series_id: logger.debug('Getting show id') series_id = show_ids.get(series_sanitized) # search as last resort if not series_id: logger.warning('Series not found in show ids') series_id = self._search_show_id(series) return series_id
def query(self, series, season, episode, year=None): # get the show id show_id = self.get_show_id(series, year) if show_id is None: logger.error('No show id found for %s (%r)', series, year) return [] # get the episode url episode_url = self.get_episode_url(show_id, series, season, episode, year) if episode_url is None: logger.error('No episode url found for %s, season %d, episode %d', series, season, episode) return [] # get the page of the episode of the show r = self.session.get(episode_url, timeout=10) r.raise_for_status() soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser']) # get episode title title_pattern = re.compile('Subt.+tulos de {}(.+){}x{:02d} - (.+)'.format(series, season, episode).lower()) title = title_pattern.search(soup.select('#cabecera-subtitulo')[0].get_text().strip().lower()).group(2) # loop over subtitle rows subtitles = [] for sub in soup.find_all('div', attrs={'id': re.compile('version([0-9]+)')}): # read the release subtitle release = sanitize_release_group(release_pattern.search(sub.find('p', class_='title-sub') .contents[2]).group(1)) for html_language in sub.select('ul.sslist'): language = Language.fromtusubtitulo(html_language.find_next('b').get_text().strip()) hearing_impaired = False # modify spanish latino subtitle language to only spanish and set hearing_impaired = True # because if exists spanish and spanish latino subtitle for the same episode, the score will be # higher with spanish subtitle. Spanish subtitle takes priority. if language == Language('spa', 'MX'): language = Language('spa') hearing_impaired = True # ignore incomplete subtitles status = sanitize(html_language.find_next('li', class_=re.compile('li-estado')).get_text()) if status != 'completado': logger.debug('Ignoring subtitle with status %s', status) continue # get the most updated version of the subtitle and if it doesn't exist get the original version html_status = html_language.select('a[href^="updated/"]') if len(html_status) == 0: html_status = html_language.select('a[href^="original/"]') subtitle_url = self.server_url + html_status[0]['href'] subtitle = TuSubtituloSubtitle(language, hearing_impaired, episode_url, series, season, episode, title, year, release, subtitle_url) logger.debug('Found subtitle %r', subtitle) subtitles.append(subtitle) return subtitles
def query(self, series, season, episode, year=None): # get the show id show_id = self.get_show_id(series, year) if show_id is None: logger.error("No show id found for %s (%r)", series, year) return [] # get the episode url episode_url = self.get_episode_url(show_id, series, season, episode, year) if episode_url is None: logger.info(f"[{self.provider_name}]: No episode url found for {series}, season {season}, episode {episode}") return [] # get the page of the episode of the show r = self.session.get(episode_url, timeout=10) r.raise_for_status() soup = ParserBeautifulSoup(r.content, ["lxml", "html.parser"]) # get episode title title_pattern = re.compile("Subt.+tulos de {}(.+){}x{:02d} - (.+)".format(series, season, episode).lower()) title = title_pattern.search(soup.select("#cabecera-subtitulo")[0].get_text().strip().lower()).group(2) # loop over subtitle rows subtitles = [] for sub in soup.find_all("div", attrs={"id": re.compile("version([0-9]+)")}): # read the release subtitle release = sanitize_release_group(release_pattern.search(sub.find("p", class_="title-sub").contents[2]).group(1)) for html_language in sub.select("ul.sslist"): language = Language.fromtusubtitulo(html_language.find_next("b").get_text().strip()) hearing_impaired = False # modify spanish latino subtitle language to only spanish and set hearing_impaired = True # because if exists spanish and spanish latino subtitle for the same episode, the score will be # higher with spanish subtitle. Spanish subtitle takes priority. if language == Language("spa", "MX"): language = Language("spa") hearing_impaired = True # ignore incomplete subtitles status = sanitize(html_language.find_next("li", class_=re.compile("li-estado")).get_text()) if status != "completado": logger.debug("Ignoring subtitle with status %s", status) continue # get the most updated version of the subtitle and if it doesn't exist get the original version html_status = html_language.select('a[href^="updated/"]') if len(html_status) == 0: html_status = html_language.select('a[href^="original/"]') subtitle_url = self.server_url + html_status[0]["href"] subtitle = TuSubtituloSubtitle(language, hearing_impaired, episode_url, series, season, episode, title, year, release, subtitle_url) logger.debug("Found subtitle %r", subtitle) subtitles.append(subtitle) return subtitles
def test_video_fromname_episode_no_season(episodes): video = Video.fromname(episodes["the_jinx_e05"].name) assert type(video) is Episode assert video.name == episodes["the_jinx_e05"].name assert video.format == episodes["the_jinx_e05"].format assert video.release_group == episodes["the_jinx_e05"].release_group assert video.resolution == episodes["the_jinx_e05"].resolution assert video.video_codec == episodes["the_jinx_e05"].video_codec assert video.audio_codec is None assert video.imdb_id is None assert video.hashes == {} assert video.size is None assert video.subtitle_languages == set() assert sanitize(video.series) == sanitize(episodes["the_jinx_e05"].series) assert video.season == episodes["the_jinx_e05"].season assert video.episode == episodes["the_jinx_e05"].episode assert video.title is None assert video.year is None assert video.tvdb_id is None
def get_matches(self, video): """ patch: set guessit to single_value :param video: :return: """ matches = set() # episode if isinstance(video, Episode): # series if video.series and (fix_inconsistent_naming(self.title) in ( fix_inconsistent_naming(name) for name in [video.series] + video.alternative_series)): matches.add('series') # year if video.original_series and self.year is None or video.year and video.year == self.year: matches.add('year') # season if video.season and self.season == video.season: matches.add('season') # episode if video.episode and self.episode == video.episode: matches.add('episode') # guess for release in self.releases: matches |= guess_matches(video, guessit(release, {'type': 'episode', "single_value": True})) # movie elif isinstance(video, Movie): # title if video.title and (sanitize(self.title) in ( sanitize(name) for name in [video.title] + video.alternative_titles)): matches.add('title') # year if video.year and self.year == video.year: matches.add('year') # guess for release in self.releases: matches |= guess_matches(video, guessit(release, {'type': 'movie', "single_value": True})) self.matches = matches return matches
def search_ids(self, title, year=None, imdb_id=None, season=None, episode=None, titles=None): """Search movie or episode id from the `title`, `season` and `episode`. :param imdb_id: imdb id of the given movie :param titles: all titles of the given series or movie :param year: release year of the given movie :param str title: series of the episode or movie name :param int season: season of the episode. :param int episode: episode number. :return: list of ids :rtype: list """ # make the search query = title titles = titles or [] is_episode = False if season and episode: is_episode = True query = '%s S%#02dE%#02d' % (title, season, episode) logger.info(u'Searching %s ID for %r', "episode" if is_episode else "movie", query) r = self.session.get(self.API_URL + 'search', params={'q': query}, timeout=10) r.raise_for_status() results = r.json() match_ids = [] if results['total'] >= 1: for result in results["results"]: if (result['type'] == "episode" and not is_episode) or (result['type'] == "movie" and is_episode): continue # shortcut in case of matching imdb id if not is_episode and imdb_id and "imdb" in result and "tt%s" % result["imdb"] == str(imdb_id): logger.debug("Movie matched by IMDB ID %s, taking shortcut", imdb_id) match_ids = [result['id']] break # advanced title check in case of multiple movie results if results['total'] > 1: if not is_episode and year: if result["title"] and not (sanitize(result["title"]) in (u"%s %s" % (sanitize(name), year) for name in titles)): continue match_ids.append(result['id']) else: logger.error(u'No episode ID found for %r', query) if match_ids: logger.debug(u"Found matching IDs: %s", ", ".join(str(id) for id in match_ids)) return match_ids
def get_show_id(self, series, year=None, country_code=None): """Get the best matching show id for `series`, `year` and `country_code`. First search in the result of :meth:`_get_show_ids` and fallback on a search with :meth:`_search_show_id`. :param str series: series of the episode. :param year: year of the series, if any. :type year: int :param country_code: country code of the series, if any. :type country_code: str :return: the show id, if found. :rtype: int """ series_sanitized = sanitize(series).lower() show_ids = self._get_show_ids() show_id = None # attempt with country if not show_id and country_code: logger.debug('Getting show id with country') show_id = show_ids.get('%s %s' % (series_sanitized, country_code.lower())) # attempt with year if not show_id and year: logger.debug('Getting show id with year') show_id = show_ids.get('%s %d' % (series_sanitized, year)) # attempt clean if not show_id: logger.debug('Getting show id') show_id = show_ids.get(series_sanitized) # search as last resort if not show_id: logger.warning('Series not found in show ids') show_id = self._search_show_id(series) return show_id
def get_matches(self, video): matches = set() # series if isinstance(video, Episode) and self.movie_kind == 'episode': if video.series and (sanitize(self.title) in ( sanitize(name) for name in [video.series] + video.alternative_series)): matches.add('series') # season if video.season and self.season == video.season: matches.add('season') # episode if video.episode and self.episode == video.episode: matches.add('episode') # tvdb_id if video.tvdb_id and str(self.tvdb_id) == str(video.tvdb_id): matches.add('tvdb_id') elif isinstance(video, Movie) and self.movie_kind == 'movie': # title if video.title and (sanitize(self.title) in ( sanitize(name) for name in [video.title] + video.alternative_titles)): matches.add('title') # imdb_id if video.imdb_id and self.imdb_id and str(self.imdb_id) == str(video.imdb_id): matches.add('imdb_id') # year if video.year and self.year == video.year: matches.add('year') else: logger.info('%r is not a valid movie_kind', self.movie_kind) return matches # release_group if video.release_group and self.release: rg = sanitize_release_group(video.release_group) if any(r in sanitize_release_group(self.release) for r in get_equivalent_release_groups(rg)): matches.add('release_group') # blatantly assume we've got a matching format if the release group matches # fixme: smart? #matches.add('format') # resolution if video.resolution and self.version and str(video.resolution) in self.version.lower(): matches.add('resolution') # format if video.format and self.format: formats = [video.format] if video.format == "WEB-DL": formats.append("WEB") for fmt in formats: if fmt.lower() in self.format.lower(): matches.add('format') break matches |= guess_matches(video, guessit(self.release_info), partial=True) self.matches = matches return matches
def test_sanitize(): assert sanitize('Marvel\'s Agents of S.H.I.E.L.D.') == 'marvels agents of s h i e l d'