def test_list_subtitles_single_language(video_single_language): with EmbeddedSubtitlesProvider() as provider: subs = provider.list_subtitles(video_single_language, {Language.fromalpha2("en")}) for sub in subs: assert sub.language == Language.fromalpha2("en")
class NapiProjektProvider(_NapiProjektProvider): languages = {Language.fromalpha2(l) for l in ['pl']} subtitle_class = NapiProjektSubtitle def query(self, language, hash): params = { 'v': 'dreambox', 'kolejka': 'false', 'nick': '', 'pass': '', 'napios': 'Linux', 'l': language.alpha2.upper(), 'f': hash, 't': get_subhash(hash)} logger.info('Searching subtitle %r', params) r = self.session.get(self.server_url, params=params, timeout=10) r.raise_for_status() # handle subtitles not found and errors if r.content[:4] == b'NPc0': logger.debug('No subtitles found') return None subtitle = self.subtitle_class(language, hash) subtitle.content = r.content logger.debug('Found subtitle %r', subtitle) return subtitle def list_subtitles(self, video, languages): return [s for s in [self.query(l, video.hashes['napiprojekt']) for l in languages] if s is not None]
def test_list_subtitles_also_forced(video_single_language): with EmbeddedSubtitlesProvider() as provider: language_1 = Language.fromalpha2("en") language_2 = Language.rebuild(language_1, forced=True) subs = provider.list_subtitles(video_single_language, {language_1, language_2}) assert any(language_1 == sub.language for sub in subs) assert any(not sub.language.forced for sub in subs)
def query(self, title): subtitles = [] data = { 'ajax': '1', 'sSearch': title, } r = self.session.post(self.search_url, data=data, timeout=10) r.raise_for_status() if not r.content: logger.debug('No data returned from provider') return [] soup = ParserBeautifulSoup(r.content.decode('utf-8', 'ignore'), ['lxml', 'html.parser']) # loop over subtitle cells rows = soup.select('tbody > tr') for row in rows: # title title_anchor_el = row.select_one('.title > a') title_inner_text = [ element for element in title_anchor_el if isinstance(element, NavigableString) ] title = title_inner_text[0].strip() # year year = row.select_one('.year').text.strip('()') # download link href = title_anchor_el.get('href') download_link = self.server_url + href # imdb id imdb_td = row.select_one('td:nth-of-type(4)') imdb_link = imdb_td.select_one('a').get('href') imdb_id = imdb_link.split('/')[-2] # fps fps = row.select_one('.fps').text.strip() # additional notes notes = row.select_one('.notes').text.strip() # page link = download link (there is no seperate subtitle page link) page_link = download_link # create/add the subitle subtitle = self.subtitle_class(Language.fromalpha2('lv'), page_link, download_link, title, year, imdb_id, fps, notes) logger.debug('nekur: Found subtitle %r', subtitle) subtitles.append(subtitle) return subtitles
def test_download_subtitle_multiple(video_multiple_languages): with EmbeddedSubtitlesProvider() as provider: languages = {Language.fromalpha2(code) for code in ("en", "it", "fr")} | {Language("por", "BR")} subs = provider.list_subtitles(video_multiple_languages, languages) for sub in subs: provider.download_subtitle(sub) assert sub.content is not None
def test_list_subtitles_multiple_languages(video_multiple_languages): with EmbeddedSubtitlesProvider() as provider: languages = { Language.fromalpha2(code) for code in ("en", "it", "fr", "es") } | {Language("por", "BR")} subs = provider.list_subtitles(video_multiple_languages, languages) for expected in languages: assert any(sub.language == expected for sub in subs)
def test_download_invalid_subtitle(video_single_language): with EmbeddedSubtitlesProvider() as provider: subtitle = provider.list_subtitles(video_single_language, {Language.fromalpha2("en")})[0] provider._cached_paths[subtitle.container.path] = { subtitle.stream.index: "dummy.srt" } with pytest.raises(fese.InvalidFile): provider.download_subtitle(subtitle)
def _parse_subtitles_page(self, video, response, language): subtitles = [] page_soup = ParserBeautifulSoup( response.content.decode("utf-8", "ignore"), ["lxml", "html.parser"]) title_soups = page_soup.find_all("div", {"id": "menu_detalle_buscador"}) body_soups = page_soup.find_all("div", {"id": "buscador_detalle"}) episode = isinstance(video, Episode) for subtitle in range(0, len(title_soups)): title_soup, body_soup = title_soups[subtitle], body_soups[subtitle] # title title = _clean_title(title_soup.find("a").text) # Forced subtitles are not supported if title.lower().rstrip().endswith(("forzado", "forzados")): logger.debug("Skipping forced subtitles: %s", title) continue # Check movie title (if the video is a movie) if not episode and not _check_movie(video, title): continue # Data datos = body_soup.find("div", { "id": "buscador_detalle_sub_datos" }).text # Ignore multi-disc and non-srt subtitles if not any(item in datos for item in ("Cds:</b> 1", "SubRip")): continue spain = "/pais/7.gif" in datos language = Language.fromalpha2("es") if spain else Language( "spa", "MX") # description sub_details = body_soup.find("div", { "id": "buscador_detalle_sub" }).text description = sub_details.replace(",", " ") # uploader uploader = body_soup.find("a", {"class": "link1"}).text download_url = _get_download_url(body_soup) page_link = title_soup.find("a")["href"] subtitle = self.subtitle_class(language, video, page_link, title, description, uploader, download_url) logger.debug("Found subtitle %r", subtitle) subtitles.append(subtitle) return subtitles
def query(self, keyword, season=None, episode=None, year=None): params = keyword if season and episode: params += ' S{season:02d}E{episode:02d}'.format(season=season, episode=episode) elif year: params += ' {:4d}'.format(year) logger.debug('Searching subtitles %r', params) subtitles = [] search_link = self.server_url + text_type( self.search_url).format(params) while True: r = self.session.get(search_link, timeout=30) r.raise_for_status() if not r.content: logger.debug('No data returned from provider') return [] soup = ParserBeautifulSoup(r.content.decode('utf-8', 'ignore'), ['lxml', 'html.parser']) # loop over subtitles cells for cell in soup.select('td.latest_name > a:nth-of-type(1)'): # read the item subtitle_id = int(cell['href'].rsplit('/', 2)[1]) page_link = cell['href'] language = Language.fromalpha2( cell.parent.find('img')['src'].split('/')[-1].split('.') [0]) version = cell.text.strip() or None if version is None: version = "" subtitle = self.subtitle_class( language, page_link, version, self.download_url.format(subtitle_id)) logger.debug('Found subtitle %r', subtitle) subtitles.append(subtitle) anchors = soup.select('td a') next_page_available = False for anchor in anchors: if 'Next' in anchor.text and 'search.php' in anchor['href']: search_link = self.server_url + anchor['href'] next_page_available = True break if not next_page_available: break return subtitles
def query(self, title): subtitles = [] r = self.session.get(self.search_url, params={'q': title}, timeout=10) r.raise_for_status() if not r.content: logger.debug('No data returned from provider') return [] soup = ParserBeautifulSoup(r.content.decode('utf-8', 'ignore'), ['lxml', 'html.parser']) # loop over subtitle cells rows = soup.select('.eBlock') for row in rows: result_anchor_el = row.select_one('.eTitle > a') # page link page_link = result_anchor_el.get('href') # fetch/parse additional info r = self.session.get(page_link, timeout=10) soup = ParserBeautifulSoup(r.content.decode('utf-8', 'ignore'), ['lxml', 'html.parser']) # title movie_titles_string = soup.select_one('.main-header').text.strip() movie_titles_list = movie_titles_string.split(' / ') title = movie_titles_list[-1] # year year = soup.select_one('#film-page-year').text.strip() # imdb id imdb_link = soup.select_one('#actors-page > a').get('href') imdb_id = imdb_link.split('/')[-2] # download link href = soup.select_one('.hvr').get('href') download_link = self.server_url + href # create/add the subitle subtitle = self.subtitle_class(Language.fromalpha2('lv'), page_link, download_link, title, year, imdb_id) logger.debug('subtitri.id.lv: Found subtitle %r', subtitle) subtitles.append(subtitle) return subtitles
def _parse_subtitles_page(self, video, response, language): subtitles = [] page_soup = ParserBeautifulSoup( response.content.decode("utf-8", "ignore"), ["lxml", "html.parser"]) title_soups = page_soup.find_all("div", {"id": "menu_detalle_buscador"}) body_soups = page_soup.find_all("div", {"id": "buscador_detalle"}) for subtitle in range(0, len(title_soups)): title_soup, body_soup = title_soups[subtitle], body_soups[subtitle] # title title = self._clean_title(title_soup.find("a").text) # discard subtitles if a year between parenthesis is present in title and doesn't match the one provided # in video object if re.match(r'(\(\d{4}\))', title): if video.year and str(video.year) not in title: continue # Data datos = body_soup.find("div", { "id": "buscador_detalle_sub_datos" }).text # Ignore multi-disc and non-srt subtitles if not any(item in datos for item in ("Cds:</b> 1", "SubRip")): continue spain = "/pais/7.gif" in datos language = Language.fromalpha2("es") if spain else Language( "spa", "MX") # description sub_details = body_soup.find("div", { "id": "buscador_detalle_sub" }).text description = sub_details.replace(",", " ").lower() # uploader uploader = body_soup.find("a", {"class": "link1"}).text page_link = title_soup.find("a")["href"] subtitle = self.subtitle_class(language, video, page_link, title, description, uploader) logger.debug("Found subtitle %r", subtitle) subtitles.append(subtitle) return subtitles
def query(self, movie_id, title, year): # get the season list of the show logger.info('Getting the subtitle list of show id %s', movie_id) if movie_id: page_link = self.server_url + '/' + movie_id else: page_link = self.server_url + text_type(self.search_url).format( ' '.join([title, str(year)])) r = self.session.get(page_link, timeout=10) r.raise_for_status() if not r.content: logger.debug('No data returned from provider') return [] soup = ParserBeautifulSoup(r.content, ['html.parser']) year_num = None year_element = soup.select_one('td#dates_header > table div') matches = False if year_element: matches = year_re.match(str(year_element.contents[2]).strip()) if matches: year_num = int(matches.group(1)) title_element = soup.select_one('td#dates_header > table u') show_title = str( title_element.contents[0]).strip() if title_element else None subtitles = [] # loop over episode rows for subtitle in soup.select( 'table.table_border div[align="center"] > div'): # read common info version = subtitle.find('b').text download_link = self.server_url + subtitle.find('a')['href'] language = Language.fromalpha2( subtitle.find('img')['src'].split('/')[-1].split('.')[0]) subtitle = self.subtitle_class(language, page_link, show_title, year_num, version, download_link) logger.debug('Found subtitle {!r}'.format(subtitle)) subtitles.append(subtitle) return subtitles
def query(self, show_id, series, season, episode, title): # get the season list of the show logger.info('Getting the subtitle list of show id %s', show_id) if all((show_id, season, episode)): page_link = self.server_url + self.episode_link.format( show_id=show_id, season=season, episode=episode) else: return [] r = self.session.get(page_link, timeout=10) r.raise_for_status() if not r.content: logger.debug('No data returned from provider') return [] soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser']) year = None matches = year_re.match( str(soup.select_one( '#dates_header_br > table div').contents[2]).strip()) if matches: year = int(matches.group(1)) show_title = str( soup.select_one('#dates_header_br > table div u').string).strip() subtitles = [] # loop over episode rows for subs_tag in soup.select('table .seeDark,.seeMedium'): # read common info version = subs_tag.find_all('b')[0].text download_link = self.server_url + subs_tag.find('a')['href'] uploader = subs_tag.find_all('b')[1].text language = Language.fromalpha2( subs_tag.find('img')['src'].split('/')[-1].split('.')[0]) subtitle = self.subtitle_class(language, page_link, show_title, year, version, download_link, uploader) logger.debug('Found subtitle %r', subtitle) subtitles.append(subtitle) return subtitles
def test_get_matches_movie(movies): movie = movies["dune"] subtitle = ArgenteamSubtitle( Language.fromalpha2("es"), None, "https://argenteam.net/subtitles/86024/Dune.Part.One.%282021%29.WEB.H264.1080p-NAISU", "WEB H264 1080p", {"title", "year", "imdb_id"}, ) matches = subtitle.get_matches(movie) assert matches == { "title", "year", "imdb_id", "source", "resolution", "edition", "video_codec", }
def test_get_matches_episode(episodes): episode = episodes["breaking_bad_s01e01"] subtitle = ArgenteamSubtitle( Language.fromalpha2("es"), None, "https://argenteam.net/subtitles/24002/Breaking.Bad.%282008%29.S01E01-Pilot.BluRay.x264.720p-REWARD", "BluRay x264 720p", {"title", "season", "episode", "imdb_id"}, ) matches = subtitle.get_matches(episode) assert matches == { "title", "season", "episode", "imdb_id", "source", "video_codec", "resolution", "edition", "streaming_service", "release_group", "series", "year", }
class PodnapisiProvider(_PodnapisiProvider, ProviderSubtitleArchiveMixin): languages = ({Language('por', 'BR'), Language('srp', script='Latn'), Language('srp', script='Cyrl')} | {Language.fromalpha2(l) for l in language_converters['alpha2'].codes}) languages.update(set(Language.rebuild(l, forced=True) for l in languages)) languages.update(set(Language.rebuild(l, hi=True) for l in languages)) video_types = (Episode, Movie) server_url = 'https://podnapisi.net/subtitles/' only_foreign = False also_foreign = False verify_ssl = True subtitle_class = PodnapisiSubtitle hearing_impaired_verifiable = True def __init__(self, only_foreign=False, also_foreign=False, verify_ssl=True): self.only_foreign = only_foreign self.also_foreign = also_foreign self.verify_ssl = verify_ssl if only_foreign: logger.info("Only searching for foreign/forced subtitles") super(PodnapisiProvider, self).__init__() def initialize(self): super().initialize() self.session.mount('https://', PodnapisiAdapter()) self.session.verify = self.verify_ssl def list_subtitles(self, video, languages): if video.is_special: logger.info("%s can't search for specials right now, skipping", self) return [] season = episode = None if isinstance(video, Episode): titles = [fix_inconsistent_naming(title) for title in [video.series] + video.alternative_series] season = video.season episode = video.episode else: titles = [video.title] + video.alternative_titles for title in titles: subtitles = [s for l in languages for s in self.query(l, title, video, season=season, episode=episode, year=video.year, only_foreign=self.only_foreign, also_foreign=self.also_foreign)] if subtitles: return subtitles return [] def query(self, language, keyword, video, season=None, episode=None, year=None, only_foreign=False, also_foreign=False): search_language = str(language).lower() # sr-Cyrl specialcase if search_language == "sr-cyrl": search_language = "sr" # set parameters, see http://www.podnapisi.net/forum/viewtopic.php?f=62&t=26164#p212652 params = {'sXML': 1, 'sL': search_language, 'sK': keyword} is_episode = False if season and episode: is_episode = True params['sTS'] = season params['sTE'] = episode if year: params['sY'] = year # loop over paginated results logger.info('Searching subtitles %r', params) subtitles = [] pids = set() while True: # query the server content = None try: content = self.session.get(self.server_url + 'search/old', params=params, timeout=10).content xml = etree.fromstring(content) except etree.ParseError: logger.error("Wrong data returned: %r", content) break # exit if no results if not int(xml.find('pagination/results').text): logger.debug('No subtitles found') break # loop over subtitles for subtitle_xml in xml.findall('subtitle'): # read xml elements pid = subtitle_xml.find('pid').text # ignore duplicates, see http://www.podnapisi.net/forum/viewtopic.php?f=62&t=26164&start=10#p213321 if pid in pids: continue _language = Language.fromietf(subtitle_xml.find('language').text) hearing_impaired = 'n' in (subtitle_xml.find('flags').text or '') foreign = 'f' in (subtitle_xml.find('flags').text or '') if only_foreign and not foreign: continue elif not only_foreign and not also_foreign and foreign: continue elif also_foreign and foreign: _language = Language.rebuild(_language, forced=True) # set subtitle language to hi if it's hearing_impaired if hearing_impaired: _language = Language.rebuild(_language, hi=True) if language != _language: continue page_link = subtitle_xml.find('url').text releases = [] if subtitle_xml.find('release').text: for release in subtitle_xml.find('release').text.split(): releases.append(re.sub(r'\.+$', '', release)) # remove trailing dots title = subtitle_xml.find('title').text r_season = int(subtitle_xml.find('tvSeason').text) r_episode = int(subtitle_xml.find('tvEpisode').text) r_year = int(subtitle_xml.find('year').text) if is_episode: subtitle = self.subtitle_class(_language, hearing_impaired, page_link, pid, releases, title, season=r_season, episode=r_episode, year=r_year, asked_for_release_group=video.release_group, asked_for_episode=episode) else: subtitle = self.subtitle_class(_language, hearing_impaired, page_link, pid, releases, title, year=r_year, asked_for_release_group=video.release_group) logger.debug('Found subtitle %r', subtitle) subtitles.append(subtitle) pids.add(pid) # stop on last page if int(xml.find('pagination/current').text) >= int(xml.find('pagination/count').text): break # increment current page params['page'] = int(xml.find('pagination/current').text) + 1 logger.debug('Getting page %d', params['page']) xml = None return subtitles def download_subtitle(self, subtitle): # download as a zip logger.info('Downloading subtitle %r', subtitle) r = self.session.get(self.server_url + subtitle.pid + '/download', params={'container': 'zip'}, timeout=10) r.raise_for_status() # open the zip with ZipFile(io.BytesIO(r.content)) as zf: subtitle.content = self.get_subtitle_from_archive(subtitle, zf)
class SubdivxSubtitlesProvider(Provider): provider_name = "subdivx" hash_verifiable = False languages = {Language("spa", "MX")} | {Language.fromalpha2("es")} video_types = (Episode, Movie) subtitle_class = SubdivxSubtitle multi_result_throttle = 2 language_list = list(languages) def __init__(self): self.session = None def initialize(self): self.session = Session() self.session.headers["User-Agent"] = f"Subliminal/{__short_version__}" self.session.cookies.update({"iduser_cookie": _IDUSER_COOKIE}) def terminate(self): self.session.close() def query(self, video, languages): if isinstance(video, Episode): query = f"{video.series} S{video.season:02}E{video.episode:02}" else: # Subdvix has problems searching foreign movies if the year is # appended. A proper solution would be filtering results with the # year in self._parse_subtitles_page. query = video.title params = { "buscar2": query, "accion": "5", "masdesc": "", "subtitulos": "1", "realiza_b": "1", "pg": "1", } logger.debug(f"Searching subtitles: {query}") subtitles = [] language = self.language_list[0] search_link = f"{_SERVER_URL}/index.php" while True: response = self.session.get(search_link, params=params, allow_redirects=True, timeout=20) try: page_subtitles = self._parse_subtitles_page( video, response, language) except Exception as e: logger.error(f"Error parsing subtitles list: {e}") break subtitles += page_subtitles if len(page_subtitles) < 100: break # this is the last page params["pg"] += 1 # search next page time.sleep(self.multi_result_throttle) return subtitles def list_subtitles(self, video, languages): return self.query(video, languages) def download_subtitle(self, subtitle): # download the subtitle logger.info("Downloading subtitle %r", subtitle) # download zip / rar file with the subtitle response = self.session.get( subtitle.download_url, headers={"Referer": subtitle.page_link}, timeout=30, ) response.raise_for_status() # open the compressed archive archive = _get_archive(response.content) # extract the subtitle subtitle_content = _get_subtitle_from_archive(archive, subtitle) subtitle.content = fix_line_ending(subtitle_content) def _parse_subtitles_page(self, video, response, language): subtitles = [] page_soup = ParserBeautifulSoup( response.content.decode("utf-8", "ignore"), ["lxml", "html.parser"]) title_soups = page_soup.find_all("div", {"id": "menu_detalle_buscador"}) body_soups = page_soup.find_all("div", {"id": "buscador_detalle"}) episode = isinstance(video, Episode) for subtitle in range(0, len(title_soups)): title_soup, body_soup = title_soups[subtitle], body_soups[subtitle] # title title = _clean_title(title_soup.find("a").text) # Forced subtitles are not supported if title.lower().rstrip().endswith(("forzado", "forzados")): logger.debug("Skipping forced subtitles: %s", title) continue # Check movie title (if the video is a movie) if not episode and not _check_movie(video, title): continue # Data datos = body_soup.find("div", { "id": "buscador_detalle_sub_datos" }).text # Ignore multi-disc and non-srt subtitles if not any(item in datos for item in ("Cds:</b> 1", "SubRip")): continue spain = "/pais/7.gif" in datos language = Language.fromalpha2("es") if spain else Language( "spa", "MX") # description sub_details = body_soup.find("div", { "id": "buscador_detalle_sub" }).text description = sub_details.replace(",", " ") # uploader uploader = body_soup.find("a", {"class": "link1"}).text download_url = _get_download_url(body_soup) page_link = title_soup.find("a")["href"] subtitle = self.subtitle_class(language, video, page_link, title, description, uploader, download_url) logger.debug("Found subtitle %r", subtitle) subtitles.append(subtitle) return subtitles
def _translateLanguageCodeToLanguage(languageCode): if languageCode.lower() == 'vo': return Language.fromalpha2('en') elif languageCode.lower() == 'vf': return Language.fromalpha2('fr')
class ArgenteamProvider(Provider, ProviderSubtitleArchiveMixin): provider_name = 'argenteam' languages = {Language.fromalpha2(l) for l in ['es']} video_types = (Episode, Movie) BASE_URL = "http://www.argenteam.net/" API_URL = BASE_URL + "api/v1/" subtitle_class = ArgenteamSubtitle hearing_impaired_verifiable = False language_list = list(languages) multi_result_throttle = 2 # seconds def __init__(self): self.session = None def initialize(self): self.session = Session() self.session.headers = { 'User-Agent': os.environ.get("SZ_USER_AGENT", "Sub-Zero/2") } def terminate(self): self.session.close() def search_ids(self, title, year=None, imdb_id=None, season=None, episode=None, titles=None): """Search movie or episode id from the `title`, `season` and `episode`. :param imdb_id: imdb id of the given movie :param titles: all titles of the given series or movie :param year: release year of the given movie :param str title: series of the episode or movie name :param int season: season of the episode. :param int episode: episode number. :return: list of ids :rtype: list """ # make the search query = title titles = titles or [] is_episode = False if season and episode: is_episode = True query = '%s S%#02dE%#02d' % (title, season, episode) logger.info(u'Searching %s ID for %r', "episode" if is_episode else "movie", query) r = self.session.get(self.API_URL + 'search', params={'q': query}, timeout=10) r.raise_for_status() results = r.json() match_ids = [] if results['total'] >= 1: for result in results["results"]: if (result['type'] == "episode" and not is_episode) or (result['type'] == "movie" and is_episode): continue # shortcut in case of matching imdb id if not is_episode and imdb_id and "imdb" in result and "tt%s" % result[ "imdb"] == str(imdb_id): logger.debug( "Movie matched by IMDB ID %s, taking shortcut", imdb_id) match_ids = [result['id']] break # advanced title check in case of multiple movie results if results['total'] > 1: if not is_episode and year: if result["title"] and not (sanitize(result["title"]) in (u"%s %s" % (sanitize(name), year) for name in titles)): continue match_ids.append(result['id']) else: logger.error(u'No episode ID found for %r', query) if match_ids: logger.debug(u"Found matching IDs: %s", ", ".join(str(id) for id in match_ids)) return match_ids def query(self, title, video, titles=None): is_episode = isinstance(video, Episode) season = episode = None url = self.API_URL + 'movie' if is_episode: season = video.season episode = video.episode url = self.API_URL + 'episode' argenteam_ids = self.search_ids(title, season=season, episode=episode, titles=titles) else: argenteam_ids = self.search_ids(title, year=video.year, imdb_id=video.imdb_id, titles=titles) if not argenteam_ids: return [] language = self.language_list[0] subtitles = [] has_multiple_ids = len(argenteam_ids) > 1 for aid in argenteam_ids: response = self.session.get(url, params={'id': aid}, timeout=10) response.raise_for_status() content = response.json() imdb_id = year = None returned_title = title if not is_episode and "info" in content: imdb_id = content["info"].get("imdb") year = content["info"].get("year") returned_title = content["info"].get("title", title) for r in content['releases']: for s in r['subtitles']: movie_kind = "episode" if is_episode else "movie" page_link = self.BASE_URL + movie_kind + "/" + str(aid) sub = ArgenteamSubtitle( language, page_link, s['uri'], movie_kind, returned_title, season, episode, year, r.get('team'), r.get('tags'), r.get('source'), r.get('codec'), content.get("tvdb"), imdb_id, asked_for_release_group=video.release_group, asked_for_episode=episode) subtitles.append(sub) if has_multiple_ids: time.sleep(self.multi_result_throttle) return subtitles def list_subtitles(self, video, languages): if isinstance(video, Episode): titles = [video.series] + video.alternative_series else: titles = [video.title] + video.alternative_titles for title in titles: subs = self.query(title, video, titles=titles) if subs: return subs time.sleep(self.multi_result_throttle) return [] def download_subtitle(self, subtitle): # download as a zip logger.info('Downloading subtitle %r', subtitle) r = self.session.get(subtitle.download_link, timeout=10) r.raise_for_status() # open the zip with ZipFile(io.BytesIO(r.content)) as zf: subtitle.content = self.get_subtitle_from_archive(subtitle, zf)
def test_list_subtitles_only_forced(video_single_language): with EmbeddedSubtitlesProvider() as provider: language = Language.fromalpha2("en") language = Language.rebuild(language, forced=True) subs = provider.list_subtitles(video_single_language, {language}) assert len(subs) == 0
def query(self, show_id, series, season, year=None, country=None): # get the season list of the show logger.info('Getting the season list of show id %d', show_id) r = self.session.get(self.server_url + self.series_url.format(show_id), timeout=10) r.raise_for_status() if not r.content: logger.debug('No data returned from provider') return [] soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser']) series = soup.find('name').text # loop over season rows seasons = soup.findAll('series_group') season_id = None for season_row in seasons: try: parsed_season = int(season_row['ssnnum']) if parsed_season == season: season_id = int(season_row['ssnid']) break except (ValueError, TypeError): continue if season_id is None: logger.debug('Season not found in provider') return [] # get the subtitle list of the season logger.info('Getting the subtitle list of season %d', season) r = self.session.get( self.server_url + self.season_url.format(show_id=show_id, season=season_id), timeout=10) r.raise_for_status() if not r.content: logger.debug('No data returned from provider') return [] soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser']) subtitles = [] # loop over episode rows for subtitle_group in soup.findAll('subg'): # read the episode info episode_info = subtitle_group.find('etitle') if episode_info is None: continue episodes = [] episode_match = episode_re.match(episode_info['number']) if episode_match: episodes = [ int(e) for e in [episode_match.group(1), episode_match.group(3)] if e ] subtitle_info = subtitle_group.find('sgt') if subtitle_info is None: continue season = int(subtitle_info['ssnnum']) episode_id = int(subtitle_info['epsid']) # filter out unreleased subtitles for subs_tag in subtitle_group.findAll('sr'): if subs_tag['published_on'] == '': continue page_link = self.server_url + self.page_link.format( show_id=show_id, season_id=season_id, season=season, episode=episode_id) title = episode_info['title'] version = subs_tag.fmt.text + ' ' + subs_tag.team.text download_link = self.server_url + self.download_link.format( int(subs_tag['rlsid'])) for episode in episodes: subtitle = self.subtitle_class(Language.fromalpha2('el'), page_link, series, season, episode, year, title, version, download_link) logger.debug('Found subtitle %r', subtitle) subtitles.append(subtitle) return subtitles
def test_download_subtitle_single(video_single_language): with EmbeddedSubtitlesProvider() as provider: subtitle = provider.list_subtitles(video_single_language, {Language.fromalpha2("en")})[0] provider.download_subtitle(subtitle) assert subtitle.content is not None
def test_list_subtitles_wo_srt(video_multiple_languages): with EmbeddedSubtitlesProvider(include_srt=False) as provider: subs = provider.list_subtitles(video_multiple_languages, {Language.fromalpha2("en")}) assert not subs
def test_list_subtitles_wo_ass(video_single_language): with EmbeddedSubtitlesProvider(include_ass=False) as provider: subs = provider.list_subtitles(video_single_language, {Language.fromalpha2("en")}) assert not subs
class SuchaProvider(Provider): """Sucha Provider""" languages = {Language.fromalpha2(l) for l in ["es"]} language_list = list(languages) video_types = (Episode, Movie) def initialize(self): self.session = Session() self.session.headers = { "User-Agent": os.environ.get("SZ_USER_AGENT", "Sub-Zero/2") } def terminate(self): self.session.close() def query(self, languages, video): movie_year = video.year if video.year else "0" is_episode = isinstance(video, Episode) language = self.language_list[0] if is_episode: q = { "query": "{} S{:02}E{:02}".format(video.series, video.season, video.episode) } else: q = {"query": video.title, "year": movie_year} logger.debug("Searching subtitles: {}".format(q["query"])) res = self.session.get(server_url + ("episode" if is_episode else "movie"), params=q, timeout=10) res.raise_for_status() result = res.json() subtitles = [] for i in result: matches = set() try: if (video.title.lower() in i["title"].lower() or video.title.lower() in i["alt_title"].lower()): matches.add("title") except TypeError: logger.debug("No subtitles found") return [] if is_episode: if (q["query"].lower() in i["title"].lower() or q["query"].lower() in i["alt_title"].lower()): matches.add("title") matches.add("series") matches.add("season") matches.add("episode") matches.add("year") if str(i["year"]) == video.year: matches.add("year") subtitles.append( SuchaSubtitle( language, i["release"], i["filename"], str(i["id"]), "episode" if is_episode else "movie", matches, )) return subtitles def list_subtitles(self, video, languages): return self.query(languages, video) def _check_response(self, response): if response.status_code != 200: raise ServiceUnavailable("Bad status code: " + str(response.status_code)) def _get_archive(self, content): archive_stream = io.BytesIO(content) if rarfile.is_rarfile(archive_stream): logger.debug("Identified rar archive") archive = rarfile.RarFile(archive_stream) elif zipfile.is_zipfile(archive_stream): logger.debug("Identified zip archive") archive = zipfile.ZipFile(archive_stream) else: raise APIThrottled("Unsupported compressed format") return archive def get_file(self, archive): for name in archive.namelist(): if os.path.split(name)[-1].startswith("."): continue if not name.lower().endswith(SUBTITLE_EXTENSIONS): continue if ("[eng]" in name.lower() or ".en." in name.lower() or ".eng." in name.lower()): continue logger.debug("Returning from archive: {}".format(name)) return archive.read(name) raise APIThrottled("Can not find the subtitle in the compressed file") def download_subtitle(self, subtitle): logger.info("Downloading subtitle %r", subtitle) response = self.session.get( server_url + "download", params={ "id": subtitle.download_id, "type": subtitle.download_type }, timeout=10, ) response.raise_for_status() self._check_response(response) archive = self._get_archive(response.content) subtitle_file = self.get_file(archive) subtitle.content = fix_line_ending(subtitle_file)
class SubdivxSubtitlesProvider(Provider): provider_name = "subdivx" hash_verifiable = False languages = {Language.fromalpha2(lang) for lang in ["es"]} subtitle_class = SubdivxSubtitle server_url = "https://www.subdivx.com/" multi_result_throttle = 2 language_list = list(languages) def __init__(self): self.session = None def initialize(self): self.session = Session() self.session.headers["User-Agent"] = f"Subliminal/{__short_version__}" def terminate(self): self.session.close() def query(self, video, languages): if isinstance(video, Episode): query = f"{video.series} S{video.season:02}E{video.episode:02}" else: # Subdvix has problems searching foreign movies if the year is # appended. A proper solution would be filtering results with the # year in self._parse_subtitles_page. query = video.title params = { "q": query, # search string "accion": 5, # action search "oxdown": 1, # order by downloads descending "pg": 1, # page 1 } logger.debug(f"Searching subtitles: {query}") subtitles = [] language = self.language_list[0] search_link = self.server_url + "index.php" while True: response = self.session.get(search_link, params=params, timeout=20) self._check_response(response) try: page_subtitles = self._parse_subtitles_page( video, response, language) except Exception as e: logger.error(f"Error parsing subtitles list: {e}") break subtitles += page_subtitles if len(page_subtitles) < 100: break # this is the last page params["pg"] += 1 # search next page time.sleep(self.multi_result_throttle) return subtitles def list_subtitles(self, video, languages): return self.query(video, languages) def download_subtitle(self, subtitle): if isinstance(subtitle, SubdivxSubtitle): # download the subtitle logger.info("Downloading subtitle %r", subtitle) # get download link download_link = self._get_download_link(subtitle) # download zip / rar file with the subtitle response = self.session.get( self.server_url + download_link, headers={"Referer": subtitle.page_link}, timeout=30, ) self._check_response(response) # open the compressed archive archive = self._get_archive(response.content) # extract the subtitle subtitle_content = self._get_subtitle_from_archive( archive, subtitle) subtitle.content = fix_line_ending(subtitle_content) def _parse_subtitles_page(self, video, response, language): subtitles = [] page_soup = ParserBeautifulSoup( response.content.decode("utf-8", "ignore"), ["lxml", "html.parser"]) title_soups = page_soup.find_all("div", {"id": "menu_detalle_buscador"}) body_soups = page_soup.find_all("div", {"id": "buscador_detalle"}) for subtitle in range(0, len(title_soups)): title_soup, body_soup = title_soups[subtitle], body_soups[subtitle] # title title = title_soup.find("a").text.replace("Subtitulos de ", "") # filter by year if video.year and str(video.year) not in title: continue page_link = title_soup.find("a")["href"] # description description = body_soup.find("div", { "id": "buscador_detalle_sub" }).text description = description.replace(",", " ").lower() # uploader uploader = body_soup.find("a", {"class": "link1"}).text subtitle = self.subtitle_class(language, video, page_link, title, description, uploader) logger.debug("Found subtitle %r", subtitle) subtitles.append(subtitle) return subtitles def _get_download_link(self, subtitle): response = self.session.get(subtitle.page_link, timeout=20) self._check_response(response) try: page_soup = ParserBeautifulSoup( response.content.decode("utf-8", "ignore"), ["lxml", "html.parser"]) links_soup = page_soup.find_all("a", {"class": "detalle_link"}) for link_soup in links_soup: if link_soup["href"].startswith("bajar"): return self.server_url + link_soup["href"] links_soup = page_soup.find_all("a", {"class": "link1"}) for link_soup in links_soup: if "bajar.php" in link_soup["href"]: return link_soup["href"] except Exception as e: raise APIThrottled(f"Error parsing download link: {e}") raise APIThrottled("Download link not found") @staticmethod def _check_response(response): if response.status_code != 200: raise ServiceUnavailable( f"Bad status code: {response.status_code}") @staticmethod def _get_archive(content): # open the archive archive_stream = io.BytesIO(content) if rarfile.is_rarfile(archive_stream): logger.debug("Identified rar archive") archive = rarfile.RarFile(archive_stream) elif zipfile.is_zipfile(archive_stream): logger.debug("Identified zip archive") archive = zipfile.ZipFile(archive_stream) else: raise APIThrottled("Unsupported compressed format") return archive @staticmethod def _get_subtitle_from_archive(archive, subtitle): _valid_names = [] for name in archive.namelist(): # discard hidden files # discard non-subtitle files if not os.path.split(name)[-1].startswith(".") and name.lower( ).endswith(SUBTITLE_EXTENSIONS): _valid_names.append(name) # archive with only 1 subtitle if len(_valid_names) == 1: logger.debug( f"returning from archive: {_valid_names[0]} (single subtitle file)" ) return archive.read(_valid_names[0]) # in archives with more than 1 subtitle (season pack) we try to guess the best subtitle file _scores = get_scores(subtitle.video) _max_score = 0 _max_name = "" for name in _valid_names: _guess = guessit(name) if "season" not in _guess: _guess["season"] = -1 if "episode" not in _guess: _guess["episode"] = -1 if isinstance(subtitle.video, Episode): logger.debug("guessing %s" % name) logger.debug( f"subtitle S{_guess['season']}E{_guess['episode']} video " f"S{subtitle.video.season}E{subtitle.video.episode}") if (subtitle.video.episode != _guess["episode"] or subtitle.video.season != _guess["season"]): logger.debug("subtitle does not match video, skipping") continue matches = set() matches |= guess_matches(subtitle.video, _guess) _score = sum((_scores.get(match, 0) for match in matches)) logger.debug("srt matches: %s, score %d" % (matches, _score)) if _score > _max_score: _max_score = _score _max_name = name logger.debug(f"new max: {name} {_score}") if _max_score > 0: logger.debug( f"returning from archive: {_max_name} scored {_max_score}") return archive.read(_max_name) raise APIThrottled("Can not find the subtitle in the compressed file")
class SuchaProvider(Provider): """Sucha Provider""" languages = {Language.fromalpha2(l) for l in ["es"]} language_list = list(languages) logger.debug(languages) video_types = (Episode, Movie) def initialize(self): self.session = Session() self.session.headers = { "User-Agent": os.environ.get("SZ_USER_AGENT", "Sub-Zero/2") } def terminate(self): self.session.close() def query(self, languages, video): movie_year = video.year if video.year else None is_episode = True if isinstance(video, Episode) else False imdb_id = video.imdb_id if video.imdb_id else None language = self.language_list[0] if is_episode: q = { "query": "{} S{:02}E{:02}".format(video.series, video.season, video.episode) } else: if imdb_id: q = {"query": imdb_id} else: q = {"query": video.title, "year": movie_year} logger.debug("Searching subtitles: {}".format(q["query"])) res = self.session.get(server_url + "search", params=q, timeout=10) res.raise_for_status() result = res.json() try: subtitles = [] for i in result["results"]: matches = set() # We use 'in' instead of '==' since Subdivx titles are # irregular if video.title.lower() in i["title"].lower(): matches.add("title") if is_episode: if q["query"].lower() in i["title"].lower(): matches.add("title") matches.add("series") matches.add("imdb_id") matches.add("season") matches.add("episode") matches.add("year") if i["year"] == video.year: matches.add("year") if imdb_id: matches.add("imdb_id") # We'll add release group info (if found) to the pseudo filename # in order to show it in the manual search filename = i["pseudo_file"] if (video.release_group and str(video.release_group).lower() in i["original_description"]): filename = i["pseudo_file"].replace( ".es.srt", "-" + str(video.release_group) + ".es.srt") subtitles.append( SuchaSubtitle( language, i["referer"], filename, i["guessit"], i["download_url"], i["hearing_impaired"], matches, )) return subtitles except KeyError: logger.debug("No subtitles found") return [] def list_subtitles(self, video, languages): return self.query(languages, video) def _check_response(self, response): if response.status_code != 200: raise ServiceUnavailable("Bad status code: " + str(response.status_code)) def _get_archive(self, content): archive_stream = io.BytesIO(content) if rarfile.is_rarfile(archive_stream): logger.debug("Identified rar archive") archive = rarfile.RarFile(archive_stream) elif zipfile.is_zipfile(archive_stream): logger.debug("Identified zip archive") archive = zipfile.ZipFile(archive_stream) else: raise APIThrottled("Unsupported compressed format") return archive def get_file(self, archive): for name in archive.namelist(): if os.path.split(name)[-1].startswith("."): continue if not name.lower().endswith(SUBTITLE_EXTENSIONS): continue if ("[eng]" in name.lower() or ".en." in name.lower() or ".eng." in name.lower()): continue logger.debug("Returning from archive: {}".format(name)) return archive.read(name) raise APIThrottled("Can not find the subtitle in the compressed file") def download_subtitle(self, subtitle): logger.info("Downloading subtitle %r", subtitle) response = self.session.get(subtitle.download_link, headers={"Referer": subtitle.page_link}, timeout=10) response.raise_for_status() self._check_response(response) archive = self._get_archive(response.content) subtitle_file = self.get_file(archive) subtitle.content = fix_line_ending(subtitle_file)
class SubdivxSubtitlesProvider(Provider): provider_name = 'subdivx' hash_verifiable = False languages = {Language.fromalpha2(l) for l in ['es']} subtitle_class = SubdivxSubtitle server_url = 'https://www.subdivx.com/' multi_result_throttle = 2 language_list = list(languages) def __init__(self): self.session = None def initialize(self): self.session = Session() self.session.headers['User-Agent'] = 'Subliminal/{}'.format( __short_version__) def terminate(self): self.session.close() def query(self, video, languages): if isinstance(video, Episode): query = "{} S{:02d}E{:02d}".format(video.series, video.season, video.episode) else: query = video.title if video.year: query += ' {:4d}'.format(video.year) params = { 'q': query, # search string 'accion': 5, # action search 'oxdown': 1, # order by downloads descending 'pg': 1 # page 1 } logger.debug('Searching subtitles %r', query) subtitles = [] language = self.language_list[0] search_link = self.server_url + 'index.php' while True: response = self.session.get(search_link, params=params, timeout=20) self._check_response(response) try: page_subtitles = self._parse_subtitles_page( video, response, language) except Exception as e: logger.error('Error parsing subtitles list: ' + str(e)) break subtitles += page_subtitles if len(page_subtitles) < 20: break # this is the last page params['pg'] += 1 # search next page time.sleep(self.multi_result_throttle) return subtitles def list_subtitles(self, video, languages): return self.query(video, languages) def download_subtitle(self, subtitle): if isinstance(subtitle, SubdivxSubtitle): # download the subtitle logger.info('Downloading subtitle %r', subtitle) # get download link download_link = self._get_download_link(subtitle) # download zip / rar file with the subtitle response = self.session.get( download_link, headers={'Referer': subtitle.page_link}, timeout=30) self._check_response(response) # open the compressed archive archive = self._get_archive(response.content) # extract the subtitle subtitle_content = self._get_subtitle_from_archive( archive, subtitle) subtitle.content = fix_line_ending(subtitle_content) def _check_response(self, response): if response.status_code != 200: raise ServiceUnavailable('Bad status code: ' + str(response.status_code)) def _parse_subtitles_page(self, video, response, language): subtitles = [] page_soup = ParserBeautifulSoup( response.content.decode('iso-8859-1', 'ignore'), ['lxml', 'html.parser']) title_soups = page_soup.find_all("div", {'id': 'menu_detalle_buscador'}) body_soups = page_soup.find_all("div", {'id': 'buscador_detalle'}) for subtitle in range(0, len(title_soups)): title_soup, body_soup = title_soups[subtitle], body_soups[subtitle] # title title = title_soup.find("a").text.replace("Subtitulos de ", "") page_link = title_soup.find("a")["href"] # description description = body_soup.find("div", { 'id': 'buscador_detalle_sub' }).text description = description.replace(",", " ").lower() # uploader uploader = body_soup.find("a", {'class': 'link1'}).text subtitle = self.subtitle_class(language, video, page_link, title, description, uploader) logger.debug('Found subtitle %r', subtitle) subtitles.append(subtitle) return subtitles def _get_download_link(self, subtitle): response = self.session.get(subtitle.page_link, timeout=20) self._check_response(response) try: page_soup = ParserBeautifulSoup( response.content.decode('iso-8859-1', 'ignore'), ['lxml', 'html.parser']) links_soup = page_soup.find_all("a", {'class': 'detalle_link'}) for link_soup in links_soup: if link_soup['href'].startswith('bajar'): return self.server_url + link_soup['href'] links_soup = page_soup.find_all("a", {'class': 'link1'}) for link_soup in links_soup: if "bajar.php" in link_soup['href']: return link_soup['href'] except Exception as e: raise APIThrottled('Error parsing download link: ' + str(e)) raise APIThrottled('Download link not found') def _get_archive(self, content): # open the archive archive_stream = io.BytesIO(content) if rarfile.is_rarfile(archive_stream): logger.debug('Identified rar archive') archive = rarfile.RarFile(archive_stream) elif zipfile.is_zipfile(archive_stream): logger.debug('Identified zip archive') archive = zipfile.ZipFile(archive_stream) else: raise APIThrottled('Unsupported compressed format') return archive def _get_subtitle_from_archive(self, archive, subtitle): _max_score = 0 _scores = get_scores(subtitle.video) for name in archive.namelist(): # discard hidden files if os.path.split(name)[-1].startswith('.'): continue # discard non-subtitle files if not name.lower().endswith(SUBTITLE_EXTENSIONS): continue _guess = guessit(name) if isinstance(subtitle.video, Episode): logger.debug("guessing %s" % name) logger.debug("subtitle S{}E{} video S{}E{}".format( _guess['season'], _guess['episode'], subtitle.video.season, subtitle.video.episode)) if subtitle.video.episode != _guess[ 'episode'] or subtitle.video.season != _guess['season']: logger.debug('subtitle does not match video, skipping') continue matches = set() matches |= guess_matches(subtitle.video, _guess) _score = sum((_scores.get(match, 0) for match in matches)) logger.debug('srt matches: %s, score %d' % (matches, _score)) if _score > _max_score: _max_name = name _max_score = _score logger.debug("new max: {} {}".format(name, _score)) if _max_score > 0: logger.debug("returning from archive: {} scored {}".format( _max_name, _max_score)) return archive.read(_max_name) raise APIThrottled('Can not find the subtitle in the compressed file')
class NekurProvider(Provider, ProviderSubtitleArchiveMixin): """Nekur Provider.""" subtitle_class = NekurSubtitle languages = {Language('lva', 'LV')} | {Language.fromalpha2(l) for l in ['lv']} server_url = 'http://subtitri.nekur.net/' search_url = server_url + 'modules/Subtitles.php' def __init__(self): self.session = None def initialize(self): self.session = Session() self.session.headers['User-Agent'] = AGENT_LIST[randint(0, len(AGENT_LIST) - 1)] self.session.headers['Referer'] = self.server_url def terminate(self): self.session.close() def query(self, title): subtitles = [] data = { 'ajax': '1', 'sSearch': title, } r = self.session.post(self.search_url, data=data, timeout=10) r.raise_for_status() if not r.content: logger.debug('No data returned from provider') return [] soup = ParserBeautifulSoup(r.content.decode('utf-8', 'ignore'), ['lxml', 'html.parser']) # loop over subtitle cells rows = soup.select('tbody > tr') for row in rows: # title title_anchor_el = row.select_one('.title > a') title_inner_text = [element for element in title_anchor_el if isinstance(element, NavigableString)] title = title_inner_text[0].strip() # year year = row.select_one('.year').text.strip('()') # download link href = title_anchor_el.get('href') download_link = self.server_url + href # imdb id imdb_td = row.select_one('td:nth-of-type(4)') imdb_link = imdb_td.select_one('a').get('href') imdb_id = imdb_link.split('/')[-2] # fps fps = row.select_one('.fps').text.strip() # additional notes notes = row.select_one('.notes').text.strip() # page link = download link (there is no seperate subtitle page link) page_link = download_link # create/add the subitle subtitle = self.subtitle_class(Language.fromalpha2('lv'), page_link, download_link, title, year, imdb_id, fps, notes) logger.debug('nekur: Found subtitle %r', subtitle) subtitles.append(subtitle) return subtitles def list_subtitles(self, video, languages): if isinstance(video, Movie): titles = [video.title] + video.alternative_titles else: titles = [] subtitles = [] # query for subtitles for title in titles: if isinstance(video, Movie): subtitles += [s for s in self.query(title) if s.language in languages] return subtitles def download_subtitle(self, subtitle): if isinstance(subtitle, NekurSubtitle): # download the subtitle r = self.session.get(subtitle.download_link, timeout=10) r.raise_for_status() # open the archive archive_stream = io.BytesIO(r.content) if is_rarfile(archive_stream): archive = RarFile(archive_stream) elif is_zipfile(archive_stream): archive = ZipFile(archive_stream) else: subtitle.content = r.content if subtitle.is_valid(): return subtitle.content = None raise ProviderError('Unidentified archive type') subtitle.content = self.get_subtitle_from_archive(subtitle, archive)
class NapiProjektProvider(_NapiProjektProvider): languages = {Language.fromalpha2(l) for l in ['pl']} subtitle_class = NapiProjektSubtitle required_hash = 'napiprojekt' server_url = 'http://napiprojekt.pl/unit_napisy/dl.php' def query(self, language, subq): subtitle = subq logger.debug('Found subtitle %r', subtitle) return subtitle def get_length(self, filename): result = subprocess.run(["ffprobe", "-v", "error", "-show_entries", "format=duration", "-of", "default=noprint_wrappers=1:nokey=1", filename], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) return float(result.stdout) def list_subtitles(self, video, languages): season = episode = None year=video.year duration = self.get_length(video.original_path) if isinstance(video, Episode): title = video.series season = video.season episode = video.episode v_type = "series" else: title = video.title v_type = "movie" subs = [] url = 'https://www.napiprojekt.pl/ajax/search_catalog.php' req = {'queryString': title, '&queryKind': v_type, '&queryYear': year, '&associate': ''} searchsub = requests.post(url, data = req) soup2 = BeautifulSoup(searchsub.text, 'html.parser') result = soup2.find('a', {'class': 'movieTitleCat'}) if result: sub_link = "https://www.napiprojekt.pl/" + result['href'] sub_link = sub_link.replace("napisy-","napisy1,1,1-dla-",1) if v_type == "series": sub_link = sub_link + "-s" + str(season).zfill(2) + "e" + str(episode).zfill(2) logger.debug ("Checking subs on: " + sub_link) page = requests.get(sub_link) soup = BeautifulSoup(page.text, 'html.parser') slider = soup.find('div', {'class': 'sliderContent _oF'}) if slider: alinks = slider.findAll('a') howmany = len(alinks) else: howmany = 1 lang = "" for e in languages: lang = e break for x in range(1,howmany+1): sub_link_loop = sub_link.replace("napisy1,1,1-dla-","napisy" + str(x) + ",1,1-dla-",1) #print(sub_link_loop) page = requests.get(sub_link_loop) soup = BeautifulSoup(page.text, 'html.parser') table = soup.find('tbody') #print(slider.prettify()) if table: for row in table.findAll(lambda tag: tag.name=='tr'): napid = row.findAll('td')[0].find('a', href=True)['href'].replace("napiprojekt:","") size = row.findAll('td')[1].text fps = row.findAll('td')[2].text length = row.findAll('td')[3].text downloads = row.findAll('td')[6].text # print("ID: " + napid) # print("Rozmiar: " + size) # print("FPS: " + fps) # print("Czas trwania: " + length) if length == "": floatlength = 0 else: lengtharray = length.split(":") floatlength = int(lengtharray[0]) * 3600 + int(lengtharray[1]) * 60 + float(lengtharray[2]) # print("Czas trwania float: " + str(floatlength)) # print("Pobrań: " + downloads) if duration-60 <= floatlength <= duration+60: subtitle = self.subtitle_class(lang, napid, floatlength, downloads) subs.append(subtitle) sortedsubs = sorted(subs, key=lambda subs: abs(subs.duration - duration)) return [s for s in [self.query(lang, subsrt) for subsrt in sortedsubs] if s is not None] else: return None def download_subtitle(self, subtitle): hash = subtitle.hash params = { 'v': 'dreambox', 'kolejka': 'false', 'nick': '', 'pass': '', 'napios': 'Linux', 'l': "PL", 'f': hash, 't': get_subhash(hash)} logger.info('Searching subtitle %r', params) r = self.session.get(self.server_url, params=params, timeout=10) r.raise_for_status() # handle subtitles not found and errors if r.content[:4] == b'NPc0': logger.debug('No subtitles downloaded') subtitle2 = subtitle subtitle2.content = r.content logger.debug('Downloaded subtitle %r', subtitle2)