def initialize(self): logger.info("Creating session") self.session = RetryingCFSession() prev_cookies = region.get("subscene_cookies2") if prev_cookies != NO_VALUE: logger.debug("Re-using old subscene cookies: %r", prev_cookies) self.session.cookies.update(prev_cookies) else: logger.debug("Logging in") self.login()
def initialize(self): logger.debug("Legendasdivx.pt :: Creating session for requests") self.session = RetryingCFSession() # re-use PHP Session if present prev_cookies = region.get("legendasdivx_cookies2") if prev_cookies != NO_VALUE: logger.debug("Legendasdivx.pt :: Re-using previous legendasdivx cookies: %s", prev_cookies) self.session.cookies.update(prev_cookies) # login if session has expired else: logger.debug("Legendasdivx.pt :: Session cookies not found!") self.session.headers.update(self.headers) self.login()
def initialize(self): self.session = RetryingCFSession() #load_verification("titlovi", self.session) token = region.get("titlovi_token") if token is not NO_VALUE: self.user_id, self.login_token, self.token_exp = token if datetime.now() > self.token_exp: logger.debug('Token expired') self.log_in() else: logger.debug('Use cached token') else: logger.debug('Token not found in cache') self.log_in()
class TitloviProvider(Provider, ProviderSubtitleArchiveMixin): subtitle_class = TitloviSubtitle languages = { Language.fromtitlovi(l) for l in language_converters['titlovi'].codes } | {Language.fromietf('sr-Latn')} api_url = 'https://kodi.titlovi.com/api/subtitles' api_gettoken_url = api_url + '/gettoken' api_search_url = api_url + '/search' def __init__(self, username=None, password=None): if not all((username, password)): raise ConfigurationError('Username and password must be specified') self.username = username self.password = password self.session = None self.user_id = None self.login_token = None self.token_exp = None def initialize(self): self.session = RetryingCFSession() #load_verification("titlovi", self.session) token = region.get("titlovi_token") if token is not NO_VALUE: self.user_id, self.login_token, self.token_exp = token if datetime.now() > self.token_exp: logger.debug('Token expired') self.log_in() else: logger.debug('Use cached token') else: logger.debug('Token not found in cache') self.log_in() def log_in(self): login_params = dict(username=self.username, password=self.password, json=True) try: response = self.session.post(self.api_gettoken_url, params=login_params) if response.status_code == request_codes.ok: resp_json = response.json() self.login_token = resp_json.get('Token') self.user_id = resp_json.get('UserId') self.token_exp = dateutil.parser.parse( resp_json.get('ExpirationDate')) region.set("titlovi_token", [self.user_id, self.login_token, self.token_exp]) logger.debug('New token obtained') elif response.status_code == request_codes.unauthorized: raise AuthenticationError('Login failed') except RequestException as e: logger.error(e) def terminate(self): self.session.close() @region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME) def get_result(self, search_url, search_params): return self.session.get(search_url, params=search_params) def query(self, languages, title, season=None, episode=None, year=None, imdb_id=None, video=None): search_params = dict() used_languages = languages lang_strings = [str(lang) for lang in used_languages] # handle possible duplicate use of Serbian Latin if "sr" in lang_strings and "sr-Latn" in lang_strings: logger.info( 'Duplicate entries <Language [sr]> and <Language [sr-Latn]> found, filtering languages' ) used_languages = [ l for l in used_languages if l != Language.fromietf('sr-Latn') ] logger.info('Filtered language list %r', used_languages) # convert list of languages into search string langs = '|'.join(map(str, [l.titlovi for l in used_languages])) # set query params search_params['query'] = title search_params['lang'] = langs is_episode = False if season and episode: is_episode = True #search_params['season'] = season #search_params['episode'] = episode #if year: # search_params['year'] = year if imdb_id: search_params['imdbID'] = imdb_id # loop through paginated results logger.info('Searching subtitles %r', search_params) subtitles = [] query_results = [] try: search_params['token'] = self.login_token search_params['userid'] = self.user_id search_params['json'] = True #response = self.get_result(search_url=self.api_search_url, search_params=search_params) response = self.get_result(self.api_search_url, search_params) resp_json = response.json() if resp_json['SubtitleResults']: query_results.extend(resp_json['SubtitleResults']) except Exception as e: logger.error(e) for sub in query_results: # title and alternate title match = title_re.search(sub.get('Title')) if match: _title = match.group('title') alt_title = match.group('altitle') else: continue # handle movies and series separately if is_episode: # skip if season and episode number does not match if season and season != sub.get('Season'): continue elif episode and episode != sub.get('Episode'): continue subtitle = self.subtitle_class( Language.fromtitlovi(sub.get('Lang')), sub.get('Link'), sub.get('Id'), sub.get('Release'), _title, alt_title=alt_title, season=sub.get('Season'), episode=sub.get('Episode'), year=sub.get('Year'), rating=sub.get('Rating'), download_count=sub.get('DownloadCount'), asked_for_release_group=video.release_group, asked_for_episode=episode) else: subtitle = self.subtitle_class( Language.fromtitlovi(sub.get('Lang')), sub.get('Link'), sub.get('Id'), sub.get('Release'), _title, alt_title=alt_title, year=sub.get('Year'), rating=sub.get('Rating'), download_count=sub.get('DownloadCount'), asked_for_release_group=video.release_group) logger.debug('Found subtitle %r', subtitle) # prime our matches so we can use the values later subtitle.get_matches(video) # add found subtitles subtitles.append(subtitle) return subtitles def list_subtitles(self, video, languages): season = episode = None if isinstance(video, Episode): title = video.series season = video.season episode = video.episode else: title = video.title return [ s for s in self.query(languages, fix_inconsistent_naming(title), season=season, episode=episode, year=video.year, imdb_id=video.imdb_id, video=video) ] def download_subtitle(self, subtitle): r = self.session.get(subtitle.download_link, timeout=10) r.raise_for_status() # open the archive archive_stream = io.BytesIO(r.content) if is_rarfile(archive_stream): logger.debug('Archive identified as rar') archive = RarFile(archive_stream) elif is_zipfile(archive_stream): logger.debug('Archive identified as zip') archive = ZipFile(archive_stream) else: subtitle.content = r.content if subtitle.is_valid(): return subtitle.content = None raise ProviderError('Unidentified archive type') subs_in_archive = archive.namelist() # if Serbian lat and cyr versions are packed together, try to find right version if len(subs_in_archive) > 1 and (subtitle.language == 'sr' or subtitle.language == 'sr-Cyrl'): self.get_subtitle_from_bundled_archive(subtitle, subs_in_archive, archive) else: # use default method for everything else subtitle.content = self.get_subtitle_from_archive( subtitle, archive) def get_subtitle_from_bundled_archive(self, subtitle, subs_in_archive, archive): sr_lat_subs = [] sr_cyr_subs = [] sub_to_extract = None for sub_name in subs_in_archive: _sub_name = sub_name.lower() if not ('.cyr' in _sub_name or '.cir' in _sub_name or 'cyr)' in _sub_name): sr_lat_subs.append(sub_name) if ('.cyr' in sub_name or '.cir' in _sub_name) and not '.lat' in _sub_name.lower(): sr_cyr_subs.append(sub_name) if subtitle.language == 'sr': if len(sr_lat_subs) > 0: sub_to_extract = sr_lat_subs[0] if subtitle.language == 'sr-Cyrl': if len(sr_cyr_subs) > 0: sub_to_extract = sr_cyr_subs[0] logger.info(u'Using %s from the archive', sub_to_extract) subtitle.content = fix_line_ending(archive.read(sub_to_extract))
def initialize(self): self.session = RetryingCFSession()
class TitloviProvider(Provider, ProviderSubtitleArchiveMixin): subtitle_class = TitloviSubtitle languages = {Language.fromtitlovi(l) for l in language_converters['titlovi'].codes} | {Language.fromietf('sr-Latn')} server_url = 'https://titlovi.com' search_url = server_url + '/titlovi/?' download_url = server_url + '/download/?type=1&mediaid=' def initialize(self): self.session = RetryingCFSession() #load_verification("titlovi", self.session) def terminate(self): self.session.close() def query(self, languages, title, season=None, episode=None, year=None, video=None): items_per_page = 10 current_page = 1 used_languages = languages lang_strings = [str(lang) for lang in used_languages] # handle possible duplicate use of Serbian Latin if "sr" in lang_strings and "sr-Latn" in lang_strings: logger.info('Duplicate entries <Language [sr]> and <Language [sr-Latn]> found, filtering languages') used_languages = filter(lambda l: l != Language.fromietf('sr-Latn'), used_languages) logger.info('Filtered language list %r', used_languages) # convert list of languages into search string langs = '|'.join(map(str, [l.titlovi for l in used_languages])) # set query params params = {'prijevod': title, 'jezik': langs} is_episode = False if season and episode: is_episode = True params['s'] = season params['e'] = episode if year: params['g'] = year # loop through paginated results logger.info('Searching subtitles %r', params) subtitles = [] while True: # query the server try: r = self.session.get(self.search_url, params=params, timeout=10) r.raise_for_status() except RequestException as e: logger.exception('RequestException %s', e) break else: try: soup = BeautifulSoup(r.content, 'lxml') # number of results result_count = int(soup.select_one('.results_count b').string) except: result_count = None # exit if no results if not result_count: if not subtitles: logger.debug('No subtitles found') else: logger.debug("No more subtitles found") break # number of pages with results pages = int(math.ceil(result_count / float(items_per_page))) # get current page if 'pg' in params: current_page = int(params['pg']) try: sublist = soup.select('section.titlovi > ul.titlovi > li.subtitleContainer.canEdit') for sub in sublist: # subtitle id sid = sub.find(attrs={'data-id': True}).attrs['data-id'] # get download link download_link = self.download_url + sid # title and alternate title match = title_re.search(sub.a.string) if match: _title = match.group('title') alt_title = match.group('altitle') else: continue # page link page_link = self.server_url + sub.a.attrs['href'] # subtitle language _lang = sub.select_one('.lang') match = lang_re.search(_lang.attrs.get('src', _lang.attrs.get('cfsrc', ''))) if match: try: # decode language lang = Language.fromtitlovi(match.group('lang')+match.group('script')) except ValueError: continue # relase year or series start year match = year_re.search(sub.find(attrs={'data-id': True}).parent.i.string) if match: r_year = int(match.group('year')) # fps match = fps_re.search(sub.select_one('.fps').string) if match: fps = match.group('fps') # releases releases = str(sub.select_one('.fps').parent.contents[0].string) # handle movies and series separately if is_episode: # season and episode info sxe = sub.select_one('.s0xe0y').string r_season = None r_episode = None if sxe: match = season_re.search(sxe) if match: r_season = int(match.group('season')) match = episode_re.search(sxe) if match: r_episode = int(match.group('episode')) subtitle = self.subtitle_class(lang, page_link, download_link, sid, releases, _title, alt_title=alt_title, season=r_season, episode=r_episode, year=r_year, fps=fps, asked_for_release_group=video.release_group, asked_for_episode=episode) else: subtitle = self.subtitle_class(lang, page_link, download_link, sid, releases, _title, alt_title=alt_title, year=r_year, fps=fps, asked_for_release_group=video.release_group) logger.debug('Found subtitle %r', subtitle) # prime our matches so we can use the values later subtitle.get_matches(video) # add found subtitles subtitles.append(subtitle) finally: soup.decompose() # stop on last page if current_page >= pages: break # increment current page params['pg'] = current_page + 1 logger.debug('Getting page %d', params['pg']) return subtitles def list_subtitles(self, video, languages): season = episode = None if isinstance(video, Episode): title = video.series season = video.season episode = video.episode else: title = video.title return [s for s in self.query(languages, fix_inconsistent_naming(title), season=season, episode=episode, year=video.year, video=video)] def download_subtitle(self, subtitle): r = self.session.get(subtitle.download_link, timeout=10) r.raise_for_status() # open the archive archive_stream = io.BytesIO(r.content) if is_rarfile(archive_stream): logger.debug('Archive identified as rar') archive = RarFile(archive_stream) elif is_zipfile(archive_stream): logger.debug('Archive identified as zip') archive = ZipFile(archive_stream) else: subtitle.content = r.content if subtitle.is_valid(): return subtitle.content = None raise ProviderError('Unidentified archive type') subs_in_archive = archive.namelist() # if Serbian lat and cyr versions are packed together, try to find right version if len(subs_in_archive) > 1 and (subtitle.language == 'sr' or subtitle.language == 'sr-Cyrl'): self.get_subtitle_from_bundled_archive(subtitle, subs_in_archive, archive) else: # use default method for everything else subtitle.content = self.get_subtitle_from_archive(subtitle, archive) def get_subtitle_from_bundled_archive(self, subtitle, subs_in_archive, archive): sr_lat_subs = [] sr_cyr_subs = [] sub_to_extract = None for sub_name in subs_in_archive: if not ('.cyr' in sub_name or '.cir' in sub_name): sr_lat_subs.append(sub_name) if ('.cyr' in sub_name or '.cir' in sub_name) and not '.lat' in sub_name: sr_cyr_subs.append(sub_name) if subtitle.language == 'sr': if len(sr_lat_subs) > 0: sub_to_extract = sr_lat_subs[0] if subtitle.language == 'sr-Cyrl': if len(sr_cyr_subs) > 0: sub_to_extract = sr_cyr_subs[0] logger.info(u'Using %s from the archive', sub_to_extract) subtitle.content = fix_line_ending(archive.read(sub_to_extract))
class LegendasdivxProvider(Provider): """Legendasdivx Provider.""" languages = {Language('por', 'BR')} | {Language('por')} SEARCH_THROTTLE = 8 site = 'https://www.legendasdivx.pt' headers = { 'User-Agent': os.environ.get("SZ_USER_AGENT", "Sub-Zero/2"), 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Origin': 'https://www.legendasdivx.pt', 'Referer': 'https://www.legendasdivx.pt' } loginpage = site + '/forum/ucp.php?mode=login' searchurl = site + '/modules.php?name=Downloads&file=jz&d_op=search&op=_jz00&query={query}' download_link = site + '/modules.php{link}' def __init__(self, username, password, skip_wrong_fps=True): # make sure login credentials are configured. if any((username, password)) and not all((username, password)): raise ConfigurationError( 'Legendasdivx.pt :: Username and password must be specified') self.username = username self.password = password self.skip_wrong_fps = skip_wrong_fps def initialize(self): logger.debug("Legendasdivx.pt :: Creating session for requests") self.session = RetryingCFSession() # re-use PHP Session if present prev_cookies = region.get("legendasdivx_cookies2") if prev_cookies != NO_VALUE: logger.debug( "Legendasdivx.pt :: Re-using previous legendasdivx cookies: %s", prev_cookies) self.session.cookies.update(prev_cookies) # login if session has expired else: logger.debug("Legendasdivx.pt :: Session cookies not found!") self.session.headers.update(self.headers) self.login() def terminate(self): # session close self.session.close() def login(self): logger.debug('Legendasdivx.pt :: Logging in') try: # sleep for a 1 second before another request sleep(1) res = self.session.get(self.loginpage) res.raise_for_status() bsoup = ParserBeautifulSoup(res.content, ['lxml']) _allinputs = bsoup.findAll('input') data = {} # necessary to set 'sid' for POST request for field in _allinputs: data[field.get('name')] = field.get('value') # sleep for a 1 second before another request sleep(1) data['username'] = self.username data['password'] = self.password res = self.session.post(self.loginpage, data) res.raise_for_status() # make sure we're logged in logger.debug( 'Legendasdivx.pt :: Logged in successfully: PHPSESSID: %s', self.session.cookies.get_dict()['PHPSESSID']) cj = self.session.cookies.copy() store_cks = ("PHPSESSID", "phpbb3_2z8zs_sid", "phpbb3_2z8zs_k", "phpbb3_2z8zs_u", "lang") for cn in iter(self.session.cookies.keys()): if cn not in store_cks: del cj[cn] # store session cookies on cache logger.debug( "Legendasdivx.pt :: Storing legendasdivx session cookies: %r", cj) region.set("legendasdivx_cookies2", cj) except KeyError: logger.error( "Legendasdivx.pt :: Couldn't get session ID, check your credentials" ) raise AuthenticationError( "Legendasdivx.pt :: Couldn't get session ID, check your credentials" ) except HTTPError as e: if "bloqueado" in res.text.lower(): logger.error( "LegendasDivx.pt :: Your IP is blocked on this server.") raise IPAddressBlocked( "LegendasDivx.pt :: Your IP is blocked on this server.") logger.error("Legendasdivx.pt :: HTTP Error %s", e) raise TooManyRequests("Legendasdivx.pt :: HTTP Error %s", e) except Exception as e: logger.error("LegendasDivx.pt :: Uncaught error: %r", e) raise ServiceUnavailable("LegendasDivx.pt :: Uncaught error: %r", e) def _process_page(self, video, bsoup): subtitles = [] _allsubs = bsoup.findAll("div", {"class": "sub_box"}) for _subbox in _allsubs: hits = 0 for th in _subbox.findAll("th"): if th.text == 'Hits:': hits = int(th.find_next("td").text) if th.text == 'Idioma:': lang = th.find_next("td").find("img").get('src') if 'brazil' in lang.lower(): lang = Language.fromopensubtitles('pob') elif 'portugal' in lang.lower(): lang = Language.fromopensubtitles('por') else: continue if th.text == "Frame Rate:": frame_rate = th.find_next("td").text.strip() # get description for matches description = _subbox.find("td", { "class": "td_desc brd_up" }).get_text() # get subtitle link from footer sub_footer = _subbox.find("div", {"class": "sub_footer"}) download = sub_footer.find( "a", {"class": "sub_download"}) if sub_footer else None # sometimes 'a' tag is not found and returns None. Most likely HTML format error! try: download_link = self.download_link.format( link=download.get('href')) logger.debug("Legendasdivx.pt :: Found subtitle link on: %s ", download_link) except: logger.debug( "Legendasdivx.pt :: Couldn't find download link. Trying next..." ) continue # get subtitle uploader sub_header = _subbox.find("div", {"class": "sub_header"}) uploader = sub_header.find("a").text if sub_header else 'anonymous' exact_match = False if video.name.lower() in description.lower(): exact_match = True data = { 'link': download_link, 'exact_match': exact_match, 'hits': hits, 'uploader': uploader, 'frame_rate': frame_rate, 'description': description } subtitles.append( LegendasdivxSubtitle(lang, video, data, skip_wrong_fps=self.skip_wrong_fps)) return subtitles def query(self, video, languages): _searchurl = self.searchurl subtitles = [] if isinstance(video, Movie): querytext = video.imdb_id if video.imdb_id else video.title if isinstance(video, Episode): querytext = '{} S{:02d}E{:02d}'.format(video.series, video.season, video.episode) querytext = quote(querytext.lower()) # language query filter if not isinstance(languages, (tuple, list, set)): languages = [languages] for language in languages: logger.debug("Legendasdivx.pt :: searching for %s subtitles.", language) language_id = language.opensubtitles if 'por' in language_id: lang_filter = '&form_cat=28' elif 'pob' in language_id: lang_filter = '&form_cat=29' else: lang_filter = '' querytext = querytext + lang_filter if lang_filter else querytext try: # sleep for a 1 second before another request sleep(1) self.headers['Referer'] = self.site + '/index.php' self.session.headers.update(self.headers) res = self.session.get(_searchurl.format(query=querytext), allow_redirects=False) res.raise_for_status() if (res.status_code == 200 and "A legenda não foi encontrada" in res.text): logger.warning( 'Legendasdivx.pt :: query %s return no results!', querytext) # for series, if no results found, try again just with series and season (subtitle packs) if isinstance(video, Episode): logger.debug( "Legendasdivx.pt :: trying again with just series and season on query." ) querytext = re.sub("(e|E)(\d{2})", "", querytext) # sleep for a 1 second before another request sleep(1) res = self.session.get( _searchurl.format(query=querytext), allow_redirects=False) res.raise_for_status() if (res.status_code == 200 and "A legenda não foi encontrada" in res.text): logger.warning( 'Legendasdivx.pt :: query {0} return no results for language {1}(for series and season only).' .format(querytext, language_id)) continue if res.status_code == 302: # got redirected to login page. # seems that our session cookies are no longer valid... clean them from cache region.delete("legendasdivx_cookies2") logger.debug( "Legendasdivx.pt :: Logging in again. Cookies have expired!" ) # login and try again self.login() # sleep for a 1 second before another request sleep(1) res = self.session.get(_searchurl.format(query=querytext)) res.raise_for_status() except HTTPError as e: if "bloqueado" in res.text.lower(): logger.error( "LegendasDivx.pt :: Your IP is blocked on this server." ) raise IPAddressBlocked( "LegendasDivx.pt :: Your IP is blocked on this server." ) logger.error("Legendasdivx.pt :: HTTP Error %s", e) raise TooManyRequests("Legendasdivx.pt :: HTTP Error %s", e) except Exception as e: logger.error("LegendasDivx.pt :: Uncaught error: %r", e) raise ServiceUnavailable( "LegendasDivx.pt :: Uncaught error: %r", e) bsoup = ParserBeautifulSoup(res.content, ['html.parser']) # search for more than 10 results (legendasdivx uses pagination) # don't throttle - maximum results = 6 * 10 MAX_PAGES = 6 # get number of pages bases on results found page_header = bsoup.find("div", {"class": "pager_bar"}) results_found = re.search( r'\((.*?) encontradas\)', page_header.text).group(1) if page_header else 0 logger.debug("Legendasdivx.pt :: Found %s subtitles", str(results_found)) num_pages = (int(results_found) // 10) + 1 num_pages = min(MAX_PAGES, num_pages) # process first page subtitles += self._process_page(video, bsoup) # more pages? if num_pages > 1: for num_page in range(2, num_pages + 1): sleep(1) # another 1 sec before requesting... _search_next = self.searchurl.format( query=querytext) + "&page={0}".format(str(num_page)) logger.debug( "Legendasdivx.pt :: Moving on to next page: %s", _search_next) # sleep for a 1 second before another request sleep(1) res = self.session.get(_search_next) next_page = ParserBeautifulSoup(res.content, ['html.parser']) subs = self._process_page(video, next_page) subtitles.extend(subs) return subtitles def list_subtitles(self, video, languages): return self.query(video, languages) def download_subtitle(self, subtitle): try: # sleep for a 1 second before another request sleep(1) res = self.session.get(subtitle.page_link) res.raise_for_status() except HTTPError as e: if "bloqueado" in res.text.lower(): logger.error( "LegendasDivx.pt :: Your IP is blocked on this server.") raise IPAddressBlocked( "LegendasDivx.pt :: Your IP is blocked on this server.") logger.error("Legendasdivx.pt :: HTTP Error %s", e) raise TooManyRequests("Legendasdivx.pt :: HTTP Error %s", e) except Exception as e: logger.error("LegendasDivx.pt :: Uncaught error: %r", e) raise ServiceUnavailable("LegendasDivx.pt :: Uncaught error: %r", e) # make sure we haven't maxed out our daily limit if (res.status_code == 200 and 'limite de downloads diário atingido' in res.text.lower()): logger.error("LegendasDivx.pt :: Daily download limit reached!") raise DownloadLimitExceeded( "Legendasdivx.pt :: Daily download limit reached!") archive = self._get_archive(res.content) # extract the subtitle if archive: subtitle_content = self._get_subtitle_from_archive( archive, subtitle) if subtitle_content: subtitle.content = fix_line_ending(subtitle_content) subtitle.normalize() return subtitle return def _get_archive(self, content): # open the archive archive_stream = io.BytesIO(content) if rarfile.is_rarfile(archive_stream): logger.debug('Legendasdivx.pt :: Identified rar archive') archive = rarfile.RarFile(archive_stream) elif zipfile.is_zipfile(archive_stream): logger.debug('Legendasdivx.pt :: Identified zip archive') archive = zipfile.ZipFile(archive_stream) else: logger.error('Legendasdivx.pt :: Unsupported compressed format') return None return archive def _get_subtitle_from_archive(self, archive, subtitle): # some files have a non subtitle with .txt extension _tmp = list(SUBTITLE_EXTENSIONS) _tmp.remove('.txt') _subtitle_extensions = tuple(_tmp) _max_score = 0 _scores = get_scores(subtitle.video) for name in archive.namelist(): # discard hidden files if os.path.split(name)[-1].startswith('.'): continue # discard non-subtitle files if not name.lower().endswith(_subtitle_extensions): continue _guess = guessit(name) if isinstance(subtitle.video, Episode): logger.debug("Legendasdivx.pt :: guessing %s", name) logger.debug("Legendasdivx.pt :: subtitle S%sE%s video S%sE%s", _guess['season'], _guess['episode'], subtitle.video.season, subtitle.video.episode) if subtitle.video.episode != _guess[ 'episode'] or subtitle.video.season != _guess['season']: logger.debug( 'Legendasdivx.pt :: subtitle does not match video, skipping' ) continue matches = set() matches |= guess_matches(subtitle.video, _guess) logger.debug('Legendasdivx.pt :: sub matches: %s', matches) _score = sum((_scores.get(match, 0) for match in matches)) if _score > _max_score: _max_name = name _max_score = _score logger.debug("Legendasdivx.pt :: new max: %s %s", name, _score) if _max_score > 0: logger.debug( "Legendasdivx.pt :: returning from archive: %s scored %s", _max_name, _max_score) return archive.read(_max_name) logger.error( "Legendasdivx.pt :: No subtitle found on compressed file. Max score was 0" ) return None
class SoustitreseuProvider(Provider, ProviderSubtitleArchiveMixin): """Sous-Titres.eu Provider.""" subtitle_class = SoustitreseuSubtitle languages = {Language(l) for l in ['fra', 'eng']} video_types = (Episode, Movie) server_url = 'https://www.sous-titres.eu/' search_url = server_url + 'search.html' def __init__(self): self.session = None self.is_perfect_match = False def initialize(self): self.session = RetryingCFSession() self.session.headers['Referer'] = self.server_url def terminate(self): self.session.close() def query_series(self, video, title): subtitles = [] r = self.session.get(self.search_url, params={'q': title}, timeout=30) r.raise_for_status() soup = ParserBeautifulSoup(r.content.decode('utf-8', 'ignore'), ['html.parser']) # loop over series name self.is_perfect_match = False series_url = [] series = soup.select('.serie > h3 > a') for item in series: # title if title in item.text: series_url.append(item.attrs['href']) self.is_perfect_match = True series_subs_archives_url = [] for series_page in series_url: page_link = self.server_url + series_page r = self.session.get(page_link, timeout=30) r.raise_for_status() soup = ParserBeautifulSoup(r.content.decode('utf-8', 'ignore'), ['html.parser']) series_subs_archives = soup.select('a.subList') for item in series_subs_archives: matching_archive = False subtitles_archive_name = unquote( item.attrs['href'].split('/')[-1:][0][:-4]) guessed_subs = guessit(subtitles_archive_name, {'type': 'episode'}) try: season, episode = item.select_one( '.episodenum').text.split('×') guessed_subs.update({ 'season': int(season), 'episode': int(episode) }) except ValueError: season = item.select_one('.episodenum').text[1:] episode = None guessed_subs.update({'season': int(season)}) if guessed_subs['season'] == video.season: if 'episode' in guessed_subs: if guessed_subs['episode'] == video.episode: matching_archive = True else: matching_archive = True if guessed_subs['season'] == 16: print('test') if matching_archive: download_link = self.server_url + 'series/' + item.attrs[ 'href'] res = self.session.get(download_link, timeout=30) res.raise_for_status() archive = self._get_archive(res.content) # extract the subtitle if archive: subtitles_from_archive = self._get_subtitle_from_archive( archive, video) for subtitle in subtitles_from_archive: subtitle.page_link = page_link subtitle.download_link = download_link subtitles.append(subtitle) return subtitles def query_movies(self, video, title): subtitles = [] r = self.session.get(self.search_url, params={'q': title}, timeout=30) r.raise_for_status() soup = ParserBeautifulSoup(r.content.decode('utf-8', 'ignore'), ['html.parser']) # loop over movies name movies_url = [] self.is_perfect_match = False movies = soup.select('.film > h3 > a') for item in movies: # title if title.lower() in item.text.lower(): movies_url.append(item.attrs['href']) self.is_perfect_match = True series_subs_archives_url = [] for movies_page in movies_url: page_link = self.server_url + movies_page r = self.session.get(page_link, timeout=30) r.raise_for_status() soup = ParserBeautifulSoup(r.content.decode('utf-8', 'ignore'), ['html.parser']) movies_subs_archives = soup.select('a.subList') for item in movies_subs_archives: download_link = self.server_url + 'films/' + item.attrs['href'] res = self.session.get(download_link, timeout=30) res.raise_for_status() archive = self._get_archive(res.content) # extract the subtitle if archive: subtitles_from_archive = self._get_subtitle_from_archive( archive, video) for subtitle in subtitles_from_archive: subtitle.page_link = page_link subtitle.download_link = download_link subtitles.append(subtitle) return subtitles def list_subtitles(self, video, languages): subtitles = [] # query for subtitles if isinstance(video, Episode): subtitles += [ s for s in self.query_series(video, video.series) if s.language in languages ] else: subtitles += [ s for s in self.query_movies(video, video.title) if s.language in languages ] return subtitles def download_subtitle(self, subtitle): return subtitle def _get_archive(self, content): # open the archive archive_stream = io.BytesIO(content) if is_rarfile(archive_stream): logger.debug('Sous-Titres.eu: Identified rar archive') archive = RarFile(archive_stream) elif is_zipfile(archive_stream): logger.debug('Sous-Titres.eu: Identified zip archive') archive = ZipFile(archive_stream) else: logger.error('Sous-Titres.eu: Unsupported compressed format') return None return archive def _get_subtitle_from_archive(self, archive, video): subtitles = [] # some files have a non subtitle with .txt extension _tmp = list(SUBTITLE_EXTENSIONS) _tmp.remove('.txt') _subtitle_extensions = tuple(_tmp) _scores = get_scores(video) for name in archive.namelist(): # discard hidden files if os.path.split(name)[-1].startswith('.'): continue # discard non-subtitle files if not name.lower().endswith(_subtitle_extensions): continue # get subtitles language if '.en.' in name.lower(): language = Language.fromopensubtitles('eng') else: language = Language.fromopensubtitles('fre') release = name[:-4].lower().rstrip('tag').rstrip('en').rstrip('fr') _guess = guessit(release) if isinstance(video, Episode): if video.episode != _guess[ 'episode'] or video.season != _guess['season']: continue matches = set() matches |= guess_matches(video, _guess) _score = sum((_scores.get(match, 0) for match in matches)) content = archive.read(name) subtitles.append( SoustitreseuSubtitle(language, video, name, _guess, content, self.is_perfect_match)) return subtitles
def initialize(self): self.session = RetryingCFSession() self.session.headers['User-Agent'] = AGENT_LIST[randint(0, len(AGENT_LIST) - 1)]
class GreekSubsProvider(Provider): """GreekSubs Provider.""" languages = {Language('ell')} server_url = 'https://greeksubs.net/' subtitle_class = GreekSubsSubtitle def __init__(self): self.session = None def initialize(self): self.session = RetryingCFSession() self.session.headers['User-Agent'] = AGENT_LIST[randint(0, len(AGENT_LIST) - 1)] def terminate(self): self.session.close() def query(self, video, languages, imdb_id, season=None, episode=None): logger.debug('Searching subtitles for %r', imdb_id) subtitles = [] search_link = self.server_url + 'en/view/' + imdb_id r = self.session.get(search_link, timeout=30) r.raise_for_status() soup_page = ParserBeautifulSoup(r.content.decode('utf-8', 'ignore'), ['html.parser']) if isinstance(video, Episode): try: episodes = soup_page.select('div.col-lg-offset-2.col-md-8.text-center.top30.bottom10 > a') for item in episodes: season_episode = re.search(r'Season (\d+) Episode (\d+)', item.text) season_number = int(season_episode.group(1)) episode_number = int(season_episode.group(2)) if season_number == season and episode_number == episode: episode_page = item.attrs['href'] r = self.session.get(episode_page, timeout=30) soup_subs = ParserBeautifulSoup(r.content.decode('utf-8', 'ignore'), ['html.parser']) try: secCode = soup_subs.find('input', {'id': 'secCode'}).get('value') except Exception as e: logging.debug(e) else: for subtitles_item in soup_subs.select('#elSub > tbody > tr'): try: subtitle_id = re.search(r'downloadMe\(\'(.*)\'\)', subtitles_item.contents[2].contents[2].contents[0].attrs['onclick']).group(1) page_link = self.server_url + 'dll/' + subtitle_id + '/0/' + secCode language = Language.fromalpha2(subtitles_item.parent.find('img')['alt']) version = subtitles_item.contents[2].contents[4].text.strip() uploader = subtitles_item.contents[2].contents[5].contents[0].contents[1].text.strip() referer = episode_page.encode('utf-8') r = self.session.get(page_link, headers={'Referer': referer}, timeout=30, allow_redirects=False) r.raise_for_status() soup_dll = ParserBeautifulSoup(r.content.decode('utf-8', 'ignore'), ['html.parser']) try: langcode = soup_dll.find(attrs={"name": 'langcode'}).get('value') uid = soup_dll.find(attrs={"name": 'uid'}).get('value') output = soup_dll.find(attrs={"name": 'output'}).get('value') dll = soup_dll.find(attrs={"name": 'dll'}).get('value') except Exception as e: logging.debug(e) else: download_req = self.session.post(page_link, data={'langcode': langcode, 'uid': uid, 'output': output, 'dll': dll}, headers={'Referer': page_link}, timeout=10) except Exception as e: logging.debug(e) else: if language in languages: subtitle = self.subtitle_class(language, page_link, version, uploader, referer) if not download_req.content: logger.error('Unable to download subtitle. No data returned from provider') continue subtitle.content = download_req.content logger.debug('Found subtitle %r', subtitle) subtitles.append(subtitle) else: pass except Exception as e: logging.debug(e) elif isinstance(video, Movie): try: soup_subs = ParserBeautifulSoup(r.content.decode('utf-8', 'ignore'), ['html.parser']) try: secCode = soup_subs.find('input', {'id': 'secCode'}).get('value') except Exception as e: logging.debug(e) else: for subtitles_item in soup_subs.select('#elSub > tbody > tr'): try: subtitle_id = re.search(r'downloadMe\(\'(.*)\'\)', subtitles_item.contents[2].contents[2].contents[0].attrs[ 'onclick']).group(1) page_link = self.server_url + 'dll/' + subtitle_id + '/0/' + secCode language = Language.fromalpha2(subtitles_item.parent.find('img')['alt']) version = subtitles_item.contents[2].contents[4].text.strip() uploader = subtitles_item.contents[2].contents[5].contents[0].contents[ 1].text.strip() referer = page_link.encode('utf-8') r = self.session.get(page_link, headers={'Referer': referer}, timeout=30, allow_redirects=False) r.raise_for_status() soup_dll = ParserBeautifulSoup(r.content.decode('utf-8', 'ignore'), ['html.parser']) try: langcode = soup_dll.find(attrs={"name": 'langcode'}).get('value') uid = soup_dll.find(attrs={"name": 'uid'}).get('value') output = soup_dll.find(attrs={"name": 'output'}).get('value') dll = soup_dll.find(attrs={"name": 'dll'}).get('value') except Exception as e: logging.debug(e) else: download_req = self.session.post(page_link, data={'langcode': langcode, 'uid': uid, 'output': output, 'dll': dll}, headers={'Referer': page_link}, timeout=10) except Exception as e: logging.debug(e) else: if language in languages: subtitle = self.subtitle_class(language, page_link, version, uploader, referer) if not download_req.content: logger.error('Unable to download subtitle. No data returned from provider') continue subtitle.content = download_req.content logger.debug('Found subtitle %r', subtitle) subtitles.append(subtitle) except Exception as e: logging.debug(e) return subtitles def list_subtitles(self, video, languages): imdbId = None subtitles = [] if isinstance(video, Episode): imdbId = video.series_imdb_id elif isinstance(video, Movie): imdbId = video.imdb_id if not imdbId: logger.debug('No imdb number available to search with provider') return subtitles # query for subtitles with the imdbId subtitles = [] if isinstance(video, Episode): subtitles = self.query(video, languages, imdbId, season=video.season, episode=video.episode) elif isinstance(video, Movie): subtitles = self.query(video, languages, imdbId) return subtitles def download_subtitle(self, subtitle): if isinstance(subtitle, GreekSubsSubtitle): subtitle.content = fix_line_ending(subtitle.content)
class SubsceneProvider(Provider, ProviderSubtitleArchiveMixin): """ This currently only searches for the filename on SubScene. It doesn't open every found subtitle page to avoid massive hammering, thus it can't determine whether a subtitle is only-foreign or not. """ subtitle_class = SubsceneSubtitle languages = supported_languages languages.update(set(Language.rebuild(l, forced=True) for l in languages)) session = None skip_wrong_fps = False hearing_impaired_verifiable = True only_foreign = False search_throttle = 2 # seconds def __init__(self, only_foreign=False): self.only_foreign = only_foreign def initialize(self): logger.info("Creating session") self.session = RetryingCFSession() def terminate(self): logger.info("Closing session") self.session.close() def _create_filters(self, languages): self.filters = dict(HearingImpaired="2") if self.only_foreign: self.filters["ForeignOnly"] = "True" logger.info("Only searching for foreign/forced subtitles") self.filters["LanguageFilter"] = ",".join( (str(language_ids[l.alpha3]) for l in languages if l.alpha3 in language_ids)) logger.debug("Filter created: '%s'" % self.filters) def _enable_filters(self): self.session.cookies.update(self.filters) logger.debug("Filters applied") def list_subtitles(self, video, languages): if not video.original_name: logger.info( "Skipping search because we don't know the original release name" ) return [] self._create_filters(languages) self._enable_filters() return [s for s in self.query(video) if s.language in languages] def download_subtitle(self, subtitle): if subtitle.pack_data: logger.info("Using previously downloaded pack data") archive = ZipFile(io.BytesIO(subtitle.pack_data)) subtitle.pack_data = None try: subtitle.content = self.get_subtitle_from_archive( subtitle, archive) return except ProviderError: pass # open the archive r = self.session.get(subtitle.get_download_link(self.session), timeout=10) r.raise_for_status() archive_stream = io.BytesIO(r.content) archive = ZipFile(archive_stream) subtitle.content = self.get_subtitle_from_archive(subtitle, archive) # store archive as pack_data for later caching subtitle.pack_data = r.content def parse_results(self, video, film): subtitles = [] for s in film.subtitles: subtitle = SubsceneSubtitle.from_api(s) subtitle.asked_for_release_group = video.release_group if isinstance(video, Episode): subtitle.asked_for_episode = video.episode if self.only_foreign: subtitle.language = Language.rebuild(subtitle.language, forced=True) subtitles.append(subtitle) logger.debug('Found subtitle %r', subtitle) return subtitles def query(self, video): #vfn = get_video_filename(video) subtitles = [] #logger.debug(u"Searching for: %s", vfn) # film = search(vfn, session=self.session) # # if film and film.subtitles: # logger.debug('Release results found: %s', len(film.subtitles)) # subtitles = self.parse_results(video, film) # else: # logger.debug('No release results found') #time.sleep(self.search_throttle) # re-search for episodes without explicit release name if isinstance(video, Episode): #term = u"%s S%02iE%02i" % (video.series, video.season, video.episode) more_than_one = len([video.series] + video.alternative_series) > 1 for series in [video.series] + video.alternative_series: term = u"%s - %s Season" % ( series, p.number_to_words( "%sth" % video.season).capitalize()) logger.debug('Searching for alternative results: %s', term) film = search(term, session=self.session, release=False, throttle=self.search_throttle) if film and film.subtitles: logger.debug('Alternative results found: %s', len(film.subtitles)) subtitles += self.parse_results(video, film) else: logger.debug('No alternative results found') # packs # if video.season_fully_aired: # term = u"%s S%02i" % (series, video.season) # logger.debug('Searching for packs: %s', term) # time.sleep(self.search_throttle) # film = search(term, session=self.session, throttle=self.search_throttle) # if film and film.subtitles: # logger.debug('Pack results found: %s', len(film.subtitles)) # subtitles += self.parse_results(video, film) # else: # logger.debug('No pack results found') # else: # logger.debug("Not searching for packs, because the season hasn't fully aired") if more_than_one: time.sleep(self.search_throttle) else: more_than_one = len([video.title] + video.alternative_titles) > 1 for title in [video.title] + video.alternative_titles: logger.debug('Searching for movie results: %s', title) film = search(title, year=video.year, session=self.session, limit_to=None, release=False, throttle=self.search_throttle) if film and film.subtitles: subtitles += self.parse_results(video, film) if more_than_one: time.sleep(self.search_throttle) logger.info("%s subtitles found" % len(subtitles)) return subtitles
def initialize(self): logger.info("Creating session") self.session = RetryingCFSession()
class SubsceneProvider(Provider, ProviderSubtitleArchiveMixin): """ This currently only searches for the filename on SubScene. It doesn't open every found subtitle page to avoid massive hammering, thus it can't determine whether a subtitle is only-foreign or not. """ subtitle_class = SubsceneSubtitle languages = supported_languages languages.update(set(Language.rebuild(l, forced=True) for l in languages)) session = None skip_wrong_fps = False hearing_impaired_verifiable = True only_foreign = False search_throttle = 2 # seconds def __init__(self, only_foreign=False): self.only_foreign = only_foreign def initialize(self): logger.info("Creating session") self.session = RetryingCFSession() def terminate(self): logger.info("Closing session") self.session.close() def _create_filters(self, languages): self.filters = dict(HearingImpaired="2") if self.only_foreign: self.filters["ForeignOnly"] = "True" logger.info("Only searching for foreign/forced subtitles") self.filters["LanguageFilter"] = ",".join((str(language_ids[l.alpha3]) for l in languages if l.alpha3 in language_ids)) logger.debug("Filter created: '%s'" % self.filters) def _enable_filters(self): self.session.cookies.update(self.filters) logger.debug("Filters applied") def list_subtitles(self, video, languages): if not video.original_name: logger.info("Skipping search because we don't know the original release name") return [] self._create_filters(languages) self._enable_filters() return [s for s in self.query(video) if s.language in languages] def download_subtitle(self, subtitle): if subtitle.pack_data: logger.info("Using previously downloaded pack data") archive = ZipFile(io.BytesIO(subtitle.pack_data)) subtitle.pack_data = None try: subtitle.content = self.get_subtitle_from_archive(subtitle, archive) return except ProviderError: pass # open the archive r = self.session.get(subtitle.get_download_link(self.session), timeout=10) r.raise_for_status() archive_stream = io.BytesIO(r.content) archive = ZipFile(archive_stream) subtitle.content = self.get_subtitle_from_archive(subtitle, archive) # store archive as pack_data for later caching subtitle.pack_data = r.content def parse_results(self, video, film): subtitles = [] for s in film.subtitles: subtitle = SubsceneSubtitle.from_api(s) subtitle.asked_for_release_group = video.release_group if isinstance(video, Episode): subtitle.asked_for_episode = video.episode if self.only_foreign: subtitle.language = Language.rebuild(subtitle.language, forced=True) subtitles.append(subtitle) logger.debug('Found subtitle %r', subtitle) return subtitles def query(self, video): #vfn = get_video_filename(video) subtitles = [] #logger.debug(u"Searching for: %s", vfn) # film = search(vfn, session=self.session) # # if film and film.subtitles: # logger.debug('Release results found: %s', len(film.subtitles)) # subtitles = self.parse_results(video, film) # else: # logger.debug('No release results found') #time.sleep(self.search_throttle) # re-search for episodes without explicit release name if isinstance(video, Episode): #term = u"%s S%02iE%02i" % (video.series, video.season, video.episode) more_than_one = len([video.series] + video.alternative_series) > 1 for series in [video.series] + video.alternative_series: term = u"%s - %s Season" % (series, p.number_to_words("%sth" % video.season).capitalize()) logger.debug('Searching for alternative results: %s', term) film = search(term, session=self.session, release=False, throttle=self.search_throttle) if film and film.subtitles: logger.debug('Alternative results found: %s', len(film.subtitles)) subtitles += self.parse_results(video, film) else: logger.debug('No alternative results found') # packs # if video.season_fully_aired: # term = u"%s S%02i" % (series, video.season) # logger.debug('Searching for packs: %s', term) # time.sleep(self.search_throttle) # film = search(term, session=self.session, throttle=self.search_throttle) # if film and film.subtitles: # logger.debug('Pack results found: %s', len(film.subtitles)) # subtitles += self.parse_results(video, film) # else: # logger.debug('No pack results found') # else: # logger.debug("Not searching for packs, because the season hasn't fully aired") if more_than_one: time.sleep(self.search_throttle) else: more_than_one = len([video.title] + video.alternative_titles) > 1 for title in [video.title] + video.alternative_titles: logger.debug('Searching for movie results: %s', title) film = search(title, year=video.year, session=self.session, limit_to=None, release=False, throttle=self.search_throttle) if film and film.subtitles: subtitles += self.parse_results(video, film) if more_than_one: time.sleep(self.search_throttle) logger.info("%s subtitles found" % len(subtitles)) return subtitles
class SubsceneProvider(Provider, ProviderSubtitleArchiveMixin): """ This currently only searches for the filename on SubScene. It doesn't open every found subtitle page to avoid massive hammering, thus it can't determine whether a subtitle is only-foreign or not. """ subtitle_class = SubsceneSubtitle languages = supported_languages languages.update(set(Language.rebuild(l, forced=True) for l in languages)) session = None skip_wrong_fps = False hearing_impaired_verifiable = True only_foreign = False username = None password = None search_throttle = 5 # seconds def __init__(self, only_foreign=False, username=None, password=None): if not all((username, password)): raise ConfigurationError('Username and password must be specified') self.only_foreign = only_foreign self.username = username self.password = password def initialize(self): logger.info("Creating session") self.session = RetryingCFSession() def login(self): r = self.session.get("https://subscene.com/account/login") if "Server Error" in r.content: logger.error("Login unavailable; Maintenance?") raise ServiceUnavailable("Login unavailable; Maintenance?") match = re.search(r"<script id='modelJson' type='application/json'>\s*(.+)\s*</script>", r.content) if match: h = HTMLParser.HTMLParser() data = json.loads(h.unescape(match.group(1))) login_url = urlparse.urljoin(data["siteUrl"], data["loginUrl"]) time.sleep(1.0) r = self.session.post(login_url, { "username": self.username, "password": self.password, data["antiForgery"]["name"]: data["antiForgery"]["value"] }) pep_content = re.search(r"<form method=\"post\" action=\"https://subscene\.com/\">" r".+name=\"id_token\".+?value=\"(?P<id_token>.+?)\".*?" r"access_token\".+?value=\"(?P<access_token>.+?)\".+?" r"token_type.+?value=\"(?P<token_type>.+?)\".+?" r"expires_in.+?value=\"(?P<expires_in>.+?)\".+?" r"scope.+?value=\"(?P<scope>.+?)\".+?" r"state.+?value=\"(?P<state>.+?)\".+?" r"session_state.+?value=\"(?P<session_state>.+?)\"", r.content, re.MULTILINE | re.DOTALL) if pep_content: r = self.session.post(SITE_DOMAIN, pep_content.groupdict()) try: r.raise_for_status() except Exception: raise ProviderError("Something went wrong when trying to log in: %s", traceback.format_exc()) else: cj = self.session.cookies.copy() store_cks = ("scene", "idsrv", "idsrv.xsrf", "idsvr.clients", "idsvr.session", "idsvr.username") for cn in self.session.cookies.iterkeys(): if cn not in store_cks: del cj[cn] logger.debug("Storing cookies: %r", cj) region.set("subscene_cookies2", cj) return raise ProviderError("Something went wrong when trying to log in #1") def terminate(self): logger.info("Closing session") self.session.close() def _create_filters(self, languages): self.filters = dict(HearingImpaired="2") if self.only_foreign: self.filters["ForeignOnly"] = "True" logger.info("Only searching for foreign/forced subtitles") self.filters["LanguageFilter"] = ",".join((str(language_ids[l.alpha3]) for l in languages if l.alpha3 in language_ids)) logger.debug("Filter created: '%s'" % self.filters) def _enable_filters(self): self.session.cookies.update(self.filters) logger.debug("Filters applied") def list_subtitles(self, video, languages): if not video.original_name: logger.info("Skipping search because we don't know the original release name") return [] self._create_filters(languages) self._enable_filters() return [s for s in self.query(video) if s.language in languages] def download_subtitle(self, subtitle): if subtitle.pack_data: logger.info("Using previously downloaded pack data") archive = ZipFile(io.BytesIO(subtitle.pack_data)) subtitle.pack_data = None try: subtitle.content = self.get_subtitle_from_archive(subtitle, archive) return except ProviderError: pass # open the archive r = self.session.get(subtitle.get_download_link(self.session), timeout=10) r.raise_for_status() archive_stream = io.BytesIO(r.content) archive = ZipFile(archive_stream) subtitle.content = self.get_subtitle_from_archive(subtitle, archive) # store archive as pack_data for later caching subtitle.pack_data = r.content def parse_results(self, video, film): subtitles = [] for s in film.subtitles: try: subtitle = SubsceneSubtitle.from_api(s) except NotImplementedError, e: logger.info(e) continue subtitle.asked_for_release_group = video.release_group if isinstance(video, Episode): subtitle.asked_for_episode = video.episode if self.only_foreign: subtitle.language = Language.rebuild(subtitle.language, forced=True) subtitles.append(subtitle) logger.debug('Found subtitle %r', subtitle) return subtitles
class SubsceneProvider(Provider, ProviderSubtitleArchiveMixin): """ This currently only searches for the filename on SubScene. It doesn't open every found subtitle page to avoid massive hammering, thus it can't determine whether a subtitle is only-foreign or not. """ subtitle_class = SubsceneSubtitle languages = supported_languages languages.update(set(Language.rebuild(l, forced=True) for l in languages)) languages.update(set(Language.rebuild(l, hi=True) for l in languages)) video_types = (Episode, Movie) session = None skip_wrong_fps = False hearing_impaired_verifiable = True only_foreign = False username = None password = None search_throttle = 8 # seconds def __init__(self, only_foreign=False, username=None, password=None): if not all((username, password)): raise ConfigurationError('Username and password must be specified') self.only_foreign = only_foreign self.username = username self.password = password def initialize(self): logger.info("Creating session") self.session = RetryingCFSession() prev_cookies = region.get("subscene_cookies2") if prev_cookies != NO_VALUE: logger.debug("Re-using old subscene cookies: %r", prev_cookies) self.session.cookies.update(prev_cookies) else: logger.debug("Logging in") self.login() def login(self): r = self.session.get("https://subscene.com/account/login") if "Server Error" in r.text: logger.error("Login unavailable; Maintenance?") raise ServiceUnavailable("Login unavailable; Maintenance?") match = re.search( r"<script id='modelJson' type='application/json'>\s*(.+)\s*</script>", r.text) if match: h = html data = json.loads(h.unescape(match.group(1))) login_url = parse.urljoin(data["siteUrl"], data["loginUrl"]) time.sleep(1.0) r = self.session.post( login_url, { "username": self.username, "password": self.password, data["antiForgery"]["name"]: data["antiForgery"]["value"] }) pep_content = re.search( r"<form method=\"post\" action=\"https://subscene\.com/\">" r".+name=\"id_token\".+?value=\"(?P<id_token>.+?)\".*?" r"access_token\".+?value=\"(?P<access_token>.+?)\".+?" r"token_type.+?value=\"(?P<token_type>.+?)\".+?" r"expires_in.+?value=\"(?P<expires_in>.+?)\".+?" r"scope.+?value=\"(?P<scope>.+?)\".+?" r"state.+?value=\"(?P<state>.+?)\".+?" r"session_state.+?value=\"(?P<session_state>.+?)\"", r.text, re.MULTILINE | re.DOTALL) if pep_content: r = self.session.post(SITE_DOMAIN, pep_content.groupdict()) try: r.raise_for_status() except Exception: raise ProviderError( "Something went wrong when trying to log in: %s", traceback.format_exc()) else: cj = self.session.cookies.copy() store_cks = ("scene", "idsrv", "idsrv.xsrf", "idsvr.clients", "idsvr.session", "idsvr.username") for cn in self.session.cookies.keys(): if cn not in store_cks: del cj[cn] logger.debug("Storing cookies: %r", cj) region.set("subscene_cookies2", cj) return raise ProviderError("Something went wrong when trying to log in #1") def terminate(self): logger.info("Closing session") self.session.close() def _create_filters(self, languages): self.filters = dict(HearingImpaired="2") acc_filters = self.filters.copy() if self.only_foreign: self.filters["ForeignOnly"] = "True" acc_filters["ForeignOnly"] = self.filters["ForeignOnly"].lower() logger.info("Only searching for foreign/forced subtitles") selected_ids = [] for l in languages: lid = language_ids.get(l.basename, language_ids.get(l.alpha3, None)) if lid: selected_ids.append(str(lid)) acc_filters["SelectedIds"] = selected_ids self.filters["LanguageFilter"] = ",".join(acc_filters["SelectedIds"]) last_filters = region.get("subscene_filters") if last_filters != acc_filters: region.set("subscene_filters", acc_filters) logger.debug("Setting account filters to %r", acc_filters) self.session.post("https://u.subscene.com/filter", acc_filters, allow_redirects=False) logger.debug("Filter created: '%s'" % self.filters) def _enable_filters(self): self.session.cookies.update(self.filters) logger.debug("Filters applied") def list_subtitles(self, video, languages): if not video.original_name: logger.info( "Skipping search because we don't know the original release name" ) return [] self._create_filters(languages) self._enable_filters() if isinstance(video, Episode): international_titles = list( set([video.series] + video.alternative_series[:1])) subtitles = [ s for s in self.query(video, international_titles) if s.language in languages ] if not len(subtitles): us_titles = [x + ' (US)' for x in international_titles] subtitles = [ s for s in self.query(video, us_titles) if s.language in languages ] return subtitles else: titles = list(set([video.title] + video.alternative_titles[:1])) return [ s for s in self.query(video, titles) if s.language in languages ] def download_subtitle(self, subtitle): if subtitle.pack_data: logger.info("Using previously downloaded pack data") if rarfile.is_rarfile(io.BytesIO(subtitle.pack_data)): logger.debug('Identified rar archive') archive = rarfile.RarFile(io.BytesIO(subtitle.pack_data)) elif zipfile.is_zipfile(io.BytesIO(subtitle.pack_data)): logger.debug('Identified zip archive') archive = zipfile.ZipFile(io.BytesIO(subtitle.pack_data)) else: logger.error('Unsupported compressed format') return subtitle.pack_data = None try: subtitle.content = self.get_subtitle_from_archive( subtitle, archive) return except ProviderError: pass # open the archive r = self.session.get(subtitle.get_download_link(self.session), timeout=10) r.raise_for_status() archive_stream = io.BytesIO(r.content) if rarfile.is_rarfile(archive_stream): logger.debug('Identified rar archive') archive = rarfile.RarFile(archive_stream) elif zipfile.is_zipfile(archive_stream): logger.debug('Identified zip archive') archive = zipfile.ZipFile(archive_stream) else: logger.error('Unsupported compressed format') return subtitle.content = self.get_subtitle_from_archive(subtitle, archive) # store archive as pack_data for later caching subtitle.pack_data = r.content def parse_results(self, video, film): subtitles = [] for s in film.subtitles: try: subtitle = SubsceneSubtitle.from_api(s) except NotImplementedError as e: logger.info(e) continue subtitle.asked_for_release_group = video.release_group if isinstance(video, Episode): subtitle.asked_for_episode = video.episode if self.only_foreign: subtitle.language = Language.rebuild(subtitle.language, forced=True) # set subtitle language to hi if it's hearing_impaired if subtitle.hearing_impaired: subtitle.language = Language.rebuild(subtitle.language, hi=True) subtitles.append(subtitle) logger.debug('Found subtitle %r', subtitle) return subtitles def do_search(self, *args, **kwargs): try: return search(*args, **kwargs) except requests.HTTPError: region.delete("subscene_cookies2") raise @reinitialize_on_error((RequestException, ), attempts=1) def query(self, video, titles): subtitles = [] if isinstance(video, Episode): more_than_one = len(titles) > 1 for series in titles: term = u"%s - %s Season" % ( series, p.number_to_words( "%sth" % video.season).capitalize()) logger.debug('Searching with series and season: %s', term) film = self.do_search(term, session=self.session, release=False, throttle=self.search_throttle, limit_to=SearchTypes.TvSerie) if not film and video.season == 1: logger.debug('Searching with series name: %s', series) film = self.do_search(series, session=self.session, release=False, throttle=self.search_throttle, limit_to=SearchTypes.TvSerie) if film and film.subtitles: logger.debug('Searching found: %s', len(film.subtitles)) subtitles += self.parse_results(video, film) else: logger.debug('No results found') if more_than_one: time.sleep(self.search_throttle) else: more_than_one = len(titles) > 1 for title in titles: logger.debug('Searching for movie results: %r', title) film = self.do_search(title, year=video.year, session=self.session, limit_to=None, release=False, throttle=self.search_throttle) if film and film.subtitles: subtitles += self.parse_results(video, film) if more_than_one: time.sleep(self.search_throttle) logger.info("%s subtitles found" % len(subtitles)) return subtitles
def initialize(self): self.session = RetryingCFSession() self.session.headers['Referer'] = self.server_url
class TitloviProvider(Provider, ProviderSubtitleArchiveMixin): subtitle_class = TitloviSubtitle languages = {Language.fromtitlovi(l) for l in language_converters['titlovi'].codes} | {Language.fromietf('sr-Latn')} server_url = 'https://titlovi.com' search_url = server_url + '/titlovi/?' download_url = server_url + '/download/?type=1&mediaid=' def initialize(self): self.session = RetryingCFSession() #load_verification("titlovi", self.session) def terminate(self): self.session.close() def query(self, languages, title, season=None, episode=None, year=None, video=None): items_per_page = 10 current_page = 1 used_languages = languages lang_strings = [str(lang) for lang in used_languages] # handle possible duplicate use of Serbian Latin if "sr" in lang_strings and "sr-Latn" in lang_strings: logger.info('Duplicate entries <Language [sr]> and <Language [sr-Latn]> found, filtering languages') used_languages = filter(lambda l: l != Language.fromietf('sr-Latn'), used_languages) logger.info('Filtered language list %r', used_languages) # convert list of languages into search string langs = '|'.join(map(str, [l.titlovi for l in used_languages])) # set query params params = {'prijevod': title, 'jezik': langs} is_episode = False if season and episode: is_episode = True params['s'] = season params['e'] = episode if year: params['g'] = year # loop through paginated results logger.info('Searching subtitles %r', params) subtitles = [] while True: # query the server try: r = self.session.get(self.search_url, params=params, timeout=10) r.raise_for_status() except RequestException as e: logger.exception('RequestException %s', e) break else: try: soup = BeautifulSoup(r.content, 'lxml') # number of results result_count = int(soup.select_one('.results_count b').string) except: result_count = None # exit if no results if not result_count: if not subtitles: logger.debug('No subtitles found') else: logger.debug("No more subtitles found") break # number of pages with results pages = int(math.ceil(result_count / float(items_per_page))) # get current page if 'pg' in params: current_page = int(params['pg']) try: sublist = soup.select('section.titlovi > ul.titlovi > li.subtitleContainer.canEdit') for sub in sublist: # subtitle id sid = sub.find(attrs={'data-id': True}).attrs['data-id'] # get download link download_link = self.download_url + sid # title and alternate title match = title_re.search(sub.a.string) if match: _title = match.group('title') alt_title = match.group('altitle') else: continue # page link page_link = self.server_url + sub.a.attrs['href'] # subtitle language match = lang_re.search(sub.select_one('.lang').attrs['src']) if match: try: # decode language lang = Language.fromtitlovi(match.group('lang')+match.group('script')) except ValueError: continue # relase year or series start year match = year_re.search(sub.find(attrs={'data-id': True}).parent.i.string) if match: r_year = int(match.group('year')) # fps match = fps_re.search(sub.select_one('.fps').string) if match: fps = match.group('fps') # releases releases = str(sub.select_one('.fps').parent.contents[0].string) # handle movies and series separately if is_episode: # season and episode info sxe = sub.select_one('.s0xe0y').string r_season = None r_episode = None if sxe: match = season_re.search(sxe) if match: r_season = int(match.group('season')) match = episode_re.search(sxe) if match: r_episode = int(match.group('episode')) subtitle = self.subtitle_class(lang, page_link, download_link, sid, releases, _title, alt_title=alt_title, season=r_season, episode=r_episode, year=r_year, fps=fps, asked_for_release_group=video.release_group, asked_for_episode=episode) else: subtitle = self.subtitle_class(lang, page_link, download_link, sid, releases, _title, alt_title=alt_title, year=r_year, fps=fps, asked_for_release_group=video.release_group) logger.debug('Found subtitle %r', subtitle) # prime our matches so we can use the values later subtitle.get_matches(video) # add found subtitles subtitles.append(subtitle) finally: soup.decompose() # stop on last page if current_page >= pages: break # increment current page params['pg'] = current_page + 1 logger.debug('Getting page %d', params['pg']) return subtitles def list_subtitles(self, video, languages): season = episode = None if isinstance(video, Episode): title = video.series season = video.season episode = video.episode else: title = video.title return [s for s in self.query(languages, fix_inconsistent_naming(title), season=season, episode=episode, year=video.year, video=video)] def download_subtitle(self, subtitle): r = self.session.get(subtitle.download_link, timeout=10) r.raise_for_status() # open the archive archive_stream = io.BytesIO(r.content) if is_rarfile(archive_stream): logger.debug('Archive identified as rar') archive = RarFile(archive_stream) elif is_zipfile(archive_stream): logger.debug('Archive identified as zip') archive = ZipFile(archive_stream) else: subtitle.content = r.content if subtitle.is_valid(): return subtitle.content = None raise ProviderError('Unidentified archive type') subs_in_archive = archive.namelist() # if Serbian lat and cyr versions are packed together, try to find right version if len(subs_in_archive) > 1 and (subtitle.language == 'sr' or subtitle.language == 'sr-Cyrl'): self.get_subtitle_from_bundled_archive(subtitle, subs_in_archive, archive) else: # use default method for everything else subtitle.content = self.get_subtitle_from_archive(subtitle, archive) def get_subtitle_from_bundled_archive(self, subtitle, subs_in_archive, archive): sr_lat_subs = [] sr_cyr_subs = [] sub_to_extract = None for sub_name in subs_in_archive: if not ('.cyr' in sub_name or '.cir' in sub_name): sr_lat_subs.append(sub_name) if ('.cyr' in sub_name or '.cir' in sub_name) and not '.lat' in sub_name: sr_cyr_subs.append(sub_name) if subtitle.language == 'sr': if len(sr_lat_subs) > 0: sub_to_extract = sr_lat_subs[0] if subtitle.language == 'sr-Cyrl': if len(sr_cyr_subs) > 0: sub_to_extract = sr_cyr_subs[0] logger.info(u'Using %s from the archive', sub_to_extract) subtitle.content = fix_line_ending(archive.read(sub_to_extract))