class SubsceneProvider(Provider, ProviderSubtitleArchiveMixin): """ This currently only searches for the filename on SubScene. It doesn't open every found subtitle page to avoid massive hammering, thus it can't determine whether a subtitle is only-foreign or not. """ subtitle_class = SubsceneSubtitle languages = supported_languages languages.update(set(Language.rebuild(l, forced=True) for l in languages)) session = None skip_wrong_fps = False hearing_impaired_verifiable = True only_foreign = False username = None password = None search_throttle = 5 # seconds def __init__(self, only_foreign=False, username=None, password=None): if not all((username, password)): raise ConfigurationError('Username and password must be specified') self.only_foreign = only_foreign self.username = username self.password = password def initialize(self): logger.info("Creating session") self.session = RetryingCFSession() def login(self): r = self.session.get("https://subscene.com/account/login") if "Server Error" in r.content: logger.error("Login unavailable; Maintenance?") raise ServiceUnavailable("Login unavailable; Maintenance?") match = re.search(r"<script id='modelJson' type='application/json'>\s*(.+)\s*</script>", r.content) if match: h = HTMLParser.HTMLParser() data = json.loads(h.unescape(match.group(1))) login_url = urlparse.urljoin(data["siteUrl"], data["loginUrl"]) time.sleep(1.0) r = self.session.post(login_url, { "username": self.username, "password": self.password, data["antiForgery"]["name"]: data["antiForgery"]["value"] }) pep_content = re.search(r"<form method=\"post\" action=\"https://subscene\.com/\">" r".+name=\"id_token\".+?value=\"(?P<id_token>.+?)\".*?" r"access_token\".+?value=\"(?P<access_token>.+?)\".+?" r"token_type.+?value=\"(?P<token_type>.+?)\".+?" r"expires_in.+?value=\"(?P<expires_in>.+?)\".+?" r"scope.+?value=\"(?P<scope>.+?)\".+?" r"state.+?value=\"(?P<state>.+?)\".+?" r"session_state.+?value=\"(?P<session_state>.+?)\"", r.content, re.MULTILINE | re.DOTALL) if pep_content: r = self.session.post(SITE_DOMAIN, pep_content.groupdict()) try: r.raise_for_status() except Exception: raise ProviderError("Something went wrong when trying to log in: %s", traceback.format_exc()) else: cj = self.session.cookies.copy() store_cks = ("scene", "idsrv", "idsrv.xsrf", "idsvr.clients", "idsvr.session", "idsvr.username") for cn in self.session.cookies.iterkeys(): if cn not in store_cks: del cj[cn] logger.debug("Storing cookies: %r", cj) region.set("subscene_cookies2", cj) return raise ProviderError("Something went wrong when trying to log in #1") def terminate(self): logger.info("Closing session") self.session.close() def _create_filters(self, languages): self.filters = dict(HearingImpaired="2") if self.only_foreign: self.filters["ForeignOnly"] = "True" logger.info("Only searching for foreign/forced subtitles") self.filters["LanguageFilter"] = ",".join((str(language_ids[l.alpha3]) for l in languages if l.alpha3 in language_ids)) logger.debug("Filter created: '%s'" % self.filters) def _enable_filters(self): self.session.cookies.update(self.filters) logger.debug("Filters applied") def list_subtitles(self, video, languages): if not video.original_name: logger.info("Skipping search because we don't know the original release name") return [] self._create_filters(languages) self._enable_filters() return [s for s in self.query(video) if s.language in languages] def download_subtitle(self, subtitle): if subtitle.pack_data: logger.info("Using previously downloaded pack data") archive = ZipFile(io.BytesIO(subtitle.pack_data)) subtitle.pack_data = None try: subtitle.content = self.get_subtitle_from_archive(subtitle, archive) return except ProviderError: pass # open the archive r = self.session.get(subtitle.get_download_link(self.session), timeout=10) r.raise_for_status() archive_stream = io.BytesIO(r.content) archive = ZipFile(archive_stream) subtitle.content = self.get_subtitle_from_archive(subtitle, archive) # store archive as pack_data for later caching subtitle.pack_data = r.content def parse_results(self, video, film): subtitles = [] for s in film.subtitles: try: subtitle = SubsceneSubtitle.from_api(s) except NotImplementedError, e: logger.info(e) continue subtitle.asked_for_release_group = video.release_group if isinstance(video, Episode): subtitle.asked_for_episode = video.episode if self.only_foreign: subtitle.language = Language.rebuild(subtitle.language, forced=True) subtitles.append(subtitle) logger.debug('Found subtitle %r', subtitle) return subtitles
class TitloviProvider(Provider, ProviderSubtitleArchiveMixin): subtitle_class = TitloviSubtitle languages = { Language.fromtitlovi(l) for l in language_converters['titlovi'].codes } | {Language.fromietf('sr-Latn')} api_url = 'https://kodi.titlovi.com/api/subtitles' api_gettoken_url = api_url + '/gettoken' api_search_url = api_url + '/search' def __init__(self, username=None, password=None): if not all((username, password)): raise ConfigurationError('Username and password must be specified') self.username = username self.password = password self.session = None self.user_id = None self.login_token = None self.token_exp = None def initialize(self): self.session = RetryingCFSession() #load_verification("titlovi", self.session) token = region.get("titlovi_token") if token is not NO_VALUE: self.user_id, self.login_token, self.token_exp = token if datetime.now() > self.token_exp: logger.debug('Token expired') self.log_in() else: logger.debug('Use cached token') else: logger.debug('Token not found in cache') self.log_in() def log_in(self): login_params = dict(username=self.username, password=self.password, json=True) try: response = self.session.post(self.api_gettoken_url, params=login_params) if response.status_code == request_codes.ok: resp_json = response.json() self.login_token = resp_json.get('Token') self.user_id = resp_json.get('UserId') self.token_exp = dateutil.parser.parse( resp_json.get('ExpirationDate')) region.set("titlovi_token", [self.user_id, self.login_token, self.token_exp]) logger.debug('New token obtained') elif response.status_code == request_codes.unauthorized: raise AuthenticationError('Login failed') except RequestException as e: logger.error(e) def terminate(self): self.session.close() @region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME) def get_result(self, search_url, search_params): return self.session.get(search_url, params=search_params) def query(self, languages, title, season=None, episode=None, year=None, imdb_id=None, video=None): search_params = dict() used_languages = languages lang_strings = [str(lang) for lang in used_languages] # handle possible duplicate use of Serbian Latin if "sr" in lang_strings and "sr-Latn" in lang_strings: logger.info( 'Duplicate entries <Language [sr]> and <Language [sr-Latn]> found, filtering languages' ) used_languages = [ l for l in used_languages if l != Language.fromietf('sr-Latn') ] logger.info('Filtered language list %r', used_languages) # convert list of languages into search string langs = '|'.join(map(str, [l.titlovi for l in used_languages])) # set query params search_params['query'] = title search_params['lang'] = langs is_episode = False if season and episode: is_episode = True #search_params['season'] = season #search_params['episode'] = episode #if year: # search_params['year'] = year if imdb_id: search_params['imdbID'] = imdb_id # loop through paginated results logger.info('Searching subtitles %r', search_params) subtitles = [] query_results = [] try: search_params['token'] = self.login_token search_params['userid'] = self.user_id search_params['json'] = True #response = self.get_result(search_url=self.api_search_url, search_params=search_params) response = self.get_result(self.api_search_url, search_params) resp_json = response.json() if resp_json['SubtitleResults']: query_results.extend(resp_json['SubtitleResults']) except Exception as e: logger.error(e) for sub in query_results: # title and alternate title match = title_re.search(sub.get('Title')) if match: _title = match.group('title') alt_title = match.group('altitle') else: continue # handle movies and series separately if is_episode: # skip if season and episode number does not match if season and season != sub.get('Season'): continue elif episode and episode != sub.get('Episode'): continue subtitle = self.subtitle_class( Language.fromtitlovi(sub.get('Lang')), sub.get('Link'), sub.get('Id'), sub.get('Release'), _title, alt_title=alt_title, season=sub.get('Season'), episode=sub.get('Episode'), year=sub.get('Year'), rating=sub.get('Rating'), download_count=sub.get('DownloadCount'), asked_for_release_group=video.release_group, asked_for_episode=episode) else: subtitle = self.subtitle_class( Language.fromtitlovi(sub.get('Lang')), sub.get('Link'), sub.get('Id'), sub.get('Release'), _title, alt_title=alt_title, year=sub.get('Year'), rating=sub.get('Rating'), download_count=sub.get('DownloadCount'), asked_for_release_group=video.release_group) logger.debug('Found subtitle %r', subtitle) # prime our matches so we can use the values later subtitle.get_matches(video) # add found subtitles subtitles.append(subtitle) return subtitles def list_subtitles(self, video, languages): season = episode = None if isinstance(video, Episode): title = video.series season = video.season episode = video.episode else: title = video.title return [ s for s in self.query(languages, fix_inconsistent_naming(title), season=season, episode=episode, year=video.year, imdb_id=video.imdb_id, video=video) ] def download_subtitle(self, subtitle): r = self.session.get(subtitle.download_link, timeout=10) r.raise_for_status() # open the archive archive_stream = io.BytesIO(r.content) if is_rarfile(archive_stream): logger.debug('Archive identified as rar') archive = RarFile(archive_stream) elif is_zipfile(archive_stream): logger.debug('Archive identified as zip') archive = ZipFile(archive_stream) else: subtitle.content = r.content if subtitle.is_valid(): return subtitle.content = None raise ProviderError('Unidentified archive type') subs_in_archive = archive.namelist() # if Serbian lat and cyr versions are packed together, try to find right version if len(subs_in_archive) > 1 and (subtitle.language == 'sr' or subtitle.language == 'sr-Cyrl'): self.get_subtitle_from_bundled_archive(subtitle, subs_in_archive, archive) else: # use default method for everything else subtitle.content = self.get_subtitle_from_archive( subtitle, archive) def get_subtitle_from_bundled_archive(self, subtitle, subs_in_archive, archive): sr_lat_subs = [] sr_cyr_subs = [] sub_to_extract = None for sub_name in subs_in_archive: _sub_name = sub_name.lower() if not ('.cyr' in _sub_name or '.cir' in _sub_name or 'cyr)' in _sub_name): sr_lat_subs.append(sub_name) if ('.cyr' in sub_name or '.cir' in _sub_name) and not '.lat' in _sub_name.lower(): sr_cyr_subs.append(sub_name) if subtitle.language == 'sr': if len(sr_lat_subs) > 0: sub_to_extract = sr_lat_subs[0] if subtitle.language == 'sr-Cyrl': if len(sr_cyr_subs) > 0: sub_to_extract = sr_cyr_subs[0] logger.info(u'Using %s from the archive', sub_to_extract) subtitle.content = fix_line_ending(archive.read(sub_to_extract))
class SubsceneProvider(Provider, ProviderSubtitleArchiveMixin): """ This currently only searches for the filename on SubScene. It doesn't open every found subtitle page to avoid massive hammering, thus it can't determine whether a subtitle is only-foreign or not. """ subtitle_class = SubsceneSubtitle languages = supported_languages languages.update(set(Language.rebuild(l, forced=True) for l in languages)) languages.update(set(Language.rebuild(l, hi=True) for l in languages)) video_types = (Episode, Movie) session = None skip_wrong_fps = False hearing_impaired_verifiable = True only_foreign = False username = None password = None search_throttle = 8 # seconds def __init__(self, only_foreign=False, username=None, password=None): if not all((username, password)): raise ConfigurationError('Username and password must be specified') self.only_foreign = only_foreign self.username = username self.password = password def initialize(self): logger.info("Creating session") self.session = RetryingCFSession() prev_cookies = region.get("subscene_cookies2") if prev_cookies != NO_VALUE: logger.debug("Re-using old subscene cookies: %r", prev_cookies) self.session.cookies.update(prev_cookies) else: logger.debug("Logging in") self.login() def login(self): r = self.session.get("https://subscene.com/account/login") if "Server Error" in r.text: logger.error("Login unavailable; Maintenance?") raise ServiceUnavailable("Login unavailable; Maintenance?") match = re.search( r"<script id='modelJson' type='application/json'>\s*(.+)\s*</script>", r.text) if match: h = html data = json.loads(h.unescape(match.group(1))) login_url = parse.urljoin(data["siteUrl"], data["loginUrl"]) time.sleep(1.0) r = self.session.post( login_url, { "username": self.username, "password": self.password, data["antiForgery"]["name"]: data["antiForgery"]["value"] }) pep_content = re.search( r"<form method=\"post\" action=\"https://subscene\.com/\">" r".+name=\"id_token\".+?value=\"(?P<id_token>.+?)\".*?" r"access_token\".+?value=\"(?P<access_token>.+?)\".+?" r"token_type.+?value=\"(?P<token_type>.+?)\".+?" r"expires_in.+?value=\"(?P<expires_in>.+?)\".+?" r"scope.+?value=\"(?P<scope>.+?)\".+?" r"state.+?value=\"(?P<state>.+?)\".+?" r"session_state.+?value=\"(?P<session_state>.+?)\"", r.text, re.MULTILINE | re.DOTALL) if pep_content: r = self.session.post(SITE_DOMAIN, pep_content.groupdict()) try: r.raise_for_status() except Exception: raise ProviderError( "Something went wrong when trying to log in: %s", traceback.format_exc()) else: cj = self.session.cookies.copy() store_cks = ("scene", "idsrv", "idsrv.xsrf", "idsvr.clients", "idsvr.session", "idsvr.username") for cn in self.session.cookies.keys(): if cn not in store_cks: del cj[cn] logger.debug("Storing cookies: %r", cj) region.set("subscene_cookies2", cj) return raise ProviderError("Something went wrong when trying to log in #1") def terminate(self): logger.info("Closing session") self.session.close() def _create_filters(self, languages): self.filters = dict(HearingImpaired="2") acc_filters = self.filters.copy() if self.only_foreign: self.filters["ForeignOnly"] = "True" acc_filters["ForeignOnly"] = self.filters["ForeignOnly"].lower() logger.info("Only searching for foreign/forced subtitles") selected_ids = [] for l in languages: lid = language_ids.get(l.basename, language_ids.get(l.alpha3, None)) if lid: selected_ids.append(str(lid)) acc_filters["SelectedIds"] = selected_ids self.filters["LanguageFilter"] = ",".join(acc_filters["SelectedIds"]) last_filters = region.get("subscene_filters") if last_filters != acc_filters: region.set("subscene_filters", acc_filters) logger.debug("Setting account filters to %r", acc_filters) self.session.post("https://u.subscene.com/filter", acc_filters, allow_redirects=False) logger.debug("Filter created: '%s'" % self.filters) def _enable_filters(self): self.session.cookies.update(self.filters) logger.debug("Filters applied") def list_subtitles(self, video, languages): if not video.original_name: logger.info( "Skipping search because we don't know the original release name" ) return [] self._create_filters(languages) self._enable_filters() if isinstance(video, Episode): international_titles = list( set([video.series] + video.alternative_series[:1])) subtitles = [ s for s in self.query(video, international_titles) if s.language in languages ] if not len(subtitles): us_titles = [x + ' (US)' for x in international_titles] subtitles = [ s for s in self.query(video, us_titles) if s.language in languages ] return subtitles else: titles = list(set([video.title] + video.alternative_titles[:1])) return [ s for s in self.query(video, titles) if s.language in languages ] def download_subtitle(self, subtitle): if subtitle.pack_data: logger.info("Using previously downloaded pack data") if rarfile.is_rarfile(io.BytesIO(subtitle.pack_data)): logger.debug('Identified rar archive') archive = rarfile.RarFile(io.BytesIO(subtitle.pack_data)) elif zipfile.is_zipfile(io.BytesIO(subtitle.pack_data)): logger.debug('Identified zip archive') archive = zipfile.ZipFile(io.BytesIO(subtitle.pack_data)) else: logger.error('Unsupported compressed format') return subtitle.pack_data = None try: subtitle.content = self.get_subtitle_from_archive( subtitle, archive) return except ProviderError: pass # open the archive r = self.session.get(subtitle.get_download_link(self.session), timeout=10) r.raise_for_status() archive_stream = io.BytesIO(r.content) if rarfile.is_rarfile(archive_stream): logger.debug('Identified rar archive') archive = rarfile.RarFile(archive_stream) elif zipfile.is_zipfile(archive_stream): logger.debug('Identified zip archive') archive = zipfile.ZipFile(archive_stream) else: logger.error('Unsupported compressed format') return subtitle.content = self.get_subtitle_from_archive(subtitle, archive) # store archive as pack_data for later caching subtitle.pack_data = r.content def parse_results(self, video, film): subtitles = [] for s in film.subtitles: try: subtitle = SubsceneSubtitle.from_api(s) except NotImplementedError as e: logger.info(e) continue subtitle.asked_for_release_group = video.release_group if isinstance(video, Episode): subtitle.asked_for_episode = video.episode if self.only_foreign: subtitle.language = Language.rebuild(subtitle.language, forced=True) # set subtitle language to hi if it's hearing_impaired if subtitle.hearing_impaired: subtitle.language = Language.rebuild(subtitle.language, hi=True) subtitles.append(subtitle) logger.debug('Found subtitle %r', subtitle) return subtitles def do_search(self, *args, **kwargs): try: return search(*args, **kwargs) except requests.HTTPError: region.delete("subscene_cookies2") raise @reinitialize_on_error((RequestException, ), attempts=1) def query(self, video, titles): subtitles = [] if isinstance(video, Episode): more_than_one = len(titles) > 1 for series in titles: term = u"%s - %s Season" % ( series, p.number_to_words( "%sth" % video.season).capitalize()) logger.debug('Searching with series and season: %s', term) film = self.do_search(term, session=self.session, release=False, throttle=self.search_throttle, limit_to=SearchTypes.TvSerie) if not film and video.season == 1: logger.debug('Searching with series name: %s', series) film = self.do_search(series, session=self.session, release=False, throttle=self.search_throttle, limit_to=SearchTypes.TvSerie) if film and film.subtitles: logger.debug('Searching found: %s', len(film.subtitles)) subtitles += self.parse_results(video, film) else: logger.debug('No results found') if more_than_one: time.sleep(self.search_throttle) else: more_than_one = len(titles) > 1 for title in titles: logger.debug('Searching for movie results: %r', title) film = self.do_search(title, year=video.year, session=self.session, limit_to=None, release=False, throttle=self.search_throttle) if film and film.subtitles: subtitles += self.parse_results(video, film) if more_than_one: time.sleep(self.search_throttle) logger.info("%s subtitles found" % len(subtitles)) return subtitles
class LegendasdivxProvider(Provider): """Legendasdivx Provider.""" languages = {Language('por', 'BR')} | {Language('por')} SEARCH_THROTTLE = 8 site = 'https://www.legendasdivx.pt' headers = { 'User-Agent': os.environ.get("SZ_USER_AGENT", "Sub-Zero/2"), 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Origin': 'https://www.legendasdivx.pt', 'Referer': 'https://www.legendasdivx.pt' } loginpage = site + '/forum/ucp.php?mode=login' searchurl = site + '/modules.php?name=Downloads&file=jz&d_op=search&op=_jz00&query={query}' download_link = site + '/modules.php{link}' def __init__(self, username, password, skip_wrong_fps=True): # make sure login credentials are configured. if any((username, password)) and not all((username, password)): raise ConfigurationError( 'Legendasdivx.pt :: Username and password must be specified') self.username = username self.password = password self.skip_wrong_fps = skip_wrong_fps def initialize(self): logger.debug("Legendasdivx.pt :: Creating session for requests") self.session = RetryingCFSession() # re-use PHP Session if present prev_cookies = region.get("legendasdivx_cookies2") if prev_cookies != NO_VALUE: logger.debug( "Legendasdivx.pt :: Re-using previous legendasdivx cookies: %s", prev_cookies) self.session.cookies.update(prev_cookies) # login if session has expired else: logger.debug("Legendasdivx.pt :: Session cookies not found!") self.session.headers.update(self.headers) self.login() def terminate(self): # session close self.session.close() def login(self): logger.debug('Legendasdivx.pt :: Logging in') try: # sleep for a 1 second before another request sleep(1) res = self.session.get(self.loginpage) res.raise_for_status() bsoup = ParserBeautifulSoup(res.content, ['lxml']) _allinputs = bsoup.findAll('input') data = {} # necessary to set 'sid' for POST request for field in _allinputs: data[field.get('name')] = field.get('value') # sleep for a 1 second before another request sleep(1) data['username'] = self.username data['password'] = self.password res = self.session.post(self.loginpage, data) res.raise_for_status() # make sure we're logged in logger.debug( 'Legendasdivx.pt :: Logged in successfully: PHPSESSID: %s', self.session.cookies.get_dict()['PHPSESSID']) cj = self.session.cookies.copy() store_cks = ("PHPSESSID", "phpbb3_2z8zs_sid", "phpbb3_2z8zs_k", "phpbb3_2z8zs_u", "lang") for cn in iter(self.session.cookies.keys()): if cn not in store_cks: del cj[cn] # store session cookies on cache logger.debug( "Legendasdivx.pt :: Storing legendasdivx session cookies: %r", cj) region.set("legendasdivx_cookies2", cj) except KeyError: logger.error( "Legendasdivx.pt :: Couldn't get session ID, check your credentials" ) raise AuthenticationError( "Legendasdivx.pt :: Couldn't get session ID, check your credentials" ) except HTTPError as e: if "bloqueado" in res.text.lower(): logger.error( "LegendasDivx.pt :: Your IP is blocked on this server.") raise IPAddressBlocked( "LegendasDivx.pt :: Your IP is blocked on this server.") logger.error("Legendasdivx.pt :: HTTP Error %s", e) raise TooManyRequests("Legendasdivx.pt :: HTTP Error %s", e) except Exception as e: logger.error("LegendasDivx.pt :: Uncaught error: %r", e) raise ServiceUnavailable("LegendasDivx.pt :: Uncaught error: %r", e) def _process_page(self, video, bsoup): subtitles = [] _allsubs = bsoup.findAll("div", {"class": "sub_box"}) for _subbox in _allsubs: hits = 0 for th in _subbox.findAll("th"): if th.text == 'Hits:': hits = int(th.find_next("td").text) if th.text == 'Idioma:': lang = th.find_next("td").find("img").get('src') if 'brazil' in lang.lower(): lang = Language.fromopensubtitles('pob') elif 'portugal' in lang.lower(): lang = Language.fromopensubtitles('por') else: continue if th.text == "Frame Rate:": frame_rate = th.find_next("td").text.strip() # get description for matches description = _subbox.find("td", { "class": "td_desc brd_up" }).get_text() # get subtitle link from footer sub_footer = _subbox.find("div", {"class": "sub_footer"}) download = sub_footer.find( "a", {"class": "sub_download"}) if sub_footer else None # sometimes 'a' tag is not found and returns None. Most likely HTML format error! try: download_link = self.download_link.format( link=download.get('href')) logger.debug("Legendasdivx.pt :: Found subtitle link on: %s ", download_link) except: logger.debug( "Legendasdivx.pt :: Couldn't find download link. Trying next..." ) continue # get subtitle uploader sub_header = _subbox.find("div", {"class": "sub_header"}) uploader = sub_header.find("a").text if sub_header else 'anonymous' exact_match = False if video.name.lower() in description.lower(): exact_match = True data = { 'link': download_link, 'exact_match': exact_match, 'hits': hits, 'uploader': uploader, 'frame_rate': frame_rate, 'description': description } subtitles.append( LegendasdivxSubtitle(lang, video, data, skip_wrong_fps=self.skip_wrong_fps)) return subtitles def query(self, video, languages): _searchurl = self.searchurl subtitles = [] if isinstance(video, Movie): querytext = video.imdb_id if video.imdb_id else video.title if isinstance(video, Episode): querytext = '{} S{:02d}E{:02d}'.format(video.series, video.season, video.episode) querytext = quote(querytext.lower()) # language query filter if not isinstance(languages, (tuple, list, set)): languages = [languages] for language in languages: logger.debug("Legendasdivx.pt :: searching for %s subtitles.", language) language_id = language.opensubtitles if 'por' in language_id: lang_filter = '&form_cat=28' elif 'pob' in language_id: lang_filter = '&form_cat=29' else: lang_filter = '' querytext = querytext + lang_filter if lang_filter else querytext try: # sleep for a 1 second before another request sleep(1) self.headers['Referer'] = self.site + '/index.php' self.session.headers.update(self.headers) res = self.session.get(_searchurl.format(query=querytext), allow_redirects=False) res.raise_for_status() if (res.status_code == 200 and "A legenda não foi encontrada" in res.text): logger.warning( 'Legendasdivx.pt :: query %s return no results!', querytext) # for series, if no results found, try again just with series and season (subtitle packs) if isinstance(video, Episode): logger.debug( "Legendasdivx.pt :: trying again with just series and season on query." ) querytext = re.sub("(e|E)(\d{2})", "", querytext) # sleep for a 1 second before another request sleep(1) res = self.session.get( _searchurl.format(query=querytext), allow_redirects=False) res.raise_for_status() if (res.status_code == 200 and "A legenda não foi encontrada" in res.text): logger.warning( 'Legendasdivx.pt :: query {0} return no results for language {1}(for series and season only).' .format(querytext, language_id)) continue if res.status_code == 302: # got redirected to login page. # seems that our session cookies are no longer valid... clean them from cache region.delete("legendasdivx_cookies2") logger.debug( "Legendasdivx.pt :: Logging in again. Cookies have expired!" ) # login and try again self.login() # sleep for a 1 second before another request sleep(1) res = self.session.get(_searchurl.format(query=querytext)) res.raise_for_status() except HTTPError as e: if "bloqueado" in res.text.lower(): logger.error( "LegendasDivx.pt :: Your IP is blocked on this server." ) raise IPAddressBlocked( "LegendasDivx.pt :: Your IP is blocked on this server." ) logger.error("Legendasdivx.pt :: HTTP Error %s", e) raise TooManyRequests("Legendasdivx.pt :: HTTP Error %s", e) except Exception as e: logger.error("LegendasDivx.pt :: Uncaught error: %r", e) raise ServiceUnavailable( "LegendasDivx.pt :: Uncaught error: %r", e) bsoup = ParserBeautifulSoup(res.content, ['html.parser']) # search for more than 10 results (legendasdivx uses pagination) # don't throttle - maximum results = 6 * 10 MAX_PAGES = 6 # get number of pages bases on results found page_header = bsoup.find("div", {"class": "pager_bar"}) results_found = re.search( r'\((.*?) encontradas\)', page_header.text).group(1) if page_header else 0 logger.debug("Legendasdivx.pt :: Found %s subtitles", str(results_found)) num_pages = (int(results_found) // 10) + 1 num_pages = min(MAX_PAGES, num_pages) # process first page subtitles += self._process_page(video, bsoup) # more pages? if num_pages > 1: for num_page in range(2, num_pages + 1): sleep(1) # another 1 sec before requesting... _search_next = self.searchurl.format( query=querytext) + "&page={0}".format(str(num_page)) logger.debug( "Legendasdivx.pt :: Moving on to next page: %s", _search_next) # sleep for a 1 second before another request sleep(1) res = self.session.get(_search_next) next_page = ParserBeautifulSoup(res.content, ['html.parser']) subs = self._process_page(video, next_page) subtitles.extend(subs) return subtitles def list_subtitles(self, video, languages): return self.query(video, languages) def download_subtitle(self, subtitle): try: # sleep for a 1 second before another request sleep(1) res = self.session.get(subtitle.page_link) res.raise_for_status() except HTTPError as e: if "bloqueado" in res.text.lower(): logger.error( "LegendasDivx.pt :: Your IP is blocked on this server.") raise IPAddressBlocked( "LegendasDivx.pt :: Your IP is blocked on this server.") logger.error("Legendasdivx.pt :: HTTP Error %s", e) raise TooManyRequests("Legendasdivx.pt :: HTTP Error %s", e) except Exception as e: logger.error("LegendasDivx.pt :: Uncaught error: %r", e) raise ServiceUnavailable("LegendasDivx.pt :: Uncaught error: %r", e) # make sure we haven't maxed out our daily limit if (res.status_code == 200 and 'limite de downloads diário atingido' in res.text.lower()): logger.error("LegendasDivx.pt :: Daily download limit reached!") raise DownloadLimitExceeded( "Legendasdivx.pt :: Daily download limit reached!") archive = self._get_archive(res.content) # extract the subtitle if archive: subtitle_content = self._get_subtitle_from_archive( archive, subtitle) if subtitle_content: subtitle.content = fix_line_ending(subtitle_content) subtitle.normalize() return subtitle return def _get_archive(self, content): # open the archive archive_stream = io.BytesIO(content) if rarfile.is_rarfile(archive_stream): logger.debug('Legendasdivx.pt :: Identified rar archive') archive = rarfile.RarFile(archive_stream) elif zipfile.is_zipfile(archive_stream): logger.debug('Legendasdivx.pt :: Identified zip archive') archive = zipfile.ZipFile(archive_stream) else: logger.error('Legendasdivx.pt :: Unsupported compressed format') return None return archive def _get_subtitle_from_archive(self, archive, subtitle): # some files have a non subtitle with .txt extension _tmp = list(SUBTITLE_EXTENSIONS) _tmp.remove('.txt') _subtitle_extensions = tuple(_tmp) _max_score = 0 _scores = get_scores(subtitle.video) for name in archive.namelist(): # discard hidden files if os.path.split(name)[-1].startswith('.'): continue # discard non-subtitle files if not name.lower().endswith(_subtitle_extensions): continue _guess = guessit(name) if isinstance(subtitle.video, Episode): logger.debug("Legendasdivx.pt :: guessing %s", name) logger.debug("Legendasdivx.pt :: subtitle S%sE%s video S%sE%s", _guess['season'], _guess['episode'], subtitle.video.season, subtitle.video.episode) if subtitle.video.episode != _guess[ 'episode'] or subtitle.video.season != _guess['season']: logger.debug( 'Legendasdivx.pt :: subtitle does not match video, skipping' ) continue matches = set() matches |= guess_matches(subtitle.video, _guess) logger.debug('Legendasdivx.pt :: sub matches: %s', matches) _score = sum((_scores.get(match, 0) for match in matches)) if _score > _max_score: _max_name = name _max_score = _score logger.debug("Legendasdivx.pt :: new max: %s %s", name, _score) if _max_score > 0: logger.debug( "Legendasdivx.pt :: returning from archive: %s scored %s", _max_name, _max_score) return archive.read(_max_name) logger.error( "Legendasdivx.pt :: No subtitle found on compressed file. Max score was 0" ) return None
class GreekSubsProvider(Provider): """GreekSubs Provider.""" languages = {Language('ell')} server_url = 'https://greeksubs.net/' subtitle_class = GreekSubsSubtitle def __init__(self): self.session = None def initialize(self): self.session = RetryingCFSession() self.session.headers['User-Agent'] = AGENT_LIST[randint(0, len(AGENT_LIST) - 1)] def terminate(self): self.session.close() def query(self, video, languages, imdb_id, season=None, episode=None): logger.debug('Searching subtitles for %r', imdb_id) subtitles = [] search_link = self.server_url + 'en/view/' + imdb_id r = self.session.get(search_link, timeout=30) r.raise_for_status() soup_page = ParserBeautifulSoup(r.content.decode('utf-8', 'ignore'), ['html.parser']) if isinstance(video, Episode): try: episodes = soup_page.select('div.col-lg-offset-2.col-md-8.text-center.top30.bottom10 > a') for item in episodes: season_episode = re.search(r'Season (\d+) Episode (\d+)', item.text) season_number = int(season_episode.group(1)) episode_number = int(season_episode.group(2)) if season_number == season and episode_number == episode: episode_page = item.attrs['href'] r = self.session.get(episode_page, timeout=30) soup_subs = ParserBeautifulSoup(r.content.decode('utf-8', 'ignore'), ['html.parser']) try: secCode = soup_subs.find('input', {'id': 'secCode'}).get('value') except Exception as e: logging.debug(e) else: for subtitles_item in soup_subs.select('#elSub > tbody > tr'): try: subtitle_id = re.search(r'downloadMe\(\'(.*)\'\)', subtitles_item.contents[2].contents[2].contents[0].attrs['onclick']).group(1) page_link = self.server_url + 'dll/' + subtitle_id + '/0/' + secCode language = Language.fromalpha2(subtitles_item.parent.find('img')['alt']) version = subtitles_item.contents[2].contents[4].text.strip() uploader = subtitles_item.contents[2].contents[5].contents[0].contents[1].text.strip() referer = episode_page.encode('utf-8') r = self.session.get(page_link, headers={'Referer': referer}, timeout=30, allow_redirects=False) r.raise_for_status() soup_dll = ParserBeautifulSoup(r.content.decode('utf-8', 'ignore'), ['html.parser']) try: langcode = soup_dll.find(attrs={"name": 'langcode'}).get('value') uid = soup_dll.find(attrs={"name": 'uid'}).get('value') output = soup_dll.find(attrs={"name": 'output'}).get('value') dll = soup_dll.find(attrs={"name": 'dll'}).get('value') except Exception as e: logging.debug(e) else: download_req = self.session.post(page_link, data={'langcode': langcode, 'uid': uid, 'output': output, 'dll': dll}, headers={'Referer': page_link}, timeout=10) except Exception as e: logging.debug(e) else: if language in languages: subtitle = self.subtitle_class(language, page_link, version, uploader, referer) if not download_req.content: logger.error('Unable to download subtitle. No data returned from provider') continue subtitle.content = download_req.content logger.debug('Found subtitle %r', subtitle) subtitles.append(subtitle) else: pass except Exception as e: logging.debug(e) elif isinstance(video, Movie): try: soup_subs = ParserBeautifulSoup(r.content.decode('utf-8', 'ignore'), ['html.parser']) try: secCode = soup_subs.find('input', {'id': 'secCode'}).get('value') except Exception as e: logging.debug(e) else: for subtitles_item in soup_subs.select('#elSub > tbody > tr'): try: subtitle_id = re.search(r'downloadMe\(\'(.*)\'\)', subtitles_item.contents[2].contents[2].contents[0].attrs[ 'onclick']).group(1) page_link = self.server_url + 'dll/' + subtitle_id + '/0/' + secCode language = Language.fromalpha2(subtitles_item.parent.find('img')['alt']) version = subtitles_item.contents[2].contents[4].text.strip() uploader = subtitles_item.contents[2].contents[5].contents[0].contents[ 1].text.strip() referer = page_link.encode('utf-8') r = self.session.get(page_link, headers={'Referer': referer}, timeout=30, allow_redirects=False) r.raise_for_status() soup_dll = ParserBeautifulSoup(r.content.decode('utf-8', 'ignore'), ['html.parser']) try: langcode = soup_dll.find(attrs={"name": 'langcode'}).get('value') uid = soup_dll.find(attrs={"name": 'uid'}).get('value') output = soup_dll.find(attrs={"name": 'output'}).get('value') dll = soup_dll.find(attrs={"name": 'dll'}).get('value') except Exception as e: logging.debug(e) else: download_req = self.session.post(page_link, data={'langcode': langcode, 'uid': uid, 'output': output, 'dll': dll}, headers={'Referer': page_link}, timeout=10) except Exception as e: logging.debug(e) else: if language in languages: subtitle = self.subtitle_class(language, page_link, version, uploader, referer) if not download_req.content: logger.error('Unable to download subtitle. No data returned from provider') continue subtitle.content = download_req.content logger.debug('Found subtitle %r', subtitle) subtitles.append(subtitle) except Exception as e: logging.debug(e) return subtitles def list_subtitles(self, video, languages): imdbId = None subtitles = [] if isinstance(video, Episode): imdbId = video.series_imdb_id elif isinstance(video, Movie): imdbId = video.imdb_id if not imdbId: logger.debug('No imdb number available to search with provider') return subtitles # query for subtitles with the imdbId subtitles = [] if isinstance(video, Episode): subtitles = self.query(video, languages, imdbId, season=video.season, episode=video.episode) elif isinstance(video, Movie): subtitles = self.query(video, languages, imdbId) return subtitles def download_subtitle(self, subtitle): if isinstance(subtitle, GreekSubsSubtitle): subtitle.content = fix_line_ending(subtitle.content)