def _process_page(self, video, bsoup): subtitles = [] _allsubs = bsoup.findAll("div", {"class": "sub_box"}) for _subbox in _allsubs: hits = 0 for th in _subbox.findAll("th"): if th.text == 'Hits:': hits = int(th.find_next("td").text) if th.text == 'Idioma:': lang = th.find_next("td").find("img").get('src') if 'brazil' in lang.lower(): lang = Language.fromopensubtitles('pob') elif 'portugal' in lang.lower(): lang = Language.fromopensubtitles('por') else: continue if th.text == "Frame Rate:": frame_rate = th.find_next("td").text.strip() # get description for matches description = _subbox.find("td", {"class": "td_desc brd_up"}).get_text() # get subtitle link from footer sub_footer = _subbox.find("div", {"class": "sub_footer"}) download = sub_footer.find("a", {"class": "sub_download"}) if sub_footer else None # sometimes 'a' tag is not found and returns None. Most likely HTML format error! try: download_link = self.download_link.format(link=download.get('href')) logger.debug("Legendasdivx.pt :: Found subtitle link on: %s ", download_link) except: logger.debug("Legendasdivx.pt :: Couldn't find download link. Trying next...") continue # get subtitle uploader sub_header = _subbox.find("div", {"class" :"sub_header"}) uploader = sub_header.find("a").text if sub_header else 'anonymous' exact_match = False if video.name.lower() in description.lower(): exact_match = True data = {'link': download_link, 'exact_match': exact_match, 'hits': hits, 'uploader': uploader, 'frame_rate': frame_rate, 'description': description } subtitles.append( LegendasdivxSubtitle(lang, video, data, skip_wrong_fps=self.skip_wrong_fps) ) return subtitles
def query(self, video, video_hash, language): if not self.login(): logger.debug("Token not found. Can't perform query") return [] if isinstance(language, (tuple, list, set)): # language_ids = ",".join(language) # language_ids = 'spa' language_ids = ",".join(sorted(l.opensubtitles for l in language)) if video.imdb_id is None: imdbId = "*" else: imdbId = video.imdb_id sleep(self.SEARCH_THROTTLE) root = self.api_request( func_name="searchSubtitles", params=( "<handle>{token}</handle>" "<movieHash>{movie_hash}</movieHash>" "<movieSize>{movie_size}</movieSize>" "<languageId>{language_ids}</languageId>" "<imdbId>{imdbId}</imdbId>" ).format( token=self.token, movie_hash=video_hash, movie_size=video.size, language_ids=language_ids, imdbId=imdbId, ), ) res = root.find(".//return/result") if not res: logger.debug("No subtitles found") return [] status = res.find("status").text if status != "OK": logger.debug(f"No subtitles found (bad status: {status})") return [] items = root.findall(".//return/data/item") subtitles = [] if items: logger.debug("Subtitles Found.") for item in items: subID = item.find("subID").text subDownloadLink = item.find("subDownloadLink").text subLang = Language.fromopensubtitles(item.find("subLang").text) subName = item.find("subName").text subFormat = item.find("subFormat").text subtitles.append( BSPlayerSubtitle( subLang, subName, subFormat, video, subDownloadLink, subID ) ) return subtitles
def _get_subtitle_from_archive(self, archive, video): subtitles = [] # some files have a non subtitle with .txt extension _tmp = list(SUBTITLE_EXTENSIONS) _tmp.remove('.txt') _subtitle_extensions = tuple(_tmp) _scores = get_scores(video) for name in archive.namelist(): # discard hidden files if os.path.split(name)[-1].startswith('.'): continue # discard non-subtitle files if not name.lower().endswith(_subtitle_extensions): continue # get subtitles language if '.en.' in name.lower(): language = Language.fromopensubtitles('eng') else: language = Language.fromopensubtitles('fre') release = name[:-4].lower().rstrip('tag').rstrip('en').rstrip('fr') _guess = guessit(release) if isinstance(video, Episode): if video.episode != _guess[ 'episode'] or video.season != _guess['season']: continue matches = set() matches |= guess_matches(video, _guess) _score = sum((_scores.get(match, 0) for match in matches)) content = archive.read(name) subtitles.append( SoustitreseuSubtitle(language, video, name, _guess, content, self.is_perfect_match)) return subtitles
def _process_page(self, video, bsoup, querytext, videoname): subtitles = [] _allsubs = bsoup.findAll("div", {"class": "sub_box"}) lang = Language.fromopensubtitles("pob") for _subbox in _allsubs: hits = 0 for th in _subbox.findAll("th", {"class": "color2"}): if th.string == 'Hits:': hits = int(th.parent.find("td").string) if th.string == 'Idioma:': lang = th.parent.find("td").find("img").get('src') if 'brazil' in lang: lang = Language.fromopensubtitles('pob') else: lang = Language.fromopensubtitles('por') description = _subbox.find("td", {"class": "td_desc brd_up"}) download = _subbox.find("a", {"class": "sub_download"}) try: # sometimes BSoup just doesn't get the link logger.debug(download.get('href')) except Exception as e: logger.warning('skipping subbox on %s' % self.searchurl.format(query=querytext)) continue exact_match = False if video.name.lower() in description.get_text().lower(): exact_match = True data = { 'link': self.site + '/modules.php' + download.get('href'), 'exact_match': exact_match, 'hits': hits, 'videoname': videoname, 'description': description.get_text() } subtitles.append(LegendasdivxSubtitle(lang, video, data)) return subtitles
def query(self, video, video_hash, language): if not self.login(): return [] if isinstance(language, (tuple, list, set)): # language_ids = ",".join(language) # language_ids = 'spa' language_ids = ','.join(sorted(l.opensubtitles for l in language)) if video.imdb_id is None: imdbId = '*' else: imdbId = video.imdb_id sleep(self.SEARCH_THROTTLE) root = self.api_request( func_name='searchSubtitles', params=('<handle>{token}</handle>' '<movieHash>{movie_hash}</movieHash>' '<movieSize>{movie_size}</movieSize>' '<languageId>{language_ids}</languageId>' '<imdbId>{imdbId}</imdbId>').format( token=self.token, movie_hash=video_hash, movie_size=video.size, language_ids=language_ids, imdbId=imdbId)) res = root.find('.//return/result') if res.find('status').text != 'OK': return [] items = root.findall('.//return/data/item') subtitles = [] if items: logger.info("Subtitles Found.") for item in items: subID = item.find('subID').text subDownloadLink = item.find('subDownloadLink').text subLang = Language.fromopensubtitles(item.find('subLang').text) subName = item.find('subName').text subFormat = item.find('subFormat').text subtitles.append( BSPlayerSubtitle(subLang, subName, subFormat, video, subDownloadLink)) return subtitles
class OpenSubtitlesComProvider(ProviderRetryMixin, Provider): """OpenSubtitlesCom Provider""" server_url = 'https://www.opensubtitles.com/api/v1/' languages = {Language.fromopensubtitles(l) for l in language_converters['szopensubtitles'].codes} languages.update(set(Language.rebuild(l, forced=True) for l in languages)) def __init__(self, username=None, password=None, use_hash=True, api_key=None): if not api_key: raise ConfigurationError('Api_key must be specified') if not all((username, password)): raise ConfigurationError('Username and password must be specified') self.session = Session() self.session.headers = {'User-Agent': os.environ.get("SZ_USER_AGENT", "Sub-Zero/2"), 'Api-Key': api_key, 'Content-Type': 'application/json'} self.token = None self.username = username self.password = password self.video = None self.use_hash = use_hash def initialize(self): self.token = region.get("oscom_token") if self.token: self.session.headers.update({'Authorization': 'Beaker ' + self.token}) return True else: self.login() def terminate(self): self.session.close() def login(self): try: r = self.session.post(self.server_url + 'login', json={"username": self.username, "password": self.password}, allow_redirects=False, timeout=10) except (ConnectionError, Timeout, ReadTimeout): raise ServiceUnavailable('Unknown Error, empty response: %s: %r' % (r.status_code, r)) else: if r.status_code == 200: try: self.token = r.json()['token'] except ValueError: raise ProviderError('Invalid JSON returned by provider') else: self.session.headers.update({'Authorization': 'Beaker ' + self.token}) region.set("oscom_token", self.token) return True elif r.status_code == 401: raise AuthenticationError('Login failed: {}'.format(r.reason)) elif r.status_code == 429: raise TooManyRequests() else: raise ProviderError('Bad status code: {}'.format(r.status_code)) finally: return False @region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME) def search_titles(self, title): title_id = None imdb_id = None if isinstance(self.video, Episode) and self.video.series_imdb_id: imdb_id = self.video.series_imdb_id elif isinstance(self.video, Movie) and self.video.imdb_id: imdb_id = self.video.imdb_id if imdb_id: parameters = {'imdb_id': imdb_id} logging.debug('Searching using this IMDB id: {}'.format(imdb_id)) else: parameters = {'query': title} logging.debug('Searching using this title: {}'.format(title)) results = self.session.get(self.server_url + 'features', params=parameters, timeout=10) results.raise_for_status() if results.status_code == 401: logging.debug('Authentification failed: clearing cache and attempting to login.') region.delete("oscom_token") self.login() results = self.session.get(self.server_url + 'features', params=parameters, timeout=10) results.raise_for_status() if results.status_code == 429: raise TooManyRequests() elif results.status_code == 429: raise TooManyRequests() # deserialize results try: results_dict = results.json()['data'] except ValueError: raise ProviderError('Invalid JSON returned by provider') else: # loop over results for result in results_dict: if title.lower() == result['attributes']['title'].lower() and \ (not self.video.year or self.video.year == int(result['attributes']['year'])): title_id = result['id'] break if title_id: logging.debug('Found this title ID: {}'.format(title_id)) return title_id finally: if not title_id: logger.debug('No match found for {}'.format(title)) def query(self, languages, video): self.video = video if self.use_hash: hash = self.video.hashes.get('opensubtitlescom') logging.debug('Searching using this hash: {}'.format(hash)) else: hash = None if isinstance(self.video, Episode): title = self.video.series else: title = self.video.title title_id = self.search_titles(title) if not title_id: return [] lang_strings = [str(lang) for lang in languages] langs = ','.join(lang_strings) logging.debug('Searching for this languages: {}'.format(lang_strings)) # query the server if isinstance(self.video, Episode): res = self.session.get(self.server_url + 'subtitles', params={'parent_feature_id': title_id, 'languages': langs, 'episode_number': self.video.episode, 'season_number': self.video.season, 'moviehash': hash}, timeout=10) else: res = self.session.get(self.server_url + 'subtitles', params={'id': title_id, 'languages': langs, 'moviehash': hash}, timeout=10) res.raise_for_status() if res.status_code == 429: raise TooManyRequests() subtitles = [] try: result = res.json() except ValueError: raise ProviderError('Invalid JSON returned by provider') else: logging.debug('Query returned {} subtitles'.format(len(result['data']))) if len(result['data']): for item in result['data']: if 'season_number' in item['attributes']['feature_details']: season_number = item['attributes']['feature_details']['season_number'] else: season_number = None if 'episode_number' in item['attributes']['feature_details']: episode_number = item['attributes']['feature_details']['episode_number'] else: episode_number = None if 'moviehash_match' in item['attributes']: moviehash_match = item['attributes']['moviehash_match'] else: moviehash_match = False if len(item['attributes']['files']): subtitle = OpenSubtitlesComSubtitle( language=Language.fromietf(item['attributes']['language']), hearing_impaired=item['attributes']['hearing_impaired'], page_link=item['attributes']['url'], file_id=item['attributes']['files'][0]['file_id'], releases=item['attributes']['release'], uploader=item['attributes']['uploader']['name'], title=item['attributes']['feature_details']['movie_name'], year=item['attributes']['feature_details']['year'], season=season_number, episode=episode_number, hash_matched=moviehash_match ) subtitle.get_matches(self.video) subtitles.append(subtitle) return subtitles def list_subtitles(self, video, languages): return self.query(languages, video) def download_subtitle(self, subtitle): logger.info('Downloading subtitle %r', subtitle) headers = {'Accept': 'application/json', 'Content-Type': 'application/json'} res = self.session.post(self.server_url + 'download', json={'file_id': subtitle.file_id, 'sub_format': 'srt'}, headers=headers, timeout=10) res.raise_for_status() if res.status_code == 429: raise TooManyRequests() elif res.status_code == 406: raise DownloadLimitExceeded("Daily download limit reached") else: try: subtitle.download_link = res.json()['link'] except ValueError: raise ProviderError('Invalid JSON returned by provider') else: r = self.session.get(subtitle.download_link, timeout=10) r.raise_for_status() if res.status_code == 429: raise TooManyRequests() elif res.status_code == 406: raise DownloadLimitExceeded("Daily download limit reached") subtitle_content = r.content if subtitle_content: subtitle.content = fix_line_ending(subtitle_content) else: logger.debug('Could not download subtitle from {}'.format(subtitle.download_link))
def query(self, video, language): try: logger.debug('Got session id %s' % self.session.cookies.get_dict()['PHPSESSID']) except Exception as e: self.login() return [] language_ids = '0' if isinstance(language, (tuple, list, set)): if len(language) == 1: language_ids = ','.join( sorted(l.opensubtitles for l in language)) if language_ids == 'por': language_ids = '&form_cat=28' else: language_ids = '&form_cat=29' querytext = video.name querytext = os.path.basename(querytext) querytext, _ = os.path.splitext(querytext) videoname = querytext querytext = querytext.lower() querytext = querytext.replace(".", "+").replace("[", "").replace("]", "") if language_ids != '0': querytext = querytext + language_ids self.headers['Referer'] = self.site + '/index.php' self.session.headers.update(self.headers.items()) res = self.session.get(self.searchurl.format(query=querytext)) # form_cat=28 = br # form_cat=29 = pt if "A legenda não foi encontrada" in res.text: logger.warning('%s not found', querytext) return [] bsoup = ParserBeautifulSoup(res.content, ['html.parser']) _allsubs = bsoup.findAll("div", {"class": "sub_box"}) subtitles = [] lang = Language.fromopensubtitles("pob") for _subbox in _allsubs: hits = 0 for th in _subbox.findAll("th", {"class": "color2"}): if th.string == 'Hits:': hits = int(th.parent.find("td").string) if th.string == 'Idioma:': lang = th.parent.find("td").find("img").get('src') if 'brazil' in lang: lang = Language.fromopensubtitles('pob') else: lang = Language.fromopensubtitles('por') description = _subbox.find("td", {"class": "td_desc brd_up"}) download = _subbox.find("a", {"class": "sub_download"}) try: # sometimes BSoup just doesn't get the link logger.debug(download.get('href')) except Exception as e: logger.warning('skipping subbox on %s' % self.searchurl.format(query=querytext)) continue exact_match = False if video.name.lower() in description.get_text().lower(): exact_match = True data = { 'link': self.site + '/modules.php' + download.get('href'), 'exact_match': exact_match, 'hits': hits, 'videoname': videoname, 'description': description.get_text() } subtitles.append(LegendasdivxSubtitle(lang, video, data)) return subtitles
def query(self, video, languages, hash=None, size=None, imdb_id=None, query=None, season=None, episode=None, tag=None, use_tag_search=False, only_foreign=False, also_foreign=False): # fill the search criteria criteria = [] if hash and size: criteria.append({'moviehash': hash, 'moviebytesize': str(size)}) if use_tag_search and tag: criteria.append({'tag': tag}) if imdb_id: if season and episode: criteria.append({ 'imdbid': imdb_id[2:], 'season': season, 'episode': episode }) else: criteria.append({'imdbid': imdb_id[2:]}) # Commented out after the issue with episode released after October 17th 2020. # if query and season and episode: # for q in query: # criteria.append({'query': q.replace('\'', ''), 'season': season, 'episode': episode}) # elif query: # for q in query: # criteria.append({'query': q.replace('\'', '')}) if not criteria: raise ValueError('Not enough information') # add the language for criterion in criteria: criterion['sublanguageid'] = ','.join( sorted(l.opensubtitles for l in languages)) # query the server logger.info('Searching subtitles %r', criteria) response = self.use_token_or_login(lambda: self.retry(lambda: checked( lambda: self.server.SearchSubtitles(self.token, criteria)))) subtitles = [] # exit if no data if not response['data']: logger.info('No subtitles found') return subtitles # loop over subtitle items for subtitle_item in response['data']: _subtitle_item = subtitle_item # in case OS messes their API results up again, check whether we've got a dict or a string as subtitle_item if hasattr(_subtitle_item, "startswith"): _subtitle_item = response["data"][subtitle_item] # read the item language = Language.fromopensubtitles( _subtitle_item['SubLanguageID']) hearing_impaired = bool(int(_subtitle_item['SubHearingImpaired'])) page_link = _subtitle_item['SubtitlesLink'] subtitle_id = int(_subtitle_item['IDSubtitleFile']) matched_by = _subtitle_item['MatchedBy'] movie_kind = _subtitle_item['MovieKind'] hash = _subtitle_item['MovieHash'] movie_name = _subtitle_item['MovieName'] movie_release_name = _subtitle_item['MovieReleaseName'] movie_year = int(_subtitle_item['MovieYear'] ) if _subtitle_item['MovieYear'] else None if season or episode: movie_imdb_id = 'tt' + _subtitle_item['SeriesIMDBParent'] else: movie_imdb_id = 'tt' + _subtitle_item['IDMovieImdb'] movie_fps = _subtitle_item.get('MovieFPS') series_season = int(_subtitle_item['SeriesSeason'] ) if _subtitle_item['SeriesSeason'] else None series_episode = int(_subtitle_item['SeriesEpisode'] ) if _subtitle_item['SeriesEpisode'] else None filename = _subtitle_item['SubFileName'] encoding = _subtitle_item.get('SubEncoding') or None foreign_parts_only = bool( int(_subtitle_item.get('SubForeignPartsOnly', 0))) # foreign/forced subtitles only wanted if only_foreign and not foreign_parts_only: continue # foreign/forced not wanted elif not only_foreign and not also_foreign and foreign_parts_only: continue # set subtitle language to forced if it's foreign_parts_only elif (also_foreign or only_foreign) and foreign_parts_only: language = Language.rebuild(language, forced=True) # set subtitle language to hi if it's hearing_impaired if hearing_impaired: language = Language.rebuild(language, hi=True) if language not in languages: continue if video.imdb_id and (movie_imdb_id != re.sub( "(?<![^a-zA-Z])0+", "", video.imdb_id)): continue query_parameters = _subtitle_item.get("QueryParameters") subtitle = self.subtitle_class(language, hearing_impaired, page_link, subtitle_id, matched_by, movie_kind, hash, movie_name, movie_release_name, movie_year, movie_imdb_id, series_season, series_episode, query_parameters, filename, encoding, movie_fps, skip_wrong_fps=self.skip_wrong_fps) subtitle.uploader = _subtitle_item[ 'UserNickName'] if _subtitle_item[ 'UserNickName'] else 'anonymous' logger.debug('Found subtitle %r by %s', subtitle, matched_by) subtitles.append(subtitle) return subtitles
class OpenSubtitlesProvider(ProviderRetryMixin, _OpenSubtitlesProvider): only_foreign = False also_foreign = False subtitle_class = OpenSubtitlesSubtitle hash_verifiable = True hearing_impaired_verifiable = True skip_wrong_fps = True is_vip = False use_ssl = True timeout = 15 default_url = "//api.opensubtitles.org/xml-rpc" vip_url = "//vip-api.opensubtitles.org/xml-rpc" languages = { Language.fromopensubtitles(l) for l in language_converters['szopensubtitles'].codes } languages.update(set(Language.rebuild(l, forced=True) for l in languages)) languages.update(set(Language.rebuild(l, hi=True) for l in languages)) def __init__(self, username=None, password=None, use_tag_search=False, only_foreign=False, also_foreign=False, skip_wrong_fps=True, is_vip=False, use_ssl=True, timeout=15): if any((username, password)) and not all((username, password)): raise ConfigurationError('Username and password must be specified') self.username = username or '' self.password = password or '' self.use_tag_search = use_tag_search self.only_foreign = only_foreign self.also_foreign = also_foreign self.skip_wrong_fps = skip_wrong_fps self.token = None self.is_vip = is_vip self.use_ssl = use_ssl self.timeout = timeout logger.debug("Using timeout: %d", timeout) if use_ssl: logger.debug("Using HTTPS connection") self.default_url = ("https:" if use_ssl else "http:") + self.default_url self.vip_url = ("https:" if use_ssl else "http:") + self.vip_url if use_tag_search: logger.info("Using tag/exact filename search") if only_foreign: logger.info("Only searching for foreign/forced subtitles") def get_server_proxy(self, url, timeout=None): return ServerProxy( url, SubZeroRequestsTransport(use_https=self.use_ssl, timeout=timeout or self.timeout, user_agent=os.environ.get( "SZ_USER_AGENT", "Sub-Zero/2"))) def log_in_url(self, server_url): self.token = None self.server = self.get_server_proxy(server_url) response = self.retry(lambda: checked(lambda: self.server.LogIn( self.username, self.password, 'eng', os.environ.get("SZ_USER_AGENT", "Sub-Zero/2")))) self.token = response['token'] logger.debug('Logged in with token %r', self.token[:10] + "X" * (len(self.token) - 10)) region.set("os_token", bytearray(self.token, encoding='utf-8')) region.set("os_server_url", bytearray(server_url, encoding='utf-8')) def log_in(self): logger.info('Logging in') try: self.log_in_url(self.vip_url if self.is_vip else self.default_url) except Unauthorized: if self.is_vip: logger.info("VIP server login failed, falling back") try: self.log_in_url(self.default_url) except Unauthorized: pass if not self.token: logger.error("Login failed, please check your credentials") raise Unauthorized def use_token_or_login(self, func): if not self.token: self.log_in() return func() try: return func() except Unauthorized: self.log_in() return func() def initialize(self): token_cache = region.get("os_token") url_cache = region.get("os_server_url") if token_cache is not NO_VALUE and url_cache is not NO_VALUE: self.token = token_cache.decode("utf-8") self.server = self.get_server_proxy(url_cache.decode("utf-8")) logger.debug("Using previous login token: %r", self.token[:10] + "X" * (len(self.token) - 10)) else: self.server = None self.token = None def terminate(self): self.server = None self.token = None def list_subtitles(self, video, languages): """ :param video: :param languages: :return: patch: query movies even if hash is known; add tag parameter """ season = episode = None if isinstance(video, Episode): query = [video.series] + video.alternative_series season = video.season episode = episode = min(video.episode) if isinstance( video.episode, list) else video.episode if video.is_special: season = None episode = None query = [ u"%s %s" % (series, video.title) for series in [video.series] + video.alternative_series ] logger.info("%s: Searching for special: %r", self.__class__, query) # elif ('opensubtitles' not in video.hashes or not video.size) and not video.imdb_id: # query = video.name.split(os.sep)[-1] else: query = [video.title] + video.alternative_titles if isinstance(video, Episode): imdb_id = video.series_imdb_id else: imdb_id = video.imdb_id return self.query(video, languages, hash=video.hashes.get('opensubtitles'), size=video.size, imdb_id=imdb_id, query=query, season=season, episode=episode, tag=video.original_name, use_tag_search=self.use_tag_search, only_foreign=self.only_foreign, also_foreign=self.also_foreign) def query(self, video, languages, hash=None, size=None, imdb_id=None, query=None, season=None, episode=None, tag=None, use_tag_search=False, only_foreign=False, also_foreign=False): # fill the search criteria criteria = [] if hash and size: criteria.append({'moviehash': hash, 'moviebytesize': str(size)}) if use_tag_search and tag: criteria.append({'tag': tag}) if imdb_id: if season and episode: criteria.append({ 'imdbid': imdb_id[2:], 'season': season, 'episode': episode }) else: criteria.append({'imdbid': imdb_id[2:]}) # Commented out after the issue with episode released after October 17th 2020. # if query and season and episode: # for q in query: # criteria.append({'query': q.replace('\'', ''), 'season': season, 'episode': episode}) # elif query: # for q in query: # criteria.append({'query': q.replace('\'', '')}) if not criteria: raise ValueError('Not enough information') # add the language for criterion in criteria: criterion['sublanguageid'] = ','.join( sorted(l.opensubtitles for l in languages)) # query the server logger.info('Searching subtitles %r', criteria) response = self.use_token_or_login(lambda: self.retry(lambda: checked( lambda: self.server.SearchSubtitles(self.token, criteria)))) subtitles = [] # exit if no data if not response['data']: logger.info('No subtitles found') return subtitles # loop over subtitle items for subtitle_item in response['data']: _subtitle_item = subtitle_item # in case OS messes their API results up again, check whether we've got a dict or a string as subtitle_item if hasattr(_subtitle_item, "startswith"): _subtitle_item = response["data"][subtitle_item] # read the item language = Language.fromopensubtitles( _subtitle_item['SubLanguageID']) hearing_impaired = bool(int(_subtitle_item['SubHearingImpaired'])) page_link = _subtitle_item['SubtitlesLink'] subtitle_id = int(_subtitle_item['IDSubtitleFile']) matched_by = _subtitle_item['MatchedBy'] movie_kind = _subtitle_item['MovieKind'] hash = _subtitle_item['MovieHash'] movie_name = _subtitle_item['MovieName'] movie_release_name = _subtitle_item['MovieReleaseName'] movie_year = int(_subtitle_item['MovieYear'] ) if _subtitle_item['MovieYear'] else None if season or episode: movie_imdb_id = 'tt' + _subtitle_item['SeriesIMDBParent'] else: movie_imdb_id = 'tt' + _subtitle_item['IDMovieImdb'] movie_fps = _subtitle_item.get('MovieFPS') series_season = int(_subtitle_item['SeriesSeason'] ) if _subtitle_item['SeriesSeason'] else None series_episode = int(_subtitle_item['SeriesEpisode'] ) if _subtitle_item['SeriesEpisode'] else None filename = _subtitle_item['SubFileName'] encoding = _subtitle_item.get('SubEncoding') or None foreign_parts_only = bool( int(_subtitle_item.get('SubForeignPartsOnly', 0))) # foreign/forced subtitles only wanted if only_foreign and not foreign_parts_only: continue # foreign/forced not wanted elif not only_foreign and not also_foreign and foreign_parts_only: continue # set subtitle language to forced if it's foreign_parts_only elif (also_foreign or only_foreign) and foreign_parts_only: language = Language.rebuild(language, forced=True) # set subtitle language to hi if it's hearing_impaired if hearing_impaired: language = Language.rebuild(language, hi=True) if language not in languages: continue if video.imdb_id and (movie_imdb_id != re.sub( "(?<![^a-zA-Z])0+", "", video.imdb_id)): continue query_parameters = _subtitle_item.get("QueryParameters") subtitle = self.subtitle_class(language, hearing_impaired, page_link, subtitle_id, matched_by, movie_kind, hash, movie_name, movie_release_name, movie_year, movie_imdb_id, series_season, series_episode, query_parameters, filename, encoding, movie_fps, skip_wrong_fps=self.skip_wrong_fps) subtitle.uploader = _subtitle_item[ 'UserNickName'] if _subtitle_item[ 'UserNickName'] else 'anonymous' logger.debug('Found subtitle %r by %s', subtitle, matched_by) subtitles.append(subtitle) return subtitles def download_subtitle(self, subtitle): logger.info('Downloading subtitle %r', subtitle) response = self.use_token_or_login( lambda: checked(lambda: self.server.DownloadSubtitles( self.token, [str(subtitle.subtitle_id)]))) subtitle.content = fix_line_ending( zlib.decompress(base64.b64decode(response['data'][0]['data']), 47))
def query(self, languages, hash=None, size=None, imdb_id=None, query=None, season=None, episode=None, tag=None, use_tag_search=False, only_foreign=False, also_foreign=False): # fill the search criteria criteria = [] if hash and size: criteria.append({'moviehash': hash, 'moviebytesize': str(size)}) if use_tag_search and tag: criteria.append({'tag': tag}) if imdb_id: if season and episode: criteria.append({'imdbid': imdb_id[2:], 'season': season, 'episode': episode}) else: criteria.append({'imdbid': imdb_id[2:]}) if query and season and episode: for q in query: criteria.append({'query': q.replace('\'', ''), 'season': season, 'episode': episode}) elif query: for q in query: criteria.append({'query': q.replace('\'', '')}) if not criteria: raise ValueError('Not enough information') # add the language for criterion in criteria: criterion['sublanguageid'] = ','.join(sorted(l.opensubtitles for l in languages)) # query the server logger.info('Searching subtitles %r', criteria) response = self.use_token_or_login( lambda: self.retry(lambda: checked(lambda: self.server.SearchSubtitles(self.token, criteria))) ) subtitles = [] # exit if no data if not response['data']: logger.info('No subtitles found') return subtitles # loop over subtitle items for subtitle_item in response['data']: _subtitle_item = subtitle_item # in case OS messes their API results up again, check whether we've got a dict or a string as subtitle_item if hasattr(_subtitle_item, "startswith"): _subtitle_item = response["data"][subtitle_item] # read the item language = Language.fromopensubtitles(_subtitle_item['SubLanguageID']) hearing_impaired = bool(int(_subtitle_item['SubHearingImpaired'])) page_link = _subtitle_item['SubtitlesLink'] subtitle_id = int(_subtitle_item['IDSubtitleFile']) matched_by = _subtitle_item['MatchedBy'] movie_kind = _subtitle_item['MovieKind'] hash = _subtitle_item['MovieHash'] movie_name = _subtitle_item['MovieName'] movie_release_name = _subtitle_item['MovieReleaseName'] movie_year = int(_subtitle_item['MovieYear']) if _subtitle_item['MovieYear'] else None movie_imdb_id = 'tt' + _subtitle_item['IDMovieImdb'] movie_fps = _subtitle_item.get('MovieFPS') series_season = int(_subtitle_item['SeriesSeason']) if _subtitle_item['SeriesSeason'] else None series_episode = int(_subtitle_item['SeriesEpisode']) if _subtitle_item['SeriesEpisode'] else None filename = _subtitle_item['SubFileName'] encoding = _subtitle_item.get('SubEncoding') or None foreign_parts_only = bool(int(_subtitle_item.get('SubForeignPartsOnly', 0))) # foreign/forced subtitles only wanted if only_foreign and not foreign_parts_only: continue # foreign/forced not wanted elif not only_foreign and not also_foreign and foreign_parts_only: continue # set subtitle language to forced if it's foreign_parts_only elif (also_foreign or only_foreign) and foreign_parts_only: language = Language.rebuild(language, forced=True) if language not in languages: continue query_parameters = _subtitle_item.get("QueryParameters") subtitle = self.subtitle_class(language, hearing_impaired, page_link, subtitle_id, matched_by, movie_kind, hash, movie_name, movie_release_name, movie_year, movie_imdb_id, series_season, series_episode, query_parameters, filename, encoding, movie_fps, skip_wrong_fps=self.skip_wrong_fps) logger.debug('Found subtitle %r by %s', subtitle, matched_by) subtitles.append(subtitle) return subtitles
class OpenSubtitlesComProvider(ProviderRetryMixin, Provider): """OpenSubtitlesCom Provider""" server_url = 'https://api.opensubtitles.com/api/v1/' languages = { Language.fromopensubtitles(lang) for lang in language_converters['szopensubtitles'].codes } languages.update( set(Language.rebuild(lang, forced=True) for lang in languages)) video_types = (Episode, Movie) def __init__(self, username=None, password=None, use_hash=True, api_key=None): if not all((username, password)): raise ConfigurationError('Username and password must be specified') if not api_key: raise ConfigurationError('Api_key must be specified') if not all((username, password)): raise ConfigurationError('Username and password must be specified') self.session = Session() self.session.headers = { 'User-Agent': os.environ.get("SZ_USER_AGENT", "Sub-Zero/2"), 'Api-Key': api_key, 'Content-Type': 'application/json' } self.token = None self.username = username self.password = password self.video = None self.use_hash = use_hash self._started = None def initialize(self): self._started = time.time() self.login() def terminate(self): self.session.close() def ping(self): return self._started and (time.time() - self._started) < TOKEN_EXPIRATION_TIME def login(self): r = self.retry(lambda: checked(lambda: self.session.post( self.server_url + 'login', json={ "username": self.username, "password": self.password }, allow_redirects=False, timeout=30), validate_json=True, json_key_name='token'), amount=retry_amount) self.token = r.json()['token'] region.set("oscom_token", self.token) return @staticmethod def sanitize_external_ids(external_id): if isinstance(external_id, str): external_id = external_id.lower().lstrip('tt').lstrip('0') sanitized_id = external_id[:-1].lstrip('0') + external_id[-1] return int(sanitized_id) @region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME) def search_titles(self, title): title_id = None parameters = {'query': title.lower()} logging.debug(f'Searching using this title: {title}') results = self.retry(lambda: checked(lambda: self.session.get( self.server_url + 'features', params=parameters, timeout=30), validate_token=True, validate_json=True, json_key_name='data'), amount=retry_amount) if results == 401: logging.debug( 'Authentification failed: clearing cache and attempting to login.' ) region.delete("oscom_token") self.login() results = self.retry(lambda: checked(lambda: self.session.get( self.server_url + 'features', params=parameters, timeout=30), validate_json=True, json_key_name='data'), amount=retry_amount) # deserialize results results_dict = results.json()['data'] # loop over results for result in results_dict: if 'title' in result['attributes']: if isinstance(self.video, Episode): if fix_tv_naming(title).lower() == result['attributes']['title'].lower() and \ (not self.video.year or self.video.year == int(result['attributes']['year'])): title_id = result['id'] break else: if fix_movie_naming(title).lower() == result['attributes']['title'].lower() and \ (not self.video.year or self.video.year == int(result['attributes']['year'])): title_id = result['id'] break else: continue if title_id: logging.debug(f'Found this title ID: {title_id}') return self.sanitize_external_ids(title_id) if not title_id: logger.debug(f'No match found for {title}') def query(self, languages, video): self.video = video if self.use_hash: file_hash = self.video.hashes.get('opensubtitlescom') logging.debug(f'Searching using this hash: {hash}') else: file_hash = None if isinstance(self.video, Episode): title = self.video.series else: title = self.video.title imdb_id = None if isinstance(self.video, Episode) and self.video.series_imdb_id: imdb_id = self.sanitize_external_ids(self.video.series_imdb_id) elif isinstance(self.video, Movie) and self.video.imdb_id: imdb_id = self.sanitize_external_ids(self.video.imdb_id) title_id = None if not imdb_id: title_id = self.search_titles(title) if not title_id: return [] lang_strings = [str(lang.basename) for lang in languages] only_foreign = all([lang.forced for lang in languages]) also_foreign = any([lang.forced for lang in languages]) if only_foreign: forced = 'only' elif also_foreign: forced = 'include' else: forced = 'exclude' langs = ','.join(lang_strings) logging.debug(f'Searching for this languages: {lang_strings}') # query the server if isinstance(self.video, Episode): res = self.retry(lambda: checked(lambda: self.session.get( self.server_url + 'subtitles', params=(('episode_number', self.video.episode), ('foreign_parts_only', forced), ('imdb_id', imdb_id if not title_id else None), ('languages', langs.lower()), ('moviehash', file_hash), ('parent_feature_id', title_id if title_id else None), ('query', os.path.basename(self.video.name).lower()), ('season_number', self.video.season)), timeout=30), validate_json=True, json_key_name='data'), amount=retry_amount) else: res = self.retry(lambda: checked(lambda: self.session.get( self.server_url + 'subtitles', params=(('foreign_parts_only', forced), ('id', title_id if title_id else None), ('imdb_id', imdb_id if not title_id else None), ('languages', langs.lower()), ('moviehash', file_hash), ('query', os.path.basename(self.video.name).lower())), timeout=30), validate_json=True, json_key_name='data'), amount=retry_amount) subtitles = [] result = res.json() logging.debug(f"Query returned {len(result['data'])} subtitles") if len(result['data']): for item in result['data']: if 'season_number' in item['attributes']['feature_details']: season_number = item['attributes']['feature_details'][ 'season_number'] else: season_number = None if 'episode_number' in item['attributes']['feature_details']: episode_number = item['attributes']['feature_details'][ 'episode_number'] else: episode_number = None if 'moviehash_match' in item['attributes']: moviehash_match = item['attributes']['moviehash_match'] else: moviehash_match = False if len(item['attributes']['files']): subtitle = OpenSubtitlesComSubtitle( language=Language.fromietf( item['attributes']['language']), forced=item['attributes']['foreign_parts_only'], hearing_impaired=item['attributes'] ['hearing_impaired'], page_link=item['attributes']['url'], file_id=item['attributes']['files'][0]['file_id'], releases=item['attributes']['release'], uploader=item['attributes']['uploader']['name'], title=item['attributes']['feature_details'] ['movie_name'], year=item['attributes']['feature_details']['year'], season=season_number, episode=episode_number, hash_matched=moviehash_match) subtitle.get_matches(self.video) subtitles.append(subtitle) return subtitles def list_subtitles(self, video, languages): return self.query(languages, video) def download_subtitle(self, subtitle): if self.token is NO_VALUE: logger.debug("No cached token, we'll try to login again.") self.login() if self.token is NO_VALUE: logger.debug( "Unable to obtain an authentication token right now, we'll try again later." ) raise ProviderError("Unable to obtain an authentication token") logger.info('Downloading subtitle %r', subtitle) headers = { 'Accept': 'application/json', 'Content-Type': 'application/json', 'Authorization': 'Beaker ' + self.token } res = self.retry(lambda: checked(lambda: self.session.post( self.server_url + 'download', json={ 'file_id': subtitle.file_id, 'sub_format': 'srt' }, headers=headers, timeout=30), validate_json=True, json_key_name='link'), amount=retry_amount) download_data = res.json() subtitle.download_link = download_data['link'] r = self.retry(lambda: checked(lambda: self.session.get( subtitle.download_link, timeout=30), validate_content=True), amount=retry_amount) if not r: logger.debug( f'Could not download subtitle from {subtitle.download_link}') subtitle.content = None return else: subtitle_content = r.content subtitle.content = fix_line_ending(subtitle_content)
class OpenSubtitlesProvider(ProviderRetryMixin, _OpenSubtitlesProvider): only_foreign = False subtitle_class = OpenSubtitlesSubtitle hash_verifiable = True hearing_impaired_verifiable = True skip_wrong_fps = True is_vip = False default_url = "https://api.opensubtitles.org/xml-rpc" vip_url = "https://vip-api.opensubtitles.org/xml-rpc" languages = { Language.fromopensubtitles(l) for l in language_converters['szopensubtitles'].codes } # | { #Language.fromietf("sr-latn"), Language.fromietf("sr-cyrl")} def __init__(self, username=None, password=None, use_tag_search=False, only_foreign=False, skip_wrong_fps=True, is_vip=False): if any((username, password)) and not all((username, password)): raise ConfigurationError('Username and password must be specified') self.username = username or '' self.password = password or '' self.use_tag_search = use_tag_search self.only_foreign = only_foreign self.skip_wrong_fps = skip_wrong_fps self.token = None self.is_vip = is_vip if use_tag_search: logger.info("Using tag/exact filename search") if only_foreign: logger.info("Only searching for foreign/forced subtitles") def get_server_proxy(self, url, timeout=10): return ServerProxy(url, SubZeroTransport(timeout, url)) def log_in(self, server_url=None): if server_url: self.terminate() self.server = self.get_server_proxy(server_url) response = self.retry(lambda: checked( self.server.LogIn(self.username, self.password, 'eng', os.environ.get("SZ_USER_AGENT", "Sub-Zero/2")))) self.token = response['token'] logger.debug('Logged in with token %r', self.token) region.set("os_token", self.token) def use_token_or_login(self, func): if not self.token: self.log_in() return func() try: return func() except Unauthorized: self.log_in() return func() def initialize(self): if self.is_vip: self.server = self.get_server_proxy(self.vip_url) logger.info("Using VIP server") else: self.server = self.get_server_proxy(self.default_url) logger.info('Logging in') token = region.get("os_token", expiration_time=3600) if token is not NO_VALUE: try: checked(self.server.NoOperation(token)) self.token = token logger.info("Using previous login token: %s", self.token) return except: pass try: self.log_in() except Unauthorized: if self.is_vip: logger.info("VIP server login failed, falling back") self.log_in(self.default_url) if self.token: return logger.error("Login failed, please check your credentials") def terminate(self): try: if self.server: self.server.close() except: pass self.token = None def list_subtitles(self, video, languages): """ :param video: :param languages: :return: patch: query movies even if hash is known; add tag parameter """ season = episode = None if isinstance(video, Episode): query = video.series season = video.season episode = episode = min(video.episode) if isinstance( video.episode, list) else video.episode if video.is_special: season = None episode = None query = u"%s %s" % (video.series, video.title) logger.info("%s: Searching for special: %r", self.__class__, query) # elif ('opensubtitles' not in video.hashes or not video.size) and not video.imdb_id: # query = video.name.split(os.sep)[-1] else: query = video.title return self.query(languages, hash=video.hashes.get('opensubtitles'), size=video.size, imdb_id=video.imdb_id, query=query, season=season, episode=episode, tag=video.original_name, use_tag_search=self.use_tag_search, only_foreign=self.only_foreign) def query(self, languages, hash=None, size=None, imdb_id=None, query=None, season=None, episode=None, tag=None, use_tag_search=False, only_foreign=False): # fill the search criteria criteria = [] if hash and size: criteria.append({'moviehash': hash, 'moviebytesize': str(size)}) if use_tag_search and tag: criteria.append({'tag': tag}) if imdb_id: if season and episode: criteria.append({ 'imdbid': imdb_id[2:], 'season': season, 'episode': episode }) else: criteria.append({'imdbid': imdb_id[2:]}) if query and season and episode: criteria.append({ 'query': query.replace('\'', ''), 'season': season, 'episode': episode }) elif query: criteria.append({'query': query.replace('\'', '')}) if not criteria: raise ValueError('Not enough information') # add the language for criterion in criteria: criterion['sublanguageid'] = ','.join( sorted(l.opensubtitles for l in languages)) # query the server logger.info('Searching subtitles %r', criteria) response = self.use_token_or_login(lambda: self.retry(lambda: checked( self.server.SearchSubtitles(self.token, criteria)))) subtitles = [] # exit if no data if not response['data']: logger.info('No subtitles found') return subtitles # loop over subtitle items for subtitle_item in response['data']: # read the item language = Language.fromopensubtitles( subtitle_item['SubLanguageID']) hearing_impaired = bool(int(subtitle_item['SubHearingImpaired'])) page_link = subtitle_item['SubtitlesLink'] subtitle_id = int(subtitle_item['IDSubtitleFile']) matched_by = subtitle_item['MatchedBy'] movie_kind = subtitle_item['MovieKind'] hash = subtitle_item['MovieHash'] movie_name = subtitle_item['MovieName'] movie_release_name = subtitle_item['MovieReleaseName'] movie_year = int(subtitle_item['MovieYear'] ) if subtitle_item['MovieYear'] else None movie_imdb_id = 'tt' + subtitle_item['IDMovieImdb'] movie_fps = subtitle_item.get('MovieFPS') series_season = int(subtitle_item['SeriesSeason'] ) if subtitle_item['SeriesSeason'] else None series_episode = int(subtitle_item['SeriesEpisode'] ) if subtitle_item['SeriesEpisode'] else None filename = subtitle_item['SubFileName'] encoding = subtitle_item.get('SubEncoding') or None foreign_parts_only = bool( int(subtitle_item.get('SubForeignPartsOnly', 0))) # foreign/forced subtitles only wanted if only_foreign and not foreign_parts_only: continue # foreign/forced not wanted if not only_foreign and foreign_parts_only: continue query_parameters = subtitle_item.get("QueryParameters") subtitle = self.subtitle_class(language, hearing_impaired, page_link, subtitle_id, matched_by, movie_kind, hash, movie_name, movie_release_name, movie_year, movie_imdb_id, series_season, series_episode, query_parameters, filename, encoding, movie_fps, skip_wrong_fps=self.skip_wrong_fps) logger.debug('Found subtitle %r by %s', subtitle, matched_by) subtitles.append(subtitle) return subtitles def download_subtitle(self, subtitle): return self.use_token_or_login(lambda: super( OpenSubtitlesProvider, self).download_subtitle(subtitle))
def query(self, languages, hash=None, size=None, imdb_id=None, query=None, season=None, episode=None, tag=None, use_tag_search=False, only_foreign=False): # fill the search criteria criteria = [] if hash and size: criteria.append({'moviehash': hash, 'moviebytesize': str(size)}) if use_tag_search and tag: criteria.append({'tag': tag}) if imdb_id: if season and episode: criteria.append({ 'imdbid': imdb_id[2:], 'season': season, 'episode': episode }) else: criteria.append({'imdbid': imdb_id[2:]}) if query and season and episode: criteria.append({ 'query': query.replace('\'', ''), 'season': season, 'episode': episode }) elif query: criteria.append({'query': query.replace('\'', '')}) if not criteria: raise ValueError('Not enough information') # add the language for criterion in criteria: criterion['sublanguageid'] = ','.join( sorted(l.opensubtitles for l in languages)) # query the server logger.info('Searching subtitles %r', criteria) response = self.use_token_or_login(lambda: self.retry(lambda: checked( self.server.SearchSubtitles(self.token, criteria)))) subtitles = [] # exit if no data if not response['data']: logger.info('No subtitles found') return subtitles # loop over subtitle items for subtitle_item in response['data']: # read the item language = Language.fromopensubtitles( subtitle_item['SubLanguageID']) hearing_impaired = bool(int(subtitle_item['SubHearingImpaired'])) page_link = subtitle_item['SubtitlesLink'] subtitle_id = int(subtitle_item['IDSubtitleFile']) matched_by = subtitle_item['MatchedBy'] movie_kind = subtitle_item['MovieKind'] hash = subtitle_item['MovieHash'] movie_name = subtitle_item['MovieName'] movie_release_name = subtitle_item['MovieReleaseName'] movie_year = int(subtitle_item['MovieYear'] ) if subtitle_item['MovieYear'] else None movie_imdb_id = 'tt' + subtitle_item['IDMovieImdb'] movie_fps = subtitle_item.get('MovieFPS') series_season = int(subtitle_item['SeriesSeason'] ) if subtitle_item['SeriesSeason'] else None series_episode = int(subtitle_item['SeriesEpisode'] ) if subtitle_item['SeriesEpisode'] else None filename = subtitle_item['SubFileName'] encoding = subtitle_item.get('SubEncoding') or None foreign_parts_only = bool( int(subtitle_item.get('SubForeignPartsOnly', 0))) # foreign/forced subtitles only wanted if only_foreign and not foreign_parts_only: continue # foreign/forced not wanted if not only_foreign and foreign_parts_only: continue query_parameters = subtitle_item.get("QueryParameters") subtitle = self.subtitle_class(language, hearing_impaired, page_link, subtitle_id, matched_by, movie_kind, hash, movie_name, movie_release_name, movie_year, movie_imdb_id, series_season, series_episode, query_parameters, filename, encoding, movie_fps, skip_wrong_fps=self.skip_wrong_fps) logger.debug('Found subtitle %r by %s', subtitle, matched_by) subtitles.append(subtitle) return subtitles