def download_subtitle(self, subtitle): logger.info('Downloading subtitle %r', subtitle) headers = {'Accept': 'application/json', 'Content-Type': 'application/json'} res = self.session.post(self.server_url + 'download', json={'file_id': subtitle.file_id, 'sub_format': 'srt'}, headers=headers, timeout=10) res.raise_for_status() if res.status_code == 429: raise TooManyRequests() elif res.status_code == 406: raise DownloadLimitExceeded("Daily download limit reached") else: try: subtitle.download_link = res.json()['link'] except ValueError: raise ProviderError('Invalid JSON returned by provider') else: r = self.session.get(subtitle.download_link, timeout=10) r.raise_for_status() if res.status_code == 429: raise TooManyRequests() elif res.status_code == 406: raise DownloadLimitExceeded("Daily download limit reached") subtitle_content = r.content if subtitle_content: subtitle.content = fix_line_ending(subtitle_content) else: logger.debug('Could not download subtitle from {}'.format(subtitle.download_link))
def search_titles(self, title): title_id = None parameters = {'query': title.lower()} logging.debug('Searching using this title: {}'.format(title)) results = self.session.get(self.server_url + 'features', params=parameters, timeout=30) if results.status_code == 401: logging.debug( 'Authentification failed: clearing cache and attempting to login.' ) region.delete("oscom_token") self.login() results = self.session.get(self.server_url + 'features', params=parameters, timeout=30) if results.status_code == 429: raise TooManyRequests() elif results.status_code == 503: raise ProviderError(results.reason) elif results.status_code == 429: raise TooManyRequests() elif results.status_code == 503: raise ProviderError(results.reason) # deserialize results try: results_dict = results.json()['data'] except ValueError: raise ProviderError('Invalid JSON returned by provider') else: # loop over results for result in results_dict: if 'title' in result['attributes']: if isinstance(self.video, Episode): if fix_tv_naming(title).lower() == result['attributes']['title'].lower() and \ (not self.video.year or self.video.year == int(result['attributes']['year'])): title_id = result['id'] break else: if fix_movie_naming(title).lower() == result['attributes']['title'].lower() and \ (not self.video.year or self.video.year == int(result['attributes']['year'])): title_id = result['id'] break else: continue if title_id: logging.debug('Found this title ID: {}'.format(title_id)) return self.sanitize_external_ids(title_id) finally: if not title_id: logger.debug('No match found for {}'.format(title))
def download_subtitle(self, subtitle): if self.token is NO_VALUE: logger.debug("No cached token, we'll try to login again.") self.login() if self.token is NO_VALUE: logger.debug( "Unable to obtain an authentication token right now, we'll try again later." ) raise ProviderError("Unable to obtain an authentication token") logger.info('Downloading subtitle %r', subtitle) headers = { 'Accept': 'application/json', 'Content-Type': 'application/json', 'Authorization': 'Beaker ' + self.token } res = self.session.post(self.server_url + 'download', json={ 'file_id': subtitle.file_id, 'sub_format': 'srt' }, headers=headers, timeout=30) if res.status_code == 429: raise TooManyRequests() elif res.status_code == 406: raise DownloadLimitExceeded("Daily download limit reached") elif res.status_code == 503: raise ProviderError(res.reason) else: try: subtitle.download_link = res.json()['link'] except ValueError: raise ProviderError('Invalid JSON returned by provider') else: r = self.session.get(subtitle.download_link, timeout=30) if res.status_code == 429: raise TooManyRequests() elif res.status_code == 406: raise DownloadLimitExceeded("Daily download limit reached") elif res.status_code == 503: raise ProviderError(res.reason) subtitle_content = r.content if subtitle_content: subtitle.content = fix_line_ending(subtitle_content) else: logger.debug('Could not download subtitle from {}'.format( subtitle.download_link))
def search_titles(self, title): title_id = None imdb_id = None if isinstance(self.video, Episode) and self.video.series_imdb_id: imdb_id = self.video.series_imdb_id elif isinstance(self.video, Movie) and self.video.imdb_id: imdb_id = self.video.imdb_id if imdb_id: parameters = {'imdb_id': imdb_id} logging.debug('Searching using this IMDB id: {}'.format(imdb_id)) else: parameters = {'query': title} logging.debug('Searching using this title: {}'.format(title)) results = self.session.get(self.server_url + 'features', params=parameters, timeout=10) results.raise_for_status() if results.status_code == 401: logging.debug('Authentification failed: clearing cache and attempting to login.') region.delete("oscom_token") self.login() results = self.session.get(self.server_url + 'features', params=parameters, timeout=10) results.raise_for_status() if results.status_code == 429: raise TooManyRequests() elif results.status_code == 429: raise TooManyRequests() # deserialize results try: results_dict = results.json()['data'] except ValueError: raise ProviderError('Invalid JSON returned by provider') else: # loop over results for result in results_dict: if title.lower() == result['attributes']['title'].lower() and \ (not self.video.year or self.video.year == int(result['attributes']['year'])): title_id = result['id'] break if title_id: logging.debug('Found this title ID: {}'.format(title_id)) return title_id finally: if not title_id: logger.debug('No match found for {}'.format(title))
def download_subtitle(self, subtitle): try: # sleep for a 1 second before another request sleep(1) res = self.session.get(subtitle.page_link) res.raise_for_status() except HTTPError as e: if "bloqueado" in res.text.lower(): logger.error("LegendasDivx.pt :: Your IP is blocked on this server.") raise IPAddressBlocked("LegendasDivx.pt :: Your IP is blocked on this server.") logger.error("Legendasdivx.pt :: HTTP Error %s", e) raise TooManyRequests("Legendasdivx.pt :: HTTP Error %s", e) except Exception as e: logger.error("LegendasDivx.pt :: Uncaught error: %r", e) raise ServiceUnavailable("LegendasDivx.pt :: Uncaught error: %r", e) # make sure we haven't maxed out our daily limit if (res.status_code == 200 and 'limite de downloads diário atingido' in res.text.lower()): logger.error("LegendasDivx.pt :: Daily download limit reached!") raise DownloadLimitExceeded("Legendasdivx.pt :: Daily download limit reached!") archive = self._get_archive(res.content) # extract the subtitle if archive: subtitle_content = self._get_subtitle_from_archive(archive, subtitle) if subtitle_content: subtitle.content = fix_line_ending(subtitle_content) subtitle.normalize() return subtitle return
def api_request(self, func_name="logIn", params="", tries=5): headers = { "User-Agent": "BSPlayer/2.x (1022.12360)", "Content-Type": "text/xml; charset=utf-8", "Connection": "close", "SOAPAction": '"http://api.bsplayer-subtitles.com/v1.php#{func_name}"'.format( func_name=func_name ), } data = ( '<?xml version="1.0" encoding="UTF-8"?>\n' '<SOAP-ENV:Envelope xmlns:SOAP-ENV="http://schemas.xmlsoap.org/soap/envelope/" ' 'xmlns:SOAP-ENC="http://schemas.xmlsoap.org/soap/encoding/" ' 'xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" ' 'xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:ns1="{search_url}">' '<SOAP-ENV:Body SOAP-ENV:encodingStyle="http://schemas.xmlsoap.org/soap/encoding/">' "<ns1:{func_name}>{params}</ns1:{func_name}></SOAP-ENV:Body></SOAP-ENV:Envelope>" ).format(search_url=self.search_url, func_name=func_name, params=params) logger.debug("Sending request: %s." % func_name) for i in iter(range(tries)): try: self.session.headers.update(headers.items()) res = self.session.post(self.search_url, data) return ElementTree.fromstring(res.text.strip()) except Exception as ex: logger.error(f"Exception parsing response: {ex}") if func_name == "logIn": self.search_url = self.get_sub_domain() sleep(1) raise TooManyRequests(f"Too many retries: {tries}")
def login(self): try: r = self.session.post(self.server_url + 'login', json={"username": self.username, "password": self.password}, allow_redirects=False, timeout=10) except (ConnectionError, Timeout, ReadTimeout): raise ServiceUnavailable('Unknown Error, empty response: %s: %r' % (r.status_code, r)) else: if r.status_code == 200: try: self.token = r.json()['token'] except ValueError: raise ProviderError('Invalid JSON returned by provider') else: self.session.headers.update({'Authorization': 'Beaker ' + self.token}) region.set("oscom_token", self.token) return True elif r.status_code == 401: raise AuthenticationError('Login failed: {}'.format(r.reason)) elif r.status_code == 429: raise TooManyRequests() else: raise ProviderError('Bad status code: {}'.format(r.status_code)) finally: return False
def query(self, show_id, series, season, year=None, country=None): # patch: fix logging # get the page of the season of the show logger.info('Getting the page of show id %d, season %d', show_id, season) r = self.session.get(self.server_url + 'ajax_loadShow.php', params={'show': show_id, 'season': season}, timeout=10, headers={ "referer": "%sshow/%s" % (self.server_url, show_id), "X-Requested-With": "XMLHttpRequest" } ) r.raise_for_status() if r.status_code == 304: raise TooManyRequests() if not r.content: # Provider wrongful return a status of 304 Not Modified with an empty content # raise_for_status won't raise exception for that status code logger.error('No data returned from provider') return [] soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser']) # loop over subtitle rows subtitles = [] for row in soup.select('tr.epeven'): cells = row('td') # ignore incomplete subtitles status = cells[5].text if status != 'Completed': logger.debug('Ignoring subtitle with status %s', status) continue # read the item language = Language.fromaddic7ed(cells[3].text) hearing_impaired = bool(cells[6].text) page_link = self.server_url + cells[2].a['href'][1:] season = int(cells[0].text) episode = int(cells[1].text) title = cells[2].text version = cells[4].text download_link = cells[9].a['href'][1:] subtitle = self.subtitle_class(language, hearing_impaired, page_link, series, season, episode, title, year, version, download_link) logger.debug('Found subtitle %r', subtitle) subtitles.append(subtitle) soup.decompose() soup = None return subtitles
def login(self): logger.debug('Legendasdivx.pt :: Logging in') try: # sleep for a 1 second before another request sleep(1) res = self.session.get(self.loginpage) res.raise_for_status() bsoup = ParserBeautifulSoup(res.content, ['lxml']) _allinputs = bsoup.findAll('input') data = {} # necessary to set 'sid' for POST request for field in _allinputs: data[field.get('name')] = field.get('value') # sleep for a 1 second before another request sleep(1) data['username'] = self.username data['password'] = self.password res = self.session.post(self.loginpage, data) res.raise_for_status() # make sure we're logged in logger.debug( 'Legendasdivx.pt :: Logged in successfully: PHPSESSID: %s', self.session.cookies.get_dict()['PHPSESSID']) cj = self.session.cookies.copy() store_cks = ("PHPSESSID", "phpbb3_2z8zs_sid", "phpbb3_2z8zs_k", "phpbb3_2z8zs_u", "lang") for cn in iter(self.session.cookies.keys()): if cn not in store_cks: del cj[cn] # store session cookies on cache logger.debug( "Legendasdivx.pt :: Storing legendasdivx session cookies: %r", cj) region.set("legendasdivx_cookies2", cj) except KeyError: logger.error( "Legendasdivx.pt :: Couldn't get session ID, check your credentials" ) raise AuthenticationError( "Legendasdivx.pt :: Couldn't get session ID, check your credentials" ) except HTTPError as e: if "bloqueado" in res.text.lower(): logger.error( "LegendasDivx.pt :: Your IP is blocked on this server.") raise IPAddressBlocked( "LegendasDivx.pt :: Your IP is blocked on this server.") logger.error("Legendasdivx.pt :: HTTP Error %s", e) raise TooManyRequests("Legendasdivx.pt :: HTTP Error %s", e) except Exception as e: logger.error("LegendasDivx.pt :: Uncaught error: %r", e) raise ServiceUnavailable("LegendasDivx.pt :: Uncaught error: %r", e)
def initialize(self): self.session = Session() self.session.headers[ 'User-Agent'] = 'Subliminal/%s' % subliminal.__short_version__ if self.USE_ADDICTED_RANDOM_AGENTS: from .utils import FIRST_THOUSAND_OR_SO_USER_AGENTS as AGENT_LIST logger.debug("Addic7ed: using random user agents") self.session.headers['User-Agent'] = AGENT_LIST[randint( 0, len(AGENT_LIST) - 1)] self.session.headers['Referer'] = self.server_url # login if self.username and self.password: ccks = region.get("addic7ed_cookies", expiration_time=86400) if ccks != NO_VALUE: try: self.session.cookies._cookies.update(ccks) r = self.session.get(self.server_url + 'panel.php', allow_redirects=False, timeout=10) if r.status_code == 302: logger.info('Addic7ed: Login expired') region.delete("addic7ed_cookies") else: logger.info('Addic7ed: Reusing old login') self.logged_in = True return except: pass logger.info('Addic7ed: Logging in') data = { 'username': self.username, 'password': self.password, 'Submit': 'Log in' } r = self.session.post( self.server_url + 'dologin.php', data, allow_redirects=False, timeout=10, headers={"Referer": self.server_url + "login.php"}) if "relax, slow down" in r.content: raise TooManyRequests(self.username) if r.status_code != 302: raise AuthenticationError(self.username) region.set("addic7ed_cookies", self.session.cookies._cookies) logger.debug('Addic7ed: Logged in') self.logged_in = True
def download_subtitle(self, subtitle): last_dls = region.get("addic7ed_dls") now = datetime.datetime.now() one_day = datetime.timedelta(hours=24) def raise_limit(): logger.info("Addic7ed: Downloads per day exceeded (%s)", cap) raise DownloadLimitPerDayExceeded if not isinstance(last_dls, types.ListType): last_dls = [] else: # filter all non-expired DLs last_dls = filter(lambda t: t + one_day > now, last_dls) region.set("addic7ed_dls", last_dls) cap = self.vip and 80 or 40 amount = len(last_dls) if amount >= cap: raise_limit() # download the subtitle r = self.session.get(self.server_url + subtitle.download_link, headers={'Referer': subtitle.page_link}, timeout=10) r.raise_for_status() if r.status_code == 304: raise TooManyRequests() if not r.content: # Provider wrongful return a status of 304 Not Modified with an empty content # raise_for_status won't raise exception for that status code logger.error( 'Addic7ed: Unable to download subtitle. No data returned from provider' ) return # detect download limit exceeded if r.headers['Content-Type'] == 'text/html': raise DownloadLimitExceeded subtitle.content = fix_line_ending(r.content) last_dls.append(datetime.datetime.now()) region.set("addic7ed_dls", last_dls) logger.info("Addic7ed: Used %s/%s downloads", amount + 1, cap) if amount + 1 >= cap: raise_limit()
def _search_show_id(self, series, year=None): """Search the show id from the `series` and `year`. :param str series: series of the episode. :param year: year of the series, if any. :type year: int :return: the show id, if found. :rtype: int """ # addic7ed doesn't support search with quotes series = series.replace('\'', ' ') # build the params series_year = '%s %d' % (series, year) if year is not None else series params = {'search': series_year, 'Submit': 'Search'} # make the search logger.info('Searching show ids with %r', params) r = self.session.get(self.server_url + 'srch.php', params=params, timeout=10) r.raise_for_status() if r.status_code == 304: raise TooManyRequests() soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser']) suggestion = None # get the suggestion try: suggestion = soup.select('span.titulo > a[href^="/show/"]') if not suggestion: logger.warning('Show id not found: no suggestion') return None if not sanitize(suggestion[0].i.text.replace('\'', ' '), default_characters=self.sanitize_characters) == \ sanitize(series_year, default_characters=self.sanitize_characters): logger.warning('Show id not found: suggestion does not match') return None show_id = int(suggestion[0]['href'][6:]) logger.debug('Found show id %d', show_id) return show_id finally: soup.decompose() soup = None
def download_subtitle(self, subtitle): # download the subtitle r = self.session.get(self.server_url + subtitle.download_link, headers={'Referer': subtitle.page_link}, timeout=10) r.raise_for_status() if r.status_code == 304: raise TooManyRequests() if not r.content: # Provider wrongful return a status of 304 Not Modified with an empty content # raise_for_status won't raise exception for that status code logger.error('Unable to download subtitle. No data returned from provider') return # detect download limit exceeded if r.headers['Content-Type'] == 'text/html': raise DownloadLimitExceeded subtitle.content = fix_line_ending(r.content)
def query(self, languages, video): self.video = video if self.use_hash: hash = self.video.hashes.get('opensubtitlescom') logging.debug('Searching using this hash: {}'.format(hash)) else: hash = None if isinstance(self.video, Episode): title = self.video.series else: title = self.video.title title_id = self.search_titles(title) if not title_id: return [] lang_strings = [str(lang) for lang in languages] langs = ','.join(lang_strings) logging.debug('Searching for this languages: {}'.format(lang_strings)) # query the server if isinstance(self.video, Episode): res = self.session.get(self.server_url + 'subtitles', params={'parent_feature_id': title_id, 'languages': langs, 'episode_number': self.video.episode, 'season_number': self.video.season, 'moviehash': hash}, timeout=10) else: res = self.session.get(self.server_url + 'subtitles', params={'id': title_id, 'languages': langs, 'moviehash': hash}, timeout=10) res.raise_for_status() if res.status_code == 429: raise TooManyRequests() subtitles = [] try: result = res.json() except ValueError: raise ProviderError('Invalid JSON returned by provider') else: logging.debug('Query returned {} subtitles'.format(len(result['data']))) if len(result['data']): for item in result['data']: if 'season_number' in item['attributes']['feature_details']: season_number = item['attributes']['feature_details']['season_number'] else: season_number = None if 'episode_number' in item['attributes']['feature_details']: episode_number = item['attributes']['feature_details']['episode_number'] else: episode_number = None if 'moviehash_match' in item['attributes']: moviehash_match = item['attributes']['moviehash_match'] else: moviehash_match = False if len(item['attributes']['files']): subtitle = OpenSubtitlesComSubtitle( language=Language.fromietf(item['attributes']['language']), hearing_impaired=item['attributes']['hearing_impaired'], page_link=item['attributes']['url'], file_id=item['attributes']['files'][0]['file_id'], releases=item['attributes']['release'], uploader=item['attributes']['uploader']['name'], title=item['attributes']['feature_details']['movie_name'], year=item['attributes']['feature_details']['year'], season=season_number, episode=episode_number, hash_matched=moviehash_match ) subtitle.get_matches(self.video) subtitles.append(subtitle) return subtitles
def query_movie(self, movie_id, title, year=None): # get the page of the movie logger.info('Getting the page of movie id %s', movie_id) r = self.session.get(self.server_url + 'movie/' + movie_id, timeout=10, headers={ "referer": self.server_url, "X-Requested-With": "XMLHttpRequest" } ) r.raise_for_status() if r.status_code == 304: raise TooManyRequests() if not r.text: # Provider wrongful return a status of 304 Not Modified with an empty content # raise_for_status won't raise exception for that status code logger.error('No data returned from provider') return [] soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser']) # loop over subtitle rows tables = [] subtitles = [] for table in soup.find_all('table', {'align': 'center', 'border': '0', 'class': 'tabel95', 'width': '100%'}): if table.find_all('td', {'class': 'NewsTitle'}): tables.append(table) for table in tables: row1 = table.contents[1] row2 = table.contents[4] row3 = table.contents[6] # other rows are useless # ignore incomplete subtitles status = row2.contents[6].text if "%" in status: logger.debug('Ignoring subtitle with status %s', status) continue # read the item try: language = Language.fromaddic7ed(row2.contents[4].text.strip('\n')) except babelfish.exceptions.LanguageReverseError as error: logger.debug("Language error: %s, Ignoring subtitle", error) continue hearing_impaired = bool(row3.contents[1].contents[1].attrs['src'].endswith('hi.jpg')) page_link = self.server_url + 'movie/' + movie_id # Seems like Addic7ed returns the first word in the language of the user (Version, Versión, etc) # As we can't match a regex, we will just strip the first word try: version = " ".join(str(row1.contents[1].contents[1]).split()[1:]) version_matches = re.search(r"(.+),.+", version) version = version_matches.group(1) if version_matches else None except IndexError: version = None try: download_link = row2.contents[8].contents[3].attrs['href'][1:] except IndexError: download_link = row2.contents[8].contents[2].attrs['href'][1:] uploader = row1.contents[2].contents[8].text.strip() # set subtitle language to hi if it's hearing_impaired if hearing_impaired: language = Language.rebuild(language, hi=True) subtitle = self.subtitle_class(language, hearing_impaired, page_link, None, None, None, title, year, version, download_link, uploader) logger.debug('Found subtitle %r', subtitle) subtitles.append(subtitle) soup.decompose() soup = None return subtitles
def initialize(self): self.session = Session() self.session.headers[ 'User-Agent'] = 'Subliminal/%s' % subliminal.__short_version__ from .utils import FIRST_THOUSAND_OR_SO_USER_AGENTS as AGENT_LIST logger.debug("Addic7ed: using random user agents") self.session.headers['User-Agent'] = AGENT_LIST[randint( 0, len(AGENT_LIST) - 1)] self.session.headers['Referer'] = self.server_url # login if self.username and self.password: def check_verification(cache_region): try: rr = self.session.get(self.server_url + 'panel.php', allow_redirects=False, timeout=10, headers={"Referer": self.server_url}) if rr.status_code == 302: logger.info('Addic7ed: Login expired') cache_region.delete("addic7ed_data") else: logger.info('Addic7ed: Re-using old login') self.logged_in = True return True except ConnectionError as e: logger.debug( "Addic7ed: There was a problem reaching the server: %s." % e) raise IPAddressBlocked( "Addic7ed: Your IP is temporarily blocked.") if load_verification("addic7ed", self.session, callback=check_verification): return logger.info('Addic7ed: Logging in') data = { 'username': self.username, 'password': self.password, 'Submit': 'Log in', 'url': '', 'remember': 'true' } tries = 0 while tries <= 3: tries += 1 r = self.session.get(self.server_url + 'login.php', timeout=10, headers={"Referer": self.server_url}) if "g-recaptcha" in r.text or "grecaptcha" in r.text: logger.info( 'Addic7ed: Solving captcha. This might take a couple of minutes, but should only ' 'happen once every so often') for g, s in (("g-recaptcha-response", r'g-recaptcha.+?data-sitekey=\"(.+?)\"'), ("recaptcha_response", r'grecaptcha.execute\(\'(.+?)\',')): site_key = re.search(s, r.text).group(1) if site_key: break if not site_key: logger.error("Addic7ed: Captcha site-key not found!") return pitcher = pitchers.get_pitcher()( "Addic7ed", self.server_url + 'login.php', site_key, user_agent=self.session.headers["User-Agent"], cookies=self.session.cookies.get_dict(), is_invisible=True) result = pitcher.throw() if not result: if tries >= 3: raise Exception( "Addic7ed: Couldn't solve captcha!") logger.info( "Addic7ed: Couldn't solve captcha! Retrying") continue data[g] = result r = self.session.post( self.server_url + 'dologin.php', data, allow_redirects=False, timeout=10, headers={"Referer": self.server_url + "login.php"}) if "relax, slow down" in r.text: raise TooManyRequests(self.username) if "Wrong password" in r.text or "doesn't exist" in r.text: raise AuthenticationError(self.username) if r.status_code != 302: if tries >= 3: logger.error( "Addic7ed: Something went wrong when logging in") raise AuthenticationError(self.username) logger.info( "Addic7ed: Something went wrong when logging in; retrying" ) continue break store_verification("addic7ed", self.session) logger.debug('Addic7ed: Logged in') self.logged_in = True
def _search_show_id(self, series, year=None): """Search the show id from the `series` and `year`. :param str series: series of the episode. :param year: year of the series, if any. :type year: int :return: the show id, if found. :rtype: int """ # addic7ed doesn't support search with quotes series = series.replace('\'', ' ') # build the params series_year = '%s %d' % (series, year) if year is not None else series params = {'search': series_year, 'Submit': 'Search'} # make the search logger.info('Searching show ids with %r', params) # currently addic7ed searches via srch.php from the front page, then a re-search is needed which calls # search.php for endpoint in ( "srch.php", "search.php", ): headers = None if endpoint == "search.php": headers = {"referer": self.server_url + "srch.php"} r = self.session.get(self.server_url + endpoint, params=params, timeout=10, headers=headers) r.raise_for_status() if r.text and "Sorry, your search" not in r.text: break time.sleep(4) if r.status_code == 304: raise TooManyRequests() soup = ParserBeautifulSoup(r.text, ['lxml', 'html.parser']) suggestion = None # get the suggestion try: suggestion = soup.select('span.titulo > a[href^="/show/"]') if not suggestion: logger.warning('Show id not found: no suggestion') return None if not sanitize(suggestion[0].i.text.replace('\'', ' '), default_characters=self.sanitize_characters) == \ sanitize(series_year, default_characters=self.sanitize_characters): logger.warning('Show id not found: suggestion does not match') return None show_id = int(suggestion[0]['href'][6:]) logger.debug('Found show id %d', show_id) return show_id finally: soup.decompose() soup = None
def initialize(self): self.session = Session() self.session.headers['User-Agent'] = 'Subliminal/%s' % subliminal.__short_version__ from .utils import FIRST_THOUSAND_OR_SO_USER_AGENTS as AGENT_LIST logger.debug("Addic7ed: using random user agents") self.session.headers['User-Agent'] = AGENT_LIST[randint(0, len(AGENT_LIST) - 1)] self.session.headers['Referer'] = self.server_url # login if self.username and self.password: def check_verification(cache_region): rr = self.session.get(self.server_url + 'panel.php', allow_redirects=False, timeout=10, headers={"Referer": self.server_url}) if rr.status_code == 302: logger.info('Addic7ed: Login expired') cache_region.delete("addic7ed_data") else: logger.info('Addic7ed: Re-using old login') self.logged_in = True return True if load_verification("addic7ed", self.session, callback=check_verification): return logger.info('Addic7ed: Logging in') data = {'username': self.username, 'password': self.password, 'Submit': 'Log in', 'url': '', 'remember': 'true'} tries = 0 while tries < 3: r = self.session.get(self.server_url + 'login.php', timeout=10, headers={"Referer": self.server_url}) if "grecaptcha" in r.content: logger.info('Addic7ed: Solving captcha. This might take a couple of minutes, but should only ' 'happen once every so often') site_key = re.search(r'grecaptcha.execute\(\'(.+?)\',', r.content).group(1) if not site_key: logger.error("Addic7ed: Captcha site-key not found!") return pitcher = pitchers.get_pitcher()("Addic7ed", self.server_url + 'login.php', site_key, user_agent=self.session.headers["User-Agent"], cookies=self.session.cookies.get_dict(), is_invisible=True) result = pitcher.throw() if not result: raise Exception("Addic7ed: Couldn't solve captcha!") data["recaptcha_response"] = result r = self.session.post(self.server_url + 'dologin.php', data, allow_redirects=False, timeout=10, headers={"Referer": self.server_url + "login.php"}) if "relax, slow down" in r.content: raise TooManyRequests(self.username) if r.status_code != 302: if "User <b></b> doesn't exist" in r.content and tries <= 2: logger.info("Addic7ed: Error, trying again. (%s/%s)", tries+1, 3) tries += 1 continue raise AuthenticationError(self.username) break store_verification("addic7ed", self.session) logger.debug('Addic7ed: Logged in') self.logged_in = True
def query(self, languages, video): self.video = video if self.use_hash: file_hash = self.video.hashes.get('opensubtitlescom') logging.debug('Searching using this hash: {}'.format(hash)) else: file_hash = None if isinstance(self.video, Episode): title = self.video.series else: title = self.video.title imdb_id = None if isinstance(self.video, Episode) and self.video.series_imdb_id: imdb_id = self.sanitize_external_ids(self.video.series_imdb_id) elif isinstance(self.video, Movie) and self.video.imdb_id: imdb_id = self.sanitize_external_ids(self.video.imdb_id) title_id = None if not imdb_id: title_id = self.search_titles(title) if not title_id: return [] lang_strings = [str(lang.basename) for lang in languages] only_foreign = all([lang.forced for lang in languages]) also_foreign = any([lang.forced for lang in languages]) if only_foreign: forced = 'only' elif also_foreign: forced = 'include' else: forced = 'exclude' langs = ','.join(lang_strings) logging.debug('Searching for this languages: {}'.format(lang_strings)) # query the server if isinstance(self.video, Episode): res = self.session.get( self.server_url + 'subtitles', params=(('episode_number', self.video.episode), ('foreign_parts_only', forced), ('languages', langs.lower()), ('moviehash', file_hash), ('parent_feature_id', title_id) if title_id else ('imdb_id', imdb_id), ('season_number', self.video.season), ('query', os.path.basename(self.video.name))), timeout=30) else: res = self.session.get( self.server_url + 'subtitles', params=(('foreign_parts_only', forced), ('id', title_id) if title_id else ('imdb_id', imdb_id), ('languages', langs.lower()), ('moviehash', file_hash), ('query', os.path.basename(self.video.name))), timeout=30) if res.status_code == 429: raise TooManyRequests() elif res.status_code == 503: raise ProviderError(res.reason) subtitles = [] try: result = res.json() if 'data' not in result: raise ValueError except ValueError: raise ProviderError('Invalid JSON returned by provider') else: logging.debug('Query returned {} subtitles'.format( len(result['data']))) if len(result['data']): for item in result['data']: if 'season_number' in item['attributes'][ 'feature_details']: season_number = item['attributes']['feature_details'][ 'season_number'] else: season_number = None if 'episode_number' in item['attributes'][ 'feature_details']: episode_number = item['attributes']['feature_details'][ 'episode_number'] else: episode_number = None if 'moviehash_match' in item['attributes']: moviehash_match = item['attributes']['moviehash_match'] else: moviehash_match = False if len(item['attributes']['files']): subtitle = OpenSubtitlesComSubtitle( language=Language.fromietf( item['attributes']['language']), forced=item['attributes']['foreign_parts_only'], hearing_impaired=item['attributes'] ['hearing_impaired'], page_link=item['attributes']['url'], file_id=item['attributes']['files'][0]['file_id'], releases=item['attributes']['release'], uploader=item['attributes']['uploader']['name'], title=item['attributes']['feature_details'] ['movie_name'], year=item['attributes']['feature_details']['year'], season=season_number, episode=episode_number, hash_matched=moviehash_match) subtitle.get_matches(self.video) subtitles.append(subtitle) return subtitles
def checked(fn, raise_api_limit=False, validate_token=False, validate_json=False, json_key_name=None, validate_content=False): """Run :fn: and check the response status before returning it. :param fn: the function to make an API call to OpenSubtitles.com. :param raise_api_limit: if True we wait a little bit longer before running the call again. :param validate_token: test if token is valid and return 401 if not. :param validate_json: test if response is valid json. :param json_key_name: test if returned json contain a specific key. :param validate_content: test if response have a content (used with download). :return: the response. """ response = None try: try: response = fn() except APIThrottled: if not raise_api_limit: logger.info( "API request limit hit, waiting and trying again once.") time.sleep(2) return checked(fn, raise_api_limit=True) raise except (ConnectionError, Timeout, ReadTimeout): raise ServiceUnavailable( f'Unknown Error, empty response: {response.status_code}: {response}' ) except Exception: logging.exception('Unhandled exception raised.') raise ProviderError('Unhandled exception raised. Check log.') else: status_code = response.status_code except Exception: status_code = None else: if status_code == 401: if validate_token: return 401 else: raise AuthenticationError(f'Login failed: {response.reason}') elif status_code == 403: raise ProviderError("Bazarr API key seems to be in problem") elif status_code == 406: raise DownloadLimitExceeded("Daily download limit reached") elif status_code == 410: raise ProviderError("Download as expired") elif status_code == 429: raise TooManyRequests() elif status_code == 502: # this one should deal with Bad Gateway issue on their side. raise APIThrottled() elif 500 <= status_code <= 599: raise ProviderError(response.reason) if status_code != 200: raise ProviderError(f'Bad status code: {response.status_code}') if validate_json: try: json_test = response.json() except JSONDecodeError: raise ProviderError('Invalid JSON returned by provider') else: if json_key_name not in json_test: raise ProviderError( f'Invalid JSON returned by provider: no {json_key_name} key in returned json.' ) if validate_content: if not hasattr(response, 'content'): logging.error('Download link returned no content attribute.') return False elif not response.content: logging.error( f'This download link returned empty content: {response.url}' ) return False return response
def query(self, video, languages): _searchurl = self.searchurl subtitles = [] if isinstance(video, Movie): querytext = video.imdb_id if video.imdb_id else video.title if isinstance(video, Episode): querytext = '{} S{:02d}E{:02d}'.format(video.series, video.season, video.episode) querytext = quote(querytext.lower()) # language query filter if not isinstance(languages, (tuple, list, set)): languages = [languages] for language in languages: logger.debug("Legendasdivx.pt :: searching for %s subtitles.", language) language_id = language.opensubtitles if 'por' in language_id: lang_filter = '&form_cat=28' elif 'pob' in language_id: lang_filter = '&form_cat=29' else: lang_filter = '' querytext = querytext + lang_filter if lang_filter else querytext try: # sleep for a 1 second before another request sleep(1) self.headers['Referer'] = self.site + '/index.php' self.session.headers.update(self.headers) res = self.session.get(_searchurl.format(query=querytext), allow_redirects=False) res.raise_for_status() if (res.status_code == 200 and "A legenda não foi encontrada" in res.text): logger.warning( 'Legendasdivx.pt :: query %s return no results!', querytext) # for series, if no results found, try again just with series and season (subtitle packs) if isinstance(video, Episode): logger.debug( "Legendasdivx.pt :: trying again with just series and season on query." ) querytext = re.sub("(e|E)(\d{2})", "", querytext) # sleep for a 1 second before another request sleep(1) res = self.session.get( _searchurl.format(query=querytext), allow_redirects=False) res.raise_for_status() if (res.status_code == 200 and "A legenda não foi encontrada" in res.text): logger.warning( 'Legendasdivx.pt :: query {0} return no results for language {1}(for series and season only).' .format(querytext, language_id)) continue if res.status_code == 302: # got redirected to login page. # seems that our session cookies are no longer valid... clean them from cache region.delete("legendasdivx_cookies2") logger.debug( "Legendasdivx.pt :: Logging in again. Cookies have expired!" ) # login and try again self.login() # sleep for a 1 second before another request sleep(1) res = self.session.get(_searchurl.format(query=querytext)) res.raise_for_status() except HTTPError as e: if "bloqueado" in res.text.lower(): logger.error( "LegendasDivx.pt :: Your IP is blocked on this server." ) raise IPAddressBlocked( "LegendasDivx.pt :: Your IP is blocked on this server." ) logger.error("Legendasdivx.pt :: HTTP Error %s", e) raise TooManyRequests("Legendasdivx.pt :: HTTP Error %s", e) except Exception as e: logger.error("LegendasDivx.pt :: Uncaught error: %r", e) raise ServiceUnavailable( "LegendasDivx.pt :: Uncaught error: %r", e) bsoup = ParserBeautifulSoup(res.content, ['html.parser']) # search for more than 10 results (legendasdivx uses pagination) # don't throttle - maximum results = 6 * 10 MAX_PAGES = 6 # get number of pages bases on results found page_header = bsoup.find("div", {"class": "pager_bar"}) results_found = re.search( r'\((.*?) encontradas\)', page_header.text).group(1) if page_header else 0 logger.debug("Legendasdivx.pt :: Found %s subtitles", str(results_found)) num_pages = (int(results_found) // 10) + 1 num_pages = min(MAX_PAGES, num_pages) # process first page subtitles += self._process_page(video, bsoup) # more pages? if num_pages > 1: for num_page in range(2, num_pages + 1): sleep(1) # another 1 sec before requesting... _search_next = self.searchurl.format( query=querytext) + "&page={0}".format(str(num_page)) logger.debug( "Legendasdivx.pt :: Moving on to next page: %s", _search_next) # sleep for a 1 second before another request sleep(1) res = self.session.get(_search_next) next_page = ParserBeautifulSoup(res.content, ['html.parser']) subs = self._process_page(video, next_page) subtitles.extend(subs) return subtitles