def process_archive_subtitle_files(self, archiveStream, language, video, link, fps, num_cds): subtitles = [] type = 'episode' if isinstance(video, Episode) else 'movie' is_7zip = isinstance(archiveStream, SevenZipFile) if is_7zip: file_content = archiveStream.readall() file_list = sorted(file_content) else: file_list = sorted(archiveStream.namelist()) for file_name in file_list: if file_name.lower().endswith(('srt', '.sub', '.txt')): file_is_txt = True if file_name.lower().endswith( '.txt') else False if file_is_txt and re.search( r'subsunacs\.net|танете част|прочети|^read ?me|procheti', file_name, re.I): logger.info('Ignore readme txt file %r', file_name) continue logger.info('Found subtitle file %r', file_name) subtitle = SubsUnacsSubtitle(language, file_name, type, video, link, fps, num_cds) if is_7zip: subtitle.content = fix_line_ending( file_content[file_name].read()) else: subtitle.content = fix_line_ending( archiveStream.read(file_name)) if subtitle.is_valid(): subtitles.append(subtitle) return subtitles
def download_subtitle(self, subtitle): res = self.session.get(subtitle.download_link, headers={'Referer': subtitle.page_link}, timeout=self.timeout) try: res.raise_for_status() except: raise HTTPError( f"An error occured during the download request to {subtitle.download_link}" ) archive_stream = io.BytesIO(res.content) archive = None if rarfile.is_rarfile(archive_stream): logger.debug("Titulky.com: Identified rar archive") archive = rarfile.RarFile(archive_stream) subtitle_content = self.get_subtitle_from_archive( subtitle, archive) elif zipfile.is_zipfile(archive_stream): logger.debug("Titulky.com: Identified zip archive") archive = zipfile.ZipFile(archive_stream) subtitle_content = self.get_subtitle_from_archive( subtitle, archive) else: subtitle_content = fix_line_ending(res.content) if not subtitle_content: logger.debug( "Titulky.com: No subtitle content found. The downloading limit has been most likely exceeded." ) raise DownloadLimitExceeded( "Subtitles download limit has been exceeded") subtitle.content = subtitle_content
def download_subtitle(self, subtitle): logger.info('Downloading subtitle %r', subtitle) response = self.use_token_or_login( lambda: checked(lambda: self.server.DownloadSubtitles( self.token, [str(subtitle.subtitle_id)]))) subtitle.content = fix_line_ending( zlib.decompress(base64.b64decode(response['data'][0]['data']), 47))
def _process_archive(self, archive_stream, subtitle): for file_name in archive_stream.namelist(): if file_name.lower().endswith(('.srt', '.sub')): logger.info('Found subtitle file %r', file_name) subtitle.content = fix_line_ending(archive_stream.read(file_name)) if subtitle.is_valid(): return
def download_subtitle(self, subtitle): try: # sleep for a 1 second before another request sleep(1) res = self.session.get(subtitle.page_link) res.raise_for_status() except HTTPError as e: if "bloqueado" in res.text.lower(): logger.error("LegendasDivx.pt :: Your IP is blocked on this server.") raise IPAddressBlocked("LegendasDivx.pt :: Your IP is blocked on this server.") logger.error("Legendasdivx.pt :: HTTP Error %s", e) raise TooManyRequests("Legendasdivx.pt :: HTTP Error %s", e) except Exception as e: logger.error("LegendasDivx.pt :: Uncaught error: %r", e) raise ServiceUnavailable("LegendasDivx.pt :: Uncaught error: %r", e) # make sure we haven't maxed out our daily limit if (res.status_code == 200 and 'limite de downloads diário atingido' in res.text.lower()): logger.error("LegendasDivx.pt :: Daily download limit reached!") raise DownloadLimitExceeded("Legendasdivx.pt :: Daily download limit reached!") archive = self._get_archive(res.content) # extract the subtitle if archive: subtitle_content = self._get_subtitle_from_archive(archive, subtitle) if subtitle_content: subtitle.content = fix_line_ending(subtitle_content) subtitle.normalize() return subtitle return
def download_subtitle(self, subtitle): logger.info('Downloading subtitle %r', subtitle) headers = {'Accept': 'application/json', 'Content-Type': 'application/json'} res = self.session.post(self.server_url + 'download', json={'file_id': subtitle.file_id, 'sub_format': 'srt'}, headers=headers, timeout=10) res.raise_for_status() if res.status_code == 429: raise TooManyRequests() elif res.status_code == 406: raise DownloadLimitExceeded("Daily download limit reached") else: try: subtitle.download_link = res.json()['link'] except ValueError: raise ProviderError('Invalid JSON returned by provider') else: r = self.session.get(subtitle.download_link, timeout=10) r.raise_for_status() if res.status_code == 429: raise TooManyRequests() elif res.status_code == 406: raise DownloadLimitExceeded("Daily download limit reached") subtitle_content = r.content if subtitle_content: subtitle.content = fix_line_ending(subtitle_content) else: logger.debug('Could not download subtitle from {}'.format(subtitle.download_link))
def _get_season_subtitles(self, show_id, season, sub_format): params = { 'apikey': self.apikey, 'show_id': show_id, 'q': 'Stagione %%%d' % season, 'version': sub_format } r = self.session.get(self.server_url + 'subtitles/search', params=params, timeout=30) r.raise_for_status() root = etree.fromstring(r.content) if int(root.find('data/count').text) == 0: logger.warning('Subtitles for season not found, try with rip suffix') params['version'] = sub_format + 'rip' r = self.session.get(self.server_url + 'subtitles/search', params=params, timeout=30) r.raise_for_status() root = etree.fromstring(r.content) if int(root.find('data/count').text) == 0: logger.warning('Subtitles for season not found') return [] subs = [] # Looking for subtitles in first page season_re = re.compile('.*?stagione 0*?%d.*' % season) for subtitle in root.findall('data/subtitles/subtitle'): if season_re.match(subtitle.find('name').text.lower()): logger.debug('Found season zip id %d - %r - %r', int(subtitle.find('id').text), subtitle.find('name').text, subtitle.find('version').text) content = self._download_zip(int(subtitle.find('id').text)) if not is_zipfile(io.BytesIO(content)): # pragma: no cover if 'limite di download' in content: raise DownloadLimitExceeded('You reached the download limit') else: raise ConfigurationError('Not a zip file: %r' % content) with ZipFile(io.BytesIO(content)) as zf: episode_re = re.compile('s(\d{1,2})e(\d{1,2})') for name in zf.namelist(): match = episode_re.search(name) if not match: # pragma: no cover logger.debug('Cannot decode subtitle %r', name) else: sub = ItaSASubtitle( int(subtitle.find('id').text), subtitle.find('show_name').text, int(match.group(1)), int(match.group(2)), None, None, None, name, ) sub.content = fix_line_ending(zf.read(name)) subs.append(sub) return subs
def download_subtitle(self, subtitle): session = Session() _addheaders = { 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:72.0) Gecko/20100101 Firefox/72.0', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Origin': 'https://subtitrari.regielive.ro', 'Accept-Language': 'en-US,en;q=0.5', 'Referer': 'https://subtitrari.regielive.ro', 'Pragma': 'no-cache', 'Cache-Control': 'no-cache' } session.headers.update(_addheaders) res = session.get('https://subtitrari.regielive.ro') cookies = res.cookies _zipped = session.get(subtitle.page_link, cookies=cookies) if _zipped: if _zipped.text == '500': raise ValueError('Error 500 on server') archive = zipfile.ZipFile(io.BytesIO(_zipped.content)) subtitle_content = self._get_subtitle_from_archive(archive) subtitle.content = fix_line_ending(subtitle_content) return subtitle raise ValueError('Problems conecting to the server')
def download_subtitle(self, subtitle: TuSubtituloSubtitle): # download the subtitle logger.info("Downloading subtitle %s", subtitle.download_link) r = self.session.get(subtitle.download_link, headers={"Referer": subtitle.page_link}, timeout=10) r.raise_for_status() subtitle.content = fix_line_ending(r.content)
def get_subtitle_from_bundled_archive(self, subtitle, subs_in_archive, archive): sr_lat_subs = [] sr_cyr_subs = [] sub_to_extract = None for sub_name in subs_in_archive: _sub_name = sub_name.lower() if not ('.cyr' in _sub_name or '.cir' in _sub_name or 'cyr)' in _sub_name): sr_lat_subs.append(sub_name) if ('.cyr' in sub_name or '.cir' in _sub_name) and not '.lat' in _sub_name.lower(): sr_cyr_subs.append(sub_name) if subtitle.language == 'sr': if len(sr_lat_subs) > 0: sub_to_extract = sr_lat_subs[0] if subtitle.language == 'sr-Cyrl': if len(sr_cyr_subs) > 0: sub_to_extract = sr_cyr_subs[0] logger.info(u'Using %s from the archive', sub_to_extract) subtitle.content = fix_line_ending(archive.read(sub_to_extract))
def _get_season_subtitles(self, show_id, season, sub_format): params = { 'apikey': self.apikey, 'show_id': show_id, 'q': 'Stagione %{}'.format(season), 'version': sub_format } r = self.session.get(self.server_url + 'subtitles/search', params=params, timeout=30) r.raise_for_status() root = etree.fromstring(r.content) if int(root.find('data/count').text) == 0: logger.warning('Subtitles for season not found, try with rip suffix') params['version'] = sub_format + 'rip' r = self.session.get(self.server_url + 'subtitles/search', params=params, timeout=30) r.raise_for_status() root = etree.fromstring(r.content) if int(root.find('data/count').text) == 0: logger.warning('Subtitles for season not found') return [] subs = [] # Looking for subtitles in first page season_re = re.compile('.*?stagione 0*?{}.*'.format(season)) for subtitle in root.findall('data/subtitles/subtitle'): if season_re.match(subtitle.find('name').text.lower()): logger.debug('Found season zip id %d - %r - %r', int(subtitle.find('id').text), subtitle.find('name').text, subtitle.find('version').text) content = self._download_zip(int(subtitle.find('id').text)) if not is_zipfile(io.BytesIO(content)): # pragma: no cover if 'limite di download' in content: raise TooManyRequests() else: raise ConfigurationError('Not a zip file: {!r}'.format(content)) with ZipFile(io.BytesIO(content)) as zf: episode_re = re.compile('s(\d{1,2})e(\d{1,2})') for index, name in enumerate(zf.namelist()): match = episode_re.search(name) if not match: # pragma: no cover logger.debug('Cannot decode subtitle %r', name) else: sub = ItaSASubtitle( int(subtitle.find('id').text), subtitle.find('show_name').text, int(match.group(1)), int(match.group(2)), None, None, None, name) sub.content = fix_line_ending(zf.read(name)) subs.append(sub) return subs
def download_subtitle(self, subtitle): logger.info("Downloading subtitle %r", subtitle) response = self.session.get(subtitle.download_link, headers={"Referer": subtitle.page_link}, timeout=10) response.raise_for_status() self._check_response(response) subtitle.content = fix_line_ending(response.content)
def download_subtitle(self, subtitle): logger.info('Downloading subtitle %r', subtitle) response = self.use_token_or_login( lambda: checked( lambda: self.server.DownloadSubtitles(self.token, [str(subtitle.subtitle_id)]) ) ) subtitle.content = fix_line_ending(zlib.decompress(base64.b64decode(response['data'][0]['data']), 47))
def download_subtitle(self, subtitle): if isinstance(subtitle, Subs4FreeSubtitle): # download the subtitle logger.info('Downloading subtitle %r', subtitle) r = self.session.get(subtitle.download_link, headers={'Referer': subtitle.page_link}, timeout=10) r.raise_for_status() if not r.content: logger.debug( 'Unable to download subtitle. No data returned from provider' ) return soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser']) download_element = soup.select_one('input[name="id"]') image_element = soup.select_one('input[type="image"]') subtitle_id = download_element[ 'value'] if download_element else None width = int(str( image_element['width']).strip('px')) if image_element else 0 height = int(str( image_element['height']).strip('px')) if image_element else 0 if not subtitle_id: logger.debug( 'Unable to download subtitle. No download link found') return self.apply_anti_block(subtitle) download_url = self.server_url + self.download_url r = self.session.post(download_url, data={ 'id': subtitle_id, 'x': random.randint(0, width), 'y': random.randint(0, height) }, headers={'Referer': subtitle.download_link}, timeout=10) r.raise_for_status() if not r.content: logger.debug( 'Unable to download subtitle. No data returned from provider' ) return archive = _get_archive(r.content) subtitle_content = _get_subtitle_from_archive( archive) if archive else r.content if subtitle_content: subtitle.content = fix_line_ending(subtitle_content) else: logger.debug('Could not extract subtitle from %r', archive)
def process_archive_subtitle_files(self, archiveStream, language, video, link, fps, num_cds): subtitles = [] type = 'episode' if isinstance(video, Episode) else 'movie' for file_name in sorted(archiveStream.namelist()): if file_name.lower().endswith(('.srt', '.sub')): logger.info('Found subtitle file %r', file_name) subtitle = SubsSabBzSubtitle(language, file_name, type, video, link, fps, num_cds) subtitle.content = fix_line_ending(archiveStream.read(file_name)) subtitles.append(subtitle) return subtitles
def download_subtitle(self, subtitle): # download archive in case we previously hit the releases cache and didn't download it if subtitle.archive.content is None: self.download_archive(subtitle.archive) # extract subtitle's content try: subtitle.content = fix_line_ending(subtitle.archive.content.read(subtitle.name)) except (BadRarFile, BadZipfile): logger.error('Bad archive for %s', subtitle.name)
def process_archive_subtitle_files(archive_stream, language, video, link, fps, subs_id): subtitles = [] media_type = 'episode' if isinstance(video, Episode) else 'movie' for file_name in archive_stream.namelist(): if file_name.lower().endswith(('.srt', '.sub')): logger.info('Found subtitle file %r', file_name) subtitle = YavkaNetSubtitle(language, file_name, media_type, video, link, fps, subs_id) subtitle.content = fix_line_ending(archive_stream.read(file_name)) subtitles.append(subtitle) return subtitles
def download_subtitle(self, subtitle): logger.info("Downloading subtitle %r", subtitle) headers = { "User-Agent": "BSPlayer/2.x (1022.12360)", "Content-Length": "0", } response = self.session.get(subtitle.page_link, headers=headers) subtitle.content = fix_line_ending(zlib.decompress(response.content, 47))
def download_subtitle(self, subtitle): logger.info('Downloading subtitle %r', subtitle) headers = { 'User-Agent': 'BSPlayer/2.x (1022.12360)', 'Content-Length': '0', } response = self.session.get(subtitle.page_link, headers=headers) subtitle.content = fix_line_ending(zlib.decompress(response.content, 47))
def download_subtitle(self, subtitle): r = self.session.get(subtitle.download_link, timeout=10) r.raise_for_status() # open the archive archive_stream = io.BytesIO(r.content) if is_rarfile(archive_stream): logger.debug('Archive identified as rar') archive = RarFile(archive_stream) elif is_zipfile(archive_stream): logger.debug('Archive identified as zip') archive = ZipFile(archive_stream) else: raise ProviderError('Unidentified archive type') # extract subtitle's content subs_in_archive = [] for name in archive.namelist(): for ext in (".srt", ".sub", ".ssa", ".ass"): if name.endswith(ext): subs_in_archive.append(name) # select the correct subtitle file matching_sub = None if len(subs_in_archive) == 1: matching_sub = subs_in_archive[0] else: for sub_name in subs_in_archive: guess = guessit(sub_name) # consider subtitle valid if: # - episode and season match # - format matches (if it was matched before) # - release group matches (and we asked for one and it was matched, or it was not matched) if guess["episode"] == subtitle.episode and guess[ "season"] == subtitle.season: format_matches = "format" not in subtitle.matches or \ ("format" in subtitle.matches and guess["format"].lower() in subtitle.releases.lower()) release_group_matches = True if subtitle.asked_for_release_group: release_group_matches = "release_group" not in subtitle.matches or \ ("release_group" in subtitle.matches and guess["release_group"].lower() == subtitle.asked_for_release_group.lower()) if release_group_matches and format_matches: matching_sub = sub_name break if not matching_sub: raise ProviderError("None of expected subtitle found in archive") subtitle.content = fix_line_ending(archive.read(matching_sub))
def download_subtitle(self, subtitle): logger.info("Downloading subtitle %r", subtitle) response = self.session.get( f"{SERVER_URL}/download", params={"id": subtitle.download_id, "type": subtitle.download_type}, timeout=10, ) response.raise_for_status() archive = self._get_archive(response.content) subtitle_file = self.get_file(archive) subtitle.content = fix_line_ending(subtitle_file)
def download_subtitle(self, subtitle): logger.info("Downloading subtitle %r", subtitle) download_url_ = self.scrape_download_url(subtitle.sub_dict) if not download_url_: raise APIThrottled("Can't scrape download url") response = self.session.get(download_url_, timeout=10, allow_redirects=True) self._check_response(response) subtitle.content = fix_line_ending(response.content)
def download_subtitle(self, subtitle): logger.info('Downloading subtitle %r', subtitle) r = self.session.get(subtitle.download_link, timeout=10) r.raise_for_status() r.encoding = "ISO-8859-1" subtitle_content = r.text if subtitle_content: subtitle.content = fix_line_ending(subtitle_content) else: logger.debug('Could not download subtitle from %s', subtitle.download_link)
def download_subtitle(self, subtitle): if self.token is NO_VALUE: logger.debug("No cached token, we'll try to login again.") self.login() if self.token is NO_VALUE: logger.debug( "Unable to obtain an authentication token right now, we'll try again later." ) raise ProviderError("Unable to obtain an authentication token") logger.info('Downloading subtitle %r', subtitle) headers = { 'Accept': 'application/json', 'Content-Type': 'application/json', 'Authorization': 'Beaker ' + self.token } res = self.session.post(self.server_url + 'download', json={ 'file_id': subtitle.file_id, 'sub_format': 'srt' }, headers=headers, timeout=30) if res.status_code == 429: raise TooManyRequests() elif res.status_code == 406: raise DownloadLimitExceeded("Daily download limit reached") elif res.status_code == 503: raise ProviderError(res.reason) else: try: subtitle.download_link = res.json()['link'] except ValueError: raise ProviderError('Invalid JSON returned by provider') else: r = self.session.get(subtitle.download_link, timeout=30) if res.status_code == 429: raise TooManyRequests() elif res.status_code == 406: raise DownloadLimitExceeded("Daily download limit reached") elif res.status_code == 503: raise ProviderError(res.reason) subtitle_content = r.content if subtitle_content: subtitle.content = fix_line_ending(subtitle_content) else: logger.debug('Could not download subtitle from {}'.format( subtitle.download_link))
def download_subtitle(self, subtitle): if isinstance(subtitle, XSubsSubtitle): # download the subtitle logger.info('Downloading subtitle %r', subtitle) r = self.session.get(subtitle.download_link, headers={'Referer': subtitle.page_link}, timeout=10) r.raise_for_status() if not r.content: logger.debug('Unable to download subtitle. No data returned from provider') return subtitle.content = fix_line_ending(r.content)
def download_subtitle(self, subtitle): logger.info('Downloading subtitle %r', subtitle) r = self.session.get(subtitle.download_link, timeout=10) r.raise_for_status() archive = _get_archive(r.content) subtitle_content = _get_subtitle_from_archive( archive) if archive else r.content if subtitle_content: subtitle.content = fix_line_ending(subtitle_content) else: logger.debug('Could not extract subtitle from %r', archive)
def download_subtitle(self, subtitle): if isinstance(subtitle, Subs4SeriesSubtitle): # download the subtitle logger.info('Downloading subtitle %r', subtitle) r = self.session.get(subtitle.download_link, headers={'Referer': subtitle.page_link}, timeout=10) r.raise_for_status() if not r.content: logger.debug( 'Unable to download subtitle. No data returned from provider' ) return soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser']) download_element = soup.select_one('a.style55ws') if not download_element: download_element = soup.select_one('form[method="post"]') target = download_element[ 'action'] if download_element else None else: target = download_element['href'] if not target: logger.debug( 'Unable to download subtitle. No download link found') return self.apply_anti_block(subtitle) download_url = self.server_url + target r = self.session.get(download_url, headers={'Referer': subtitle.download_link}, timeout=10) r.raise_for_status() if not r.content: logger.debug( 'Unable to download subtitle. No data returned from provider' ) return archive = _get_archive(r.content) subtitle_content = _get_subtitle_from_archive( archive) if archive else r.content if subtitle_content: subtitle.content = fix_line_ending(subtitle_content) else: logger.debug('Could not extract subtitle from %r', archive)
def query(self, language, size, name, hash): params = { 'postAction': 'CheckSub', 'ua': self.username, 'ap': self.password, 'fs': size, 'fh': hash, 'fn': os.path.basename(name), 'n24pref': 1 } response = self.session.post(self.api_url, data=params, timeout=10) response.raise_for_status() response_content = response.content.split(b'||', 1) n24_data = response_content[0].decode() if n24_data[:2] != 'OK': if n24_data[:11] == 'login error': raise AuthenticationError('Login failed') logger.error('Unknown response: %s', response.content) return None n24_status = n24_data[:4] if n24_status == 'OK-0': logger.info('No subtitles found') return None subtitle_info = dict(p.split(':', 1) for p in n24_data.split('|')[1:]) logger.debug('Subtitle info: %s', subtitle_info) if n24_status == 'OK-1': logger.info('No subtitles found but got video info') return None elif n24_status == 'OK-2': logger.info('Found subtitles') elif n24_status == 'OK-3': logger.info('Found subtitles but not from Napisy24 database') return None subtitle_content = response_content[1] subtitle = Napisy24Subtitle(language, hash, 'tt%s' % subtitle_info['imdb'].zfill(7), subtitle_info['napisId']) with ZipFile(BytesIO(subtitle_content)) as zf: subtitle.content = fix_line_ending( zf.open(zf.namelist()[0]).read()) return subtitle
def download_subtitle(self, subtitle): last_dls = region.get("addic7ed_dls") now = datetime.datetime.now() one_day = datetime.timedelta(hours=24) def raise_limit(): logger.info("Addic7ed: Downloads per day exceeded (%s)", cap) raise DownloadLimitPerDayExceeded if not isinstance(last_dls, types.ListType): last_dls = [] else: # filter all non-expired DLs last_dls = filter(lambda t: t + one_day > now, last_dls) region.set("addic7ed_dls", last_dls) cap = self.vip and 80 or 40 amount = len(last_dls) if amount >= cap: raise_limit() # download the subtitle r = self.session.get(self.server_url + subtitle.download_link, headers={'Referer': subtitle.page_link}, timeout=10) r.raise_for_status() if r.status_code == 304: raise TooManyRequests() if not r.content: # Provider wrongful return a status of 304 Not Modified with an empty content # raise_for_status won't raise exception for that status code logger.error( 'Addic7ed: Unable to download subtitle. No data returned from provider' ) return # detect download limit exceeded if r.headers['Content-Type'] == 'text/html': raise DownloadLimitExceeded subtitle.content = fix_line_ending(r.content) last_dls.append(datetime.datetime.now()) region.set("addic7ed_dls", last_dls) logger.info("Addic7ed: Used %s/%s downloads", amount + 1, cap) if amount + 1 >= cap: raise_limit()
def download_subtitle(self, subtitle): # download url = 'http://zip.{}/{}.zip'.format(self.server_url, subtitle.subtitle_id) r = self.session.get(url, headers={'Referer': subtitle.page_link}, timeout=10) r.raise_for_status() # open the zip with zipfile.ZipFile(io.BytesIO(r.content)) as zf: # remove some filenames from the namelist namelist = [n for n in zf.namelist() if os.path.splitext(n)[1] in ['.srt', '.sub']] if len(namelist) > 1: raise ProviderError('More than one file to unzip') subtitle.content = fix_line_ending(zf.read(namelist[0]))
def download_subtitle(self, subtitle): res = self.session.get(subtitle.page_link) if res: if res.text == '500': raise ValueError('Error 500 on server') archive = self._get_archive(res.content) # extract the subtitle subtitle_content = self._get_subtitle_from_archive(archive) subtitle.content = fix_line_ending(subtitle_content) subtitle.normalize() return subtitle raise ValueError('Problems conecting to the server')
def download_subtitle(self, subtitle): if isinstance(subtitle, ZimukuSubtitle): # download the subtitle logger.info('Downloading subtitle %r', subtitle) r = self.session.get(subtitle.download_link, headers={'Referer': subtitle.page_link}, timeout=30) r.raise_for_status() if not r.content: logger.debug( 'Unable to download subtitle. No data returned from provider' ) return soup = ParserBeautifulSoup(r.content.decode('utf-8', 'ignore'), ['lxml', 'html.parser']) links = soup.find("div", {"class": "clearfix"}).find_all('a') # TODO: add settings for choice for down_link in links: url = down_link.get('href').encode('utf-8') url = self.server_url + url r = self.session.get( url, headers={'Referer': subtitle.download_link}, timeout=30) r.raise_for_status() if len(r.content) > 1024: break archive_stream = io.BytesIO(r.content) archive = None if rarfile.is_rarfile(archive_stream): logger.debug('Identified rar archive') archive = rarfile.RarFile(archive_stream) subtitle_content = _get_subtitle_from_archive(archive) elif zipfile.is_zipfile(archive_stream): logger.debug('Identified zip archive') archive = zipfile.ZipFile(archive_stream) subtitle_content = _get_subtitle_from_archive(archive) else: subtitle_content = r.content if subtitle_content: subtitle.content = fix_line_ending(subtitle_content) else: logger.debug('Could not extract subtitle from %r', archive)
def download_subtitle(self, subtitle): # download the subtitle logger.info('Downloading subtitle %r', subtitle) r = self.session.get(self.server_url + subtitle.download_link, headers={'Referer': subtitle.page_link}, timeout=10) r.raise_for_status() if not r.content: # Provider returns a status of 304 Not Modified with an empty content # raise_for_status won't raise exception for that status code logger.debug('Unable to download subtitle. No data returned from provider') return # detect download limit exceeded if r.headers['Content-Type'] == 'text/html': raise DownloadLimitExceeded subtitle.content = fix_line_ending(r.content)
def download_subtitle(self, subtitle): # download url = self.server_url + 'subtitle/download/{}/{}/'.format(subtitle.language.alpha2, subtitle.subtitle_id) params = {'v': subtitle.subtitle_version, 'key': subtitle.subtitle_key} r = self.session.get(url, params=params, headers={'Referer': subtitle.page_link}, timeout=10) r.raise_for_status() # open the zip try: with zipfile.ZipFile(io.BytesIO(r.content)) as zf: # remove some filenames from the namelist namelist = [n for n in zf.namelist() if not n.endswith('.txt')] if len(namelist) > 1: raise ProviderError('More than one file to unzip') subtitle.content = fix_line_ending(zf.read(namelist[0])) except zipfile.BadZipfile: # if no zip file was retrieved, daily downloads limit has exceeded raise ProviderError('Daily limit exceeded')
def get_subtitle_from_bundled_archive(self, subtitle, subs_in_archive, archive): sr_lat_subs = [] sr_cyr_subs = [] sub_to_extract = None for sub_name in subs_in_archive: if not ('.cyr' in sub_name or '.cir' in sub_name): sr_lat_subs.append(sub_name) if ('.cyr' in sub_name or '.cir' in sub_name) and not '.lat' in sub_name: sr_cyr_subs.append(sub_name) if subtitle.language == 'sr': if len(sr_lat_subs) > 0: sub_to_extract = sr_lat_subs[0] if subtitle.language == 'sr-Cyrl': if len(sr_cyr_subs) > 0: sub_to_extract = sr_cyr_subs[0] logger.info(u'Using %s from the archive', sub_to_extract) subtitle.content = fix_line_ending(archive.read(sub_to_extract))
def get_subtitle_from_archive(self, subtitle, archive): # extract subtitle's content subs_in_archive = [] for name in archive.namelist(): for ext in (".srt", ".sub", ".ssa", ".ass"): if name.endswith(ext): subs_in_archive.append(name) # select the correct subtitle file matching_sub = None subs_unsure = [] subs_fallback = [] if len(subs_in_archive) == 1: matching_sub = subs_in_archive[0] else: for sub_name in subs_in_archive: guess = guessit(sub_name) sub_name_lower = sub_name.lower() # consider subtitle valid if: # - episode and season match # - format matches (if it was matched before) # - release group matches (and we asked for one and it was matched, or it was not matched) # - not asked for forced and "forced" not in filename is_episode = subtitle.asked_for_episode if not subtitle.language.forced: base, ext = os.path.splitext(sub_name_lower) if base.endswith("forced") or "forced" in guess.get("release_group", ""): continue episodes = guess.get("episode") if is_episode and episodes and not isinstance(episodes, list): episodes = [episodes] if not is_episode or ( ( subtitle.episode in episodes or (subtitle.is_pack and subtitle.asked_for_episode in episodes) ) and guess.get("season") == subtitle.season): format_matches = True wanted_format_but_not_found = False if "format" in subtitle.matches: format_matches = False if isinstance(subtitle.releases, types.ListType): releases = ",".join(subtitle.releases).lower() else: releases = subtitle.releases.lower() if "format" not in guess: wanted_format_but_not_found = True else: formats = guess["format"] if not isinstance(formats, types.ListType): formats = [formats] for f in formats: format_matches = f.lower() in releases if format_matches: break release_group_matches = True if subtitle.is_pack or (subtitle.asked_for_release_group and ("release_group" in subtitle.matches or "hash" in subtitle.matches)): if subtitle.asked_for_release_group: asked_for_rlsgrp = subtitle.asked_for_release_group.lower() if asked_for_rlsgrp: release_group_matches = False if asked_for_rlsgrp in sub_name_lower: release_group_matches = True if release_group_matches and format_matches: matching_sub = sub_name break elif release_group_matches and wanted_format_but_not_found: subs_unsure.append(sub_name) else: subs_fallback.append(sub_name) if not matching_sub and not subs_unsure and not subs_fallback: logger.error("None of expected subtitle found in archive") return elif subs_unsure: matching_sub = subs_unsure[0] elif subs_fallback: matching_sub = subs_fallback[0] try: matching_sub_unicode = matching_sub.decode("utf-8") except UnicodeDecodeError: try: matching_sub_unicode = matching_sub.decode("cp437") except UnicodeDecodeError: matching_sub_unicode = matching_sub.decode("utf-8", errors='replace') logger.info(u"Using %s from the archive", matching_sub_unicode) return fix_line_ending(archive.read(matching_sub))
def download_subtitle(self, subtitle): logger.info('Downloading subtitle %r', subtitle) r = self.session.get(subtitle.download_link, timeout=10) r.raise_for_status() subtitle.content = fix_line_ending(r.content)
def test_fix_line_ending(): content = b'Text\r\nwith\rweird\nline ending\r\ncharacters' assert fix_line_ending(content) == b'Text\nwith\nweird\nline ending\ncharacters'
def query(self, series, season, episode, video_format, resolution, country=None): # To make queries you need to be logged in if not self.logged_in: # pragma: no cover raise ConfigurationError('Cannot query if not logged in') # get the show id show_id = self.get_show_id(series, country) if show_id is None: logger.error('No show id found for %r ', series) return [] # get the page of the season of the show logger.info('Getting the subtitle of show id %d, season %d episode %d, format %r', show_id, season, episode, video_format) subtitles = [] # Default format is SDTV if not video_format or video_format.lower() == 'hdtv': if resolution in ('1080i', '1080p', '720p'): sub_format = resolution else: sub_format = 'normale' else: sub_format = video_format.lower() # Look for year params = { 'apikey': self.apikey } r = self.session.get(self.server_url + 'shows/' + str(show_id), params=params, timeout=30) r.raise_for_status() root = etree.fromstring(r.content) year = root.find('data/show/started').text if year: year = int(year.split('-', 1)[0]) tvdb_id = root.find('data/show/id_tvdb').text if tvdb_id: tvdb_id = int(tvdb_id) params = { 'apikey': self.apikey, 'show_id': show_id, 'q': '{0}x{1:02}'.format(season, episode), 'version': sub_format } r = self.session.get(self.server_url + 'subtitles/search', params=params, timeout=30) r.raise_for_status() root = etree.fromstring(r.content) if int(root.find('data/count').text) == 0: logger.warning('Subtitles not found, try with rip suffix') params['version'] = sub_format + 'rip' r = self.session.get(self.server_url + 'subtitles/search', params=params, timeout=30) r.raise_for_status() root = etree.fromstring(r.content) if int(root.find('data/count').text) == 0: logger.warning('Subtitles not found, go season mode') # If no subtitle are found for single episode try to download all season zip subs = self._get_season_subtitles(show_id, season, sub_format) if subs: for subtitle in subs: subtitle.format = video_format subtitle.year = year subtitle.tvdb_id = tvdb_id return subs else: return [] # Looking for subtitles in first page for subtitle in root.findall('data/subtitles/subtitle'): if '{0}x{1:02}'.format(season, episode) in subtitle.find('name').text.lower(): logger.debug('Found subtitle id %d - %r - %r', int(subtitle.find('id').text), subtitle.find('name').text, subtitle.find('version').text) sub = ItaSASubtitle( int(subtitle.find('id').text), subtitle.find('show_name').text, season, episode, video_format, year, tvdb_id, subtitle.find('name').text) subtitles.append(sub) # Not in the first page of result try next (if any) next_page = root.find('data/next') while next_page.text is not None: # pragma: no cover r = self.session.get(next_page.text, timeout=30) r.raise_for_status() root = etree.fromstring(r.content) logger.info('Loading subtitles page %r', root.data.page.text) # Looking for show in following pages for subtitle in root.findall('data/subtitles/subtitle'): if '{0}x{1:02}'.format(season, episode) in subtitle.find('name').text.lower(): logger.debug('Found subtitle id %d - %r - %r', int(subtitle.find('id').text), subtitle.find('name').text, subtitle.find('version').text) sub = ItaSASubtitle( int(subtitle.find('id').text), subtitle.find('show_name').text, season, episode, video_format, year, tvdb_id, subtitle.find('name').text) subtitles.append(sub) next_page = root.find('data/next') # Download the subs found, can be more than one in zip additional_subs = [] for sub in subtitles: # open the zip content = self._download_zip(sub.sub_id) if not is_zipfile(io.BytesIO(content)): # pragma: no cover if 'limite di download' in content: raise TooManyRequests() else: raise ConfigurationError('Not a zip file: {!r}'.format(content)) with ZipFile(io.BytesIO(content)) as zf: if len(zf.namelist()) > 1: # pragma: no cover for index, name in enumerate(zf.namelist()): if index == 0: # First element sub.content = fix_line_ending(zf.read(name)) sub.full_data = name else: add_sub = copy.deepcopy(sub) add_sub.content = fix_line_ending(zf.read(name)) add_sub.full_data = name additional_subs.append(add_sub) else: sub.content = fix_line_ending(zf.read(zf.namelist()[0])) sub.full_data = zf.namelist()[0] return subtitles + additional_subs