Beispiel #1
0
    def initialize(self):
        logger.info("Creating session")
        self.session = RetryingCFSession()

        prev_cookies = region.get("subscene_cookies2")
        if prev_cookies != NO_VALUE:
            logger.debug("Re-using old subscene cookies: %r", prev_cookies)
            self.session.cookies.update(prev_cookies)

        else:
            logger.debug("Logging in")
            self.login()
Beispiel #2
0
 def initialize(self):
     logger.debug("Legendasdivx.pt :: Creating session for requests")
     self.session = RetryingCFSession()
     # re-use PHP Session if present
     prev_cookies = region.get("legendasdivx_cookies2")
     if prev_cookies != NO_VALUE:
         logger.debug("Legendasdivx.pt :: Re-using previous legendasdivx cookies: %s", prev_cookies)
         self.session.cookies.update(prev_cookies)
     # login if session has expired
     else:
         logger.debug("Legendasdivx.pt :: Session cookies not found!")
         self.session.headers.update(self.headers)
         self.login()
Beispiel #3
0
    def initialize(self):
        self.session = RetryingCFSession()
        #load_verification("titlovi", self.session)

        token = region.get("titlovi_token")
        if token is not NO_VALUE:
            self.user_id, self.login_token, self.token_exp = token
            if datetime.now() > self.token_exp:
                logger.debug('Token expired')
                self.log_in()
            else:
                logger.debug('Use cached token')
        else:
            logger.debug('Token not found in cache')
            self.log_in()
Beispiel #4
0
class TitloviProvider(Provider, ProviderSubtitleArchiveMixin):
    subtitle_class = TitloviSubtitle
    languages = {
        Language.fromtitlovi(l)
        for l in language_converters['titlovi'].codes
    } | {Language.fromietf('sr-Latn')}
    api_url = 'https://kodi.titlovi.com/api/subtitles'
    api_gettoken_url = api_url + '/gettoken'
    api_search_url = api_url + '/search'

    def __init__(self, username=None, password=None):
        if not all((username, password)):
            raise ConfigurationError('Username and password must be specified')

        self.username = username
        self.password = password

        self.session = None

        self.user_id = None
        self.login_token = None
        self.token_exp = None

    def initialize(self):
        self.session = RetryingCFSession()
        #load_verification("titlovi", self.session)

        token = region.get("titlovi_token")
        if token is not NO_VALUE:
            self.user_id, self.login_token, self.token_exp = token
            if datetime.now() > self.token_exp:
                logger.debug('Token expired')
                self.log_in()
            else:
                logger.debug('Use cached token')
        else:
            logger.debug('Token not found in cache')
            self.log_in()

    def log_in(self):
        login_params = dict(username=self.username,
                            password=self.password,
                            json=True)
        try:
            response = self.session.post(self.api_gettoken_url,
                                         params=login_params)
            if response.status_code == request_codes.ok:
                resp_json = response.json()
                self.login_token = resp_json.get('Token')
                self.user_id = resp_json.get('UserId')
                self.token_exp = dateutil.parser.parse(
                    resp_json.get('ExpirationDate'))

                region.set("titlovi_token",
                           [self.user_id, self.login_token, self.token_exp])
                logger.debug('New token obtained')

            elif response.status_code == request_codes.unauthorized:
                raise AuthenticationError('Login failed')

        except RequestException as e:
            logger.error(e)

    def terminate(self):
        self.session.close()

    @region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME)
    def get_result(self, search_url, search_params):
        return self.session.get(search_url, params=search_params)

    def query(self,
              languages,
              title,
              season=None,
              episode=None,
              year=None,
              imdb_id=None,
              video=None):
        search_params = dict()

        used_languages = languages
        lang_strings = [str(lang) for lang in used_languages]

        # handle possible duplicate use of Serbian Latin
        if "sr" in lang_strings and "sr-Latn" in lang_strings:
            logger.info(
                'Duplicate entries <Language [sr]> and <Language [sr-Latn]> found, filtering languages'
            )
            used_languages = [
                l for l in used_languages if l != Language.fromietf('sr-Latn')
            ]
            logger.info('Filtered language list %r', used_languages)

        # convert list of languages into search string
        langs = '|'.join(map(str, [l.titlovi for l in used_languages]))

        # set query params
        search_params['query'] = title
        search_params['lang'] = langs
        is_episode = False
        if season and episode:
            is_episode = True
            #search_params['season'] = season
            #search_params['episode'] = episode
        #if year:
        #    search_params['year'] = year
        if imdb_id:
            search_params['imdbID'] = imdb_id

        # loop through paginated results
        logger.info('Searching subtitles %r', search_params)
        subtitles = []
        query_results = []

        try:
            search_params['token'] = self.login_token
            search_params['userid'] = self.user_id
            search_params['json'] = True

            #response = self.get_result(search_url=self.api_search_url, search_params=search_params)
            response = self.get_result(self.api_search_url, search_params)
            resp_json = response.json()
            if resp_json['SubtitleResults']:
                query_results.extend(resp_json['SubtitleResults'])

        except Exception as e:
            logger.error(e)

        for sub in query_results:

            # title and alternate title
            match = title_re.search(sub.get('Title'))
            if match:
                _title = match.group('title')
                alt_title = match.group('altitle')
            else:
                continue

            # handle movies and series separately
            if is_episode:
                # skip if season and episode number does not match
                if season and season != sub.get('Season'):
                    continue
                elif episode and episode != sub.get('Episode'):
                    continue

                subtitle = self.subtitle_class(
                    Language.fromtitlovi(sub.get('Lang')),
                    sub.get('Link'),
                    sub.get('Id'),
                    sub.get('Release'),
                    _title,
                    alt_title=alt_title,
                    season=sub.get('Season'),
                    episode=sub.get('Episode'),
                    year=sub.get('Year'),
                    rating=sub.get('Rating'),
                    download_count=sub.get('DownloadCount'),
                    asked_for_release_group=video.release_group,
                    asked_for_episode=episode)
            else:
                subtitle = self.subtitle_class(
                    Language.fromtitlovi(sub.get('Lang')),
                    sub.get('Link'),
                    sub.get('Id'),
                    sub.get('Release'),
                    _title,
                    alt_title=alt_title,
                    year=sub.get('Year'),
                    rating=sub.get('Rating'),
                    download_count=sub.get('DownloadCount'),
                    asked_for_release_group=video.release_group)
            logger.debug('Found subtitle %r', subtitle)

            # prime our matches so we can use the values later
            subtitle.get_matches(video)

            # add found subtitles
            subtitles.append(subtitle)

        return subtitles

    def list_subtitles(self, video, languages):
        season = episode = None

        if isinstance(video, Episode):
            title = video.series
            season = video.season
            episode = video.episode
        else:
            title = video.title

        return [
            s for s in self.query(languages,
                                  fix_inconsistent_naming(title),
                                  season=season,
                                  episode=episode,
                                  year=video.year,
                                  imdb_id=video.imdb_id,
                                  video=video)
        ]

    def download_subtitle(self, subtitle):
        r = self.session.get(subtitle.download_link, timeout=10)
        r.raise_for_status()

        # open the archive
        archive_stream = io.BytesIO(r.content)
        if is_rarfile(archive_stream):
            logger.debug('Archive identified as rar')
            archive = RarFile(archive_stream)
        elif is_zipfile(archive_stream):
            logger.debug('Archive identified as zip')
            archive = ZipFile(archive_stream)
        else:
            subtitle.content = r.content
            if subtitle.is_valid():
                return
            subtitle.content = None

            raise ProviderError('Unidentified archive type')

        subs_in_archive = archive.namelist()

        # if Serbian lat and cyr versions are packed together, try to find right version
        if len(subs_in_archive) > 1 and (subtitle.language == 'sr'
                                         or subtitle.language == 'sr-Cyrl'):
            self.get_subtitle_from_bundled_archive(subtitle, subs_in_archive,
                                                   archive)
        else:
            # use default method for everything else
            subtitle.content = self.get_subtitle_from_archive(
                subtitle, archive)

    def get_subtitle_from_bundled_archive(self, subtitle, subs_in_archive,
                                          archive):
        sr_lat_subs = []
        sr_cyr_subs = []
        sub_to_extract = None

        for sub_name in subs_in_archive:
            _sub_name = sub_name.lower()

            if not ('.cyr' in _sub_name or '.cir' in _sub_name
                    or 'cyr)' in _sub_name):
                sr_lat_subs.append(sub_name)

            if ('.cyr' in sub_name or '.cir'
                    in _sub_name) and not '.lat' in _sub_name.lower():
                sr_cyr_subs.append(sub_name)

        if subtitle.language == 'sr':
            if len(sr_lat_subs) > 0:
                sub_to_extract = sr_lat_subs[0]

        if subtitle.language == 'sr-Cyrl':
            if len(sr_cyr_subs) > 0:
                sub_to_extract = sr_cyr_subs[0]

        logger.info(u'Using %s from the archive', sub_to_extract)
        subtitle.content = fix_line_ending(archive.read(sub_to_extract))
Beispiel #5
0
 def initialize(self):
     self.session = RetryingCFSession()
Beispiel #6
0
class TitloviProvider(Provider, ProviderSubtitleArchiveMixin):
    subtitle_class = TitloviSubtitle
    languages = {Language.fromtitlovi(l) for l in language_converters['titlovi'].codes} | {Language.fromietf('sr-Latn')}
    server_url = 'https://titlovi.com'
    search_url = server_url + '/titlovi/?'
    download_url = server_url + '/download/?type=1&mediaid='

    def initialize(self):
        self.session = RetryingCFSession()
        #load_verification("titlovi", self.session)

    def terminate(self):
        self.session.close()

    def query(self, languages, title, season=None, episode=None, year=None, video=None):
        items_per_page = 10
        current_page = 1

        used_languages = languages
        lang_strings = [str(lang) for lang in used_languages]

        # handle possible duplicate use of Serbian Latin
        if "sr" in lang_strings and "sr-Latn" in lang_strings:
            logger.info('Duplicate entries <Language [sr]> and <Language [sr-Latn]> found, filtering languages')
            used_languages = filter(lambda l: l != Language.fromietf('sr-Latn'), used_languages)
            logger.info('Filtered language list %r', used_languages)

        # convert list of languages into search string
        langs = '|'.join(map(str, [l.titlovi for l in used_languages]))

        # set query params
        params = {'prijevod': title, 'jezik': langs}
        is_episode = False
        if season and episode:
            is_episode = True
            params['s'] = season
            params['e'] = episode
        if year:
            params['g'] = year

        # loop through paginated results
        logger.info('Searching subtitles %r', params)
        subtitles = []

        while True:
            # query the server
            try:
                r = self.session.get(self.search_url, params=params, timeout=10)
                r.raise_for_status()
            except RequestException as e:
                logger.exception('RequestException %s', e)
                break
            else:
                try:
                    soup = BeautifulSoup(r.content, 'lxml')

                    # number of results
                    result_count = int(soup.select_one('.results_count b').string)
                except:
                    result_count = None

                # exit if no results
                if not result_count:
                    if not subtitles:
                        logger.debug('No subtitles found')
                    else:
                        logger.debug("No more subtitles found")
                    break

                # number of pages with results
                pages = int(math.ceil(result_count / float(items_per_page)))

                # get current page
                if 'pg' in params:
                    current_page = int(params['pg'])

                try:
                    sublist = soup.select('section.titlovi > ul.titlovi > li.subtitleContainer.canEdit')
                    for sub in sublist:
                        # subtitle id
                        sid = sub.find(attrs={'data-id': True}).attrs['data-id']
                        # get download link
                        download_link = self.download_url + sid
                        # title and alternate title
                        match = title_re.search(sub.a.string)
                        if match:
                            _title = match.group('title')
                            alt_title = match.group('altitle')
                        else:
                            continue

                        # page link
                        page_link = self.server_url + sub.a.attrs['href']
                        # subtitle language
                        _lang = sub.select_one('.lang')
                        match = lang_re.search(_lang.attrs.get('src', _lang.attrs.get('cfsrc', '')))
                        if match:
                            try:
                                # decode language
                                lang = Language.fromtitlovi(match.group('lang')+match.group('script'))
                            except ValueError:
                                continue

                        # relase year or series start year
                        match = year_re.search(sub.find(attrs={'data-id': True}).parent.i.string)
                        if match:
                            r_year = int(match.group('year'))
                        # fps
                        match = fps_re.search(sub.select_one('.fps').string)
                        if match:
                            fps = match.group('fps')
                        # releases
                        releases = str(sub.select_one('.fps').parent.contents[0].string)

                        # handle movies and series separately
                        if is_episode:
                            # season and episode info
                            sxe = sub.select_one('.s0xe0y').string
                            r_season = None
                            r_episode = None
                            if sxe:
                                match = season_re.search(sxe)
                                if match:
                                    r_season = int(match.group('season'))
                                match = episode_re.search(sxe)
                                if match:
                                    r_episode = int(match.group('episode'))

                            subtitle = self.subtitle_class(lang, page_link, download_link, sid, releases, _title,
                                                           alt_title=alt_title, season=r_season, episode=r_episode,
                                                           year=r_year, fps=fps,
                                                           asked_for_release_group=video.release_group,
                                                           asked_for_episode=episode)
                        else:
                            subtitle = self.subtitle_class(lang, page_link, download_link, sid, releases, _title,
                                                           alt_title=alt_title, year=r_year, fps=fps,
                                                           asked_for_release_group=video.release_group)
                        logger.debug('Found subtitle %r', subtitle)

                        # prime our matches so we can use the values later
                        subtitle.get_matches(video)

                        # add found subtitles
                        subtitles.append(subtitle)

                finally:
                    soup.decompose()

                # stop on last page
                if current_page >= pages:
                    break

                # increment current page
                params['pg'] = current_page + 1
                logger.debug('Getting page %d', params['pg'])

        return subtitles

    def list_subtitles(self, video, languages):
        season = episode = None
        if isinstance(video, Episode):
            title = video.series
            season = video.season
            episode = video.episode
        else:
            title = video.title

        return [s for s in
                self.query(languages, fix_inconsistent_naming(title), season=season, episode=episode, year=video.year,
                           video=video)]

    def download_subtitle(self, subtitle):
        r = self.session.get(subtitle.download_link, timeout=10)
        r.raise_for_status()

        # open the archive
        archive_stream = io.BytesIO(r.content)
        if is_rarfile(archive_stream):
            logger.debug('Archive identified as rar')
            archive = RarFile(archive_stream)
        elif is_zipfile(archive_stream):
            logger.debug('Archive identified as zip')
            archive = ZipFile(archive_stream)
        else:
            subtitle.content = r.content
            if subtitle.is_valid():
                return
            subtitle.content = None

            raise ProviderError('Unidentified archive type')

        subs_in_archive = archive.namelist()

        # if Serbian lat and cyr versions are packed together, try to find right version
        if len(subs_in_archive) > 1 and (subtitle.language == 'sr' or subtitle.language == 'sr-Cyrl'):
            self.get_subtitle_from_bundled_archive(subtitle, subs_in_archive, archive)
        else:
            # use default method for everything else
            subtitle.content = self.get_subtitle_from_archive(subtitle, archive)

    def get_subtitle_from_bundled_archive(self, subtitle, subs_in_archive, archive):
        sr_lat_subs = []
        sr_cyr_subs = []
        sub_to_extract = None

        for sub_name in subs_in_archive:
            if not ('.cyr' in sub_name or '.cir' in sub_name):
                sr_lat_subs.append(sub_name)

            if ('.cyr' in sub_name or '.cir' in sub_name) and not '.lat' in sub_name:
                sr_cyr_subs.append(sub_name)

        if subtitle.language == 'sr':
            if len(sr_lat_subs) > 0:
                sub_to_extract = sr_lat_subs[0]

        if subtitle.language == 'sr-Cyrl':
            if len(sr_cyr_subs) > 0:
                sub_to_extract = sr_cyr_subs[0]

        logger.info(u'Using %s from the archive', sub_to_extract)
        subtitle.content = fix_line_ending(archive.read(sub_to_extract))
Beispiel #7
0
class LegendasdivxProvider(Provider):
    """Legendasdivx Provider."""
    languages = {Language('por', 'BR')} | {Language('por')}
    SEARCH_THROTTLE = 8
    site = 'https://www.legendasdivx.pt'
    headers = {
        'User-Agent': os.environ.get("SZ_USER_AGENT", "Sub-Zero/2"),
        'Accept':
        'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
        'Origin': 'https://www.legendasdivx.pt',
        'Referer': 'https://www.legendasdivx.pt'
    }
    loginpage = site + '/forum/ucp.php?mode=login'
    searchurl = site + '/modules.php?name=Downloads&file=jz&d_op=search&op=_jz00&query={query}'
    download_link = site + '/modules.php{link}'

    def __init__(self, username, password, skip_wrong_fps=True):
        # make sure login credentials are configured.
        if any((username, password)) and not all((username, password)):
            raise ConfigurationError(
                'Legendasdivx.pt :: Username and password must be specified')
        self.username = username
        self.password = password
        self.skip_wrong_fps = skip_wrong_fps

    def initialize(self):
        logger.debug("Legendasdivx.pt :: Creating session for requests")
        self.session = RetryingCFSession()
        # re-use PHP Session if present
        prev_cookies = region.get("legendasdivx_cookies2")
        if prev_cookies != NO_VALUE:
            logger.debug(
                "Legendasdivx.pt :: Re-using previous legendasdivx cookies: %s",
                prev_cookies)
            self.session.cookies.update(prev_cookies)
        # login if session has expired
        else:
            logger.debug("Legendasdivx.pt :: Session cookies not found!")
            self.session.headers.update(self.headers)
            self.login()

    def terminate(self):
        # session close
        self.session.close()

    def login(self):
        logger.debug('Legendasdivx.pt :: Logging in')
        try:
            # sleep for a 1 second before another request
            sleep(1)
            res = self.session.get(self.loginpage)
            res.raise_for_status()
            bsoup = ParserBeautifulSoup(res.content, ['lxml'])

            _allinputs = bsoup.findAll('input')
            data = {}
            # necessary to set 'sid' for POST request
            for field in _allinputs:
                data[field.get('name')] = field.get('value')

            # sleep for a 1 second before another request
            sleep(1)
            data['username'] = self.username
            data['password'] = self.password
            res = self.session.post(self.loginpage, data)
            res.raise_for_status()
            # make sure we're logged in
            logger.debug(
                'Legendasdivx.pt :: Logged in successfully: PHPSESSID: %s',
                self.session.cookies.get_dict()['PHPSESSID'])
            cj = self.session.cookies.copy()
            store_cks = ("PHPSESSID", "phpbb3_2z8zs_sid", "phpbb3_2z8zs_k",
                         "phpbb3_2z8zs_u", "lang")
            for cn in iter(self.session.cookies.keys()):
                if cn not in store_cks:
                    del cj[cn]
            # store session cookies on cache
            logger.debug(
                "Legendasdivx.pt :: Storing legendasdivx session cookies: %r",
                cj)
            region.set("legendasdivx_cookies2", cj)

        except KeyError:
            logger.error(
                "Legendasdivx.pt :: Couldn't get session ID, check your credentials"
            )
            raise AuthenticationError(
                "Legendasdivx.pt :: Couldn't get session ID, check your credentials"
            )
        except HTTPError as e:
            if "bloqueado" in res.text.lower():
                logger.error(
                    "LegendasDivx.pt :: Your IP is blocked on this server.")
                raise IPAddressBlocked(
                    "LegendasDivx.pt :: Your IP is blocked on this server.")
            logger.error("Legendasdivx.pt :: HTTP Error %s", e)
            raise TooManyRequests("Legendasdivx.pt :: HTTP Error %s", e)
        except Exception as e:
            logger.error("LegendasDivx.pt :: Uncaught error: %r", e)
            raise ServiceUnavailable("LegendasDivx.pt :: Uncaught error: %r",
                                     e)

    def _process_page(self, video, bsoup):

        subtitles = []

        _allsubs = bsoup.findAll("div", {"class": "sub_box"})

        for _subbox in _allsubs:

            hits = 0
            for th in _subbox.findAll("th"):
                if th.text == 'Hits:':
                    hits = int(th.find_next("td").text)
                if th.text == 'Idioma:':
                    lang = th.find_next("td").find("img").get('src')
                    if 'brazil' in lang.lower():
                        lang = Language.fromopensubtitles('pob')
                    elif 'portugal' in lang.lower():
                        lang = Language.fromopensubtitles('por')
                    else:
                        continue
                if th.text == "Frame Rate:":
                    frame_rate = th.find_next("td").text.strip()

            # get description for matches
            description = _subbox.find("td", {
                "class": "td_desc brd_up"
            }).get_text()

            # get subtitle link from footer
            sub_footer = _subbox.find("div", {"class": "sub_footer"})
            download = sub_footer.find(
                "a", {"class": "sub_download"}) if sub_footer else None

            # sometimes 'a' tag is not found and returns None. Most likely HTML format error!
            try:
                download_link = self.download_link.format(
                    link=download.get('href'))
                logger.debug("Legendasdivx.pt :: Found subtitle link on: %s ",
                             download_link)
            except:
                logger.debug(
                    "Legendasdivx.pt :: Couldn't find download link. Trying next..."
                )
                continue

            # get subtitle uploader
            sub_header = _subbox.find("div", {"class": "sub_header"})
            uploader = sub_header.find("a").text if sub_header else 'anonymous'

            exact_match = False
            if video.name.lower() in description.lower():
                exact_match = True

            data = {
                'link': download_link,
                'exact_match': exact_match,
                'hits': hits,
                'uploader': uploader,
                'frame_rate': frame_rate,
                'description': description
            }
            subtitles.append(
                LegendasdivxSubtitle(lang,
                                     video,
                                     data,
                                     skip_wrong_fps=self.skip_wrong_fps))
        return subtitles

    def query(self, video, languages):

        _searchurl = self.searchurl

        subtitles = []

        if isinstance(video, Movie):
            querytext = video.imdb_id if video.imdb_id else video.title

        if isinstance(video, Episode):
            querytext = '{} S{:02d}E{:02d}'.format(video.series, video.season,
                                                   video.episode)
            querytext = quote(querytext.lower())

        # language query filter
        if not isinstance(languages, (tuple, list, set)):
            languages = [languages]

        for language in languages:
            logger.debug("Legendasdivx.pt :: searching for %s subtitles.",
                         language)
            language_id = language.opensubtitles
            if 'por' in language_id:
                lang_filter = '&form_cat=28'
            elif 'pob' in language_id:
                lang_filter = '&form_cat=29'
            else:
                lang_filter = ''

            querytext = querytext + lang_filter if lang_filter else querytext

            try:
                # sleep for a 1 second before another request
                sleep(1)
                self.headers['Referer'] = self.site + '/index.php'
                self.session.headers.update(self.headers)
                res = self.session.get(_searchurl.format(query=querytext),
                                       allow_redirects=False)
                res.raise_for_status()
                if (res.status_code == 200
                        and "A legenda não foi encontrada" in res.text):
                    logger.warning(
                        'Legendasdivx.pt :: query %s return no results!',
                        querytext)
                    # for series, if no results found, try again just with series and season (subtitle packs)
                    if isinstance(video, Episode):
                        logger.debug(
                            "Legendasdivx.pt :: trying again with just series and season on query."
                        )
                        querytext = re.sub("(e|E)(\d{2})", "", querytext)
                        # sleep for a 1 second before another request
                        sleep(1)
                        res = self.session.get(
                            _searchurl.format(query=querytext),
                            allow_redirects=False)
                        res.raise_for_status()
                        if (res.status_code == 200 and
                                "A legenda não foi encontrada" in res.text):
                            logger.warning(
                                'Legendasdivx.pt :: query {0} return no results for language {1}(for series and season only).'
                                .format(querytext, language_id))
                            continue
                if res.status_code == 302:  # got redirected to login page.
                    # seems that our session cookies are no longer valid... clean them from cache
                    region.delete("legendasdivx_cookies2")
                    logger.debug(
                        "Legendasdivx.pt :: Logging in again. Cookies have expired!"
                    )
                    # login and try again
                    self.login()
                    # sleep for a 1 second before another request
                    sleep(1)
                    res = self.session.get(_searchurl.format(query=querytext))
                    res.raise_for_status()
            except HTTPError as e:
                if "bloqueado" in res.text.lower():
                    logger.error(
                        "LegendasDivx.pt :: Your IP is blocked on this server."
                    )
                    raise IPAddressBlocked(
                        "LegendasDivx.pt :: Your IP is blocked on this server."
                    )
                logger.error("Legendasdivx.pt :: HTTP Error %s", e)
                raise TooManyRequests("Legendasdivx.pt :: HTTP Error %s", e)
            except Exception as e:
                logger.error("LegendasDivx.pt :: Uncaught error: %r", e)
                raise ServiceUnavailable(
                    "LegendasDivx.pt :: Uncaught error: %r", e)

            bsoup = ParserBeautifulSoup(res.content, ['html.parser'])

            # search for more than 10 results (legendasdivx uses pagination)
            # don't throttle - maximum results = 6 * 10
            MAX_PAGES = 6

            # get number of pages bases on results found
            page_header = bsoup.find("div", {"class": "pager_bar"})
            results_found = re.search(
                r'\((.*?) encontradas\)',
                page_header.text).group(1) if page_header else 0
            logger.debug("Legendasdivx.pt :: Found %s subtitles",
                         str(results_found))
            num_pages = (int(results_found) // 10) + 1
            num_pages = min(MAX_PAGES, num_pages)

            # process first page
            subtitles += self._process_page(video, bsoup)

            # more pages?
            if num_pages > 1:
                for num_page in range(2, num_pages + 1):
                    sleep(1)  # another 1 sec before requesting...
                    _search_next = self.searchurl.format(
                        query=querytext) + "&page={0}".format(str(num_page))
                    logger.debug(
                        "Legendasdivx.pt :: Moving on to next page: %s",
                        _search_next)
                    # sleep for a 1 second before another request
                    sleep(1)
                    res = self.session.get(_search_next)
                    next_page = ParserBeautifulSoup(res.content,
                                                    ['html.parser'])
                    subs = self._process_page(video, next_page)
                    subtitles.extend(subs)

        return subtitles

    def list_subtitles(self, video, languages):
        return self.query(video, languages)

    def download_subtitle(self, subtitle):

        try:
            # sleep for a 1 second before another request
            sleep(1)
            res = self.session.get(subtitle.page_link)
            res.raise_for_status()
        except HTTPError as e:
            if "bloqueado" in res.text.lower():
                logger.error(
                    "LegendasDivx.pt :: Your IP is blocked on this server.")
                raise IPAddressBlocked(
                    "LegendasDivx.pt :: Your IP is blocked on this server.")
            logger.error("Legendasdivx.pt :: HTTP Error %s", e)
            raise TooManyRequests("Legendasdivx.pt :: HTTP Error %s", e)
        except Exception as e:
            logger.error("LegendasDivx.pt :: Uncaught error: %r", e)
            raise ServiceUnavailable("LegendasDivx.pt :: Uncaught error: %r",
                                     e)

        # make sure we haven't maxed out our daily limit
        if (res.status_code == 200
                and 'limite de downloads diário atingido' in res.text.lower()):
            logger.error("LegendasDivx.pt :: Daily download limit reached!")
            raise DownloadLimitExceeded(
                "Legendasdivx.pt :: Daily download limit reached!")

        archive = self._get_archive(res.content)
        # extract the subtitle
        if archive:
            subtitle_content = self._get_subtitle_from_archive(
                archive, subtitle)
            if subtitle_content:
                subtitle.content = fix_line_ending(subtitle_content)
                subtitle.normalize()
                return subtitle
        return

    def _get_archive(self, content):
        # open the archive
        archive_stream = io.BytesIO(content)
        if rarfile.is_rarfile(archive_stream):
            logger.debug('Legendasdivx.pt :: Identified rar archive')
            archive = rarfile.RarFile(archive_stream)
        elif zipfile.is_zipfile(archive_stream):
            logger.debug('Legendasdivx.pt :: Identified zip archive')
            archive = zipfile.ZipFile(archive_stream)
        else:
            logger.error('Legendasdivx.pt :: Unsupported compressed format')
            return None
        return archive

    def _get_subtitle_from_archive(self, archive, subtitle):
        # some files have a non subtitle with .txt extension
        _tmp = list(SUBTITLE_EXTENSIONS)
        _tmp.remove('.txt')
        _subtitle_extensions = tuple(_tmp)
        _max_score = 0
        _scores = get_scores(subtitle.video)

        for name in archive.namelist():
            # discard hidden files
            if os.path.split(name)[-1].startswith('.'):
                continue

            # discard non-subtitle files
            if not name.lower().endswith(_subtitle_extensions):
                continue

            _guess = guessit(name)
            if isinstance(subtitle.video, Episode):
                logger.debug("Legendasdivx.pt :: guessing %s", name)
                logger.debug("Legendasdivx.pt :: subtitle S%sE%s video S%sE%s",
                             _guess['season'], _guess['episode'],
                             subtitle.video.season, subtitle.video.episode)

                if subtitle.video.episode != _guess[
                        'episode'] or subtitle.video.season != _guess['season']:
                    logger.debug(
                        'Legendasdivx.pt :: subtitle does not match video, skipping'
                    )
                    continue

            matches = set()
            matches |= guess_matches(subtitle.video, _guess)
            logger.debug('Legendasdivx.pt :: sub matches: %s', matches)
            _score = sum((_scores.get(match, 0) for match in matches))
            if _score > _max_score:
                _max_name = name
                _max_score = _score
                logger.debug("Legendasdivx.pt :: new max: %s %s", name, _score)

        if _max_score > 0:
            logger.debug(
                "Legendasdivx.pt :: returning from archive: %s scored %s",
                _max_name, _max_score)
            return archive.read(_max_name)

        logger.error(
            "Legendasdivx.pt :: No subtitle found on compressed file. Max score was 0"
        )
        return None
 def initialize(self):
     self.session = RetryingCFSession()
Beispiel #9
0
class SoustitreseuProvider(Provider, ProviderSubtitleArchiveMixin):
    """Sous-Titres.eu Provider."""
    subtitle_class = SoustitreseuSubtitle
    languages = {Language(l) for l in ['fra', 'eng']}
    video_types = (Episode, Movie)
    server_url = 'https://www.sous-titres.eu/'
    search_url = server_url + 'search.html'

    def __init__(self):
        self.session = None
        self.is_perfect_match = False

    def initialize(self):
        self.session = RetryingCFSession()
        self.session.headers['Referer'] = self.server_url

    def terminate(self):
        self.session.close()

    def query_series(self, video, title):
        subtitles = []

        r = self.session.get(self.search_url, params={'q': title}, timeout=30)
        r.raise_for_status()

        soup = ParserBeautifulSoup(r.content.decode('utf-8', 'ignore'),
                                   ['html.parser'])

        # loop over series name
        self.is_perfect_match = False
        series_url = []
        series = soup.select('.serie > h3 > a')
        for item in series:
            # title
            if title in item.text:
                series_url.append(item.attrs['href'])
                self.is_perfect_match = True

        series_subs_archives_url = []
        for series_page in series_url:
            page_link = self.server_url + series_page
            r = self.session.get(page_link, timeout=30)
            r.raise_for_status()

            soup = ParserBeautifulSoup(r.content.decode('utf-8', 'ignore'),
                                       ['html.parser'])

            series_subs_archives = soup.select('a.subList')
            for item in series_subs_archives:
                matching_archive = False
                subtitles_archive_name = unquote(
                    item.attrs['href'].split('/')[-1:][0][:-4])
                guessed_subs = guessit(subtitles_archive_name,
                                       {'type': 'episode'})
                try:
                    season, episode = item.select_one(
                        '.episodenum').text.split('×')
                    guessed_subs.update({
                        'season': int(season),
                        'episode': int(episode)
                    })
                except ValueError:
                    season = item.select_one('.episodenum').text[1:]
                    episode = None
                    guessed_subs.update({'season': int(season)})

                if guessed_subs['season'] == video.season:
                    if 'episode' in guessed_subs:
                        if guessed_subs['episode'] == video.episode:
                            matching_archive = True
                    else:
                        matching_archive = True

                if guessed_subs['season'] == 16:
                    print('test')

                if matching_archive:
                    download_link = self.server_url + 'series/' + item.attrs[
                        'href']
                    res = self.session.get(download_link, timeout=30)
                    res.raise_for_status()

                    archive = self._get_archive(res.content)
                    # extract the subtitle
                    if archive:
                        subtitles_from_archive = self._get_subtitle_from_archive(
                            archive, video)
                        for subtitle in subtitles_from_archive:
                            subtitle.page_link = page_link
                            subtitle.download_link = download_link
                            subtitles.append(subtitle)

        return subtitles

    def query_movies(self, video, title):
        subtitles = []

        r = self.session.get(self.search_url, params={'q': title}, timeout=30)
        r.raise_for_status()

        soup = ParserBeautifulSoup(r.content.decode('utf-8', 'ignore'),
                                   ['html.parser'])

        # loop over movies name
        movies_url = []
        self.is_perfect_match = False
        movies = soup.select('.film > h3 > a')
        for item in movies:
            # title
            if title.lower() in item.text.lower():
                movies_url.append(item.attrs['href'])
                self.is_perfect_match = True

        series_subs_archives_url = []
        for movies_page in movies_url:
            page_link = self.server_url + movies_page
            r = self.session.get(page_link, timeout=30)
            r.raise_for_status()

            soup = ParserBeautifulSoup(r.content.decode('utf-8', 'ignore'),
                                       ['html.parser'])

            movies_subs_archives = soup.select('a.subList')
            for item in movies_subs_archives:
                download_link = self.server_url + 'films/' + item.attrs['href']
                res = self.session.get(download_link, timeout=30)
                res.raise_for_status()

                archive = self._get_archive(res.content)
                # extract the subtitle
                if archive:
                    subtitles_from_archive = self._get_subtitle_from_archive(
                        archive, video)
                    for subtitle in subtitles_from_archive:
                        subtitle.page_link = page_link
                        subtitle.download_link = download_link
                        subtitles.append(subtitle)

        return subtitles

    def list_subtitles(self, video, languages):
        subtitles = []

        # query for subtitles
        if isinstance(video, Episode):
            subtitles += [
                s for s in self.query_series(video, video.series)
                if s.language in languages
            ]
        else:
            subtitles += [
                s for s in self.query_movies(video, video.title)
                if s.language in languages
            ]

        return subtitles

    def download_subtitle(self, subtitle):
        return subtitle

    def _get_archive(self, content):
        # open the archive
        archive_stream = io.BytesIO(content)
        if is_rarfile(archive_stream):
            logger.debug('Sous-Titres.eu: Identified rar archive')
            archive = RarFile(archive_stream)
        elif is_zipfile(archive_stream):
            logger.debug('Sous-Titres.eu: Identified zip archive')
            archive = ZipFile(archive_stream)
        else:
            logger.error('Sous-Titres.eu: Unsupported compressed format')
            return None
        return archive

    def _get_subtitle_from_archive(self, archive, video):
        subtitles = []

        # some files have a non subtitle with .txt extension
        _tmp = list(SUBTITLE_EXTENSIONS)
        _tmp.remove('.txt')
        _subtitle_extensions = tuple(_tmp)
        _scores = get_scores(video)

        for name in archive.namelist():
            # discard hidden files
            if os.path.split(name)[-1].startswith('.'):
                continue

            # discard non-subtitle files
            if not name.lower().endswith(_subtitle_extensions):
                continue

            # get subtitles language
            if '.en.' in name.lower():
                language = Language.fromopensubtitles('eng')
            else:
                language = Language.fromopensubtitles('fre')

            release = name[:-4].lower().rstrip('tag').rstrip('en').rstrip('fr')
            _guess = guessit(release)
            if isinstance(video, Episode):
                if video.episode != _guess[
                        'episode'] or video.season != _guess['season']:
                    continue

            matches = set()
            matches |= guess_matches(video, _guess)
            _score = sum((_scores.get(match, 0) for match in matches))
            content = archive.read(name)
            subtitles.append(
                SoustitreseuSubtitle(language, video, name, _guess, content,
                                     self.is_perfect_match))

        return subtitles
Beispiel #10
0
 def initialize(self):
     self.session = RetryingCFSession()
     self.session.headers['User-Agent'] = AGENT_LIST[randint(0, len(AGENT_LIST) - 1)]
Beispiel #11
0
class GreekSubsProvider(Provider):
    """GreekSubs Provider."""
    languages = {Language('ell')}
    server_url = 'https://greeksubs.net/'
    subtitle_class = GreekSubsSubtitle

    def __init__(self):
        self.session = None

    def initialize(self):
        self.session = RetryingCFSession()
        self.session.headers['User-Agent'] = AGENT_LIST[randint(0, len(AGENT_LIST) - 1)]

    def terminate(self):
        self.session.close()

    def query(self, video, languages, imdb_id, season=None, episode=None):
        logger.debug('Searching subtitles for %r', imdb_id)
        subtitles = []
        search_link = self.server_url + 'en/view/' + imdb_id

        r = self.session.get(search_link, timeout=30)
        r.raise_for_status()

        soup_page = ParserBeautifulSoup(r.content.decode('utf-8', 'ignore'), ['html.parser'])

        if isinstance(video, Episode):
            try:
                episodes = soup_page.select('div.col-lg-offset-2.col-md-8.text-center.top30.bottom10 > a')
                for item in episodes:
                    season_episode = re.search(r'Season (\d+) Episode (\d+)', item.text)
                    season_number = int(season_episode.group(1))
                    episode_number = int(season_episode.group(2))
                    if season_number == season and episode_number == episode:
                        episode_page = item.attrs['href']
                        r = self.session.get(episode_page, timeout=30)
                        soup_subs = ParserBeautifulSoup(r.content.decode('utf-8', 'ignore'), ['html.parser'])
                        try:
                            secCode = soup_subs.find('input', {'id': 'secCode'}).get('value')
                        except Exception as e:
                            logging.debug(e)
                        else:
                            for subtitles_item in soup_subs.select('#elSub > tbody > tr'):
                                try:
                                    subtitle_id = re.search(r'downloadMe\(\'(.*)\'\)', subtitles_item.contents[2].contents[2].contents[0].attrs['onclick']).group(1)
                                    page_link = self.server_url + 'dll/' + subtitle_id + '/0/' + secCode
                                    language = Language.fromalpha2(subtitles_item.parent.find('img')['alt'])
                                    version = subtitles_item.contents[2].contents[4].text.strip()
                                    uploader = subtitles_item.contents[2].contents[5].contents[0].contents[1].text.strip()
                                    referer = episode_page.encode('utf-8')

                                    r = self.session.get(page_link,
                                                         headers={'Referer': referer},
                                                         timeout=30, allow_redirects=False)
                                    r.raise_for_status()
                                    soup_dll = ParserBeautifulSoup(r.content.decode('utf-8', 'ignore'), ['html.parser'])
                                    try:
                                        langcode = soup_dll.find(attrs={"name": 'langcode'}).get('value')
                                        uid = soup_dll.find(attrs={"name": 'uid'}).get('value')
                                        output = soup_dll.find(attrs={"name": 'output'}).get('value')
                                        dll = soup_dll.find(attrs={"name": 'dll'}).get('value')
                                    except Exception as e:
                                        logging.debug(e)
                                    else:
                                        download_req = self.session.post(page_link, data={'langcode': langcode,
                                                                                          'uid': uid,
                                                                                          'output': output,
                                                                                          'dll': dll},
                                                                         headers={'Referer': page_link}, timeout=10)
                                except Exception as e:
                                    logging.debug(e)
                                else:
                                    if language in languages:
                                        subtitle = self.subtitle_class(language, page_link, version, uploader, referer)
                                        if not download_req.content:
                                            logger.error('Unable to download subtitle. No data returned from provider')
                                            continue

                                        subtitle.content = download_req.content

                                        logger.debug('Found subtitle %r', subtitle)
                                        subtitles.append(subtitle)
                    else:
                        pass
            except Exception as e:
                logging.debug(e)
        elif isinstance(video, Movie):
            try:
                soup_subs = ParserBeautifulSoup(r.content.decode('utf-8', 'ignore'), ['html.parser'])
                try:
                    secCode = soup_subs.find('input', {'id': 'secCode'}).get('value')
                except Exception as e:
                    logging.debug(e)
                else:
                    for subtitles_item in soup_subs.select('#elSub > tbody > tr'):
                        try:
                            subtitle_id = re.search(r'downloadMe\(\'(.*)\'\)',
                                                    subtitles_item.contents[2].contents[2].contents[0].attrs[
                                                        'onclick']).group(1)
                            page_link = self.server_url + 'dll/' + subtitle_id + '/0/' + secCode
                            language = Language.fromalpha2(subtitles_item.parent.find('img')['alt'])
                            version = subtitles_item.contents[2].contents[4].text.strip()
                            uploader = subtitles_item.contents[2].contents[5].contents[0].contents[
                                1].text.strip()
                            referer = page_link.encode('utf-8')

                            r = self.session.get(page_link,
                                                 headers={'Referer': referer},
                                                 timeout=30, allow_redirects=False)
                            r.raise_for_status()
                            soup_dll = ParserBeautifulSoup(r.content.decode('utf-8', 'ignore'), ['html.parser'])
                            try:
                                langcode = soup_dll.find(attrs={"name": 'langcode'}).get('value')
                                uid = soup_dll.find(attrs={"name": 'uid'}).get('value')
                                output = soup_dll.find(attrs={"name": 'output'}).get('value')
                                dll = soup_dll.find(attrs={"name": 'dll'}).get('value')
                            except Exception as e:
                                logging.debug(e)
                            else:
                                download_req = self.session.post(page_link, data={'langcode': langcode,
                                                                                  'uid': uid,
                                                                                  'output': output,
                                                                                  'dll': dll},
                                                                 headers={'Referer': page_link}, timeout=10)
                        except Exception as e:
                            logging.debug(e)
                        else:
                            if language in languages:
                                subtitle = self.subtitle_class(language, page_link, version, uploader, referer)
                                if not download_req.content:
                                    logger.error('Unable to download subtitle. No data returned from provider')
                                    continue

                                subtitle.content = download_req.content

                                logger.debug('Found subtitle %r', subtitle)
                                subtitles.append(subtitle)
            except Exception as e:
                logging.debug(e)

        return subtitles

    def list_subtitles(self, video, languages):
        imdbId = None
        subtitles = []

        if isinstance(video, Episode):
            imdbId = video.series_imdb_id
        elif isinstance(video, Movie):
            imdbId = video.imdb_id

        if not imdbId:
            logger.debug('No imdb number available to search with provider')
            return subtitles

        # query for subtitles with the imdbId
        subtitles = []

        if isinstance(video, Episode):
            subtitles = self.query(video, languages, imdbId, season=video.season, episode=video.episode)
        elif isinstance(video, Movie):
            subtitles = self.query(video, languages, imdbId)

        return subtitles

    def download_subtitle(self, subtitle):
        if isinstance(subtitle, GreekSubsSubtitle):
            subtitle.content = fix_line_ending(subtitle.content)
Beispiel #12
0
class SubsceneProvider(Provider, ProviderSubtitleArchiveMixin):
    """
    This currently only searches for the filename on SubScene. It doesn't open every found subtitle page to avoid
    massive hammering, thus it can't determine whether a subtitle is only-foreign or not.
    """
    subtitle_class = SubsceneSubtitle
    languages = supported_languages
    languages.update(set(Language.rebuild(l, forced=True) for l in languages))

    session = None
    skip_wrong_fps = False
    hearing_impaired_verifiable = True
    only_foreign = False

    search_throttle = 2  # seconds

    def __init__(self, only_foreign=False):
        self.only_foreign = only_foreign

    def initialize(self):
        logger.info("Creating session")
        self.session = RetryingCFSession()

    def terminate(self):
        logger.info("Closing session")
        self.session.close()

    def _create_filters(self, languages):
        self.filters = dict(HearingImpaired="2")
        if self.only_foreign:
            self.filters["ForeignOnly"] = "True"
            logger.info("Only searching for foreign/forced subtitles")

        self.filters["LanguageFilter"] = ",".join(
            (str(language_ids[l.alpha3]) for l in languages
             if l.alpha3 in language_ids))

        logger.debug("Filter created: '%s'" % self.filters)

    def _enable_filters(self):
        self.session.cookies.update(self.filters)
        logger.debug("Filters applied")

    def list_subtitles(self, video, languages):
        if not video.original_name:
            logger.info(
                "Skipping search because we don't know the original release name"
            )
            return []

        self._create_filters(languages)
        self._enable_filters()
        return [s for s in self.query(video) if s.language in languages]

    def download_subtitle(self, subtitle):
        if subtitle.pack_data:
            logger.info("Using previously downloaded pack data")
            archive = ZipFile(io.BytesIO(subtitle.pack_data))
            subtitle.pack_data = None

            try:
                subtitle.content = self.get_subtitle_from_archive(
                    subtitle, archive)
                return
            except ProviderError:
                pass

        # open the archive
        r = self.session.get(subtitle.get_download_link(self.session),
                             timeout=10)
        r.raise_for_status()
        archive_stream = io.BytesIO(r.content)
        archive = ZipFile(archive_stream)

        subtitle.content = self.get_subtitle_from_archive(subtitle, archive)

        # store archive as pack_data for later caching
        subtitle.pack_data = r.content

    def parse_results(self, video, film):
        subtitles = []
        for s in film.subtitles:
            subtitle = SubsceneSubtitle.from_api(s)
            subtitle.asked_for_release_group = video.release_group
            if isinstance(video, Episode):
                subtitle.asked_for_episode = video.episode

            if self.only_foreign:
                subtitle.language = Language.rebuild(subtitle.language,
                                                     forced=True)

            subtitles.append(subtitle)
            logger.debug('Found subtitle %r', subtitle)

        return subtitles

    def query(self, video):
        #vfn = get_video_filename(video)
        subtitles = []
        #logger.debug(u"Searching for: %s", vfn)
        # film = search(vfn, session=self.session)
        #
        # if film and film.subtitles:
        #     logger.debug('Release results found: %s', len(film.subtitles))
        #     subtitles = self.parse_results(video, film)
        # else:
        #     logger.debug('No release results found')

        #time.sleep(self.search_throttle)

        # re-search for episodes without explicit release name
        if isinstance(video, Episode):
            #term = u"%s S%02iE%02i" % (video.series, video.season, video.episode)
            more_than_one = len([video.series] + video.alternative_series) > 1
            for series in [video.series] + video.alternative_series:
                term = u"%s - %s Season" % (
                    series, p.number_to_words(
                        "%sth" % video.season).capitalize())
                logger.debug('Searching for alternative results: %s', term)
                film = search(term,
                              session=self.session,
                              release=False,
                              throttle=self.search_throttle)
                if film and film.subtitles:
                    logger.debug('Alternative results found: %s',
                                 len(film.subtitles))
                    subtitles += self.parse_results(video, film)
                else:
                    logger.debug('No alternative results found')

                # packs
                # if video.season_fully_aired:
                #     term = u"%s S%02i" % (series, video.season)
                #     logger.debug('Searching for packs: %s', term)
                #     time.sleep(self.search_throttle)
                #     film = search(term, session=self.session, throttle=self.search_throttle)
                #     if film and film.subtitles:
                #         logger.debug('Pack results found: %s', len(film.subtitles))
                #         subtitles += self.parse_results(video, film)
                #     else:
                #         logger.debug('No pack results found')
                # else:
                #     logger.debug("Not searching for packs, because the season hasn't fully aired")
                if more_than_one:
                    time.sleep(self.search_throttle)
        else:
            more_than_one = len([video.title] + video.alternative_titles) > 1
            for title in [video.title] + video.alternative_titles:
                logger.debug('Searching for movie results: %s', title)
                film = search(title,
                              year=video.year,
                              session=self.session,
                              limit_to=None,
                              release=False,
                              throttle=self.search_throttle)
                if film and film.subtitles:
                    subtitles += self.parse_results(video, film)
                if more_than_one:
                    time.sleep(self.search_throttle)

        logger.info("%s subtitles found" % len(subtitles))
        return subtitles
Beispiel #13
0
 def initialize(self):
     logger.info("Creating session")
     self.session = RetryingCFSession()
Beispiel #14
0
class SubsceneProvider(Provider, ProviderSubtitleArchiveMixin):
    """
    This currently only searches for the filename on SubScene. It doesn't open every found subtitle page to avoid
    massive hammering, thus it can't determine whether a subtitle is only-foreign or not.
    """
    subtitle_class = SubsceneSubtitle
    languages = supported_languages
    languages.update(set(Language.rebuild(l, forced=True) for l in languages))

    session = None
    skip_wrong_fps = False
    hearing_impaired_verifiable = True
    only_foreign = False

    search_throttle = 2  # seconds

    def __init__(self, only_foreign=False):
        self.only_foreign = only_foreign

    def initialize(self):
        logger.info("Creating session")
        self.session = RetryingCFSession()

    def terminate(self):
        logger.info("Closing session")
        self.session.close()

    def _create_filters(self, languages):
        self.filters = dict(HearingImpaired="2")
        if self.only_foreign:
            self.filters["ForeignOnly"] = "True"
            logger.info("Only searching for foreign/forced subtitles")

        self.filters["LanguageFilter"] = ",".join((str(language_ids[l.alpha3]) for l in languages
                                                   if l.alpha3 in language_ids))

        logger.debug("Filter created: '%s'" % self.filters)

    def _enable_filters(self):
        self.session.cookies.update(self.filters)
        logger.debug("Filters applied")

    def list_subtitles(self, video, languages):
        if not video.original_name:
            logger.info("Skipping search because we don't know the original release name")
            return []

        self._create_filters(languages)
        self._enable_filters()
        return [s for s in self.query(video) if s.language in languages]

    def download_subtitle(self, subtitle):
        if subtitle.pack_data:
            logger.info("Using previously downloaded pack data")
            archive = ZipFile(io.BytesIO(subtitle.pack_data))
            subtitle.pack_data = None

            try:
                subtitle.content = self.get_subtitle_from_archive(subtitle, archive)
                return
            except ProviderError:
                pass

        # open the archive
        r = self.session.get(subtitle.get_download_link(self.session), timeout=10)
        r.raise_for_status()
        archive_stream = io.BytesIO(r.content)
        archive = ZipFile(archive_stream)

        subtitle.content = self.get_subtitle_from_archive(subtitle, archive)

        # store archive as pack_data for later caching
        subtitle.pack_data = r.content

    def parse_results(self, video, film):
        subtitles = []
        for s in film.subtitles:
            subtitle = SubsceneSubtitle.from_api(s)
            subtitle.asked_for_release_group = video.release_group
            if isinstance(video, Episode):
                subtitle.asked_for_episode = video.episode

            if self.only_foreign:
                subtitle.language = Language.rebuild(subtitle.language, forced=True)

            subtitles.append(subtitle)
            logger.debug('Found subtitle %r', subtitle)

        return subtitles

    def query(self, video):
        #vfn = get_video_filename(video)
        subtitles = []
        #logger.debug(u"Searching for: %s", vfn)
        # film = search(vfn, session=self.session)
        #
        # if film and film.subtitles:
        #     logger.debug('Release results found: %s', len(film.subtitles))
        #     subtitles = self.parse_results(video, film)
        # else:
        #     logger.debug('No release results found')

        #time.sleep(self.search_throttle)

        # re-search for episodes without explicit release name
        if isinstance(video, Episode):
            #term = u"%s S%02iE%02i" % (video.series, video.season, video.episode)
            more_than_one = len([video.series] + video.alternative_series) > 1
            for series in [video.series] + video.alternative_series:
                term = u"%s - %s Season" % (series, p.number_to_words("%sth" % video.season).capitalize())
                logger.debug('Searching for alternative results: %s', term)
                film = search(term, session=self.session, release=False, throttle=self.search_throttle)
                if film and film.subtitles:
                    logger.debug('Alternative results found: %s', len(film.subtitles))
                    subtitles += self.parse_results(video, film)
                else:
                    logger.debug('No alternative results found')

                # packs
                # if video.season_fully_aired:
                #     term = u"%s S%02i" % (series, video.season)
                #     logger.debug('Searching for packs: %s', term)
                #     time.sleep(self.search_throttle)
                #     film = search(term, session=self.session, throttle=self.search_throttle)
                #     if film and film.subtitles:
                #         logger.debug('Pack results found: %s', len(film.subtitles))
                #         subtitles += self.parse_results(video, film)
                #     else:
                #         logger.debug('No pack results found')
                # else:
                #     logger.debug("Not searching for packs, because the season hasn't fully aired")
                if more_than_one:
                    time.sleep(self.search_throttle)
        else:
            more_than_one = len([video.title] + video.alternative_titles) > 1
            for title in [video.title] + video.alternative_titles:
                logger.debug('Searching for movie results: %s', title)
                film = search(title, year=video.year, session=self.session, limit_to=None, release=False,
                              throttle=self.search_throttle)
                if film and film.subtitles:
                    subtitles += self.parse_results(video, film)
                if more_than_one:
                    time.sleep(self.search_throttle)

        logger.info("%s subtitles found" % len(subtitles))
        return subtitles
Beispiel #15
0
class SubsceneProvider(Provider, ProviderSubtitleArchiveMixin):
    """
    This currently only searches for the filename on SubScene. It doesn't open every found subtitle page to avoid
    massive hammering, thus it can't determine whether a subtitle is only-foreign or not.
    """
    subtitle_class = SubsceneSubtitle
    languages = supported_languages
    languages.update(set(Language.rebuild(l, forced=True) for l in languages))

    session = None
    skip_wrong_fps = False
    hearing_impaired_verifiable = True
    only_foreign = False
    username = None
    password = None

    search_throttle = 5  # seconds

    def __init__(self, only_foreign=False, username=None, password=None):
        if not all((username, password)):
            raise ConfigurationError('Username and password must be specified')

        self.only_foreign = only_foreign
        self.username = username
        self.password = password

    def initialize(self):
        logger.info("Creating session")
        self.session = RetryingCFSession()

    def login(self):
        r = self.session.get("https://subscene.com/account/login")
        if "Server Error" in r.content:
            logger.error("Login unavailable; Maintenance?")
            raise ServiceUnavailable("Login unavailable; Maintenance?")

        match = re.search(r"<script id='modelJson' type='application/json'>\s*(.+)\s*</script>", r.content)

        if match:
            h = HTMLParser.HTMLParser()
            data = json.loads(h.unescape(match.group(1)))
            login_url = urlparse.urljoin(data["siteUrl"], data["loginUrl"])
            time.sleep(1.0)

            r = self.session.post(login_url,
                                  {
                                      "username": self.username,
                                      "password": self.password,
                                      data["antiForgery"]["name"]: data["antiForgery"]["value"]
                                  })
            pep_content = re.search(r"<form method=\"post\" action=\"https://subscene\.com/\">"
                                    r".+name=\"id_token\".+?value=\"(?P<id_token>.+?)\".*?"
                                    r"access_token\".+?value=\"(?P<access_token>.+?)\".+?"
                                    r"token_type.+?value=\"(?P<token_type>.+?)\".+?"
                                    r"expires_in.+?value=\"(?P<expires_in>.+?)\".+?"
                                    r"scope.+?value=\"(?P<scope>.+?)\".+?"
                                    r"state.+?value=\"(?P<state>.+?)\".+?"
                                    r"session_state.+?value=\"(?P<session_state>.+?)\"",
                                    r.content, re.MULTILINE | re.DOTALL)

            if pep_content:
                r = self.session.post(SITE_DOMAIN, pep_content.groupdict())
                try:
                    r.raise_for_status()
                except Exception:
                    raise ProviderError("Something went wrong when trying to log in: %s", traceback.format_exc())
                else:
                    cj = self.session.cookies.copy()
                    store_cks = ("scene", "idsrv", "idsrv.xsrf", "idsvr.clients", "idsvr.session", "idsvr.username")
                    for cn in self.session.cookies.iterkeys():
                        if cn not in store_cks:
                            del cj[cn]

                    logger.debug("Storing cookies: %r", cj)
                    region.set("subscene_cookies2", cj)
                    return
        raise ProviderError("Something went wrong when trying to log in #1")

    def terminate(self):
        logger.info("Closing session")
        self.session.close()

    def _create_filters(self, languages):
        self.filters = dict(HearingImpaired="2")
        if self.only_foreign:
            self.filters["ForeignOnly"] = "True"
            logger.info("Only searching for foreign/forced subtitles")

        self.filters["LanguageFilter"] = ",".join((str(language_ids[l.alpha3]) for l in languages
                                                   if l.alpha3 in language_ids))

        logger.debug("Filter created: '%s'" % self.filters)

    def _enable_filters(self):
        self.session.cookies.update(self.filters)
        logger.debug("Filters applied")

    def list_subtitles(self, video, languages):
        if not video.original_name:
            logger.info("Skipping search because we don't know the original release name")
            return []

        self._create_filters(languages)
        self._enable_filters()
        return [s for s in self.query(video) if s.language in languages]

    def download_subtitle(self, subtitle):
        if subtitle.pack_data:
            logger.info("Using previously downloaded pack data")
            archive = ZipFile(io.BytesIO(subtitle.pack_data))
            subtitle.pack_data = None

            try:
                subtitle.content = self.get_subtitle_from_archive(subtitle, archive)
                return
            except ProviderError:
                pass

        # open the archive
        r = self.session.get(subtitle.get_download_link(self.session), timeout=10)
        r.raise_for_status()
        archive_stream = io.BytesIO(r.content)
        archive = ZipFile(archive_stream)

        subtitle.content = self.get_subtitle_from_archive(subtitle, archive)

        # store archive as pack_data for later caching
        subtitle.pack_data = r.content

    def parse_results(self, video, film):
        subtitles = []
        for s in film.subtitles:
            try:
                subtitle = SubsceneSubtitle.from_api(s)
            except NotImplementedError, e:
                logger.info(e)
                continue
            subtitle.asked_for_release_group = video.release_group
            if isinstance(video, Episode):
                subtitle.asked_for_episode = video.episode

            if self.only_foreign:
                subtitle.language = Language.rebuild(subtitle.language, forced=True)

            subtitles.append(subtitle)
            logger.debug('Found subtitle %r', subtitle)

        return subtitles
Beispiel #16
0
 def initialize(self):
     logger.info("Creating session")
     self.session = RetryingCFSession()
Beispiel #17
0
class SubsceneProvider(Provider, ProviderSubtitleArchiveMixin):
    """
    This currently only searches for the filename on SubScene. It doesn't open every found subtitle page to avoid
    massive hammering, thus it can't determine whether a subtitle is only-foreign or not.
    """
    subtitle_class = SubsceneSubtitle
    languages = supported_languages
    languages.update(set(Language.rebuild(l, forced=True) for l in languages))
    languages.update(set(Language.rebuild(l, hi=True) for l in languages))
    video_types = (Episode, Movie)
    session = None
    skip_wrong_fps = False
    hearing_impaired_verifiable = True
    only_foreign = False
    username = None
    password = None

    search_throttle = 8  # seconds

    def __init__(self, only_foreign=False, username=None, password=None):
        if not all((username, password)):
            raise ConfigurationError('Username and password must be specified')

        self.only_foreign = only_foreign
        self.username = username
        self.password = password

    def initialize(self):
        logger.info("Creating session")
        self.session = RetryingCFSession()

        prev_cookies = region.get("subscene_cookies2")
        if prev_cookies != NO_VALUE:
            logger.debug("Re-using old subscene cookies: %r", prev_cookies)
            self.session.cookies.update(prev_cookies)

        else:
            logger.debug("Logging in")
            self.login()

    def login(self):
        r = self.session.get("https://subscene.com/account/login")
        if "Server Error" in r.text:
            logger.error("Login unavailable; Maintenance?")
            raise ServiceUnavailable("Login unavailable; Maintenance?")

        match = re.search(
            r"<script id='modelJson' type='application/json'>\s*(.+)\s*</script>",
            r.text)

        if match:
            h = html
            data = json.loads(h.unescape(match.group(1)))
            login_url = parse.urljoin(data["siteUrl"], data["loginUrl"])
            time.sleep(1.0)

            r = self.session.post(
                login_url, {
                    "username": self.username,
                    "password": self.password,
                    data["antiForgery"]["name"]: data["antiForgery"]["value"]
                })
            pep_content = re.search(
                r"<form method=\"post\" action=\"https://subscene\.com/\">"
                r".+name=\"id_token\".+?value=\"(?P<id_token>.+?)\".*?"
                r"access_token\".+?value=\"(?P<access_token>.+?)\".+?"
                r"token_type.+?value=\"(?P<token_type>.+?)\".+?"
                r"expires_in.+?value=\"(?P<expires_in>.+?)\".+?"
                r"scope.+?value=\"(?P<scope>.+?)\".+?"
                r"state.+?value=\"(?P<state>.+?)\".+?"
                r"session_state.+?value=\"(?P<session_state>.+?)\"", r.text,
                re.MULTILINE | re.DOTALL)

            if pep_content:
                r = self.session.post(SITE_DOMAIN, pep_content.groupdict())
                try:
                    r.raise_for_status()
                except Exception:
                    raise ProviderError(
                        "Something went wrong when trying to log in: %s",
                        traceback.format_exc())
                else:
                    cj = self.session.cookies.copy()
                    store_cks = ("scene", "idsrv", "idsrv.xsrf",
                                 "idsvr.clients", "idsvr.session",
                                 "idsvr.username")
                    for cn in self.session.cookies.keys():
                        if cn not in store_cks:
                            del cj[cn]

                    logger.debug("Storing cookies: %r", cj)
                    region.set("subscene_cookies2", cj)
                    return
        raise ProviderError("Something went wrong when trying to log in #1")

    def terminate(self):
        logger.info("Closing session")
        self.session.close()

    def _create_filters(self, languages):
        self.filters = dict(HearingImpaired="2")
        acc_filters = self.filters.copy()
        if self.only_foreign:
            self.filters["ForeignOnly"] = "True"
            acc_filters["ForeignOnly"] = self.filters["ForeignOnly"].lower()
            logger.info("Only searching for foreign/forced subtitles")

        selected_ids = []
        for l in languages:
            lid = language_ids.get(l.basename,
                                   language_ids.get(l.alpha3, None))
            if lid:
                selected_ids.append(str(lid))

        acc_filters["SelectedIds"] = selected_ids
        self.filters["LanguageFilter"] = ",".join(acc_filters["SelectedIds"])

        last_filters = region.get("subscene_filters")
        if last_filters != acc_filters:
            region.set("subscene_filters", acc_filters)
            logger.debug("Setting account filters to %r", acc_filters)
            self.session.post("https://u.subscene.com/filter",
                              acc_filters,
                              allow_redirects=False)

        logger.debug("Filter created: '%s'" % self.filters)

    def _enable_filters(self):
        self.session.cookies.update(self.filters)
        logger.debug("Filters applied")

    def list_subtitles(self, video, languages):
        if not video.original_name:
            logger.info(
                "Skipping search because we don't know the original release name"
            )
            return []

        self._create_filters(languages)
        self._enable_filters()

        if isinstance(video, Episode):
            international_titles = list(
                set([video.series] + video.alternative_series[:1]))
            subtitles = [
                s for s in self.query(video, international_titles)
                if s.language in languages
            ]
            if not len(subtitles):
                us_titles = [x + ' (US)' for x in international_titles]
                subtitles = [
                    s for s in self.query(video, us_titles)
                    if s.language in languages
                ]
            return subtitles
        else:
            titles = list(set([video.title] + video.alternative_titles[:1]))
            return [
                s for s in self.query(video, titles) if s.language in languages
            ]

    def download_subtitle(self, subtitle):
        if subtitle.pack_data:
            logger.info("Using previously downloaded pack data")
            if rarfile.is_rarfile(io.BytesIO(subtitle.pack_data)):
                logger.debug('Identified rar archive')
                archive = rarfile.RarFile(io.BytesIO(subtitle.pack_data))
            elif zipfile.is_zipfile(io.BytesIO(subtitle.pack_data)):
                logger.debug('Identified zip archive')
                archive = zipfile.ZipFile(io.BytesIO(subtitle.pack_data))
            else:
                logger.error('Unsupported compressed format')
                return
            subtitle.pack_data = None

            try:
                subtitle.content = self.get_subtitle_from_archive(
                    subtitle, archive)
                return
            except ProviderError:
                pass

        # open the archive
        r = self.session.get(subtitle.get_download_link(self.session),
                             timeout=10)
        r.raise_for_status()
        archive_stream = io.BytesIO(r.content)

        if rarfile.is_rarfile(archive_stream):
            logger.debug('Identified rar archive')
            archive = rarfile.RarFile(archive_stream)
        elif zipfile.is_zipfile(archive_stream):
            logger.debug('Identified zip archive')
            archive = zipfile.ZipFile(archive_stream)
        else:
            logger.error('Unsupported compressed format')
            return

        subtitle.content = self.get_subtitle_from_archive(subtitle, archive)

        # store archive as pack_data for later caching
        subtitle.pack_data = r.content

    def parse_results(self, video, film):
        subtitles = []
        for s in film.subtitles:
            try:
                subtitle = SubsceneSubtitle.from_api(s)
            except NotImplementedError as e:
                logger.info(e)
                continue
            subtitle.asked_for_release_group = video.release_group
            if isinstance(video, Episode):
                subtitle.asked_for_episode = video.episode

            if self.only_foreign:
                subtitle.language = Language.rebuild(subtitle.language,
                                                     forced=True)

            # set subtitle language to hi if it's hearing_impaired
            if subtitle.hearing_impaired:
                subtitle.language = Language.rebuild(subtitle.language,
                                                     hi=True)

            subtitles.append(subtitle)
            logger.debug('Found subtitle %r', subtitle)

        return subtitles

    def do_search(self, *args, **kwargs):
        try:
            return search(*args, **kwargs)
        except requests.HTTPError:
            region.delete("subscene_cookies2")
            raise

    @reinitialize_on_error((RequestException, ), attempts=1)
    def query(self, video, titles):
        subtitles = []
        if isinstance(video, Episode):
            more_than_one = len(titles) > 1
            for series in titles:
                term = u"%s - %s Season" % (
                    series, p.number_to_words(
                        "%sth" % video.season).capitalize())
                logger.debug('Searching with series and season: %s', term)
                film = self.do_search(term,
                                      session=self.session,
                                      release=False,
                                      throttle=self.search_throttle,
                                      limit_to=SearchTypes.TvSerie)
                if not film and video.season == 1:
                    logger.debug('Searching with series name: %s', series)
                    film = self.do_search(series,
                                          session=self.session,
                                          release=False,
                                          throttle=self.search_throttle,
                                          limit_to=SearchTypes.TvSerie)

                if film and film.subtitles:
                    logger.debug('Searching found: %s', len(film.subtitles))
                    subtitles += self.parse_results(video, film)
                else:
                    logger.debug('No results found')

                if more_than_one:
                    time.sleep(self.search_throttle)
        else:
            more_than_one = len(titles) > 1
            for title in titles:
                logger.debug('Searching for movie results: %r', title)
                film = self.do_search(title,
                                      year=video.year,
                                      session=self.session,
                                      limit_to=None,
                                      release=False,
                                      throttle=self.search_throttle)
                if film and film.subtitles:
                    subtitles += self.parse_results(video, film)
                if more_than_one:
                    time.sleep(self.search_throttle)

        logger.info("%s subtitles found" % len(subtitles))
        return subtitles
Beispiel #18
0
 def initialize(self):
     self.session = RetryingCFSession()
     self.session.headers['Referer'] = self.server_url
class TitloviProvider(Provider, ProviderSubtitleArchiveMixin):
    subtitle_class = TitloviSubtitle
    languages = {Language.fromtitlovi(l) for l in language_converters['titlovi'].codes} | {Language.fromietf('sr-Latn')}
    server_url = 'https://titlovi.com'
    search_url = server_url + '/titlovi/?'
    download_url = server_url + '/download/?type=1&mediaid='

    def initialize(self):
        self.session = RetryingCFSession()
        #load_verification("titlovi", self.session)

    def terminate(self):
        self.session.close()

    def query(self, languages, title, season=None, episode=None, year=None, video=None):
        items_per_page = 10
        current_page = 1

        used_languages = languages
        lang_strings = [str(lang) for lang in used_languages]

        # handle possible duplicate use of Serbian Latin
        if "sr" in lang_strings and "sr-Latn" in lang_strings:
            logger.info('Duplicate entries <Language [sr]> and <Language [sr-Latn]> found, filtering languages')
            used_languages = filter(lambda l: l != Language.fromietf('sr-Latn'), used_languages)
            logger.info('Filtered language list %r', used_languages)

        # convert list of languages into search string
        langs = '|'.join(map(str, [l.titlovi for l in used_languages]))

        # set query params
        params = {'prijevod': title, 'jezik': langs}
        is_episode = False
        if season and episode:
            is_episode = True
            params['s'] = season
            params['e'] = episode
        if year:
            params['g'] = year

        # loop through paginated results
        logger.info('Searching subtitles %r', params)
        subtitles = []

        while True:
            # query the server
            try:
                r = self.session.get(self.search_url, params=params, timeout=10)
                r.raise_for_status()
            except RequestException as e:
                logger.exception('RequestException %s', e)
                break
            else:
                try:
                    soup = BeautifulSoup(r.content, 'lxml')

                    # number of results
                    result_count = int(soup.select_one('.results_count b').string)
                except:
                    result_count = None

                # exit if no results
                if not result_count:
                    if not subtitles:
                        logger.debug('No subtitles found')
                    else:
                        logger.debug("No more subtitles found")
                    break

                # number of pages with results
                pages = int(math.ceil(result_count / float(items_per_page)))

                # get current page
                if 'pg' in params:
                    current_page = int(params['pg'])

                try:
                    sublist = soup.select('section.titlovi > ul.titlovi > li.subtitleContainer.canEdit')
                    for sub in sublist:
                        # subtitle id
                        sid = sub.find(attrs={'data-id': True}).attrs['data-id']
                        # get download link
                        download_link = self.download_url + sid
                        # title and alternate title
                        match = title_re.search(sub.a.string)
                        if match:
                            _title = match.group('title')
                            alt_title = match.group('altitle')
                        else:
                            continue

                        # page link
                        page_link = self.server_url + sub.a.attrs['href']
                        # subtitle language
                        match = lang_re.search(sub.select_one('.lang').attrs['src'])
                        if match:
                            try:
                                # decode language
                                lang = Language.fromtitlovi(match.group('lang')+match.group('script'))
                            except ValueError:
                                continue

                        # relase year or series start year
                        match = year_re.search(sub.find(attrs={'data-id': True}).parent.i.string)
                        if match:
                            r_year = int(match.group('year'))
                        # fps
                        match = fps_re.search(sub.select_one('.fps').string)
                        if match:
                            fps = match.group('fps')
                        # releases
                        releases = str(sub.select_one('.fps').parent.contents[0].string)

                        # handle movies and series separately
                        if is_episode:
                            # season and episode info
                            sxe = sub.select_one('.s0xe0y').string
                            r_season = None
                            r_episode = None
                            if sxe:
                                match = season_re.search(sxe)
                                if match:
                                    r_season = int(match.group('season'))
                                match = episode_re.search(sxe)
                                if match:
                                    r_episode = int(match.group('episode'))

                            subtitle = self.subtitle_class(lang, page_link, download_link, sid, releases, _title,
                                                           alt_title=alt_title, season=r_season, episode=r_episode,
                                                           year=r_year, fps=fps,
                                                           asked_for_release_group=video.release_group,
                                                           asked_for_episode=episode)
                        else:
                            subtitle = self.subtitle_class(lang, page_link, download_link, sid, releases, _title,
                                                           alt_title=alt_title, year=r_year, fps=fps,
                                                           asked_for_release_group=video.release_group)
                        logger.debug('Found subtitle %r', subtitle)

                        # prime our matches so we can use the values later
                        subtitle.get_matches(video)

                        # add found subtitles
                        subtitles.append(subtitle)

                finally:
                    soup.decompose()

                # stop on last page
                if current_page >= pages:
                    break

                # increment current page
                params['pg'] = current_page + 1
                logger.debug('Getting page %d', params['pg'])

        return subtitles

    def list_subtitles(self, video, languages):
        season = episode = None
        if isinstance(video, Episode):
            title = video.series
            season = video.season
            episode = video.episode
        else:
            title = video.title

        return [s for s in
                self.query(languages, fix_inconsistent_naming(title), season=season, episode=episode, year=video.year,
                           video=video)]

    def download_subtitle(self, subtitle):
        r = self.session.get(subtitle.download_link, timeout=10)
        r.raise_for_status()

        # open the archive
        archive_stream = io.BytesIO(r.content)
        if is_rarfile(archive_stream):
            logger.debug('Archive identified as rar')
            archive = RarFile(archive_stream)
        elif is_zipfile(archive_stream):
            logger.debug('Archive identified as zip')
            archive = ZipFile(archive_stream)
        else:
            subtitle.content = r.content
            if subtitle.is_valid():
                return
            subtitle.content = None

            raise ProviderError('Unidentified archive type')

        subs_in_archive = archive.namelist()

        # if Serbian lat and cyr versions are packed together, try to find right version
        if len(subs_in_archive) > 1 and (subtitle.language == 'sr' or subtitle.language == 'sr-Cyrl'):
            self.get_subtitle_from_bundled_archive(subtitle, subs_in_archive, archive)
        else:
            # use default method for everything else
            subtitle.content = self.get_subtitle_from_archive(subtitle, archive)

    def get_subtitle_from_bundled_archive(self, subtitle, subs_in_archive, archive):
        sr_lat_subs = []
        sr_cyr_subs = []
        sub_to_extract = None

        for sub_name in subs_in_archive:
            if not ('.cyr' in sub_name or '.cir' in sub_name):
                sr_lat_subs.append(sub_name)

            if ('.cyr' in sub_name or '.cir' in sub_name) and not '.lat' in sub_name:
                sr_cyr_subs.append(sub_name)

        if subtitle.language == 'sr':
            if len(sr_lat_subs) > 0:
                sub_to_extract = sr_lat_subs[0]

        if subtitle.language == 'sr-Cyrl':
            if len(sr_cyr_subs) > 0:
                sub_to_extract = sr_cyr_subs[0]

        logger.info(u'Using %s from the archive', sub_to_extract)
        subtitle.content = fix_line_ending(archive.read(sub_to_extract))