コード例 #1
0
def test_list_subtitles_single_language(video_single_language):
    with EmbeddedSubtitlesProvider() as provider:
        subs = provider.list_subtitles(video_single_language,
                                       {Language.fromalpha2("en")})

        for sub in subs:
            assert sub.language == Language.fromalpha2("en")
コード例 #2
0
ファイル: napiprojekt.py プロジェクト: zx900930/bazarr
class NapiProjektProvider(_NapiProjektProvider):
    languages = {Language.fromalpha2(l) for l in ['pl']}
    subtitle_class = NapiProjektSubtitle

    def query(self, language, hash):
        params = {
            'v': 'dreambox',
            'kolejka': 'false',
            'nick': '',
            'pass': '',
            'napios': 'Linux',
            'l': language.alpha2.upper(),
            'f': hash,
            't': get_subhash(hash)}
        logger.info('Searching subtitle %r', params)
        r = self.session.get(self.server_url, params=params, timeout=10)
        r.raise_for_status()

        # handle subtitles not found and errors
        if r.content[:4] == b'NPc0':
            logger.debug('No subtitles found')
            return None

        subtitle = self.subtitle_class(language, hash)
        subtitle.content = r.content
        logger.debug('Found subtitle %r', subtitle)

        return subtitle

    def list_subtitles(self, video, languages):
        return [s for s in [self.query(l, video.hashes['napiprojekt']) for l in languages] if s is not None]
コード例 #3
0
def test_list_subtitles_also_forced(video_single_language):
    with EmbeddedSubtitlesProvider() as provider:
        language_1 = Language.fromalpha2("en")
        language_2 = Language.rebuild(language_1, forced=True)
        subs = provider.list_subtitles(video_single_language,
                                       {language_1, language_2})
        assert any(language_1 == sub.language for sub in subs)
        assert any(not sub.language.forced for sub in subs)
コード例 #4
0
ファイル: nekur.py プロジェクト: zx900930/bazarr
    def query(self, title):
        subtitles = []

        data = {
            'ajax': '1',
            'sSearch': title,
        }

        r = self.session.post(self.search_url, data=data, timeout=10)
        r.raise_for_status()

        if not r.content:
            logger.debug('No data returned from provider')
            return []

        soup = ParserBeautifulSoup(r.content.decode('utf-8', 'ignore'),
                                   ['lxml', 'html.parser'])

        # loop over subtitle cells
        rows = soup.select('tbody > tr')
        for row in rows:
            # title
            title_anchor_el = row.select_one('.title > a')
            title_inner_text = [
                element for element in title_anchor_el
                if isinstance(element, NavigableString)
            ]
            title = title_inner_text[0].strip()

            # year
            year = row.select_one('.year').text.strip('()')

            # download link
            href = title_anchor_el.get('href')
            download_link = self.server_url + href

            # imdb id
            imdb_td = row.select_one('td:nth-of-type(4)')
            imdb_link = imdb_td.select_one('a').get('href')
            imdb_id = imdb_link.split('/')[-2]

            # fps
            fps = row.select_one('.fps').text.strip()

            # additional notes
            notes = row.select_one('.notes').text.strip()

            # page link = download link (there is no seperate subtitle page link)
            page_link = download_link

            # create/add the subitle
            subtitle = self.subtitle_class(Language.fromalpha2('lv'),
                                           page_link, download_link, title,
                                           year, imdb_id, fps, notes)
            logger.debug('nekur: Found subtitle %r', subtitle)
            subtitles.append(subtitle)

        return subtitles
コード例 #5
0
def test_download_subtitle_multiple(video_multiple_languages):
    with EmbeddedSubtitlesProvider() as provider:
        languages = {Language.fromalpha2(code)
                     for code in ("en", "it", "fr")} | {Language("por", "BR")}

        subs = provider.list_subtitles(video_multiple_languages, languages)
        for sub in subs:
            provider.download_subtitle(sub)
            assert sub.content is not None
コード例 #6
0
def test_list_subtitles_multiple_languages(video_multiple_languages):
    with EmbeddedSubtitlesProvider() as provider:
        languages = {
            Language.fromalpha2(code)
            for code in ("en", "it", "fr", "es")
        } | {Language("por", "BR")}

        subs = provider.list_subtitles(video_multiple_languages, languages)
        for expected in languages:
            assert any(sub.language == expected for sub in subs)
コード例 #7
0
def test_download_invalid_subtitle(video_single_language):
    with EmbeddedSubtitlesProvider() as provider:
        subtitle = provider.list_subtitles(video_single_language,
                                           {Language.fromalpha2("en")})[0]

        provider._cached_paths[subtitle.container.path] = {
            subtitle.stream.index: "dummy.srt"
        }
        with pytest.raises(fese.InvalidFile):
            provider.download_subtitle(subtitle)
コード例 #8
0
    def _parse_subtitles_page(self, video, response, language):
        subtitles = []

        page_soup = ParserBeautifulSoup(
            response.content.decode("utf-8", "ignore"),
            ["lxml", "html.parser"])
        title_soups = page_soup.find_all("div",
                                         {"id": "menu_detalle_buscador"})
        body_soups = page_soup.find_all("div", {"id": "buscador_detalle"})
        episode = isinstance(video, Episode)

        for subtitle in range(0, len(title_soups)):
            title_soup, body_soup = title_soups[subtitle], body_soups[subtitle]
            # title
            title = _clean_title(title_soup.find("a").text)

            # Forced subtitles are not supported
            if title.lower().rstrip().endswith(("forzado", "forzados")):
                logger.debug("Skipping forced subtitles: %s", title)
                continue

            # Check movie title (if the video is a movie)
            if not episode and not _check_movie(video, title):
                continue

            # Data
            datos = body_soup.find("div", {
                "id": "buscador_detalle_sub_datos"
            }).text
            # Ignore multi-disc and non-srt subtitles
            if not any(item in datos for item in ("Cds:</b> 1", "SubRip")):
                continue

            spain = "/pais/7.gif" in datos
            language = Language.fromalpha2("es") if spain else Language(
                "spa", "MX")

            # description
            sub_details = body_soup.find("div", {
                "id": "buscador_detalle_sub"
            }).text
            description = sub_details.replace(",", " ")

            # uploader
            uploader = body_soup.find("a", {"class": "link1"}).text
            download_url = _get_download_url(body_soup)
            page_link = title_soup.find("a")["href"]

            subtitle = self.subtitle_class(language, video, page_link, title,
                                           description, uploader, download_url)

            logger.debug("Found subtitle %r", subtitle)
            subtitles.append(subtitle)

        return subtitles
コード例 #9
0
    def query(self, keyword, season=None, episode=None, year=None):
        params = keyword
        if season and episode:
            params += ' S{season:02d}E{episode:02d}'.format(season=season,
                                                            episode=episode)
        elif year:
            params += ' {:4d}'.format(year)

        logger.debug('Searching subtitles %r', params)
        subtitles = []
        search_link = self.server_url + text_type(
            self.search_url).format(params)
        while True:
            r = self.session.get(search_link, timeout=30)
            r.raise_for_status()

            if not r.content:
                logger.debug('No data returned from provider')
                return []

            soup = ParserBeautifulSoup(r.content.decode('utf-8', 'ignore'),
                                       ['lxml', 'html.parser'])

            # loop over subtitles cells
            for cell in soup.select('td.latest_name > a:nth-of-type(1)'):
                # read the item
                subtitle_id = int(cell['href'].rsplit('/', 2)[1])
                page_link = cell['href']
                language = Language.fromalpha2(
                    cell.parent.find('img')['src'].split('/')[-1].split('.')
                    [0])
                version = cell.text.strip() or None
                if version is None:
                    version = ""

                subtitle = self.subtitle_class(
                    language, page_link, version,
                    self.download_url.format(subtitle_id))

                logger.debug('Found subtitle %r', subtitle)
                subtitles.append(subtitle)

            anchors = soup.select('td a')
            next_page_available = False
            for anchor in anchors:
                if 'Next' in anchor.text and 'search.php' in anchor['href']:
                    search_link = self.server_url + anchor['href']
                    next_page_available = True
                    break
            if not next_page_available:
                break

        return subtitles
コード例 #10
0
ファイル: subtitriid.py プロジェクト: rickytin/bazarr
    def query(self, title):
        subtitles = []

        r = self.session.get(self.search_url, params={'q': title}, timeout=10)
        r.raise_for_status()

        if not r.content:
            logger.debug('No data returned from provider')
            return []

        soup = ParserBeautifulSoup(r.content.decode('utf-8', 'ignore'),
                                   ['lxml', 'html.parser'])

        # loop over subtitle cells
        rows = soup.select('.eBlock')
        for row in rows:
            result_anchor_el = row.select_one('.eTitle > a')

            # page link
            page_link = result_anchor_el.get('href')

            # fetch/parse additional info
            r = self.session.get(page_link, timeout=10)
            soup = ParserBeautifulSoup(r.content.decode('utf-8', 'ignore'),
                                       ['lxml', 'html.parser'])

            # title
            movie_titles_string = soup.select_one('.main-header').text.strip()
            movie_titles_list = movie_titles_string.split(' / ')
            title = movie_titles_list[-1]

            # year
            year = soup.select_one('#film-page-year').text.strip()

            # imdb id
            imdb_link = soup.select_one('#actors-page > a').get('href')
            imdb_id = imdb_link.split('/')[-2]

            # download link
            href = soup.select_one('.hvr').get('href')
            download_link = self.server_url + href

            # create/add the subitle
            subtitle = self.subtitle_class(Language.fromalpha2('lv'),
                                           page_link, download_link, title,
                                           year, imdb_id)
            logger.debug('subtitri.id.lv: Found subtitle %r', subtitle)
            subtitles.append(subtitle)

        return subtitles
コード例 #11
0
    def _parse_subtitles_page(self, video, response, language):
        subtitles = []

        page_soup = ParserBeautifulSoup(
            response.content.decode("utf-8", "ignore"),
            ["lxml", "html.parser"])
        title_soups = page_soup.find_all("div",
                                         {"id": "menu_detalle_buscador"})
        body_soups = page_soup.find_all("div", {"id": "buscador_detalle"})

        for subtitle in range(0, len(title_soups)):
            title_soup, body_soup = title_soups[subtitle], body_soups[subtitle]
            # title
            title = self._clean_title(title_soup.find("a").text)
            # discard subtitles if a year between parenthesis is present in title and doesn't match the one provided
            # in video object
            if re.match(r'(\(\d{4}\))', title):
                if video.year and str(video.year) not in title:
                    continue

            # Data
            datos = body_soup.find("div", {
                "id": "buscador_detalle_sub_datos"
            }).text
            # Ignore multi-disc and non-srt subtitles
            if not any(item in datos for item in ("Cds:</b> 1", "SubRip")):
                continue

            spain = "/pais/7.gif" in datos
            language = Language.fromalpha2("es") if spain else Language(
                "spa", "MX")

            # description
            sub_details = body_soup.find("div", {
                "id": "buscador_detalle_sub"
            }).text
            description = sub_details.replace(",", " ").lower()

            # uploader
            uploader = body_soup.find("a", {"class": "link1"}).text
            page_link = title_soup.find("a")["href"]

            subtitle = self.subtitle_class(language, video, page_link, title,
                                           description, uploader)

            logger.debug("Found subtitle %r", subtitle)
            subtitles.append(subtitle)

        return subtitles
コード例 #12
0
ファイル: subs4free.py プロジェクト: jonudewux/bazarr
    def query(self, movie_id, title, year):
        # get the season list of the show
        logger.info('Getting the subtitle list of show id %s', movie_id)
        if movie_id:
            page_link = self.server_url + '/' + movie_id
        else:
            page_link = self.server_url + text_type(self.search_url).format(
                ' '.join([title, str(year)]))

        r = self.session.get(page_link, timeout=10)
        r.raise_for_status()

        if not r.content:
            logger.debug('No data returned from provider')
            return []

        soup = ParserBeautifulSoup(r.content, ['html.parser'])

        year_num = None
        year_element = soup.select_one('td#dates_header > table div')
        matches = False
        if year_element:
            matches = year_re.match(str(year_element.contents[2]).strip())
        if matches:
            year_num = int(matches.group(1))

        title_element = soup.select_one('td#dates_header > table u')
        show_title = str(
            title_element.contents[0]).strip() if title_element else None

        subtitles = []
        # loop over episode rows
        for subtitle in soup.select(
                'table.table_border div[align="center"] > div'):
            # read common info
            version = subtitle.find('b').text
            download_link = self.server_url + subtitle.find('a')['href']
            language = Language.fromalpha2(
                subtitle.find('img')['src'].split('/')[-1].split('.')[0])

            subtitle = self.subtitle_class(language, page_link, show_title,
                                           year_num, version, download_link)

            logger.debug('Found subtitle {!r}'.format(subtitle))
            subtitles.append(subtitle)

        return subtitles
コード例 #13
0
    def query(self, show_id, series, season, episode, title):
        # get the season list of the show
        logger.info('Getting the subtitle list of show id %s', show_id)
        if all((show_id, season, episode)):
            page_link = self.server_url + self.episode_link.format(
                show_id=show_id, season=season, episode=episode)
        else:
            return []

        r = self.session.get(page_link, timeout=10)
        r.raise_for_status()

        if not r.content:
            logger.debug('No data returned from provider')
            return []

        soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])

        year = None
        matches = year_re.match(
            str(soup.select_one(
                '#dates_header_br > table div').contents[2]).strip())
        if matches:
            year = int(matches.group(1))
        show_title = str(
            soup.select_one('#dates_header_br > table div u').string).strip()

        subtitles = []
        # loop over episode rows
        for subs_tag in soup.select('table .seeDark,.seeMedium'):
            # read common info
            version = subs_tag.find_all('b')[0].text
            download_link = self.server_url + subs_tag.find('a')['href']
            uploader = subs_tag.find_all('b')[1].text
            language = Language.fromalpha2(
                subs_tag.find('img')['src'].split('/')[-1].split('.')[0])

            subtitle = self.subtitle_class(language, page_link, show_title,
                                           year, version, download_link,
                                           uploader)

            logger.debug('Found subtitle %r', subtitle)
            subtitles.append(subtitle)

        return subtitles
コード例 #14
0
def test_get_matches_movie(movies):
    movie = movies["dune"]
    subtitle = ArgenteamSubtitle(
        Language.fromalpha2("es"),
        None,
        "https://argenteam.net/subtitles/86024/Dune.Part.One.%282021%29.WEB.H264.1080p-NAISU",
        "WEB H264 1080p",
        {"title", "year", "imdb_id"},
    )
    matches = subtitle.get_matches(movie)
    assert matches == {
        "title",
        "year",
        "imdb_id",
        "source",
        "resolution",
        "edition",
        "video_codec",
    }
コード例 #15
0
def test_get_matches_episode(episodes):
    episode = episodes["breaking_bad_s01e01"]
    subtitle = ArgenteamSubtitle(
        Language.fromalpha2("es"),
        None,
        "https://argenteam.net/subtitles/24002/Breaking.Bad.%282008%29.S01E01-Pilot.BluRay.x264.720p-REWARD",
        "BluRay x264 720p",
        {"title", "season", "episode", "imdb_id"},
    )
    matches = subtitle.get_matches(episode)
    assert matches == {
        "title",
        "season",
        "episode",
        "imdb_id",
        "source",
        "video_codec",
        "resolution",
        "edition",
        "streaming_service",
        "release_group",
        "series",
        "year",
    }
コード例 #16
0
ファイル: podnapisi.py プロジェクト: rubicon/bazarr
class PodnapisiProvider(_PodnapisiProvider, ProviderSubtitleArchiveMixin):
    languages = ({Language('por', 'BR'), Language('srp', script='Latn'), Language('srp', script='Cyrl')} |
                 {Language.fromalpha2(l) for l in language_converters['alpha2'].codes})
    languages.update(set(Language.rebuild(l, forced=True) for l in languages))
    languages.update(set(Language.rebuild(l, hi=True) for l in languages))

    video_types = (Episode, Movie)

    server_url = 'https://podnapisi.net/subtitles/'
    only_foreign = False
    also_foreign = False
    verify_ssl = True
    subtitle_class = PodnapisiSubtitle
    hearing_impaired_verifiable = True

    def __init__(self, only_foreign=False, also_foreign=False, verify_ssl=True):
        self.only_foreign = only_foreign
        self.also_foreign = also_foreign
        self.verify_ssl = verify_ssl

        if only_foreign:
            logger.info("Only searching for foreign/forced subtitles")

        super(PodnapisiProvider, self).__init__()

    def initialize(self):
        super().initialize()
        self.session.mount('https://', PodnapisiAdapter())
        self.session.verify = self.verify_ssl

    def list_subtitles(self, video, languages):
        if video.is_special:
            logger.info("%s can't search for specials right now, skipping", self)
            return []

        season = episode = None
        if isinstance(video, Episode):
            titles = [fix_inconsistent_naming(title) for title in [video.series] + video.alternative_series]
            season = video.season
            episode = video.episode
        else:
            titles = [video.title] + video.alternative_titles

        for title in titles:
            subtitles = [s for l in languages for s in
                         self.query(l, title, video, season=season, episode=episode, year=video.year,
                                    only_foreign=self.only_foreign, also_foreign=self.also_foreign)]
            if subtitles:
                return subtitles

        return []

    def query(self, language, keyword, video, season=None, episode=None, year=None, only_foreign=False,
              also_foreign=False):
        search_language = str(language).lower()

        # sr-Cyrl specialcase
        if search_language == "sr-cyrl":
            search_language = "sr"

        # set parameters, see http://www.podnapisi.net/forum/viewtopic.php?f=62&t=26164#p212652
        params = {'sXML': 1, 'sL': search_language, 'sK': keyword}

        is_episode = False
        if season and episode:
            is_episode = True
            params['sTS'] = season
            params['sTE'] = episode

        if year:
            params['sY'] = year

        # loop over paginated results
        logger.info('Searching subtitles %r', params)
        subtitles = []
        pids = set()
        while True:
            # query the server
            content = None
            try:
                content = self.session.get(self.server_url + 'search/old', params=params, timeout=10).content
                xml = etree.fromstring(content)
            except etree.ParseError:
                logger.error("Wrong data returned: %r", content)
                break

            # exit if no results
            if not int(xml.find('pagination/results').text):
                logger.debug('No subtitles found')
                break

            # loop over subtitles
            for subtitle_xml in xml.findall('subtitle'):
                # read xml elements
                pid = subtitle_xml.find('pid').text
                # ignore duplicates, see http://www.podnapisi.net/forum/viewtopic.php?f=62&t=26164&start=10#p213321
                if pid in pids:
                    continue

                _language = Language.fromietf(subtitle_xml.find('language').text)
                hearing_impaired = 'n' in (subtitle_xml.find('flags').text or '')
                foreign = 'f' in (subtitle_xml.find('flags').text or '')
                if only_foreign and not foreign:
                    continue

                elif not only_foreign and not also_foreign and foreign:
                    continue

                elif also_foreign and foreign:
                    _language = Language.rebuild(_language, forced=True)

                # set subtitle language to hi if it's hearing_impaired
                if hearing_impaired:
                    _language = Language.rebuild(_language, hi=True)

                if language != _language:
                    continue

                page_link = subtitle_xml.find('url').text
                releases = []
                if subtitle_xml.find('release').text:
                    for release in subtitle_xml.find('release').text.split():
                        releases.append(re.sub(r'\.+$', '', release))  # remove trailing dots
                title = subtitle_xml.find('title').text
                r_season = int(subtitle_xml.find('tvSeason').text)
                r_episode = int(subtitle_xml.find('tvEpisode').text)
                r_year = int(subtitle_xml.find('year').text)

                if is_episode:
                    subtitle = self.subtitle_class(_language, hearing_impaired, page_link, pid, releases, title,
                                                   season=r_season, episode=r_episode, year=r_year,
                                                   asked_for_release_group=video.release_group,
                                                   asked_for_episode=episode)
                else:
                    subtitle = self.subtitle_class(_language, hearing_impaired, page_link, pid, releases, title,
                                                   year=r_year, asked_for_release_group=video.release_group)


                logger.debug('Found subtitle %r', subtitle)
                subtitles.append(subtitle)
                pids.add(pid)

            # stop on last page
            if int(xml.find('pagination/current').text) >= int(xml.find('pagination/count').text):
                break

            # increment current page
            params['page'] = int(xml.find('pagination/current').text) + 1
            logger.debug('Getting page %d', params['page'])
            xml = None

        return subtitles

    def download_subtitle(self, subtitle):
        # download as a zip
        logger.info('Downloading subtitle %r', subtitle)
        r = self.session.get(self.server_url + subtitle.pid + '/download', params={'container': 'zip'}, timeout=10)
        r.raise_for_status()

        # open the zip
        with ZipFile(io.BytesIO(r.content)) as zf:
            subtitle.content = self.get_subtitle_from_archive(subtitle, zf)
コード例 #17
0
class SubdivxSubtitlesProvider(Provider):
    provider_name = "subdivx"
    hash_verifiable = False
    languages = {Language("spa", "MX")} | {Language.fromalpha2("es")}
    video_types = (Episode, Movie)
    subtitle_class = SubdivxSubtitle

    multi_result_throttle = 2
    language_list = list(languages)

    def __init__(self):
        self.session = None

    def initialize(self):
        self.session = Session()
        self.session.headers["User-Agent"] = f"Subliminal/{__short_version__}"
        self.session.cookies.update({"iduser_cookie": _IDUSER_COOKIE})

    def terminate(self):
        self.session.close()

    def query(self, video, languages):
        if isinstance(video, Episode):
            query = f"{video.series} S{video.season:02}E{video.episode:02}"
        else:
            # Subdvix has problems searching foreign movies if the year is
            # appended. A proper solution would be filtering results with the
            # year in self._parse_subtitles_page.
            query = video.title

        params = {
            "buscar2": query,
            "accion": "5",
            "masdesc": "",
            "subtitulos": "1",
            "realiza_b": "1",
            "pg": "1",
        }

        logger.debug(f"Searching subtitles: {query}")
        subtitles = []
        language = self.language_list[0]
        search_link = f"{_SERVER_URL}/index.php"
        while True:
            response = self.session.get(search_link,
                                        params=params,
                                        allow_redirects=True,
                                        timeout=20)

            try:
                page_subtitles = self._parse_subtitles_page(
                    video, response, language)
            except Exception as e:
                logger.error(f"Error parsing subtitles list: {e}")
                break

            subtitles += page_subtitles

            if len(page_subtitles) < 100:
                break  # this is the last page

            params["pg"] += 1  # search next page
            time.sleep(self.multi_result_throttle)

        return subtitles

    def list_subtitles(self, video, languages):
        return self.query(video, languages)

    def download_subtitle(self, subtitle):
        # download the subtitle
        logger.info("Downloading subtitle %r", subtitle)

        # download zip / rar file with the subtitle
        response = self.session.get(
            subtitle.download_url,
            headers={"Referer": subtitle.page_link},
            timeout=30,
        )
        response.raise_for_status()

        # open the compressed archive
        archive = _get_archive(response.content)

        # extract the subtitle
        subtitle_content = _get_subtitle_from_archive(archive, subtitle)
        subtitle.content = fix_line_ending(subtitle_content)

    def _parse_subtitles_page(self, video, response, language):
        subtitles = []

        page_soup = ParserBeautifulSoup(
            response.content.decode("utf-8", "ignore"),
            ["lxml", "html.parser"])
        title_soups = page_soup.find_all("div",
                                         {"id": "menu_detalle_buscador"})
        body_soups = page_soup.find_all("div", {"id": "buscador_detalle"})
        episode = isinstance(video, Episode)

        for subtitle in range(0, len(title_soups)):
            title_soup, body_soup = title_soups[subtitle], body_soups[subtitle]
            # title
            title = _clean_title(title_soup.find("a").text)

            # Forced subtitles are not supported
            if title.lower().rstrip().endswith(("forzado", "forzados")):
                logger.debug("Skipping forced subtitles: %s", title)
                continue

            # Check movie title (if the video is a movie)
            if not episode and not _check_movie(video, title):
                continue

            # Data
            datos = body_soup.find("div", {
                "id": "buscador_detalle_sub_datos"
            }).text
            # Ignore multi-disc and non-srt subtitles
            if not any(item in datos for item in ("Cds:</b> 1", "SubRip")):
                continue

            spain = "/pais/7.gif" in datos
            language = Language.fromalpha2("es") if spain else Language(
                "spa", "MX")

            # description
            sub_details = body_soup.find("div", {
                "id": "buscador_detalle_sub"
            }).text
            description = sub_details.replace(",", " ")

            # uploader
            uploader = body_soup.find("a", {"class": "link1"}).text
            download_url = _get_download_url(body_soup)
            page_link = title_soup.find("a")["href"]

            subtitle = self.subtitle_class(language, video, page_link, title,
                                           description, uploader, download_url)

            logger.debug("Found subtitle %r", subtitle)
            subtitles.append(subtitle)

        return subtitles
コード例 #18
0
ファイル: betaseries.py プロジェクト: rickytin/bazarr
def _translateLanguageCodeToLanguage(languageCode):
    if languageCode.lower() == 'vo':
        return Language.fromalpha2('en')
    elif languageCode.lower() == 'vf':
        return Language.fromalpha2('fr')
コード例 #19
0
ファイル: argenteam.py プロジェクト: paulverbeke/bazarr
class ArgenteamProvider(Provider, ProviderSubtitleArchiveMixin):
    provider_name = 'argenteam'
    languages = {Language.fromalpha2(l) for l in ['es']}
    video_types = (Episode, Movie)
    BASE_URL = "http://www.argenteam.net/"
    API_URL = BASE_URL + "api/v1/"
    subtitle_class = ArgenteamSubtitle
    hearing_impaired_verifiable = False
    language_list = list(languages)

    multi_result_throttle = 2  # seconds

    def __init__(self):
        self.session = None

    def initialize(self):
        self.session = Session()
        self.session.headers = {
            'User-Agent': os.environ.get("SZ_USER_AGENT", "Sub-Zero/2")
        }

    def terminate(self):
        self.session.close()

    def search_ids(self,
                   title,
                   year=None,
                   imdb_id=None,
                   season=None,
                   episode=None,
                   titles=None):
        """Search movie or episode id from the `title`, `season` and `episode`.

        :param imdb_id: imdb id of the given movie
        :param titles: all titles of the given series or movie
        :param year: release year of the given movie
        :param str title: series of the episode or movie name
        :param int season: season of the episode.
        :param int episode: episode number.
        :return: list of ids
        :rtype: list

        """
        # make the search
        query = title
        titles = titles or []

        is_episode = False
        if season and episode:
            is_episode = True
            query = '%s S%#02dE%#02d' % (title, season, episode)

        logger.info(u'Searching %s ID for %r',
                    "episode" if is_episode else "movie", query)
        r = self.session.get(self.API_URL + 'search',
                             params={'q': query},
                             timeout=10)
        r.raise_for_status()
        results = r.json()
        match_ids = []
        if results['total'] >= 1:
            for result in results["results"]:
                if (result['type'] == "episode"
                        and not is_episode) or (result['type'] == "movie"
                                                and is_episode):
                    continue

                # shortcut in case of matching imdb id
                if not is_episode and imdb_id and "imdb" in result and "tt%s" % result[
                        "imdb"] == str(imdb_id):
                    logger.debug(
                        "Movie matched by IMDB ID %s, taking shortcut",
                        imdb_id)
                    match_ids = [result['id']]
                    break

                # advanced title check in case of multiple movie results
                if results['total'] > 1:
                    if not is_episode and year:
                        if result["title"] and not (sanitize(result["title"])
                                                    in (u"%s %s" %
                                                        (sanitize(name), year)
                                                        for name in titles)):
                            continue

                match_ids.append(result['id'])
        else:
            logger.error(u'No episode ID found for %r', query)

        if match_ids:
            logger.debug(u"Found matching IDs: %s",
                         ", ".join(str(id) for id in match_ids))

        return match_ids

    def query(self, title, video, titles=None):
        is_episode = isinstance(video, Episode)
        season = episode = None
        url = self.API_URL + 'movie'
        if is_episode:
            season = video.season
            episode = video.episode
            url = self.API_URL + 'episode'
            argenteam_ids = self.search_ids(title,
                                            season=season,
                                            episode=episode,
                                            titles=titles)

        else:
            argenteam_ids = self.search_ids(title,
                                            year=video.year,
                                            imdb_id=video.imdb_id,
                                            titles=titles)

        if not argenteam_ids:
            return []

        language = self.language_list[0]
        subtitles = []
        has_multiple_ids = len(argenteam_ids) > 1
        for aid in argenteam_ids:
            response = self.session.get(url, params={'id': aid}, timeout=10)

            response.raise_for_status()
            content = response.json()

            imdb_id = year = None
            returned_title = title
            if not is_episode and "info" in content:
                imdb_id = content["info"].get("imdb")
                year = content["info"].get("year")
                returned_title = content["info"].get("title", title)

            for r in content['releases']:
                for s in r['subtitles']:
                    movie_kind = "episode" if is_episode else "movie"
                    page_link = self.BASE_URL + movie_kind + "/" + str(aid)
                    sub = ArgenteamSubtitle(
                        language,
                        page_link,
                        s['uri'],
                        movie_kind,
                        returned_title,
                        season,
                        episode,
                        year,
                        r.get('team'),
                        r.get('tags'),
                        r.get('source'),
                        r.get('codec'),
                        content.get("tvdb"),
                        imdb_id,
                        asked_for_release_group=video.release_group,
                        asked_for_episode=episode)
                    subtitles.append(sub)

            if has_multiple_ids:
                time.sleep(self.multi_result_throttle)

        return subtitles

    def list_subtitles(self, video, languages):
        if isinstance(video, Episode):
            titles = [video.series] + video.alternative_series
        else:
            titles = [video.title] + video.alternative_titles

        for title in titles:
            subs = self.query(title, video, titles=titles)
            if subs:
                return subs

            time.sleep(self.multi_result_throttle)

        return []

    def download_subtitle(self, subtitle):
        # download as a zip
        logger.info('Downloading subtitle %r', subtitle)
        r = self.session.get(subtitle.download_link, timeout=10)
        r.raise_for_status()

        # open the zip
        with ZipFile(io.BytesIO(r.content)) as zf:
            subtitle.content = self.get_subtitle_from_archive(subtitle, zf)
コード例 #20
0
def test_list_subtitles_only_forced(video_single_language):
    with EmbeddedSubtitlesProvider() as provider:
        language = Language.fromalpha2("en")
        language = Language.rebuild(language, forced=True)
        subs = provider.list_subtitles(video_single_language, {language})
        assert len(subs) == 0
コード例 #21
0
    def query(self, show_id, series, season, year=None, country=None):
        # get the season list of the show
        logger.info('Getting the season list of show id %d', show_id)
        r = self.session.get(self.server_url + self.series_url.format(show_id),
                             timeout=10)
        r.raise_for_status()

        if not r.content:
            logger.debug('No data returned from provider')
            return []

        soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])

        series = soup.find('name').text

        # loop over season rows
        seasons = soup.findAll('series_group')
        season_id = None

        for season_row in seasons:
            try:
                parsed_season = int(season_row['ssnnum'])
                if parsed_season == season:
                    season_id = int(season_row['ssnid'])
                    break
            except (ValueError, TypeError):
                continue

        if season_id is None:
            logger.debug('Season not found in provider')
            return []

        # get the subtitle list of the season
        logger.info('Getting the subtitle list of season %d', season)
        r = self.session.get(
            self.server_url +
            self.season_url.format(show_id=show_id, season=season_id),
            timeout=10)
        r.raise_for_status()

        if not r.content:
            logger.debug('No data returned from provider')
            return []

        soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])

        subtitles = []
        # loop over episode rows
        for subtitle_group in soup.findAll('subg'):
            # read the episode info
            episode_info = subtitle_group.find('etitle')
            if episode_info is None:
                continue

            episodes = []
            episode_match = episode_re.match(episode_info['number'])
            if episode_match:
                episodes = [
                    int(e)
                    for e in [episode_match.group(1),
                              episode_match.group(3)] if e
                ]

            subtitle_info = subtitle_group.find('sgt')
            if subtitle_info is None:
                continue

            season = int(subtitle_info['ssnnum'])
            episode_id = int(subtitle_info['epsid'])

            # filter out unreleased subtitles
            for subs_tag in subtitle_group.findAll('sr'):
                if subs_tag['published_on'] == '':
                    continue

                page_link = self.server_url + self.page_link.format(
                    show_id=show_id,
                    season_id=season_id,
                    season=season,
                    episode=episode_id)
                title = episode_info['title']
                version = subs_tag.fmt.text + ' ' + subs_tag.team.text
                download_link = self.server_url + self.download_link.format(
                    int(subs_tag['rlsid']))

                for episode in episodes:
                    subtitle = self.subtitle_class(Language.fromalpha2('el'),
                                                   page_link, series, season,
                                                   episode, year, title,
                                                   version, download_link)
                    logger.debug('Found subtitle %r', subtitle)
                    subtitles.append(subtitle)

        return subtitles
コード例 #22
0
def test_download_subtitle_single(video_single_language):
    with EmbeddedSubtitlesProvider() as provider:
        subtitle = provider.list_subtitles(video_single_language,
                                           {Language.fromalpha2("en")})[0]
        provider.download_subtitle(subtitle)
        assert subtitle.content is not None
コード例 #23
0
def test_list_subtitles_wo_srt(video_multiple_languages):
    with EmbeddedSubtitlesProvider(include_srt=False) as provider:
        subs = provider.list_subtitles(video_multiple_languages,
                                       {Language.fromalpha2("en")})
        assert not subs
コード例 #24
0
def test_list_subtitles_wo_ass(video_single_language):
    with EmbeddedSubtitlesProvider(include_ass=False) as provider:
        subs = provider.list_subtitles(video_single_language,
                                       {Language.fromalpha2("en")})
        assert not subs
コード例 #25
0
class SuchaProvider(Provider):
    """Sucha Provider"""

    languages = {Language.fromalpha2(l) for l in ["es"]}
    language_list = list(languages)
    video_types = (Episode, Movie)

    def initialize(self):
        self.session = Session()
        self.session.headers = {
            "User-Agent": os.environ.get("SZ_USER_AGENT", "Sub-Zero/2")
        }

    def terminate(self):
        self.session.close()

    def query(self, languages, video):
        movie_year = video.year if video.year else "0"
        is_episode = isinstance(video, Episode)
        language = self.language_list[0]
        if is_episode:
            q = {
                "query":
                "{} S{:02}E{:02}".format(video.series, video.season,
                                         video.episode)
            }
        else:
            q = {"query": video.title, "year": movie_year}
        logger.debug("Searching subtitles: {}".format(q["query"]))
        res = self.session.get(server_url +
                               ("episode" if is_episode else "movie"),
                               params=q,
                               timeout=10)
        res.raise_for_status()
        result = res.json()
        subtitles = []
        for i in result:
            matches = set()
            try:
                if (video.title.lower() in i["title"].lower()
                        or video.title.lower() in i["alt_title"].lower()):
                    matches.add("title")
            except TypeError:
                logger.debug("No subtitles found")
                return []
            if is_episode:
                if (q["query"].lower() in i["title"].lower()
                        or q["query"].lower() in i["alt_title"].lower()):
                    matches.add("title")
                    matches.add("series")
                    matches.add("season")
                    matches.add("episode")
                    matches.add("year")
            if str(i["year"]) == video.year:
                matches.add("year")
            subtitles.append(
                SuchaSubtitle(
                    language,
                    i["release"],
                    i["filename"],
                    str(i["id"]),
                    "episode" if is_episode else "movie",
                    matches,
                ))
        return subtitles

    def list_subtitles(self, video, languages):
        return self.query(languages, video)

    def _check_response(self, response):
        if response.status_code != 200:
            raise ServiceUnavailable("Bad status code: " +
                                     str(response.status_code))

    def _get_archive(self, content):
        archive_stream = io.BytesIO(content)
        if rarfile.is_rarfile(archive_stream):
            logger.debug("Identified rar archive")
            archive = rarfile.RarFile(archive_stream)
        elif zipfile.is_zipfile(archive_stream):
            logger.debug("Identified zip archive")
            archive = zipfile.ZipFile(archive_stream)
        else:
            raise APIThrottled("Unsupported compressed format")
        return archive

    def get_file(self, archive):
        for name in archive.namelist():
            if os.path.split(name)[-1].startswith("."):
                continue
            if not name.lower().endswith(SUBTITLE_EXTENSIONS):
                continue
            if ("[eng]" in name.lower() or ".en." in name.lower()
                    or ".eng." in name.lower()):
                continue
            logger.debug("Returning from archive: {}".format(name))
            return archive.read(name)
        raise APIThrottled("Can not find the subtitle in the compressed file")

    def download_subtitle(self, subtitle):
        logger.info("Downloading subtitle %r", subtitle)
        response = self.session.get(
            server_url + "download",
            params={
                "id": subtitle.download_id,
                "type": subtitle.download_type
            },
            timeout=10,
        )
        response.raise_for_status()
        self._check_response(response)
        archive = self._get_archive(response.content)
        subtitle_file = self.get_file(archive)
        subtitle.content = fix_line_ending(subtitle_file)
コード例 #26
0
class SubdivxSubtitlesProvider(Provider):
    provider_name = "subdivx"
    hash_verifiable = False
    languages = {Language.fromalpha2(lang) for lang in ["es"]}
    subtitle_class = SubdivxSubtitle

    server_url = "https://www.subdivx.com/"
    multi_result_throttle = 2
    language_list = list(languages)

    def __init__(self):
        self.session = None

    def initialize(self):
        self.session = Session()
        self.session.headers["User-Agent"] = f"Subliminal/{__short_version__}"

    def terminate(self):
        self.session.close()

    def query(self, video, languages):
        if isinstance(video, Episode):
            query = f"{video.series} S{video.season:02}E{video.episode:02}"
        else:
            # Subdvix has problems searching foreign movies if the year is
            # appended. A proper solution would be filtering results with the
            # year in self._parse_subtitles_page.
            query = video.title

        params = {
            "q": query,  # search string
            "accion": 5,  # action search
            "oxdown": 1,  # order by downloads descending
            "pg": 1,  # page 1
        }

        logger.debug(f"Searching subtitles: {query}")
        subtitles = []
        language = self.language_list[0]
        search_link = self.server_url + "index.php"
        while True:
            response = self.session.get(search_link, params=params, timeout=20)
            self._check_response(response)

            try:
                page_subtitles = self._parse_subtitles_page(
                    video, response, language)
            except Exception as e:
                logger.error(f"Error parsing subtitles list: {e}")
                break

            subtitles += page_subtitles

            if len(page_subtitles) < 100:
                break  # this is the last page

            params["pg"] += 1  # search next page
            time.sleep(self.multi_result_throttle)

        return subtitles

    def list_subtitles(self, video, languages):
        return self.query(video, languages)

    def download_subtitle(self, subtitle):
        if isinstance(subtitle, SubdivxSubtitle):
            # download the subtitle
            logger.info("Downloading subtitle %r", subtitle)

            # get download link
            download_link = self._get_download_link(subtitle)

            # download zip / rar file with the subtitle
            response = self.session.get(
                self.server_url + download_link,
                headers={"Referer": subtitle.page_link},
                timeout=30,
            )
            self._check_response(response)

            # open the compressed archive
            archive = self._get_archive(response.content)

            # extract the subtitle
            subtitle_content = self._get_subtitle_from_archive(
                archive, subtitle)
            subtitle.content = fix_line_ending(subtitle_content)

    def _parse_subtitles_page(self, video, response, language):
        subtitles = []

        page_soup = ParserBeautifulSoup(
            response.content.decode("utf-8", "ignore"),
            ["lxml", "html.parser"])
        title_soups = page_soup.find_all("div",
                                         {"id": "menu_detalle_buscador"})
        body_soups = page_soup.find_all("div", {"id": "buscador_detalle"})

        for subtitle in range(0, len(title_soups)):
            title_soup, body_soup = title_soups[subtitle], body_soups[subtitle]

            # title
            title = title_soup.find("a").text.replace("Subtitulos de ", "")

            # filter by year
            if video.year and str(video.year) not in title:
                continue

            page_link = title_soup.find("a")["href"]

            # description
            description = body_soup.find("div", {
                "id": "buscador_detalle_sub"
            }).text
            description = description.replace(",", " ").lower()

            # uploader
            uploader = body_soup.find("a", {"class": "link1"}).text

            subtitle = self.subtitle_class(language, video, page_link, title,
                                           description, uploader)

            logger.debug("Found subtitle %r", subtitle)
            subtitles.append(subtitle)

        return subtitles

    def _get_download_link(self, subtitle):
        response = self.session.get(subtitle.page_link, timeout=20)
        self._check_response(response)
        try:
            page_soup = ParserBeautifulSoup(
                response.content.decode("utf-8", "ignore"),
                ["lxml", "html.parser"])
            links_soup = page_soup.find_all("a", {"class": "detalle_link"})
            for link_soup in links_soup:
                if link_soup["href"].startswith("bajar"):
                    return self.server_url + link_soup["href"]
            links_soup = page_soup.find_all("a", {"class": "link1"})
            for link_soup in links_soup:
                if "bajar.php" in link_soup["href"]:
                    return link_soup["href"]
        except Exception as e:
            raise APIThrottled(f"Error parsing download link: {e}")

        raise APIThrottled("Download link not found")

    @staticmethod
    def _check_response(response):
        if response.status_code != 200:
            raise ServiceUnavailable(
                f"Bad status code: {response.status_code}")

    @staticmethod
    def _get_archive(content):
        # open the archive
        archive_stream = io.BytesIO(content)
        if rarfile.is_rarfile(archive_stream):
            logger.debug("Identified rar archive")
            archive = rarfile.RarFile(archive_stream)
        elif zipfile.is_zipfile(archive_stream):
            logger.debug("Identified zip archive")
            archive = zipfile.ZipFile(archive_stream)
        else:
            raise APIThrottled("Unsupported compressed format")

        return archive

    @staticmethod
    def _get_subtitle_from_archive(archive, subtitle):
        _valid_names = []
        for name in archive.namelist():
            # discard hidden files
            # discard non-subtitle files
            if not os.path.split(name)[-1].startswith(".") and name.lower(
            ).endswith(SUBTITLE_EXTENSIONS):
                _valid_names.append(name)

        # archive with only 1 subtitle
        if len(_valid_names) == 1:
            logger.debug(
                f"returning from archive: {_valid_names[0]} (single subtitle file)"
            )
            return archive.read(_valid_names[0])

        # in archives with more than 1 subtitle (season pack) we try to guess the best subtitle file
        _scores = get_scores(subtitle.video)
        _max_score = 0
        _max_name = ""
        for name in _valid_names:
            _guess = guessit(name)
            if "season" not in _guess:
                _guess["season"] = -1
            if "episode" not in _guess:
                _guess["episode"] = -1

            if isinstance(subtitle.video, Episode):
                logger.debug("guessing %s" % name)
                logger.debug(
                    f"subtitle S{_guess['season']}E{_guess['episode']} video "
                    f"S{subtitle.video.season}E{subtitle.video.episode}")

                if (subtitle.video.episode != _guess["episode"]
                        or subtitle.video.season != _guess["season"]):
                    logger.debug("subtitle does not match video, skipping")
                    continue

            matches = set()
            matches |= guess_matches(subtitle.video, _guess)
            _score = sum((_scores.get(match, 0) for match in matches))
            logger.debug("srt matches: %s, score %d" % (matches, _score))
            if _score > _max_score:
                _max_score = _score
                _max_name = name
                logger.debug(f"new max: {name} {_score}")

        if _max_score > 0:
            logger.debug(
                f"returning from archive: {_max_name} scored {_max_score}")
            return archive.read(_max_name)

        raise APIThrottled("Can not find the subtitle in the compressed file")
コード例 #27
0
ファイル: sucha.py プロジェクト: GermanG/bazarr
class SuchaProvider(Provider):
    """Sucha Provider"""

    languages = {Language.fromalpha2(l) for l in ["es"]}
    language_list = list(languages)
    logger.debug(languages)
    video_types = (Episode, Movie)

    def initialize(self):
        self.session = Session()
        self.session.headers = {
            "User-Agent": os.environ.get("SZ_USER_AGENT", "Sub-Zero/2")
        }

    def terminate(self):
        self.session.close()

    def query(self, languages, video):
        movie_year = video.year if video.year else None
        is_episode = True if isinstance(video, Episode) else False
        imdb_id = video.imdb_id if video.imdb_id else None
        language = self.language_list[0]
        if is_episode:
            q = {
                "query":
                "{} S{:02}E{:02}".format(video.series, video.season,
                                         video.episode)
            }
        else:
            if imdb_id:
                q = {"query": imdb_id}
            else:
                q = {"query": video.title, "year": movie_year}

        logger.debug("Searching subtitles: {}".format(q["query"]))

        res = self.session.get(server_url + "search", params=q, timeout=10)
        res.raise_for_status()
        result = res.json()

        try:
            subtitles = []
            for i in result["results"]:
                matches = set()
                # We use 'in' instead of '==' since Subdivx titles are
                # irregular
                if video.title.lower() in i["title"].lower():
                    matches.add("title")
                if is_episode:
                    if q["query"].lower() in i["title"].lower():
                        matches.add("title")
                        matches.add("series")
                        matches.add("imdb_id")
                        matches.add("season")
                        matches.add("episode")
                        matches.add("year")
                if i["year"] == video.year:
                    matches.add("year")
                if imdb_id:
                    matches.add("imdb_id")

                # We'll add release group info (if found) to the pseudo filename
                # in order to show it in the manual search
                filename = i["pseudo_file"]
                if (video.release_group and str(video.release_group).lower()
                        in i["original_description"]):
                    filename = i["pseudo_file"].replace(
                        ".es.srt", "-" + str(video.release_group) + ".es.srt")

                subtitles.append(
                    SuchaSubtitle(
                        language,
                        i["referer"],
                        filename,
                        i["guessit"],
                        i["download_url"],
                        i["hearing_impaired"],
                        matches,
                    ))
            return subtitles
        except KeyError:
            logger.debug("No subtitles found")
            return []

    def list_subtitles(self, video, languages):
        return self.query(languages, video)

    def _check_response(self, response):
        if response.status_code != 200:
            raise ServiceUnavailable("Bad status code: " +
                                     str(response.status_code))

    def _get_archive(self, content):
        archive_stream = io.BytesIO(content)
        if rarfile.is_rarfile(archive_stream):
            logger.debug("Identified rar archive")
            archive = rarfile.RarFile(archive_stream)
        elif zipfile.is_zipfile(archive_stream):
            logger.debug("Identified zip archive")
            archive = zipfile.ZipFile(archive_stream)
        else:
            raise APIThrottled("Unsupported compressed format")
        return archive

    def get_file(self, archive):
        for name in archive.namelist():
            if os.path.split(name)[-1].startswith("."):
                continue
            if not name.lower().endswith(SUBTITLE_EXTENSIONS):
                continue
            if ("[eng]" in name.lower() or ".en." in name.lower()
                    or ".eng." in name.lower()):
                continue
            logger.debug("Returning from archive: {}".format(name))
            return archive.read(name)
        raise APIThrottled("Can not find the subtitle in the compressed file")

    def download_subtitle(self, subtitle):
        logger.info("Downloading subtitle %r", subtitle)
        response = self.session.get(subtitle.download_link,
                                    headers={"Referer": subtitle.page_link},
                                    timeout=10)
        response.raise_for_status()
        self._check_response(response)
        archive = self._get_archive(response.content)
        subtitle_file = self.get_file(archive)
        subtitle.content = fix_line_ending(subtitle_file)
コード例 #28
0
class SubdivxSubtitlesProvider(Provider):
    provider_name = 'subdivx'
    hash_verifiable = False
    languages = {Language.fromalpha2(l) for l in ['es']}
    subtitle_class = SubdivxSubtitle

    server_url = 'https://www.subdivx.com/'
    multi_result_throttle = 2
    language_list = list(languages)

    def __init__(self):
        self.session = None

    def initialize(self):
        self.session = Session()
        self.session.headers['User-Agent'] = 'Subliminal/{}'.format(
            __short_version__)

    def terminate(self):
        self.session.close()

    def query(self, video, languages):

        if isinstance(video, Episode):
            query = "{} S{:02d}E{:02d}".format(video.series, video.season,
                                               video.episode)
        else:
            query = video.title
            if video.year:
                query += ' {:4d}'.format(video.year)

        params = {
            'q': query,  # search string
            'accion': 5,  # action search
            'oxdown': 1,  # order by downloads descending
            'pg': 1  # page 1
        }

        logger.debug('Searching subtitles %r', query)
        subtitles = []
        language = self.language_list[0]
        search_link = self.server_url + 'index.php'
        while True:
            response = self.session.get(search_link, params=params, timeout=20)
            self._check_response(response)

            try:
                page_subtitles = self._parse_subtitles_page(
                    video, response, language)
            except Exception as e:
                logger.error('Error parsing subtitles list: ' + str(e))
                break

            subtitles += page_subtitles

            if len(page_subtitles) < 20:
                break  # this is the last page

            params['pg'] += 1  # search next page
            time.sleep(self.multi_result_throttle)

        return subtitles

    def list_subtitles(self, video, languages):
        return self.query(video, languages)

    def download_subtitle(self, subtitle):
        if isinstance(subtitle, SubdivxSubtitle):
            # download the subtitle
            logger.info('Downloading subtitle %r', subtitle)

            # get download link
            download_link = self._get_download_link(subtitle)

            # download zip / rar file with the subtitle
            response = self.session.get(
                download_link,
                headers={'Referer': subtitle.page_link},
                timeout=30)
            self._check_response(response)

            # open the compressed archive
            archive = self._get_archive(response.content)

            # extract the subtitle
            subtitle_content = self._get_subtitle_from_archive(
                archive, subtitle)
            subtitle.content = fix_line_ending(subtitle_content)

    def _check_response(self, response):
        if response.status_code != 200:
            raise ServiceUnavailable('Bad status code: ' +
                                     str(response.status_code))

    def _parse_subtitles_page(self, video, response, language):
        subtitles = []

        page_soup = ParserBeautifulSoup(
            response.content.decode('iso-8859-1', 'ignore'),
            ['lxml', 'html.parser'])
        title_soups = page_soup.find_all("div",
                                         {'id': 'menu_detalle_buscador'})
        body_soups = page_soup.find_all("div", {'id': 'buscador_detalle'})

        for subtitle in range(0, len(title_soups)):
            title_soup, body_soup = title_soups[subtitle], body_soups[subtitle]

            # title
            title = title_soup.find("a").text.replace("Subtitulos de ", "")
            page_link = title_soup.find("a")["href"]

            # description
            description = body_soup.find("div", {
                'id': 'buscador_detalle_sub'
            }).text
            description = description.replace(",", " ").lower()

            # uploader
            uploader = body_soup.find("a", {'class': 'link1'}).text

            subtitle = self.subtitle_class(language, video, page_link, title,
                                           description, uploader)

            logger.debug('Found subtitle %r', subtitle)
            subtitles.append(subtitle)

        return subtitles

    def _get_download_link(self, subtitle):
        response = self.session.get(subtitle.page_link, timeout=20)
        self._check_response(response)
        try:
            page_soup = ParserBeautifulSoup(
                response.content.decode('iso-8859-1', 'ignore'),
                ['lxml', 'html.parser'])
            links_soup = page_soup.find_all("a", {'class': 'detalle_link'})
            for link_soup in links_soup:
                if link_soup['href'].startswith('bajar'):
                    return self.server_url + link_soup['href']
            links_soup = page_soup.find_all("a", {'class': 'link1'})
            for link_soup in links_soup:
                if "bajar.php" in link_soup['href']:
                    return link_soup['href']
        except Exception as e:
            raise APIThrottled('Error parsing download link: ' + str(e))

        raise APIThrottled('Download link not found')

    def _get_archive(self, content):
        # open the archive
        archive_stream = io.BytesIO(content)
        if rarfile.is_rarfile(archive_stream):
            logger.debug('Identified rar archive')
            archive = rarfile.RarFile(archive_stream)
        elif zipfile.is_zipfile(archive_stream):
            logger.debug('Identified zip archive')
            archive = zipfile.ZipFile(archive_stream)
        else:
            raise APIThrottled('Unsupported compressed format')

        return archive

    def _get_subtitle_from_archive(self, archive, subtitle):
        _max_score = 0
        _scores = get_scores(subtitle.video)

        for name in archive.namelist():
            # discard hidden files
            if os.path.split(name)[-1].startswith('.'):
                continue

            # discard non-subtitle files
            if not name.lower().endswith(SUBTITLE_EXTENSIONS):
                continue

            _guess = guessit(name)
            if isinstance(subtitle.video, Episode):
                logger.debug("guessing %s" % name)
                logger.debug("subtitle S{}E{} video S{}E{}".format(
                    _guess['season'], _guess['episode'], subtitle.video.season,
                    subtitle.video.episode))

                if subtitle.video.episode != _guess[
                        'episode'] or subtitle.video.season != _guess['season']:
                    logger.debug('subtitle does not match video, skipping')
                    continue

            matches = set()
            matches |= guess_matches(subtitle.video, _guess)
            _score = sum((_scores.get(match, 0) for match in matches))
            logger.debug('srt matches: %s, score %d' % (matches, _score))
            if _score > _max_score:
                _max_name = name
                _max_score = _score
                logger.debug("new max: {} {}".format(name, _score))

        if _max_score > 0:
            logger.debug("returning from archive: {} scored {}".format(
                _max_name, _max_score))
            return archive.read(_max_name)

        raise APIThrottled('Can not find the subtitle in the compressed file')
コード例 #29
0
class NekurProvider(Provider, ProviderSubtitleArchiveMixin):
    """Nekur Provider."""
    subtitle_class = NekurSubtitle
    languages = {Language('lva', 'LV')} | {Language.fromalpha2(l) for l in ['lv']}
    server_url = 'http://subtitri.nekur.net/'
    search_url = server_url + 'modules/Subtitles.php'

    def __init__(self):
        self.session = None

    def initialize(self):
        self.session = Session()
        self.session.headers['User-Agent'] = AGENT_LIST[randint(0, len(AGENT_LIST) - 1)]
        self.session.headers['Referer'] = self.server_url

    def terminate(self):
        self.session.close()

    def query(self, title):
        subtitles = []

        data = {
            'ajax': '1',
            'sSearch': title,
        }

        r = self.session.post(self.search_url, data=data, timeout=10)
        r.raise_for_status()

        if not r.content:
            logger.debug('No data returned from provider')
            return []

        soup = ParserBeautifulSoup(r.content.decode('utf-8', 'ignore'), ['lxml', 'html.parser'])

        # loop over subtitle cells
        rows = soup.select('tbody > tr')
        for row in rows:
            # title
            title_anchor_el = row.select_one('.title > a')
            title_inner_text = [element for element in title_anchor_el if isinstance(element, NavigableString)]
            title = title_inner_text[0].strip()
            
            # year
            year = row.select_one('.year').text.strip('()')
            
            # download link
            href = title_anchor_el.get('href')
            download_link = self.server_url + href

            # imdb id
            imdb_td = row.select_one('td:nth-of-type(4)')
            imdb_link = imdb_td.select_one('a').get('href')
            imdb_id = imdb_link.split('/')[-2]

            # fps
            fps = row.select_one('.fps').text.strip()

            # additional notes
            notes = row.select_one('.notes').text.strip()

            # page link = download link (there is no seperate subtitle page link)
            page_link = download_link
            
            # create/add the subitle
            subtitle = self.subtitle_class(Language.fromalpha2('lv'), page_link, download_link, title, year, imdb_id, fps, notes)
            logger.debug('nekur: Found subtitle %r', subtitle)
            subtitles.append(subtitle)

        return subtitles

    def list_subtitles(self, video, languages):
        if isinstance(video, Movie):
            titles = [video.title] + video.alternative_titles
        else:
            titles = []

        subtitles = []
        # query for subtitles
        for title in titles:
            if isinstance(video, Movie):
                subtitles += [s for s in self.query(title) if s.language in languages]

        return subtitles

    def download_subtitle(self, subtitle):
        if isinstance(subtitle, NekurSubtitle):
            # download the subtitle            
            r = self.session.get(subtitle.download_link, timeout=10)
            r.raise_for_status()

            # open the archive
            archive_stream = io.BytesIO(r.content)
            if is_rarfile(archive_stream):
                archive = RarFile(archive_stream)
            elif is_zipfile(archive_stream):
                archive = ZipFile(archive_stream)
            else:
                subtitle.content = r.content
                if subtitle.is_valid():
                    return
                subtitle.content = None

                raise ProviderError('Unidentified archive type')

            subtitle.content = self.get_subtitle_from_archive(subtitle, archive)
コード例 #30
0
ファイル: napiprojekt.py プロジェクト: drikqlis/bazarr
class NapiProjektProvider(_NapiProjektProvider):
    languages = {Language.fromalpha2(l) for l in ['pl']}
    subtitle_class = NapiProjektSubtitle
    required_hash = 'napiprojekt'
    server_url = 'http://napiprojekt.pl/unit_napisy/dl.php'

    def query(self, language, subq):
        subtitle = subq
        logger.debug('Found subtitle %r', subtitle)
        return subtitle

    def get_length(self, filename):
        result = subprocess.run(["ffprobe", "-v", "error", "-show_entries",
                                 "format=duration", "-of",
                                 "default=noprint_wrappers=1:nokey=1", filename],
            stdout=subprocess.PIPE,
            stderr=subprocess.STDOUT)
        return float(result.stdout)

    def list_subtitles(self, video, languages):
        season = episode = None
        year=video.year
        duration = self.get_length(video.original_path)
        if isinstance(video, Episode):
            title = video.series
            season = video.season
            episode = video.episode
            v_type = "series"
        else:
            title = video.title
            v_type = "movie"

        subs = []

        url = 'https://www.napiprojekt.pl/ajax/search_catalog.php'
        req = {'queryString': title, '&queryKind': v_type, '&queryYear': year, '&associate': ''}
        searchsub = requests.post(url, data = req)
        soup2 = BeautifulSoup(searchsub.text, 'html.parser')
        result = soup2.find('a', {'class': 'movieTitleCat'})
        if result:
            sub_link = "https://www.napiprojekt.pl/" + result['href']   
            sub_link = sub_link.replace("napisy-","napisy1,1,1-dla-",1)
            if v_type == "series":
              sub_link = sub_link + "-s" + str(season).zfill(2) + "e" + str(episode).zfill(2)
            logger.debug ("Checking subs on: " + sub_link)
            page = requests.get(sub_link)
            soup = BeautifulSoup(page.text, 'html.parser')
            slider = soup.find('div', {'class': 'sliderContent _oF'})
            if slider:
              alinks = slider.findAll('a')
              howmany = len(alinks)
            else:
              howmany = 1
            lang = ""
            for e in languages:
                lang = e
                break
            for x in range(1,howmany+1):
                sub_link_loop = sub_link.replace("napisy1,1,1-dla-","napisy" + str(x) + ",1,1-dla-",1)
                #print(sub_link_loop)
                page = requests.get(sub_link_loop)
                soup = BeautifulSoup(page.text, 'html.parser')
                table = soup.find('tbody')
                #print(slider.prettify())
                if table:
                    for row in table.findAll(lambda tag: tag.name=='tr'):
                        napid = row.findAll('td')[0].find('a', href=True)['href'].replace("napiprojekt:","")
                        size = row.findAll('td')[1].text
                        fps = row.findAll('td')[2].text
                        length = row.findAll('td')[3].text
                        downloads = row.findAll('td')[6].text
                        # print("ID: " + napid)
                        # print("Rozmiar: " + size)
                        # print("FPS: " + fps)
                        # print("Czas trwania: " + length)
                        if length == "":
                          floatlength = 0
                        else:
                          lengtharray = length.split(":")
                          floatlength = int(lengtharray[0]) * 3600 + int(lengtharray[1]) * 60 + float(lengtharray[2])
                        # print("Czas trwania float: " + str(floatlength))
                        # print("Pobrań: " + downloads)
                        if duration-60 <= floatlength <= duration+60:
                            subtitle = self.subtitle_class(lang, napid, floatlength, downloads)
                            subs.append(subtitle)
            sortedsubs = sorted(subs, key=lambda subs: abs(subs.duration - duration))
            return [s for s in [self.query(lang, subsrt) for subsrt in sortedsubs] if s is not None]
        else:
            return None

        def download_subtitle(self, subtitle):
            hash = subtitle.hash
            params = {
                'v': 'dreambox',
                'kolejka': 'false',
                'nick': '',
                'pass': '',
                'napios': 'Linux',
                'l': "PL",
                'f': hash,
                't': get_subhash(hash)}
            logger.info('Searching subtitle %r', params)
            r = self.session.get(self.server_url, params=params, timeout=10)
            r.raise_for_status()

            # handle subtitles not found and errors
            if r.content[:4] == b'NPc0':
                logger.debug('No subtitles downloaded')

            subtitle2 = subtitle
            subtitle2.content = r.content
            logger.debug('Downloaded subtitle %r', subtitle2)