Ejemplo n.º 1
0
    def parse_results(self, video, film):
        subtitles = []
        for s in film.subtitles:
            try:
                subtitle = SubsceneSubtitle.from_api(s)
            except NotImplementedError as e:
                logger.info(e)
                continue
            subtitle.asked_for_release_group = video.release_group
            if isinstance(video, Episode):
                subtitle.asked_for_episode = video.episode

            if self.only_foreign:
                subtitle.language = Language.rebuild(subtitle.language,
                                                     forced=True)

            # set subtitle language to hi if it's hearing_impaired
            if subtitle.hearing_impaired:
                subtitle.language = Language.rebuild(subtitle.language,
                                                     hi=True)

            subtitles.append(subtitle)
            logger.debug('Found subtitle %r', subtitle)

        return subtitles
Ejemplo n.º 2
0
def _get_language_obj(languages):
    language_set = set()

    if not isinstance(languages, (set, list)):
        languages = [languages]

    for language in languages:
        lang, hi_item, forced_item = language
        if hi_item == "True":
            hi = "force HI"
        else:
            hi = "force non-HI"

        # Always use alpha2 in API Request
        lang = alpha3_from_alpha2(lang)

        lang_obj = _get_lang_obj(lang)

        if forced_item == "True":
            lang_obj = Language.rebuild(lang_obj, forced=True)
        if hi == "force HI":
            lang_obj = Language.rebuild(lang_obj, hi=True)

        language_set.add(lang_obj)

    return language_set
Ejemplo n.º 3
0
def get_embedded_subtitle_streams(part, requested_language=None, skip_duplicate_unknown=True, skip_unknown=False):
    streams = []
    streams_unknown = []
    all_streams = []
    has_unknown = False
    found_requested_language = False
    update_stream_info(part)
    for stream in part.streams:
        # subtitle stream
        if stream.stream_type == 3 and not stream.stream_key and stream.codec in TEXT_SUBTITLE_EXTS:
            is_forced = is_stream_forced(stream)
            language = helpers.get_language_from_stream(stream.language_code)
            if language:
                language = Language.rebuild(language, forced=is_forced)

            is_unknown = False
            found_requested_language = requested_language and requested_language == language
            stream_data = None

            if not language:
                # only consider first unknown subtitle stream
                if config.treat_und_as_first:
                    if has_unknown and skip_duplicate_unknown:
                        Log.Debug("skipping duplicate unknown")
                        continue

                    language = Language.rebuild(list(config.lang_list)[0], forced=is_forced)
                else:
                    language = None
                is_unknown = True
                has_unknown = True
                stream_data = {"stream": stream, "is_unknown": is_unknown, "language": language,
                               "is_forced": is_forced}
                streams_unknown.append(stream_data)

            if not requested_language or found_requested_language:
                stream_data = {"stream": stream, "is_unknown": is_unknown, "language": language,
                               "is_forced": is_forced}
                streams.append(stream_data)

                if found_requested_language:
                    break

            if stream_data:
                all_streams.append(stream_data)

    if requested_language:
        if streams_unknown and not found_requested_language and not skip_unknown:
            streams = streams_unknown
    else:
        streams = all_streams

    return streams
Ejemplo n.º 4
0
def translate_subtitles_file(video_path, source_srt_file, to_lang, forced, hi):
    to_lang = alpha3_from_alpha2(to_lang)
    lang_obj = Language(to_lang)
    if forced:
        lang_obj = Language.rebuild(lang_obj, forced=True)
    if hi:
        lang_obj = Language.rebuild(lang_obj, hi=True)

    logging.debug('BAZARR is translating in {0} this subtitles {1}'.format(lang_obj, source_srt_file))

    max_characters = 5000

    dest_srt_file = get_subtitle_path(video_path, language=lang_obj, extension='.srt', forced_tag=forced, hi_tag=hi)

    subs = pysubs2.load(source_srt_file, encoding='utf-8')
    lines_list = [x.plaintext for x in subs]
    joined_lines_str = '\n\n\n'.join(lines_list)

    logging.debug('BAZARR splitting subtitles into {} characters blocks'.format(max_characters))
    lines_block_list = []
    translated_lines_list = []
    while len(joined_lines_str):
        partial_lines_str = joined_lines_str[:max_characters]

        if len(joined_lines_str) > max_characters:
            new_partial_lines_str = partial_lines_str.rsplit('\n\n\n', 1)[0]
        else:
            new_partial_lines_str = partial_lines_str

        lines_block_list.append(new_partial_lines_str)
        joined_lines_str = joined_lines_str.replace(new_partial_lines_str, '')

    logging.debug('BAZARR is sending {} blocks to Google Translate'.format(len(lines_block_list)))
    for block_str in lines_block_list:
        try:
            translated_partial_srt_text = GoogleTranslator(source='auto',
                                                           target=lang_obj.basename).translate(text=block_str)
        except:
            return False
        else:
            translated_partial_srt_list = translated_partial_srt_text.split('\n\n\n')
            translated_lines_list += translated_partial_srt_list

    logging.debug('BAZARR saving translated subtitles to {}'.format(dest_srt_file))
    for i, line in enumerate(subs):
        line.plaintext = translated_lines_list[i]
    subs.save(dest_srt_file)

    return dest_srt_file
Ejemplo n.º 5
0
def get_embedded_subtitle_streams(part,
                                  requested_language=None,
                                  skip_duplicate_unknown=True):
    streams = []
    streams_unknown = []
    has_unknown = False
    found_requested_language = False
    for stream in part.streams:
        # subtitle stream
        if stream.stream_type == 3 and not stream.stream_key and stream.codec in TEXT_SUBTITLE_EXTS:
            is_forced = helpers.is_stream_forced(stream)
            language = helpers.get_language_from_stream(stream.language_code)
            if language:
                language = Language.rebuild(language, forced=is_forced)

            is_unknown = False
            found_requested_language = requested_language and requested_language == language

            if not language and config.treat_und_as_first:
                # only consider first unknown subtitle stream
                if has_unknown and skip_duplicate_unknown:
                    continue

                language = Language.rebuild(list(config.lang_list)[0],
                                            forced=is_forced)
                is_unknown = True
                has_unknown = True
                streams_unknown.append({
                    "stream": stream,
                    "is_unknown": is_unknown,
                    "language": language,
                    "is_forced": is_forced
                })

            if not requested_language or found_requested_language:
                streams.append({
                    "stream": stream,
                    "is_unknown": is_unknown,
                    "language": language,
                    "is_forced": is_forced
                })

                if found_requested_language:
                    break

    if streams_unknown and not found_requested_language:
        streams = streams_unknown

    return streams
Ejemplo n.º 6
0
    def load(self, fn=None, content=None, language=None, encoding="utf-8"):
        """
        
        :param encoding: used for decoding the content when fn is given, not used in case content is given
        :param language: babelfish.Language language of the subtitle
        :param fn:  filename
        :param content: unicode 
        :return: 
        """
        if language:
            self.language = Language.rebuild(language, forced=False)
        self.initialized_mods = {}
        try:
            if fn:
                self.f = pysubs2.load(fn, encoding=encoding)
            elif content:
                self.f = pysubs2.SSAFile.from_string(content)
        except (IOError,
                UnicodeDecodeError,
                pysubs2.exceptions.UnknownFPSError,
                pysubs2.exceptions.UnknownFormatIdentifierError,
                pysubs2.exceptions.FormatAutodetectionError):
            if fn:
                logger.exception("Couldn't load subtitle: %s: %s", fn, traceback.format_exc())
            elif content:
                logger.exception("Couldn't load subtitle: %s", traceback.format_exc())

        return bool(self.f)
Ejemplo n.º 7
0
    def __init__(self,
                 language,
                 forced,
                 hearing_impaired,
                 page_link,
                 file_id,
                 releases,
                 uploader,
                 title,
                 year,
                 hash_matched,
                 file_hash=None,
                 season=None,
                 episode=None):
        language = Language.rebuild(language,
                                    hi=hearing_impaired,
                                    forced=forced)

        self.title = title
        self.year = year
        self.season = season
        self.episode = episode
        self.releases = releases
        self.release_info = releases
        self.language = language
        self.hearing_impaired = hearing_impaired
        self.forced = forced
        self.file_id = file_id
        self.page_link = page_link
        self.download_link = None
        self.uploader = uploader
        self.matches = None
        self.hash = file_hash
        self.encoding = 'utf-8'
        self.hash_matched = hash_matched
Ejemplo n.º 8
0
def guess_external_subtitles(dest_folder, subtitles):
    for subtitle, language in subtitles.iteritems():
        if not language:
            subtitle_path = os.path.join(dest_folder, subtitle)
            if os.path.exists(subtitle_path) and os.path.splitext(
                    subtitle_path)[1] in core.SUBTITLE_EXTENSIONS:
                logging.debug(
                    "BAZARR falling back to file content analysis to detect language."
                )
                detected_language = None
                with open(subtitle_path, 'r') as f:
                    text = ' '.join(list(islice(f, 100)))
                    try:
                        encoding = UnicodeDammit(text)
                        text = text.decode(encoding.original_encoding)
                        detected_language = langdetect.detect(text)
                    except Exception as e:
                        logging.exception(
                            'BAZARR Error trying to detect language for this subtitles file: '
                            + subtitle_path +
                            ' You should try to delete this subtitles file manually and ask '
                            'Bazarr to download it again.')
                    else:
                        if detected_language:
                            logging.debug(
                                "BAZARR external subtitles detected and guessed this language: "
                                + str(detected_language))
                            try:
                                subtitles[subtitle] = Language.rebuild(
                                    Language.fromietf(detected_language))
                            except:
                                pass
    return subtitles
Ejemplo n.º 9
0
    def _parse_row(self, row, languages, server_url):
        td = row.findAll('td')
        rating = int(td[0].text)
        sub_lang = td[1].text
        release = re.sub(r'^subtitle ', '', td[2].text)
        sub_link = td[2].find('a').get('href')
        page_link = server_url + sub_link
        sub_link = re.sub(r'^/subtitles/', server_url + '/subtitle/',
                          sub_link) + '.zip'
        hi = True if td[3].find('span', {'class': 'hi-subtitle'}) else False
        uploader = td[4].text

        _, l, c = next(x for x in self.YifyLanguages if x[0] == sub_lang)
        lang = Language(l, c)

        # set subtitle language to hi if it's hearing_impaired
        if hi:
            lang = Language.rebuild(lang, hi=True)

        if languages & {lang}:
            return [
                YifySubtitle(lang, page_link, release, uploader, sub_link,
                             rating, hi)
            ]

        return []
Ejemplo n.º 10
0
    def load(self, fn=None, content=None, language=None, encoding="utf-8"):
        """
        
        :param encoding: used for decoding the content when fn is given, not used in case content is given
        :param language: babelfish.Language language of the subtitle
        :param fn:  filename
        :param content: unicode 
        :return: 
        """
        if language:
            self.language = Language.rebuild(language, forced=False)
        self.initialized_mods = {}
        try:
            if fn:
                self.f = pysubs2.load(fn, encoding=encoding)
            elif content:
                self.f = pysubs2.SSAFile.from_string(content)
        except (IOError,
                UnicodeDecodeError,
                pysubs2.exceptions.UnknownFPSError,
                pysubs2.exceptions.UnknownFormatIdentifierError,
                pysubs2.exceptions.FormatAutodetectionError):
            if fn:
                logger.exception("Couldn't load subtitle: %s: %s", fn, traceback.format_exc())
            elif content:
                logger.exception("Couldn't load subtitle: %s", traceback.format_exc())

        return bool(self.f)
Ejemplo n.º 11
0
def _search_external_subtitles(path, languages=None, only_one=False, scandir_generic=False):
    dirpath, filename = os.path.split(path)
    dirpath = dirpath or '.'
    fileroot, fileext = os.path.splitext(filename)
    subtitles = {}
    _scandir = _scandir_generic if scandir_generic else scandir
    for entry in _scandir(dirpath):
        if not entry.name and not scandir_generic:
            logger.debug('Could not determine the name of the file, retrying with scandir_generic')
            return _search_external_subtitles(path, languages, only_one, True)
        if not entry.is_file(follow_symlinks=False):
            continue

        p = entry.name

        # keep only valid subtitle filenames
        if not p.lower().startswith(fileroot.lower()) or not p.lower().endswith(SUBTITLE_EXTENSIONS):
            continue

        p_root, p_ext = os.path.splitext(p)
        if not INCLUDE_EXOTIC_SUBS and p_ext not in (".srt", ".ass", ".ssa", ".vtt"):
            continue

        # extract potential forced/normal/default tag
        # fixme: duplicate from subtitlehelpers
        split_tag = p_root.rsplit('.', 1)
        adv_tag = None
        if len(split_tag) > 1:
            adv_tag = split_tag[1].lower()
            if adv_tag in ['forced', 'normal', 'default', 'embedded', 'embedded-forced', 'custom']:
                p_root = split_tag[0]

        forced = False
        if adv_tag:
            forced = "forced" in adv_tag

        # extract the potential language code
        language_code = p_root[len(fileroot):].replace('_', '-')[1:]

        # default language is undefined
        language = Language('und')

        # attempt to parse
        if language_code:
            try:
                language = Language.fromietf(language_code)
                language.forced = forced
            except ValueError:
                logger.error('Cannot parse language code %r', language_code)
                language = None

        elif not language_code and only_one:
            language = Language.rebuild(list(languages)[0], forced=forced)

        subtitles[p] = language

    logger.debug('Found subtitles %r', subtitles)

    return subtitles
Ejemplo n.º 12
0
def _search_external_subtitles(path, languages=None, only_one=False, scandir_generic=False):
    dirpath, filename = os.path.split(path)
    dirpath = dirpath or '.'
    fileroot, fileext = os.path.splitext(filename)
    subtitles = {}
    _scandir = _scandir_generic if scandir_generic else scandir
    for entry in _scandir(dirpath):
        if not entry.name and not scandir_generic:
            logger.debug('Could not determine the name of the file, retrying with scandir_generic')
            return _search_external_subtitles(path, languages, only_one, True)
        if not entry.is_file(follow_symlinks=False):
            continue

        p = entry.name

        # keep only valid subtitle filenames
        if not p.lower().startswith(fileroot.lower()) or not p.lower().endswith(SUBTITLE_EXTENSIONS):
            continue

        p_root, p_ext = os.path.splitext(p)
        if not INCLUDE_EXOTIC_SUBS and p_ext not in (".srt", ".ass", ".ssa", ".vtt"):
            continue

        # extract potential forced/normal/default tag
        # fixme: duplicate from subtitlehelpers
        split_tag = p_root.rsplit('.', 1)
        adv_tag = None
        if len(split_tag) > 1:
            adv_tag = split_tag[1].lower()
            if adv_tag in ['forced', 'normal', 'default', 'embedded', 'embedded-forced', 'custom']:
                p_root = split_tag[0]

        forced = False
        if adv_tag:
            forced = "forced" in adv_tag

        # extract the potential language code
        language_code = p_root[len(fileroot):].replace('_', '-')[1:]

        # default language is undefined
        language = Language('und')

        # attempt to parse
        if language_code:
            try:
                language = Language.fromietf(language_code)
                language.forced = forced
            except ValueError:
                logger.error('Cannot parse language code %r', language_code)
                language = None

        elif not language_code and only_one:
            language = Language.rebuild(list(languages)[0], forced=forced)

        subtitles[p] = language

    logger.debug('Found subtitles %r', subtitles)

    return subtitles
Ejemplo n.º 13
0
def test_list_subtitles_also_forced(video_single_language):
    with EmbeddedSubtitlesProvider() as provider:
        language_1 = Language.fromalpha2("en")
        language_2 = Language.rebuild(language_1, forced=True)
        subs = provider.list_subtitles(video_single_language,
                                       {language_1, language_2})
        assert any(language_1 == sub.language for sub in subs)
        assert any(not sub.language.forced for sub in subs)
Ejemplo n.º 14
0
    def __init__(self, stream, container, matches):
        super().__init__(stream.language, stream.disposition.hearing_impaired)
        if stream.disposition.forced:
            self.language = Language.rebuild(stream.language, forced=True)

        self.stream: FFprobeSubtitleStream = stream
        self.container: FFprobeVideoContainer = container
        self.forced = stream.disposition.forced
        self._matches: set = matches
        self.page_link = self.container.path
        self.release_info = os.path.basename(self.page_link)
Ejemplo n.º 15
0
def guess_external_subtitles(dest_folder, subtitles):
    for subtitle, language in six.iteritems(subtitles):
        if not language:
            subtitle_path = os.path.join(dest_folder, subtitle)
            if os.path.exists(subtitle_path) and os.path.splitext(
                    subtitle_path)[1] in core.SUBTITLE_EXTENSIONS:
                logging.debug(
                    "BAZARR falling back to file content analysis to detect language."
                )
                detected_language = None

                # to improve performance, skip detection of files larger that 5M
                if os.path.getsize(subtitle_path) > 5 * 1024 * 1024:
                    logging.debug(
                        "BAZARR subtitles file is too large to be text based. Skipping this file: "
                        + subtitle_path)
                    continue

                with open(subtitle_path, 'rb') as f:
                    text = f.read()

                try:
                    # to improve performance, use only the first 32K to detect encoding
                    guess = chardet.detect(text[:32768])
                    logging.debug('BAZARR detected encoding %r', guess)
                    if guess["confidence"] < 0.6:
                        raise UnicodeError
                    if guess["confidence"] < 0.8 or guess[
                            "encoding"] == "ascii":
                        guess["encoding"] = "utf-8"
                    text = text.decode(guess["encoding"])
                    detected_language = guess_language(text)
                except UnicodeError:
                    logging.exception(
                        "BAZARR subtitles file doesn't seems to be text based. Skipping this file: "
                        + subtitle_path)
                except:
                    logging.exception(
                        'BAZARR Error trying to detect language for this subtitles file: '
                        + subtitle_path +
                        ' You should try to delete this subtitles file manually and ask '
                        'Bazarr to download it again.')
                else:
                    if detected_language:
                        logging.debug(
                            "BAZARR external subtitles detected and guessed this language: "
                            + str(detected_language))
                        try:
                            subtitles[subtitle] = Language.rebuild(
                                Language.fromietf(detected_language))
                        except:
                            pass
    return subtitles
Ejemplo n.º 16
0
def get_embedded_subtitle_streams(part, requested_language=None, skip_duplicate_unknown=True, skip_unknown=False):
    streams = []
    streams_unknown = []
    has_unknown = False
    found_requested_language = False
    for stream in part.streams:
        # subtitle stream
        if stream.stream_type == 3 and not stream.stream_key and stream.codec in TEXT_SUBTITLE_EXTS:
            is_forced = helpers.is_stream_forced(stream)
            language = helpers.get_language_from_stream(stream.language_code)
            if language:
                language = Language.rebuild(language, forced=is_forced)

            is_unknown = False
            found_requested_language = requested_language and requested_language == language

            if not language and config.treat_und_as_first:
                # only consider first unknown subtitle stream
                if has_unknown and skip_duplicate_unknown:
                    continue

                language = Language.rebuild(list(config.lang_list)[0], forced=is_forced)
                is_unknown = True
                has_unknown = True
                streams_unknown.append({"stream": stream, "is_unknown": is_unknown, "language": language,
                                        "is_forced": is_forced})

            if not requested_language or found_requested_language:
                streams.append({"stream": stream, "is_unknown": is_unknown, "language": language,
                                "is_forced": is_forced})

                if found_requested_language:
                    break

    if streams_unknown and not found_requested_language and not skip_unknown:
        streams = streams_unknown

    return streams
Ejemplo n.º 17
0
def _get_language_obj(profile_id):
    initial_language_set = set()
    language_set = set()

    # where [3] is items list of dict(id, lang, forced, hi)
    language_items = get_profiles_list(profile_id=int(profile_id))['items']

    for language in language_items:
        forced = language['forced']
        hi = language['hi']
        language = language['language']

        lang = alpha3_from_alpha2(language)

        lang_obj = _get_lang_obj(lang)

        if forced == "True":
            lang_obj = Language.rebuild(lang_obj, forced=True)

        if hi == "True":
            lang_obj = Language.rebuild(lang_obj, hi=True)

        initial_language_set.add(lang_obj)

    language_set = initial_language_set.copy()
    for language in language_set.copy():
        lang_obj_for_hi = language
        if not language.forced and not language.hi:
            lang_obj_hi = Language.rebuild(lang_obj_for_hi, hi=True)
        elif not language.forced and language.hi:
            lang_obj_hi = Language.rebuild(lang_obj_for_hi, hi=False)
        else:
            continue
        language_set.add(lang_obj_hi)

    return language_set, initial_language_set
Ejemplo n.º 18
0
    def parse_results(self, video, film):
        subtitles = []
        for s in film.subtitles:
            subtitle = SubsceneSubtitle.from_api(s)
            subtitle.asked_for_release_group = video.release_group
            if isinstance(video, Episode):
                subtitle.asked_for_episode = video.episode

            if self.only_foreign:
                subtitle.language = Language.rebuild(subtitle.language, forced=True)

            subtitles.append(subtitle)
            logger.debug('Found subtitle %r', subtitle)

        return subtitles
Ejemplo n.º 19
0
    def parse_results(self, video, film):
        subtitles = []
        for s in film.subtitles:
            subtitle = SubsceneSubtitle.from_api(s)
            subtitle.asked_for_release_group = video.release_group
            if isinstance(video, Episode):
                subtitle.asked_for_episode = video.episode

            if self.only_foreign:
                subtitle.language = Language.rebuild(subtitle.language, forced=True)

            subtitles.append(subtitle)
            logger.debug('Found subtitle %r', subtitle)

        return subtitles
Ejemplo n.º 20
0
def download_best_subtitles(video_part_map,
                            min_score=0,
                            throttle_time=None,
                            providers=None):
    hearing_impaired = Prefs['subtitles.search.hearingImpaired']
    languages = set([Language.rebuild(l) for l in config.lang_list])
    if not languages:
        return

    use_videos = []
    missing_languages = set()
    for video, part in video_part_map.iteritems():
        if not video.ignore_all:
            p_missing_languages = get_missing_languages(video, part)
        else:
            p_missing_languages = languages

        if p_missing_languages:
            Log.Info(u"%s has missing languages: %s",
                     os.path.basename(video.name), p_missing_languages)
            refine_video(video, refiner_settings=config.refiner_settings)
            use_videos.append(video)
            missing_languages.update(p_missing_languages)

    # prepare blacklist
    blacklist = get_blacklist_from_part_map(video_part_map, languages)

    if use_videos and missing_languages:
        Log.Debug(
            "Download best subtitles using settings: min_score: %s, hearing_impaired: %s, languages: %s"
            % (min_score, hearing_impaired, missing_languages))

        return subliminal.download_best_subtitles(
            set(use_videos),
            missing_languages,
            min_score,
            hearing_impaired,
            providers=providers or config.providers,
            provider_configs=config.provider_settings,
            pool_class=config.provider_pool,
            compute_score=compute_score,
            throttle_time=throttle_time,
            blacklist=blacklist,
            throttle_callback=config.provider_throttle,
            pre_download_hook=pre_download_hook,
            post_download_hook=post_download_hook,
            language_hook=language_hook)
    Log.Debug("All languages for all requested videos exist. Doing nothing.")
Ejemplo n.º 21
0
    def __init__(self,
                 language,
                 hearing_impaired=False,
                 page_link=None,
                 encoding=None,
                 mods=None):
        # set subtitle language to hi if it's hearing_impaired
        if hearing_impaired:
            language = Language.rebuild(language, hi=True)

        super(Subtitle, self).__init__(language,
                                       hearing_impaired=hearing_impaired,
                                       page_link=page_link,
                                       encoding=encoding)
        self.mods = mods
        self._is_valid = False
Ejemplo n.º 22
0
def guess_external_subtitles(dest_folder, subtitles):
    for subtitle, language in six.iteritems(subtitles):
        if not language:
            subtitle_path = os.path.join(dest_folder, subtitle)
            if os.path.exists(subtitle_path) and os.path.splitext(
                    subtitle_path)[1] in core.SUBTITLE_EXTENSIONS:
                logging.debug(
                    "BAZARR falling back to file content analysis to detect language."
                )
                if is_binary(subtitle_path):
                    logging.debug(
                        "BAZARR subtitles file doesn't seems to be text based. Skipping this file: "
                        + subtitle_path)
                    continue
                detected_language = None

                if six.PY3:
                    with open(subtitle_path, 'r', errors='ignore') as f:
                        text = f.read()
                else:
                    with open(subtitle_path, 'r') as f:
                        text = f.read()

                try:
                    encoding = UnicodeDammit(text)
                    if six.PY2:
                        text = text.decode(encoding.original_encoding)
                    detected_language = guess_language(text)
                except Exception as e:
                    logging.exception(
                        'BAZARR Error trying to detect language for this subtitles file: '
                        + subtitle_path +
                        ' You should try to delete this subtitles file manually and ask '
                        'Bazarr to download it again.')
                else:
                    if detected_language:
                        logging.debug(
                            "BAZARR external subtitles detected and guessed this language: "
                            + str(detected_language))
                        try:
                            subtitles[subtitle] = Language.rebuild(
                                Language.fromietf(detected_language))
                        except:
                            pass
    return subtitles
Ejemplo n.º 23
0
def test_list_subtitles_only_forced(video_single_language):
    with EmbeddedSubtitlesProvider() as provider:
        language = Language.fromalpha2("en")
        language = Language.rebuild(language, forced=True)
        subs = provider.list_subtitles(video_single_language, {language})
        assert len(subs) == 0
Ejemplo n.º 24
0
    def query(self, languages, hash=None, size=None, imdb_id=None, query=None, season=None, episode=None, tag=None,
              use_tag_search=False, only_foreign=False, also_foreign=False):
        # fill the search criteria
        criteria = []
        if hash and size:
            criteria.append({'moviehash': hash, 'moviebytesize': str(size)})
        if use_tag_search and tag:
            criteria.append({'tag': tag})
        if imdb_id:
            if season and episode:
                criteria.append({'imdbid': imdb_id[2:], 'season': season, 'episode': episode})
            else:
                criteria.append({'imdbid': imdb_id[2:]})
        if query and season and episode:
            for q in query:
                criteria.append({'query': q.replace('\'', ''), 'season': season, 'episode': episode})
        elif query:
            for q in query:
                criteria.append({'query': q.replace('\'', '')})
        if not criteria:
            raise ValueError('Not enough information')

        # add the language
        for criterion in criteria:
            criterion['sublanguageid'] = ','.join(sorted(l.opensubtitles for l in languages))

        # query the server
        logger.info('Searching subtitles %r', criteria)
        response = self.use_token_or_login(
            lambda: self.retry(lambda: checked(lambda: self.server.SearchSubtitles(self.token, criteria)))
        )

        subtitles = []

        # exit if no data
        if not response['data']:
            logger.info('No subtitles found')
            return subtitles

        # loop over subtitle items
        for subtitle_item in response['data']:
            _subtitle_item = subtitle_item

            # in case OS messes their API results up again, check whether we've got a dict or a string as subtitle_item
            if hasattr(_subtitle_item, "startswith"):
                _subtitle_item = response["data"][subtitle_item]

            # read the item
            language = Language.fromopensubtitles(_subtitle_item['SubLanguageID'])
            hearing_impaired = bool(int(_subtitle_item['SubHearingImpaired']))
            page_link = _subtitle_item['SubtitlesLink']
            subtitle_id = int(_subtitle_item['IDSubtitleFile'])
            matched_by = _subtitle_item['MatchedBy']
            movie_kind = _subtitle_item['MovieKind']
            hash = _subtitle_item['MovieHash']
            movie_name = _subtitle_item['MovieName']
            movie_release_name = _subtitle_item['MovieReleaseName']
            movie_year = int(_subtitle_item['MovieYear']) if _subtitle_item['MovieYear'] else None
            movie_imdb_id = 'tt' + _subtitle_item['IDMovieImdb']
            movie_fps = _subtitle_item.get('MovieFPS')
            series_season = int(_subtitle_item['SeriesSeason']) if _subtitle_item['SeriesSeason'] else None
            series_episode = int(_subtitle_item['SeriesEpisode']) if _subtitle_item['SeriesEpisode'] else None
            filename = _subtitle_item['SubFileName']
            encoding = _subtitle_item.get('SubEncoding') or None
            foreign_parts_only = bool(int(_subtitle_item.get('SubForeignPartsOnly', 0)))

            # foreign/forced subtitles only wanted
            if only_foreign and not foreign_parts_only:
                continue

            # foreign/forced not wanted
            elif not only_foreign and not also_foreign and foreign_parts_only:
                continue

            # set subtitle language to forced if it's foreign_parts_only
            elif (also_foreign or only_foreign) and foreign_parts_only:
                language = Language.rebuild(language, forced=True)

            if language not in languages:
                continue

            query_parameters = _subtitle_item.get("QueryParameters")

            subtitle = self.subtitle_class(language, hearing_impaired, page_link, subtitle_id, matched_by,
                                           movie_kind,
                                           hash, movie_name, movie_release_name, movie_year, movie_imdb_id,
                                           series_season, series_episode, query_parameters, filename, encoding,
                                           movie_fps, skip_wrong_fps=self.skip_wrong_fps)
            logger.debug('Found subtitle %r by %s', subtitle, matched_by)
            subtitles.append(subtitle)

        return subtitles
Ejemplo n.º 25
0
    def get_lang_list(self, provider=None, ordered=False):
        # advanced settings
        if provider and self.advanced.providers and provider in self.advanced.providers:
            adv_languages = self.advanced.providers[provider].get("languages", None)
            if adv_languages:
                adv_out = set()
                for adv_lang in adv_languages:
                    adv_lang = adv_lang.strip()
                    try:
                        real_lang = Language.fromietf(adv_lang)
                    except:
                        try:
                            real_lang = Language.fromname(adv_lang)
                        except:
                            continue
                    adv_out.update({real_lang})

                # fallback to default languages if no valid language was found in advanced settings
                if adv_out:
                    return adv_out

        l = [Language.fromietf(Prefs["langPref1a"])]
        lang_custom = Prefs["langPrefCustom"].strip()

        if Prefs['subtitles.only_one']:
            return set(l) if not ordered else l

        if Prefs["langPref2a"] != "None":
            try:
                l.append(Language.fromietf(Prefs["langPref2a"]))
            except:
                pass

        if Prefs["langPref3a"] != "None":
            try:
                l.append(Language.fromietf(Prefs["langPref3a"]))
            except:
                pass

        if len(lang_custom) and lang_custom != "None":
            for lang in lang_custom.split(u","):
                lang = lang.strip()
                try:
                    real_lang = Language.fromietf(lang)
                except:
                    try:
                        real_lang = Language.fromname(lang)
                    except:
                        continue
                l.append(real_lang)

        if self.forced_also:
            if Prefs["subtitles.when_forced"] == "Always":
                for lang in list(l):
                    l.append(Language.rebuild(lang, forced=True))

            else:
                for (setting, index) in (("Only for Subtitle Language (1)", 0),
                                         ("Only for Subtitle Language (2)", 1),
                                         ("Only for Subtitle Language (3)", 2)):
                    if Prefs["subtitles.when_forced"] == setting:
                        try:
                            l.append(Language.rebuild(list(l)[index], forced=True))
                            break
                        except:
                            pass

        elif self.forced_only:
            for lang in l:
                lang.forced = True

        if not self.normal_subs:
            for lang in l[:]:
                if not lang.forced:
                    l.remove(lang)

        return set(l) if not ordered else l
Ejemplo n.º 26
0
    def query(self, language, keyword, video, season=None, episode=None, year=None, only_foreign=False,
              also_foreign=False):
        search_language = str(language).lower()

        # sr-Cyrl specialcase
        if search_language == "sr-cyrl":
            search_language = "sr"

        # set parameters, see http://www.podnapisi.net/forum/viewtopic.php?f=62&t=26164#p212652
        params = {'sXML': 1, 'sL': search_language, 'sK': keyword}

        is_episode = False
        if season and episode:
            is_episode = True
            params['sTS'] = season
            params['sTE'] = episode

        if year:
            params['sY'] = year

        # loop over paginated results
        logger.info('Searching subtitles %r', params)
        subtitles = []
        pids = set()
        while True:
            # query the server
            content = None
            try:
                content = self.session.get(self.server_url + 'search/old', params=params, timeout=10).content
                xml = etree.fromstring(content)
            except etree.ParseError:
                logger.error("Wrong data returned: %r", content)
                break

            # exit if no results
            if not int(xml.find('pagination/results').text):
                logger.debug('No subtitles found')
                break

            # loop over subtitles
            for subtitle_xml in xml.findall('subtitle'):
                # read xml elements
                pid = subtitle_xml.find('pid').text
                # ignore duplicates, see http://www.podnapisi.net/forum/viewtopic.php?f=62&t=26164&start=10#p213321
                if pid in pids:
                    continue

                _language = Language.fromietf(subtitle_xml.find('language').text)
                hearing_impaired = 'n' in (subtitle_xml.find('flags').text or '')
                foreign = 'f' in (subtitle_xml.find('flags').text or '')
                if only_foreign and not foreign:
                    continue

                elif not only_foreign and not also_foreign and foreign:
                    continue

                elif also_foreign and foreign:
                    _language = Language.rebuild(_language, forced=True)

                # set subtitle language to hi if it's hearing_impaired
                if hearing_impaired:
                    _language = Language.rebuild(_language, hi=True)

                if language != _language:
                    continue

                page_link = subtitle_xml.find('url').text
                releases = []
                if subtitle_xml.find('release').text:
                    for release in subtitle_xml.find('release').text.split():
                        releases.append(re.sub(r'\.+$', '', release))  # remove trailing dots
                title = subtitle_xml.find('title').text
                r_season = int(subtitle_xml.find('tvSeason').text)
                r_episode = int(subtitle_xml.find('tvEpisode').text)
                r_year = int(subtitle_xml.find('year').text)

                if is_episode:
                    subtitle = self.subtitle_class(_language, hearing_impaired, page_link, pid, releases, title,
                                                   season=r_season, episode=r_episode, year=r_year,
                                                   asked_for_release_group=video.release_group,
                                                   asked_for_episode=episode)
                else:
                    subtitle = self.subtitle_class(_language, hearing_impaired, page_link, pid, releases, title,
                                                   year=r_year, asked_for_release_group=video.release_group)


                logger.debug('Found subtitle %r', subtitle)
                subtitles.append(subtitle)
                pids.add(pid)

            # stop on last page
            if int(xml.find('pagination/current').text) >= int(xml.find('pagination/count').text):
                break

            # increment current page
            params['page'] = int(xml.find('pagination/current').text) + 1
            logger.debug('Getting page %d', params['page'])
            xml = None

        return subtitles
Ejemplo n.º 27
0
class OpenSubtitlesComProvider(ProviderRetryMixin, Provider):
    """OpenSubtitlesCom Provider"""
    server_url = 'https://www.opensubtitles.com/api/v1/'

    languages = {Language.fromopensubtitles(l) for l in language_converters['szopensubtitles'].codes}
    languages.update(set(Language.rebuild(l, forced=True) for l in languages))

    def __init__(self, username=None, password=None, use_hash=True, api_key=None):
        if not api_key:
            raise ConfigurationError('Api_key must be specified')

        if not all((username, password)):
            raise ConfigurationError('Username and password must be specified')

        self.session = Session()
        self.session.headers = {'User-Agent': os.environ.get("SZ_USER_AGENT", "Sub-Zero/2"),
                                'Api-Key': api_key,
                                'Content-Type': 'application/json'}
        self.token = None
        self.username = username
        self.password = password
        self.video = None
        self.use_hash = use_hash

    def initialize(self):
        self.token = region.get("oscom_token")
        if self.token:
            self.session.headers.update({'Authorization': 'Beaker ' + self.token})
            return True
        else:
            self.login()

    def terminate(self):
        self.session.close()

    def login(self):
        try:
            r = self.session.post(self.server_url + 'login',
                                  json={"username": self.username, "password": self.password},
                                  allow_redirects=False,
                                  timeout=10)
        except (ConnectionError, Timeout, ReadTimeout):
            raise ServiceUnavailable('Unknown Error, empty response: %s: %r' % (r.status_code, r))
        else:
            if r.status_code == 200:
                try:
                    self.token = r.json()['token']
                except ValueError:
                    raise ProviderError('Invalid JSON returned by provider')
                else:
                    self.session.headers.update({'Authorization': 'Beaker ' + self.token})
                    region.set("oscom_token", self.token)
                    return True
            elif r.status_code == 401:
                raise AuthenticationError('Login failed: {}'.format(r.reason))
            elif r.status_code == 429:
                raise TooManyRequests()
            else:
                raise ProviderError('Bad status code: {}'.format(r.status_code))
        finally:
            return False

    @region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME)
    def search_titles(self, title):
        title_id = None
        imdb_id = None

        if isinstance(self.video, Episode) and self.video.series_imdb_id:
            imdb_id = self.video.series_imdb_id
        elif isinstance(self.video, Movie) and self.video.imdb_id:
            imdb_id = self.video.imdb_id

        if imdb_id:
            parameters = {'imdb_id': imdb_id}
            logging.debug('Searching using this IMDB id: {}'.format(imdb_id))
        else:
            parameters = {'query': title}
            logging.debug('Searching using this title: {}'.format(title))

        results = self.session.get(self.server_url + 'features', params=parameters, timeout=10)
        results.raise_for_status()

        if results.status_code == 401:
            logging.debug('Authentification failed: clearing cache and attempting to login.')
            region.delete("oscom_token")
            self.login()

            results = self.session.get(self.server_url + 'features', params=parameters, timeout=10)
            results.raise_for_status()

            if results.status_code == 429:
                raise TooManyRequests()
        elif results.status_code == 429:
            raise TooManyRequests()

        # deserialize results
        try:
            results_dict = results.json()['data']
        except ValueError:
            raise ProviderError('Invalid JSON returned by provider')
        else:
            # loop over results
            for result in results_dict:
                if title.lower() == result['attributes']['title'].lower() and \
                        (not self.video.year or self.video.year == int(result['attributes']['year'])):
                    title_id = result['id']
                    break

            if title_id:
                logging.debug('Found this title ID: {}'.format(title_id))
                return title_id
        finally:
            if not title_id:
                logger.debug('No match found for {}'.format(title))

    def query(self, languages, video):
        self.video = video
        if self.use_hash:
            hash = self.video.hashes.get('opensubtitlescom')
            logging.debug('Searching using this hash: {}'.format(hash))
        else:
            hash = None

        if isinstance(self.video, Episode):
            title = self.video.series
        else:
            title = self.video.title

        title_id = self.search_titles(title)
        if not title_id:
            return []
        lang_strings = [str(lang) for lang in languages]
        langs = ','.join(lang_strings)
        logging.debug('Searching for this languages: {}'.format(lang_strings))

        # query the server
        if isinstance(self.video, Episode):
            res = self.session.get(self.server_url + 'subtitles',
                                   params={'parent_feature_id': title_id,
                                           'languages': langs,
                                           'episode_number': self.video.episode,
                                           'season_number': self.video.season,
                                           'moviehash': hash},
                                   timeout=10)
        else:
            res = self.session.get(self.server_url + 'subtitles',
                                   params={'id': title_id,
                                           'languages': langs,
                                           'moviehash': hash},
                                   timeout=10)
        res.raise_for_status()

        if res.status_code == 429:
            raise TooManyRequests()

        subtitles = []

        try:
            result = res.json()
        except ValueError:
            raise ProviderError('Invalid JSON returned by provider')
        else:
            logging.debug('Query returned {} subtitles'.format(len(result['data'])))

            if len(result['data']):
                for item in result['data']:
                    if 'season_number' in item['attributes']['feature_details']:
                        season_number = item['attributes']['feature_details']['season_number']
                    else:
                        season_number = None

                    if 'episode_number' in item['attributes']['feature_details']:
                        episode_number = item['attributes']['feature_details']['episode_number']
                    else:
                        episode_number = None

                    if 'moviehash_match' in item['attributes']:
                        moviehash_match = item['attributes']['moviehash_match']
                    else:
                        moviehash_match = False

                    if len(item['attributes']['files']):
                        subtitle = OpenSubtitlesComSubtitle(
                                language=Language.fromietf(item['attributes']['language']),
                                hearing_impaired=item['attributes']['hearing_impaired'],
                                page_link=item['attributes']['url'],
                                file_id=item['attributes']['files'][0]['file_id'],
                                releases=item['attributes']['release'],
                                uploader=item['attributes']['uploader']['name'],
                                title=item['attributes']['feature_details']['movie_name'],
                                year=item['attributes']['feature_details']['year'],
                                season=season_number,
                                episode=episode_number,
                                hash_matched=moviehash_match
                            )
                        subtitle.get_matches(self.video)
                        subtitles.append(subtitle)

        return subtitles

    def list_subtitles(self, video, languages):
        return self.query(languages, video)

    def download_subtitle(self, subtitle):
        logger.info('Downloading subtitle %r', subtitle)

        headers = {'Accept': 'application/json', 'Content-Type': 'application/json'}
        res = self.session.post(self.server_url + 'download',
                                json={'file_id': subtitle.file_id, 'sub_format': 'srt'},
                                headers=headers,
                                timeout=10)
        res.raise_for_status()

        if res.status_code == 429:
            raise TooManyRequests()
        elif res.status_code == 406:
            raise DownloadLimitExceeded("Daily download limit reached")
        else:
            try:
                subtitle.download_link = res.json()['link']
            except ValueError:
                raise ProviderError('Invalid JSON returned by provider')
            else:
                r = self.session.get(subtitle.download_link, timeout=10)
                r.raise_for_status()

                if res.status_code == 429:
                    raise TooManyRequests()
                elif res.status_code == 406:
                    raise DownloadLimitExceeded("Daily download limit reached")

                subtitle_content = r.content

                if subtitle_content:
                    subtitle.content = fix_line_ending(subtitle_content)
                else:
                    logger.debug('Could not download subtitle from {}'.format(subtitle.download_link))
Ejemplo n.º 28
0
class OpenSubtitlesProvider(ProviderRetryMixin, _OpenSubtitlesProvider):
    only_foreign = False
    also_foreign = False
    subtitle_class = OpenSubtitlesSubtitle
    hash_verifiable = True
    hearing_impaired_verifiable = True
    skip_wrong_fps = True
    is_vip = False
    use_ssl = True
    timeout = 15

    default_url = "//api.opensubtitles.org/xml-rpc"
    vip_url = "//vip-api.opensubtitles.org/xml-rpc"

    languages = {
        Language.fromopensubtitles(l)
        for l in language_converters['szopensubtitles'].codes
    }
    languages.update(set(Language.rebuild(l, forced=True) for l in languages))
    languages.update(set(Language.rebuild(l, hi=True) for l in languages))

    def __init__(self,
                 username=None,
                 password=None,
                 use_tag_search=False,
                 only_foreign=False,
                 also_foreign=False,
                 skip_wrong_fps=True,
                 is_vip=False,
                 use_ssl=True,
                 timeout=15):
        if any((username, password)) and not all((username, password)):
            raise ConfigurationError('Username and password must be specified')

        self.username = username or ''
        self.password = password or ''
        self.use_tag_search = use_tag_search
        self.only_foreign = only_foreign
        self.also_foreign = also_foreign
        self.skip_wrong_fps = skip_wrong_fps
        self.token = None
        self.is_vip = is_vip
        self.use_ssl = use_ssl
        self.timeout = timeout

        logger.debug("Using timeout: %d", timeout)

        if use_ssl:
            logger.debug("Using HTTPS connection")

        self.default_url = ("https:"
                            if use_ssl else "http:") + self.default_url
        self.vip_url = ("https:" if use_ssl else "http:") + self.vip_url

        if use_tag_search:
            logger.info("Using tag/exact filename search")

        if only_foreign:
            logger.info("Only searching for foreign/forced subtitles")

    def get_server_proxy(self, url, timeout=None):
        return ServerProxy(
            url,
            SubZeroRequestsTransport(use_https=self.use_ssl,
                                     timeout=timeout or self.timeout,
                                     user_agent=os.environ.get(
                                         "SZ_USER_AGENT", "Sub-Zero/2")))

    def log_in_url(self, server_url):
        self.token = None
        self.server = self.get_server_proxy(server_url)

        response = self.retry(lambda: checked(lambda: self.server.LogIn(
            self.username, self.password, 'eng',
            os.environ.get("SZ_USER_AGENT", "Sub-Zero/2"))))

        self.token = response['token']
        logger.debug('Logged in with token %r',
                     self.token[:10] + "X" * (len(self.token) - 10))

        region.set("os_token", bytearray(self.token, encoding='utf-8'))
        region.set("os_server_url", bytearray(server_url, encoding='utf-8'))

    def log_in(self):
        logger.info('Logging in')

        try:
            self.log_in_url(self.vip_url if self.is_vip else self.default_url)

        except Unauthorized:
            if self.is_vip:
                logger.info("VIP server login failed, falling back")
                try:
                    self.log_in_url(self.default_url)
                except Unauthorized:
                    pass

        if not self.token:
            logger.error("Login failed, please check your credentials")
            raise Unauthorized

    def use_token_or_login(self, func):
        if not self.token:
            self.log_in()
            return func()
        try:
            return func()
        except Unauthorized:
            self.log_in()
            return func()

    def initialize(self):
        token_cache = region.get("os_token")
        url_cache = region.get("os_server_url")

        if token_cache is not NO_VALUE and url_cache is not NO_VALUE:
            self.token = token_cache.decode("utf-8")
            self.server = self.get_server_proxy(url_cache.decode("utf-8"))
            logger.debug("Using previous login token: %r",
                         self.token[:10] + "X" * (len(self.token) - 10))
        else:
            self.server = None
            self.token = None

    def terminate(self):
        self.server = None
        self.token = None

    def list_subtitles(self, video, languages):
        """
        :param video:
        :param languages:
        :return:

         patch: query movies even if hash is known; add tag parameter
        """

        season = episode = None
        if isinstance(video, Episode):
            query = [video.series] + video.alternative_series
            season = video.season
            episode = episode = min(video.episode) if isinstance(
                video.episode, list) else video.episode

            if video.is_special:
                season = None
                episode = None
                query = [
                    u"%s %s" % (series, video.title)
                    for series in [video.series] + video.alternative_series
                ]
                logger.info("%s: Searching for special: %r", self.__class__,
                            query)
        # elif ('opensubtitles' not in video.hashes or not video.size) and not video.imdb_id:
        #    query = video.name.split(os.sep)[-1]
        else:
            query = [video.title] + video.alternative_titles

        if isinstance(video, Episode):
            imdb_id = video.series_imdb_id
        else:
            imdb_id = video.imdb_id

        return self.query(video,
                          languages,
                          hash=video.hashes.get('opensubtitles'),
                          size=video.size,
                          imdb_id=imdb_id,
                          query=query,
                          season=season,
                          episode=episode,
                          tag=video.original_name,
                          use_tag_search=self.use_tag_search,
                          only_foreign=self.only_foreign,
                          also_foreign=self.also_foreign)

    def query(self,
              video,
              languages,
              hash=None,
              size=None,
              imdb_id=None,
              query=None,
              season=None,
              episode=None,
              tag=None,
              use_tag_search=False,
              only_foreign=False,
              also_foreign=False):
        # fill the search criteria
        criteria = []
        if hash and size:
            criteria.append({'moviehash': hash, 'moviebytesize': str(size)})
        if use_tag_search and tag:
            criteria.append({'tag': tag})
        if imdb_id:
            if season and episode:
                criteria.append({
                    'imdbid': imdb_id[2:],
                    'season': season,
                    'episode': episode
                })
            else:
                criteria.append({'imdbid': imdb_id[2:]})
        # Commented out after the issue with episode released after October 17th 2020.
        # if query and season and episode:
        #     for q in query:
        #         criteria.append({'query': q.replace('\'', ''), 'season': season, 'episode': episode})
        # elif query:
        #     for q in query:
        #         criteria.append({'query': q.replace('\'', '')})
        if not criteria:
            raise ValueError('Not enough information')

        # add the language
        for criterion in criteria:
            criterion['sublanguageid'] = ','.join(
                sorted(l.opensubtitles for l in languages))

        # query the server
        logger.info('Searching subtitles %r', criteria)
        response = self.use_token_or_login(lambda: self.retry(lambda: checked(
            lambda: self.server.SearchSubtitles(self.token, criteria))))

        subtitles = []

        # exit if no data
        if not response['data']:
            logger.info('No subtitles found')
            return subtitles

        # loop over subtitle items
        for subtitle_item in response['data']:
            _subtitle_item = subtitle_item

            # in case OS messes their API results up again, check whether we've got a dict or a string as subtitle_item
            if hasattr(_subtitle_item, "startswith"):
                _subtitle_item = response["data"][subtitle_item]

            # read the item
            language = Language.fromopensubtitles(
                _subtitle_item['SubLanguageID'])
            hearing_impaired = bool(int(_subtitle_item['SubHearingImpaired']))
            page_link = _subtitle_item['SubtitlesLink']
            subtitle_id = int(_subtitle_item['IDSubtitleFile'])
            matched_by = _subtitle_item['MatchedBy']
            movie_kind = _subtitle_item['MovieKind']
            hash = _subtitle_item['MovieHash']
            movie_name = _subtitle_item['MovieName']
            movie_release_name = _subtitle_item['MovieReleaseName']
            movie_year = int(_subtitle_item['MovieYear']
                             ) if _subtitle_item['MovieYear'] else None
            if season or episode:
                movie_imdb_id = 'tt' + _subtitle_item['SeriesIMDBParent']
            else:
                movie_imdb_id = 'tt' + _subtitle_item['IDMovieImdb']
            movie_fps = _subtitle_item.get('MovieFPS')
            series_season = int(_subtitle_item['SeriesSeason']
                                ) if _subtitle_item['SeriesSeason'] else None
            series_episode = int(_subtitle_item['SeriesEpisode']
                                 ) if _subtitle_item['SeriesEpisode'] else None
            filename = _subtitle_item['SubFileName']
            encoding = _subtitle_item.get('SubEncoding') or None
            foreign_parts_only = bool(
                int(_subtitle_item.get('SubForeignPartsOnly', 0)))

            # foreign/forced subtitles only wanted
            if only_foreign and not foreign_parts_only:
                continue

            # foreign/forced not wanted
            elif not only_foreign and not also_foreign and foreign_parts_only:
                continue

            # set subtitle language to forced if it's foreign_parts_only
            elif (also_foreign or only_foreign) and foreign_parts_only:
                language = Language.rebuild(language, forced=True)

            # set subtitle language to hi if it's hearing_impaired
            if hearing_impaired:
                language = Language.rebuild(language, hi=True)

            if language not in languages:
                continue

            if video.imdb_id and (movie_imdb_id != re.sub(
                    "(?<![^a-zA-Z])0+", "", video.imdb_id)):
                continue

            query_parameters = _subtitle_item.get("QueryParameters")

            subtitle = self.subtitle_class(language,
                                           hearing_impaired,
                                           page_link,
                                           subtitle_id,
                                           matched_by,
                                           movie_kind,
                                           hash,
                                           movie_name,
                                           movie_release_name,
                                           movie_year,
                                           movie_imdb_id,
                                           series_season,
                                           series_episode,
                                           query_parameters,
                                           filename,
                                           encoding,
                                           movie_fps,
                                           skip_wrong_fps=self.skip_wrong_fps)
            subtitle.uploader = _subtitle_item[
                'UserNickName'] if _subtitle_item[
                    'UserNickName'] else 'anonymous'
            logger.debug('Found subtitle %r by %s', subtitle, matched_by)
            subtitles.append(subtitle)

        return subtitles

    def download_subtitle(self, subtitle):
        logger.info('Downloading subtitle %r', subtitle)
        response = self.use_token_or_login(
            lambda: checked(lambda: self.server.DownloadSubtitles(
                self.token, [str(subtitle.subtitle_id)])))
        subtitle.content = fix_line_ending(
            zlib.decompress(base64.b64decode(response['data'][0]['data']), 47))
Ejemplo n.º 29
0
class SubsceneProvider(Provider, ProviderSubtitleArchiveMixin):
    """
    This currently only searches for the filename on SubScene. It doesn't open every found subtitle page to avoid
    massive hammering, thus it can't determine whether a subtitle is only-foreign or not.
    """
    subtitle_class = SubsceneSubtitle
    languages = supported_languages
    languages.update(set(Language.rebuild(l, forced=True) for l in languages))

    session = None
    skip_wrong_fps = False
    hearing_impaired_verifiable = True
    only_foreign = False
    username = None
    password = None

    search_throttle = 5  # seconds

    def __init__(self, only_foreign=False, username=None, password=None):
        if not all((username, password)):
            raise ConfigurationError('Username and password must be specified')

        self.only_foreign = only_foreign
        self.username = username
        self.password = password

    def initialize(self):
        logger.info("Creating session")
        self.session = RetryingCFSession()

    def login(self):
        r = self.session.get("https://subscene.com/account/login")
        if "Server Error" in r.content:
            logger.error("Login unavailable; Maintenance?")
            raise ServiceUnavailable("Login unavailable; Maintenance?")

        match = re.search(r"<script id='modelJson' type='application/json'>\s*(.+)\s*</script>", r.content)

        if match:
            h = HTMLParser.HTMLParser()
            data = json.loads(h.unescape(match.group(1)))
            login_url = urlparse.urljoin(data["siteUrl"], data["loginUrl"])
            time.sleep(1.0)

            r = self.session.post(login_url,
                                  {
                                      "username": self.username,
                                      "password": self.password,
                                      data["antiForgery"]["name"]: data["antiForgery"]["value"]
                                  })
            pep_content = re.search(r"<form method=\"post\" action=\"https://subscene\.com/\">"
                                    r".+name=\"id_token\".+?value=\"(?P<id_token>.+?)\".*?"
                                    r"access_token\".+?value=\"(?P<access_token>.+?)\".+?"
                                    r"token_type.+?value=\"(?P<token_type>.+?)\".+?"
                                    r"expires_in.+?value=\"(?P<expires_in>.+?)\".+?"
                                    r"scope.+?value=\"(?P<scope>.+?)\".+?"
                                    r"state.+?value=\"(?P<state>.+?)\".+?"
                                    r"session_state.+?value=\"(?P<session_state>.+?)\"",
                                    r.content, re.MULTILINE | re.DOTALL)

            if pep_content:
                r = self.session.post(SITE_DOMAIN, pep_content.groupdict())
                try:
                    r.raise_for_status()
                except Exception:
                    raise ProviderError("Something went wrong when trying to log in: %s", traceback.format_exc())
                else:
                    cj = self.session.cookies.copy()
                    store_cks = ("scene", "idsrv", "idsrv.xsrf", "idsvr.clients", "idsvr.session", "idsvr.username")
                    for cn in self.session.cookies.iterkeys():
                        if cn not in store_cks:
                            del cj[cn]

                    logger.debug("Storing cookies: %r", cj)
                    region.set("subscene_cookies2", cj)
                    return
        raise ProviderError("Something went wrong when trying to log in #1")

    def terminate(self):
        logger.info("Closing session")
        self.session.close()

    def _create_filters(self, languages):
        self.filters = dict(HearingImpaired="2")
        if self.only_foreign:
            self.filters["ForeignOnly"] = "True"
            logger.info("Only searching for foreign/forced subtitles")

        self.filters["LanguageFilter"] = ",".join((str(language_ids[l.alpha3]) for l in languages
                                                   if l.alpha3 in language_ids))

        logger.debug("Filter created: '%s'" % self.filters)

    def _enable_filters(self):
        self.session.cookies.update(self.filters)
        logger.debug("Filters applied")

    def list_subtitles(self, video, languages):
        if not video.original_name:
            logger.info("Skipping search because we don't know the original release name")
            return []

        self._create_filters(languages)
        self._enable_filters()
        return [s for s in self.query(video) if s.language in languages]

    def download_subtitle(self, subtitle):
        if subtitle.pack_data:
            logger.info("Using previously downloaded pack data")
            archive = ZipFile(io.BytesIO(subtitle.pack_data))
            subtitle.pack_data = None

            try:
                subtitle.content = self.get_subtitle_from_archive(subtitle, archive)
                return
            except ProviderError:
                pass

        # open the archive
        r = self.session.get(subtitle.get_download_link(self.session), timeout=10)
        r.raise_for_status()
        archive_stream = io.BytesIO(r.content)
        archive = ZipFile(archive_stream)

        subtitle.content = self.get_subtitle_from_archive(subtitle, archive)

        # store archive as pack_data for later caching
        subtitle.pack_data = r.content

    def parse_results(self, video, film):
        subtitles = []
        for s in film.subtitles:
            try:
                subtitle = SubsceneSubtitle.from_api(s)
            except NotImplementedError, e:
                logger.info(e)
                continue
            subtitle.asked_for_release_group = video.release_group
            if isinstance(video, Episode):
                subtitle.asked_for_episode = video.episode

            if self.only_foreign:
                subtitle.language = Language.rebuild(subtitle.language, forced=True)

            subtitles.append(subtitle)
            logger.debug('Found subtitle %r', subtitle)

        return subtitles
Ejemplo n.º 30
0
    def query(self, language, keyword, video, season=None, episode=None, year=None, only_foreign=False,
              also_foreign=False):
        search_language = str(language).lower()

        # sr-Cyrl specialcase
        if search_language == "sr-cyrl":
            search_language = "sr"

        # set parameters, see http://www.podnapisi.net/forum/viewtopic.php?f=62&t=26164#p212652
        params = {'sXML': 1, 'sL': search_language, 'sK': keyword}

        is_episode = False
        if season and episode:
            is_episode = True
            params['sTS'] = season
            params['sTE'] = episode

        if year:
            params['sY'] = year

        # loop over paginated results
        logger.info('Searching subtitles %r', params)
        subtitles = []
        pids = set()
        while True:
            # query the server
            content = None
            try:
                content = self.session.get(self.server_url + 'search/old', params=params, timeout=10).content
                xml = etree.fromstring(content)
            except etree.ParseError:
                logger.error("Wrong data returned: %r", content)
                break

            # exit if no results
            if not int(xml.find('pagination/results').text):
                logger.debug('No subtitles found')
                break

            # loop over subtitles
            for subtitle_xml in xml.findall('subtitle'):
                # read xml elements
                pid = subtitle_xml.find('pid').text
                # ignore duplicates, see http://www.podnapisi.net/forum/viewtopic.php?f=62&t=26164&start=10#p213321
                if pid in pids:
                    continue

                _language = Language.fromietf(subtitle_xml.find('language').text)
                hearing_impaired = 'n' in (subtitle_xml.find('flags').text or '')
                foreign = 'f' in (subtitle_xml.find('flags').text or '')
                if only_foreign and not foreign:
                    continue

                elif not only_foreign and not also_foreign and foreign:
                    continue

                elif also_foreign and foreign:
                    _language = Language.rebuild(_language, forced=True)

                if language != _language:
                    continue

                page_link = subtitle_xml.find('url').text
                releases = []
                if subtitle_xml.find('release').text:
                    for release in subtitle_xml.find('release').text.split():
                        releases.append(re.sub(r'\.+$', '', release))  # remove trailing dots
                title = subtitle_xml.find('title').text
                r_season = int(subtitle_xml.find('tvSeason').text)
                r_episode = int(subtitle_xml.find('tvEpisode').text)
                r_year = int(subtitle_xml.find('year').text)

                if is_episode:
                    subtitle = self.subtitle_class(_language, hearing_impaired, page_link, pid, releases, title,
                                                   season=r_season, episode=r_episode, year=r_year,
                                                   asked_for_release_group=video.release_group,
                                                   asked_for_episode=episode)
                else:
                    subtitle = self.subtitle_class(_language, hearing_impaired, page_link, pid, releases, title,
                                                   year=r_year, asked_for_release_group=video.release_group)


                logger.debug('Found subtitle %r', subtitle)
                subtitles.append(subtitle)
                pids.add(pid)

            # stop on last page
            if int(xml.find('pagination/current').text) >= int(xml.find('pagination/count').text):
                break

            # increment current page
            params['page'] = int(xml.find('pagination/current').text) + 1
            logger.debug('Getting page %d', params['page'])
            xml = None

        return subtitles
Ejemplo n.º 31
0
class TitrariProvider(Provider, ProviderSubtitleArchiveMixin):
    subtitle_class = TitrariSubtitle
    languages = {Language(lang) for lang in ['ron', 'eng']}
    languages.update(set(Language.rebuild(lang, forced=True) for lang in languages))
    video_types = (Episode, Movie)
    api_url = 'https://www.titrari.ro/'
    query_advanced_search = 'cautarepreaavansata'

    def __init__(self):
        self.session = None

    def initialize(self):
        self.session = Session()
        # Hardcoding the UA to bypass the 30s throttle that titrari.ro uses for IP/UA pair
        self.session.headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, ' \
                                             'like Gecko) Chrome/93.0.4535.2 Safari/537.36'
        # self.session.headers['User-Agent'] = AGENT_LIST[randint(0, len(AGENT_LIST) - 1)]

    def terminate(self):
        self.session.close()

    def query(self, language=None, title=None, imdb_id=None, video=None):
        subtitles = []

        params = self.getQueryParams(imdb_id, title, language)

        search_response = self.session.get(self.api_url, params=params, timeout=15)
        search_response.raise_for_status()

        if not search_response.content:
            logger.debug('No data returned from provider')
            return []

        soup = ParserBeautifulSoup(search_response.content.decode('utf-8', 'ignore'), ['lxml', 'html.parser'])

        # loop over subtitle cells
        rows = soup.select('td[rowspan="5"]')
        for index, row in enumerate(rows):
            result_anchor_el = row.select_one('a')

            # Download link
            href = result_anchor_el.get('href')
            download_link = self.api_url + href

            fullTitle = row.parent.select('h1 a')[0].text

            # Get title
            try:
                title = fullTitle.split("(")[0]
            except:
                logger.error("Error parsing title")

            # Get downloads count
            downloads = 0
            try:
                downloads = int(row.parent.parent.select('span')[index].text[12:])
            except:
                logger.error("Error parsing downloads")

            # Get year
            try:
                year = int(fullTitle.split("(")[1].split(")")[0])
            except:
                year = None
                logger.error("Error parsing year")

            # Get imdbId
            sub_imdb_id = self.getImdbIdFromSubtitle(row)

            comments = ''
            try:
                comments = row.parent.parent.select('.comment')[1].text
            except:
                logger.error("Error parsing comments")

            # Get page_link
            try:
                page_link = self.api_url + row.parent.select('h1 a')[0].get('href')
            except:
                logger.error("Error parsing page_link")

            # Get uploader
            try:
                uploader = row.parent.select('td.row1.stanga a')[-1].text
            except:
                logger.error("Error parsing uploader")

            episode_number = video.episode if isinstance(video, Episode) else None
            subtitle = self.subtitle_class(language, download_link, index, comments, title, sub_imdb_id, page_link, uploader,
                                           year, downloads, isinstance(video, Episode), episode_number)
            logger.debug('Found subtitle %r', str(subtitle))
            subtitles.append(subtitle)

        ordered_subs = self.order(subtitles)

        return ordered_subs

    @staticmethod
    def order(subtitles):
        logger.debug("Sorting by download count...")
        sorted_subs = sorted(subtitles, key=lambda s: s.download_count, reverse=True)
        return sorted_subs

    @staticmethod
    def getImdbIdFromSubtitle(row):
        imdbId = None
        try:
            imdbId = row.parent.parent.find_all(src=re.compile("imdb"))[0].parent.get('href').split("tt")[-1]
        except:
            logger.error("Error parsing imdb id")
        if imdbId is not None:
            return "tt" + imdbId
        else:
            return None

    # titrari.ro seems to require all parameters now
    #  z2 = comment (empty)
    #  z3 = fps (-1: any, 0: N/A, 1: 23.97 FPS etc.)
    #  z4 = CD count (-1: any)
    #  z5 = imdb_id (empty or integer)
    #  z6 = sort order (0: unsorted, 1: by date, 2: by name)
    #  z7 = title (empty or string)
    #  z8 = language (-1: all, 1: ron, 2: eng)
    #  z9 = genre (All: all, Action: action etc.)
    # z11 = type (0: any, 1: movie, 2: series)
    def getQueryParams(self, imdb_id, title, language):
        queryParams = {
            'page': self.query_advanced_search,
            'z7': '',
            'z2': '',
            'z5': '',
            'z3': '-1',
            'z4': '-1',
            'z8': '-1',
            'z9': 'All',
            'z11': '0',
            'z6': '0'
        }
        if imdb_id is not None:
            queryParams["z5"] = imdb_id
        elif title is not None:
            queryParams["z7"] = title

        if language == 'ro':
            queryParams["z8"] = '1'
        elif language == 'en':
            queryParams["z8"] = '2'

        return queryParams

    def list_subtitles(self, video, languages):
        title = fix_inconsistent_naming(video.title)
        imdb_id = None
        try:
            if isinstance(video, Episode):
                imdb_id = video.series_imdb_id[2:]
            else:
                imdb_id = video.imdb_id[2:]
        except:
            logger.error('Error parsing imdb_id from video object {}'.format(str(video)))

        subtitles = [s for lang in languages for s in
                     self.query(lang, title, imdb_id, video)]
        return subtitles

    def download_subtitle(self, subtitle):
        r = self.session.get(subtitle.download_link, headers={'Referer': self.api_url}, timeout=10)
        r.raise_for_status()

        # open the archive
        archive_stream = io.BytesIO(r.content)
        if is_rarfile(archive_stream):
            logger.debug('Archive identified as RAR')
            archive = RarFile(archive_stream)
        elif is_zipfile(archive_stream):
            logger.debug('Archive identified as ZIP')
            archive = ZipFile(archive_stream)
        else:
            subtitle.content = r.content
            if subtitle.is_valid():
                return
            subtitle.content = None

            raise ProviderError('Unidentified archive type')

        if subtitle.is_episode:
            subtitle.content = self._get_subtitle_from_archive(subtitle, archive)
        else:
            subtitle.content = self.get_subtitle_from_archive(subtitle, archive)

    @staticmethod
    def _get_subtitle_from_archive(subtitle, archive):
        for name in archive.namelist():
            # discard hidden files
            if os.path.split(name)[-1].startswith('.'):
                continue

            # discard non-subtitle files
            if not name.lower().endswith(SUBTITLE_EXTENSIONS):
                continue

            _guess = guessit(name)
            if subtitle.desired_episode == _guess['episode']:
                return archive.read(name)

        return None
Ejemplo n.º 32
0
class SubtitrarinoiProvider(Provider, ProviderSubtitleArchiveMixin):
    subtitle_class = SubtitrarinoiSubtitle
    languages = {Language(lang) for lang in ['ron']}
    languages.update(
        set(Language.rebuild(lang, forced=True) for lang in languages))
    video_types = (Episode, Movie)
    server_url = 'https://www.subtitrari-noi.ro/'
    api_url = server_url + 'paginare_filme.php'

    def __init__(self):
        self.session = None

    def initialize(self):
        self.session = Session()
        self.session.headers[
            'User-Agent'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4535.2 Safari/537.36'
        self.session.headers['X-Requested-With'] = 'XMLHttpRequest'
        self.session.headers['Referer'] = self.server_url

    def terminate(self):
        self.session.close()

    def query(self, languages=None, title=None, imdb_id=None, video=None):
        subtitles = []

        params = self.getQueryParams(imdb_id, title)
        search_response = self.session.post(self.api_url,
                                            data=params,
                                            timeout=15)
        search_response.raise_for_status()

        soup = ParserBeautifulSoup(
            search_response.content.decode('utf-8', 'ignore'),
            ['lxml', 'html.parser'])

        # loop over subtitle cells
        rows = soup.select('div[id="round"]')

        if len(rows) == 0:
            logger.debug('No data returned from provider')
            return []

        # release comments are outside of the parent for the sub details itself, so we just map it to another list
        comment_rows = soup.findAll('div',
                                    attrs={
                                        'class': None,
                                        'id': None,
                                        'align': None
                                    })

        for index, row in enumerate(rows):
            result_anchor_el = row.select_one('.buton').select('a')

            # Download link
            href = result_anchor_el[0]['href']
            download_link = self.server_url + href

            fullTitle = row.select_one('#content-main a').text

            # Get title
            try:
                title = fullTitle.split("(")[0]
            except:
                logger.error("Error parsing title")

            # Get Uploader
            try:
                uploader = row.select('#content-main p')[4].text[10:]
            except:
                logger.error("Error parsing uploader")

            # Get downloads count
            downloads = 0
            try:
                downloads = int(row.select_one('#content-right p').text[12:])
            except:
                logger.error("Error parsing downloads")

            # Get year
            try:
                year = int(fullTitle.split("(")[1].split(")")[0])
            except:
                year = None
                logger.error("Error parsing year")

            # Get imdbId
            sub_imdb_id = self.getImdbIdFromSubtitle(row)

            comments = ''
            try:
                comments = comment_rows[index].text
                logger.debug('Comments: {}'.format(comments))
            except:
                logger.error("Error parsing comments")

            # Get Page Link
            try:
                page_link = row.select_one('#content-main a')['href']
            except:
                logger.error("Error parsing page_link")

            episode_number = video.episode if isinstance(video,
                                                         Episode) else None
            subtitle = self.subtitle_class(next(iter(languages)),
                                           download_link, index, comments,
                                           title, sub_imdb_id, uploader,
                                           page_link, year, downloads,
                                           isinstance(video,
                                                      Episode), episode_number)
            logger.debug('Found subtitle %r', str(subtitle))
            subtitles.append(subtitle)

        ordered_subs = self.order(subtitles)

        return ordered_subs

    @staticmethod
    def order(subtitles):
        logger.debug("Sorting by download count...")
        sorted_subs = sorted(subtitles,
                             key=lambda s: s.download_count,
                             reverse=True)
        return sorted_subs

    @staticmethod
    def getImdbIdFromSubtitle(row):
        imdbId = None
        try:
            imdbId = row.select('div[id=content-right] a')[-1].find_all(
                src=re.compile("imdb"))[0].parent.get('href').split("tt")[-1]
        except:
            logger.error("Error parsing imdb id")
        if imdbId is not None:
            return "tt" + imdbId
        else:
            return None

    # subtitrari-noi.ro params
    # info: there seems to be no way to do an advanced search by imdb_id or title
    # the page seems to populate both "search_q" and "cautare" with the same value
    # search_q = ?
    # cautare = search string
    # tip = type of search (0: premiere - doesn't return anything, 1: series only, 2: both, I think, not sure on that)
    # an = year
    # gen = genre

    def getQueryParams(self, imdb_id, title):
        queryParams = {
            'search_q': '1',
            'tip': '2',
            'an': 'Toti anii',
            'gen': 'Toate',
        }
        if imdb_id is not None:
            queryParams["cautare"] = imdb_id
        elif title is not None:
            queryParams["cautare"] = title

        queryParams["query_q"] = queryParams["cautare"]

        return queryParams

    def list_subtitles(self, video, languages):
        title = fix_inconsistent_naming(video.title)
        imdb_id = None
        try:
            if isinstance(video, Episode):
                imdb_id = video.series_imdb_id[2:]
            else:
                imdb_id = video.imdb_id[2:]
        except:
            logger.error('Error parsing imdb_id from video object {}'.format(
                str(video)))

        subtitles = [s for s in self.query(languages, title, imdb_id, video)]
        return subtitles

    def download_subtitle(self, subtitle):
        r = self.session.get(subtitle.download_link,
                             headers={'Referer': self.api_url},
                             timeout=10)
        r.raise_for_status()

        # open the archive
        archive_stream = io.BytesIO(r.content)
        if is_rarfile(archive_stream):
            logger.debug('Archive identified as rar')
            archive = RarFile(archive_stream)
        elif is_zipfile(archive_stream):
            logger.debug('Archive identified as zip')
            archive = ZipFile(archive_stream)
        else:
            subtitle.content = r.content
            if subtitle.is_valid():
                return
            subtitle.content = None

            raise ProviderError('Unidentified archive type')

        if subtitle.is_episode:
            subtitle.content = self._get_subtitle_from_archive(
                subtitle, archive)
        else:
            subtitle.content = self.get_subtitle_from_archive(
                subtitle, archive)

    @staticmethod
    def _get_subtitle_from_archive(subtitle, archive):
        for name in archive.namelist():
            # discard hidden files
            if os.path.split(name)[-1].startswith('.'):
                continue

            # discard non-subtitle files
            if not name.lower().endswith(SUBTITLE_EXTENSIONS):
                continue

            _guess = guessit(name)
            if subtitle.desired_episode == _guess['episode']:
                return archive.read(name)

        return None
Ejemplo n.º 33
0
def manual_upload_subtitle(path, language, forced, hi, title, scene_name, media_type, subtitle, audio_language):
    logging.debug('BAZARR Manually uploading subtitles for this file: ' + path)

    single = settings.general.getboolean('single_language')

    use_postprocessing = settings.general.getboolean('use_postprocessing')
    postprocessing_cmd = settings.general.postprocessing_cmd

    chmod = int(settings.general.chmod, 8) if not sys.platform.startswith(
        'win') and settings.general.getboolean('chmod_enabled') else None

    language = alpha3_from_alpha2(language)

    custom = CustomLanguage.from_value(language, "alpha3")
    if custom is None:
        lang_obj = Language(language)
    else:
        lang_obj = custom.subzero_language()

    if forced:
        lang_obj = Language.rebuild(lang_obj, forced=True)

    sub = Subtitle(
        lang_obj,
        mods=get_array_from(settings.general.subzero_mods)
    )

    sub.content = subtitle.read()
    if not sub.is_valid():
        logging.exception('BAZARR Invalid subtitle file: ' + subtitle.filename)
        sub.mods = None

    if settings.general.getboolean('utf8_encode'):
        sub.set_encoding("utf-8")

    saved_subtitles = []
    try:
        saved_subtitles = save_subtitles(path,
                                         [sub],
                                         single=single,
                                         tags=None,  # fixme
                                         directory=get_target_folder(path),
                                         chmod=chmod,
                                         # formats=("srt", "vtt")
                                         path_decoder=force_unicode)
    except Exception:
        logging.exception('BAZARR Error saving Subtitles file to disk for this file:' + path)
        return

    if len(saved_subtitles) < 1:
        logging.exception('BAZARR Error saving Subtitles file to disk for this file:' + path)
        return

    subtitle_path = saved_subtitles[0].storage_path

    if hi:
        modifier_string = " HI"
    elif forced:
        modifier_string = " forced"
    else:
        modifier_string = ""
    message = language_from_alpha3(language) + modifier_string + " Subtitles manually uploaded."

    if hi:
        modifier_code = ":hi"
    elif forced:
        modifier_code = ":forced"
    else:
        modifier_code = ""
    uploaded_language_code3 = language + modifier_code
    uploaded_language = language_from_alpha3(language) + modifier_string
    uploaded_language_code2 = alpha2_from_alpha3(language) + modifier_code
    audio_language_code2 = alpha2_from_language(audio_language)
    audio_language_code3 = alpha3_from_language(audio_language)

    if media_type == 'series':
        episode_metadata = TableEpisodes.select(TableEpisodes.sonarrSeriesId, TableEpisodes.sonarrEpisodeId) \
            .where(TableEpisodes.path == path_mappings.path_replace_reverse(path)) \
            .dicts() \
            .get_or_none()
        if not episode_metadata:
            return
        series_id = episode_metadata['sonarrSeriesId']
        episode_id = episode_metadata['sonarrEpisodeId']
        sync_subtitles(video_path=path, srt_path=subtitle_path, srt_lang=uploaded_language_code2, media_type=media_type,
                       percent_score=100, sonarr_series_id=episode_metadata['sonarrSeriesId'], forced=forced,
                       sonarr_episode_id=episode_metadata['sonarrEpisodeId'])
    else:
        movie_metadata = TableMovies.select(TableMovies.radarrId) \
            .where(TableMovies.path == path_mappings.path_replace_reverse_movie(path)) \
            .dicts() \
            .get_or_none()
        if not movie_metadata:
            return
        series_id = ""
        episode_id = movie_metadata['radarrId']
        sync_subtitles(video_path=path, srt_path=subtitle_path, srt_lang=uploaded_language_code2, media_type=media_type,
                       percent_score=100, radarr_id=movie_metadata['radarrId'], forced=forced)

    if use_postprocessing:
        command = pp_replace(postprocessing_cmd, path, subtitle_path, uploaded_language,
                             uploaded_language_code2, uploaded_language_code3, audio_language,
                             audio_language_code2, audio_language_code3, forced, 100, "1", "manual", series_id,
                             episode_id, hi=hi)
        postprocessing(command, path)

    if media_type == 'series':
        reversed_path = path_mappings.path_replace_reverse(path)
        reversed_subtitles_path = path_mappings.path_replace_reverse(subtitle_path)
        notify_sonarr(episode_metadata['sonarrSeriesId'])
        event_stream(type='series', action='update', payload=episode_metadata['sonarrSeriesId'])
        event_stream(type='episode-wanted', action='delete', payload=episode_metadata['sonarrEpisodeId'])
    else:
        reversed_path = path_mappings.path_replace_reverse_movie(path)
        reversed_subtitles_path = path_mappings.path_replace_reverse_movie(subtitle_path)
        notify_radarr(movie_metadata['radarrId'])
        event_stream(type='movie', action='update', payload=movie_metadata['radarrId'])
        event_stream(type='movie-wanted', action='delete', payload=movie_metadata['radarrId'])

    return message, reversed_path, reversed_subtitles_path
Ejemplo n.º 34
0
def item_discover_missing_subs(rating_key, kind="show", added_at=None, section_title=None, internal=False, external=True, languages=()):
    item_id = int(rating_key)
    item = get_item(rating_key)

    if kind == "show":
        item_title = get_plex_item_display_title(item, kind, parent=item.season, section_title=section_title, parent_title=item.show.title)
    else:
        item_title = get_plex_item_display_title(item, kind, section_title=section_title)

    subtitle_storage = get_subtitle_storage()
    stored_subs = subtitle_storage.load(rating_key)
    subtitle_storage.destroy()

    subtitle_target_dir, tdir_is_absolute = config.subtitle_sub_dir

    missing = set()
    languages_set = set([Language.rebuild(l) for l in languages])
    for media in item.media:
        existing_subs = {"internal": [], "external": [], "own_external": [], "count": 0}
        for part in media.parts:

            # did we already download an external subtitle before?
            if subtitle_target_dir and stored_subs:
                for language in languages_set:
                    if has_external_subtitle(part.id, stored_subs, language):
                        # check the existence of the actual subtitle file

                        # get media filename without extension
                        part_basename = os.path.splitext(os.path.basename(part.file))[0]

                        # compute target directory for subtitle
                        # fixme: move to central location
                        if tdir_is_absolute:
                            possible_subtitle_path_base = subtitle_target_dir
                        else:
                            possible_subtitle_path_base = os.path.join(os.path.dirname(part.file), subtitle_target_dir)

                        possible_subtitle_path_base = os.path.realpath(possible_subtitle_path_base)

                        # folder actually exists?
                        if not os.path.isdir(possible_subtitle_path_base):
                            continue

                        found_any = False
                        for ext in config.subtitle_formats:
                            if cast_bool(Prefs['subtitles.only_one']):
                                possible_subtitle_path = os.path.join(possible_subtitle_path_base,
                                                                      u"%s.%s" % (part_basename, ext))
                            else:
                                possible_subtitle_path = os.path.join(possible_subtitle_path_base,
                                                                      u"%s.%s.%s" % (part_basename, language, ext))

                            # check for subtitle existence
                            if os.path.isfile(possible_subtitle_path):
                                found_any = True
                                Log.Debug(u"Found: %s", possible_subtitle_path)
                                break

                        if found_any:
                            existing_subs["own_external"].append(language)
                            existing_subs["count"] = existing_subs["count"] + 1

            for stream in part.streams:
                if stream.stream_type == 3:
                    is_forced = is_stream_forced(stream)
                    if stream.index:
                        key = "internal"
                    else:
                        key = "external"

                    if not config.exotic_ext and stream.codec.lower() not in TEXT_SUBTITLE_EXTS:
                        continue

                    # treat unknown language as lang1?
                    if not stream.language_code and config.treat_und_as_first:
                        lang = Language.rebuild(list(config.lang_list)[0])

                    # we can't parse empty language codes
                    elif not stream.language_code or not stream.codec:
                        continue

                    else:
                        # parse with internal language parser first
                        try:
                            lang = get_language_from_stream(stream.language_code)
                            if not lang:
                                if config.treat_und_as_first:
                                    lang = Language.rebuild(list(config.lang_list)[0])
                                else:
                                    continue

                        except (ValueError, LanguageReverseError):
                            continue

                    if lang:
                        # Log.Debug("Found babelfish language: %r", lang)
                        lang.forced = is_forced
                        existing_subs[key].append(lang)
                        existing_subs["count"] = existing_subs["count"] + 1

        missing_from_part = set([Language.rebuild(l) for l in languages])
        if existing_subs["count"]:

            # fixme: this is actually somewhat broken with IETF, as Plex doesn't store the country portion
            # (pt instead of pt-BR) inside the database. So it might actually download pt-BR if there's a local pt-BR
            # subtitle but not our own.
            existing_flat = set((existing_subs["internal"] if internal else [])
                                + (existing_subs["external"] if external else [])
                                + existing_subs["own_external"])

            check_languages = set([Language.rebuild(l) for l in languages])
            alpha3_map = {}
            if config.ietf_as_alpha3:
                for language in existing_flat:
                    if language.country:
                        alpha3_map[language.alpha3] = language.country
                        language.country = None

                for language in check_languages:
                    if language.country:
                        alpha3_map[language.alpha3] = language.country
                        language.country = None

            # compare sets of strings, not sets of different Language instances
            check_languages_str = set(str(l) for l in check_languages)
            existing_flat_str = set(str(l) for l in existing_flat)

            if check_languages_str.issubset(existing_flat_str) or \
                    (len(existing_flat) >= 1 and Prefs['subtitles.only_one']):
                # all subs found
                #Log.Info(u"All subtitles exist for '%s'", item_title)
                continue

            missing_from_part = set(Language.fromietf(l) for l in check_languages_str - existing_flat_str)
            if config.ietf_as_alpha3:
                for language in missing_from_part:
                    language.country = alpha3_map.get(language.alpha3, None)

        if missing_from_part:
            Log.Info(u"Subs still missing for '%s' (%s: %s): %s", item_title, rating_key, media.id,
                     missing_from_part)
            missing.update(missing_from_part)

    if missing:
        # deduplicate
        missing = set(Language.fromietf(la) for la in set(str(l) for l in missing))
        return added_at, item_id, item_title, item, missing
Ejemplo n.º 35
0
    def query(self,
              video,
              languages,
              hash=None,
              size=None,
              imdb_id=None,
              query=None,
              season=None,
              episode=None,
              tag=None,
              use_tag_search=False,
              only_foreign=False,
              also_foreign=False):
        # fill the search criteria
        criteria = []
        if hash and size:
            criteria.append({'moviehash': hash, 'moviebytesize': str(size)})
        if use_tag_search and tag:
            criteria.append({'tag': tag})
        if imdb_id:
            if season and episode:
                criteria.append({
                    'imdbid': imdb_id[2:],
                    'season': season,
                    'episode': episode
                })
            else:
                criteria.append({'imdbid': imdb_id[2:]})
        # Commented out after the issue with episode released after October 17th 2020.
        # if query and season and episode:
        #     for q in query:
        #         criteria.append({'query': q.replace('\'', ''), 'season': season, 'episode': episode})
        # elif query:
        #     for q in query:
        #         criteria.append({'query': q.replace('\'', '')})
        if not criteria:
            raise ValueError('Not enough information')

        # add the language
        for criterion in criteria:
            criterion['sublanguageid'] = ','.join(
                sorted(l.opensubtitles for l in languages))

        # query the server
        logger.info('Searching subtitles %r', criteria)
        response = self.use_token_or_login(lambda: self.retry(lambda: checked(
            lambda: self.server.SearchSubtitles(self.token, criteria))))

        subtitles = []

        # exit if no data
        if not response['data']:
            logger.info('No subtitles found')
            return subtitles

        # loop over subtitle items
        for subtitle_item in response['data']:
            _subtitle_item = subtitle_item

            # in case OS messes their API results up again, check whether we've got a dict or a string as subtitle_item
            if hasattr(_subtitle_item, "startswith"):
                _subtitle_item = response["data"][subtitle_item]

            # read the item
            language = Language.fromopensubtitles(
                _subtitle_item['SubLanguageID'])
            hearing_impaired = bool(int(_subtitle_item['SubHearingImpaired']))
            page_link = _subtitle_item['SubtitlesLink']
            subtitle_id = int(_subtitle_item['IDSubtitleFile'])
            matched_by = _subtitle_item['MatchedBy']
            movie_kind = _subtitle_item['MovieKind']
            hash = _subtitle_item['MovieHash']
            movie_name = _subtitle_item['MovieName']
            movie_release_name = _subtitle_item['MovieReleaseName']
            movie_year = int(_subtitle_item['MovieYear']
                             ) if _subtitle_item['MovieYear'] else None
            if season or episode:
                movie_imdb_id = 'tt' + _subtitle_item['SeriesIMDBParent']
            else:
                movie_imdb_id = 'tt' + _subtitle_item['IDMovieImdb']
            movie_fps = _subtitle_item.get('MovieFPS')
            series_season = int(_subtitle_item['SeriesSeason']
                                ) if _subtitle_item['SeriesSeason'] else None
            series_episode = int(_subtitle_item['SeriesEpisode']
                                 ) if _subtitle_item['SeriesEpisode'] else None
            filename = _subtitle_item['SubFileName']
            encoding = _subtitle_item.get('SubEncoding') or None
            foreign_parts_only = bool(
                int(_subtitle_item.get('SubForeignPartsOnly', 0)))

            # foreign/forced subtitles only wanted
            if only_foreign and not foreign_parts_only:
                continue

            # foreign/forced not wanted
            elif not only_foreign and not also_foreign and foreign_parts_only:
                continue

            # set subtitle language to forced if it's foreign_parts_only
            elif (also_foreign or only_foreign) and foreign_parts_only:
                language = Language.rebuild(language, forced=True)

            # set subtitle language to hi if it's hearing_impaired
            if hearing_impaired:
                language = Language.rebuild(language, hi=True)

            if language not in languages:
                continue

            if video.imdb_id and (movie_imdb_id != re.sub(
                    "(?<![^a-zA-Z])0+", "", video.imdb_id)):
                continue

            query_parameters = _subtitle_item.get("QueryParameters")

            subtitle = self.subtitle_class(language,
                                           hearing_impaired,
                                           page_link,
                                           subtitle_id,
                                           matched_by,
                                           movie_kind,
                                           hash,
                                           movie_name,
                                           movie_release_name,
                                           movie_year,
                                           movie_imdb_id,
                                           series_season,
                                           series_episode,
                                           query_parameters,
                                           filename,
                                           encoding,
                                           movie_fps,
                                           skip_wrong_fps=self.skip_wrong_fps)
            subtitle.uploader = _subtitle_item[
                'UserNickName'] if _subtitle_item[
                    'UserNickName'] else 'anonymous'
            logger.debug('Found subtitle %r by %s', subtitle, matched_by)
            subtitles.append(subtitle)

        return subtitles
Ejemplo n.º 36
0
class YifySubtitlesProvider(Provider):
    """YIFY Subtitles Provider."""

    YifyLanguages = [('Albanian', 'sqi', None), ('Arabic', 'ara', None),
                     ('Bengali', 'ben', None),
                     ('Brazilian Portuguese', 'por', 'BR'),
                     ('Bulgarian', 'bul', None), ('Chinese', 'zho', None),
                     ('Croatian', 'hrv', None), ('Czech', 'ces', None),
                     ('Danish', 'dan', None), ('Dutch', 'nld', None),
                     ('English', 'eng', None), ('Farsi/Persian', 'fas', None),
                     ('Finnish', 'fin', None), ('French', 'fra', None),
                     ('German', 'deu', None), ('Greek', 'ell', None),
                     ('Hebrew', 'heb', None), ('Hungarian', 'hun', None),
                     ('Indonesian', 'ind', None), ('Italian', 'ita', None),
                     ('Japanese', 'jpn', None), ('Korean', 'kor', None),
                     ('Lithuanian', 'lit', None), ('Macedonian', 'mkd', None),
                     ('Malay', 'msa', None), ('Norwegian', 'nor', None),
                     ('Polish', 'pol', None), ('Portuguese', 'por', None),
                     ('Romanian', 'ron', None), ('Russian', 'rus', None),
                     ('Serbian', 'srp', None), ('Slovenian', 'slv', None),
                     ('Spanish', 'spa', None), ('Swedish', 'swe', None),
                     ('Thai', 'tha', None), ('Turkish', 'tur', None),
                     ('Urdu', 'urd', None), ('Vietnamese', 'vie', None)]

    languages = {Language(l, c) for (_, l, c) in YifyLanguages}
    languages.update(set(Language.rebuild(l, hi=True) for l in languages))
    server_urls = [
        'https://yifysubtitles.org', 'https://www.yifysubtitles.com'
    ]
    video_types = (Movie, )

    def initialize(self):
        self.session = Session()
        self.session.headers['User-Agent'] = AGENT_LIST[randint(
            0,
            len(AGENT_LIST) - 1)]
        self.session.headers[
            "Accept"] = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
        self.session.headers["Accept-Language"] = "en-US,en;q=0.5"
        self.session.headers["Accept-Encoding"] = "gzip, deflate"
        self.session.headers["DNT"] = "1"
        self.session.headers["Connection"] = "keep-alive"
        self.session.headers["Upgrade-Insecure-Requests"] = "1"
        self.session.headers["Cache-Control"] = "max-age=0"

    def terminate(self):
        self.session.close()

    def _parse_row(self, row, languages, server_url):
        td = row.findAll('td')
        rating = int(td[0].text)
        sub_lang = td[1].text
        release = re.sub(r'^subtitle ', '', td[2].text)
        sub_link = td[2].find('a').get('href')
        page_link = server_url + sub_link
        sub_link = re.sub(r'^/subtitles/', server_url + '/subtitle/',
                          sub_link) + '.zip'
        hi = True if td[3].find('span', {'class': 'hi-subtitle'}) else False
        uploader = td[4].text

        _, l, c = next(x for x in self.YifyLanguages if x[0] == sub_lang)
        lang = Language(l, c)

        # set subtitle language to hi if it's hearing_impaired
        if hi:
            lang = Language.rebuild(lang, hi=True)

        if languages & {lang}:
            return [
                YifySubtitle(lang, page_link, release, uploader, sub_link,
                             rating, hi)
            ]

        return []

    def query(self, languages, imdb_id):
        subtitles = []

        logger.info('Searching subtitle %r', imdb_id)
        for server_url in self.server_urls:
            response = self.session.get(server_url + '/movie-imdb/' + imdb_id,
                                        allow_redirects=False,
                                        timeout=10,
                                        headers={'Referer': server_url})
            if response.status_code == 200:
                break

        response.raise_for_status()

        if response.status_code != 200:
            logger.debug('No subtitles found')
            return subtitles

        soup = BeautifulSoup(response.content, 'lxml')
        tbl = soup.find('table', {'class': 'other-subs'})
        tbl_body = tbl.find('tbody') if tbl else None
        rows = tbl_body.findAll('tr') if tbl_body else []

        for row in rows:
            try:
                subtitles = subtitles + self._parse_row(
                    row, languages, server_url)
            except Exception as e:
                pass

        subtitles.sort(key=lambda x: x.rating, reverse=True)
        return subtitles

    def list_subtitles(self, video, languages):
        return self.query(languages, video.imdb_id) if isinstance(
            video, Movie) and video.imdb_id else []

    def download_subtitle(self, subtitle):
        logger.info('Downloading subtitle %r', subtitle.sub_link)
        cache_key = sha1(subtitle.sub_link.encode("utf-8")).digest()
        request = region.get(cache_key)
        if request is NO_VALUE:
            request = self.session.get(subtitle.sub_link,
                                       headers={'Referer': subtitle.page_link})
            request.raise_for_status()
            region.set(cache_key, request)
        else:
            logger.info('Cache file: %s',
                        codecs.encode(cache_key, 'hex_codec').decode('utf-8'))

        archive_stream = io.BytesIO(request.content)
        if is_zipfile(archive_stream):
            self._process_archive(ZipFile(archive_stream), subtitle)
        else:
            logger.error('Ignore unsupported archive %r', request.headers)
            region.delete(cache_key)

    def _process_archive(self, archive_stream, subtitle):
        for file_name in archive_stream.namelist():
            if file_name.lower().endswith(('.srt', '.sub')):
                logger.info('Found subtitle file %r', file_name)
                subtitle.content = fix_line_ending(
                    archive_stream.read(file_name))
                if subtitle.is_valid():
                    return
Ejemplo n.º 37
0
class PodnapisiProvider(_PodnapisiProvider, ProviderSubtitleArchiveMixin):
    languages = ({Language('por', 'BR'), Language('srp', script='Latn'), Language('srp', script='Cyrl')} |
                 {Language.fromalpha2(l) for l in language_converters['alpha2'].codes})
    languages.update(set(Language.rebuild(l, forced=True) for l in languages))
    languages.update(set(Language.rebuild(l, hi=True) for l in languages))

    video_types = (Episode, Movie)

    server_url = 'https://podnapisi.net/subtitles/'
    only_foreign = False
    also_foreign = False
    verify_ssl = True
    subtitle_class = PodnapisiSubtitle
    hearing_impaired_verifiable = True

    def __init__(self, only_foreign=False, also_foreign=False, verify_ssl=True):
        self.only_foreign = only_foreign
        self.also_foreign = also_foreign
        self.verify_ssl = verify_ssl

        if only_foreign:
            logger.info("Only searching for foreign/forced subtitles")

        super(PodnapisiProvider, self).__init__()

    def initialize(self):
        super().initialize()
        self.session.mount('https://', PodnapisiAdapter())
        self.session.verify = self.verify_ssl

    def list_subtitles(self, video, languages):
        if video.is_special:
            logger.info("%s can't search for specials right now, skipping", self)
            return []

        season = episode = None
        if isinstance(video, Episode):
            titles = [fix_inconsistent_naming(title) for title in [video.series] + video.alternative_series]
            season = video.season
            episode = video.episode
        else:
            titles = [video.title] + video.alternative_titles

        for title in titles:
            subtitles = [s for l in languages for s in
                         self.query(l, title, video, season=season, episode=episode, year=video.year,
                                    only_foreign=self.only_foreign, also_foreign=self.also_foreign)]
            if subtitles:
                return subtitles

        return []

    def query(self, language, keyword, video, season=None, episode=None, year=None, only_foreign=False,
              also_foreign=False):
        search_language = str(language).lower()

        # sr-Cyrl specialcase
        if search_language == "sr-cyrl":
            search_language = "sr"

        # set parameters, see http://www.podnapisi.net/forum/viewtopic.php?f=62&t=26164#p212652
        params = {'sXML': 1, 'sL': search_language, 'sK': keyword}

        is_episode = False
        if season and episode:
            is_episode = True
            params['sTS'] = season
            params['sTE'] = episode

        if year:
            params['sY'] = year

        # loop over paginated results
        logger.info('Searching subtitles %r', params)
        subtitles = []
        pids = set()
        while True:
            # query the server
            content = None
            try:
                content = self.session.get(self.server_url + 'search/old', params=params, timeout=10).content
                xml = etree.fromstring(content)
            except etree.ParseError:
                logger.error("Wrong data returned: %r", content)
                break

            # exit if no results
            if not int(xml.find('pagination/results').text):
                logger.debug('No subtitles found')
                break

            # loop over subtitles
            for subtitle_xml in xml.findall('subtitle'):
                # read xml elements
                pid = subtitle_xml.find('pid').text
                # ignore duplicates, see http://www.podnapisi.net/forum/viewtopic.php?f=62&t=26164&start=10#p213321
                if pid in pids:
                    continue

                _language = Language.fromietf(subtitle_xml.find('language').text)
                hearing_impaired = 'n' in (subtitle_xml.find('flags').text or '')
                foreign = 'f' in (subtitle_xml.find('flags').text or '')
                if only_foreign and not foreign:
                    continue

                elif not only_foreign and not also_foreign and foreign:
                    continue

                elif also_foreign and foreign:
                    _language = Language.rebuild(_language, forced=True)

                # set subtitle language to hi if it's hearing_impaired
                if hearing_impaired:
                    _language = Language.rebuild(_language, hi=True)

                if language != _language:
                    continue

                page_link = subtitle_xml.find('url').text
                releases = []
                if subtitle_xml.find('release').text:
                    for release in subtitle_xml.find('release').text.split():
                        releases.append(re.sub(r'\.+$', '', release))  # remove trailing dots
                title = subtitle_xml.find('title').text
                r_season = int(subtitle_xml.find('tvSeason').text)
                r_episode = int(subtitle_xml.find('tvEpisode').text)
                r_year = int(subtitle_xml.find('year').text)

                if is_episode:
                    subtitle = self.subtitle_class(_language, hearing_impaired, page_link, pid, releases, title,
                                                   season=r_season, episode=r_episode, year=r_year,
                                                   asked_for_release_group=video.release_group,
                                                   asked_for_episode=episode)
                else:
                    subtitle = self.subtitle_class(_language, hearing_impaired, page_link, pid, releases, title,
                                                   year=r_year, asked_for_release_group=video.release_group)


                logger.debug('Found subtitle %r', subtitle)
                subtitles.append(subtitle)
                pids.add(pid)

            # stop on last page
            if int(xml.find('pagination/current').text) >= int(xml.find('pagination/count').text):
                break

            # increment current page
            params['page'] = int(xml.find('pagination/current').text) + 1
            logger.debug('Getting page %d', params['page'])
            xml = None

        return subtitles

    def download_subtitle(self, subtitle):
        # download as a zip
        logger.info('Downloading subtitle %r', subtitle)
        r = self.session.get(self.server_url + subtitle.pid + '/download', params={'container': 'zip'}, timeout=10)
        r.raise_for_status()

        # open the zip
        with ZipFile(io.BytesIO(r.content)) as zf:
            subtitle.content = self.get_subtitle_from_archive(subtitle, zf)
Ejemplo n.º 38
0
def guess_external_subtitles(dest_folder, subtitles):
    for subtitle, language in subtitles.items():
        if not language:
            subtitle_path = os.path.join(dest_folder, subtitle)
            if os.path.exists(subtitle_path) and os.path.splitext(
                    subtitle_path)[1] in core.SUBTITLE_EXTENSIONS:
                logging.debug(
                    "BAZARR falling back to file content analysis to detect language."
                )
                detected_language = None

                # to improve performance, skip detection of files larger that 1M
                if os.path.getsize(subtitle_path) > 1 * 1024 * 1024:
                    logging.debug(
                        "BAZARR subtitles file is too large to be text based. Skipping this file: "
                        + subtitle_path)
                    continue

                with open(subtitle_path, 'rb') as f:
                    text = f.read()

                try:
                    text = text.decode('utf-8')
                    detected_language = guess_language(text)
                except UnicodeDecodeError:
                    detector = Detector()
                    try:
                        guess = detector.detect(text)
                    except:
                        logging.debug(
                            "BAZARR skipping this subtitles because we can't guess the encoding. "
                            "It's probably a binary file: " + subtitle_path)
                        continue
                    else:
                        logging.debug('BAZARR detected encoding %r', guess)
                        try:
                            text = text.decode(guess)
                        except:
                            logging.debug(
                                "BAZARR skipping this subtitles because we can't decode the file using the "
                                "guessed encoding. It's probably a binary file: "
                                + subtitle_path)
                            continue
                    detected_language = guess_language(text)
                except:
                    logging.debug(
                        'BAZARR was unable to detect encoding for this subtitles file: %r',
                        subtitle_path)
                finally:
                    if detected_language:
                        logging.debug(
                            "BAZARR external subtitles detected and guessed this language: "
                            + str(detected_language))
                        try:
                            subtitles[subtitle] = Language.rebuild(
                                Language.fromietf(detected_language),
                                forced=False,
                                hi=False)
                        except:
                            pass

        # Detect hearing-impaired external subtitles not identified in filename
        if not subtitles[subtitle].hi:
            subtitle_path = os.path.join(dest_folder, subtitle)

            # to improve performance, skip detection of files larger that 1M
            if os.path.getsize(subtitle_path) > 1 * 1024 * 1024:
                logging.debug(
                    "BAZARR subtitles file is too large to be text based. Skipping this file: "
                    + subtitle_path)
                continue

            with open(subtitle_path, 'rb') as f:
                text = f.read()

            try:
                text = text.decode('utf-8')
            except UnicodeDecodeError:
                detector = Detector()
                try:
                    guess = detector.detect(text)
                except:
                    logging.debug(
                        "BAZARR skipping this subtitles because we can't guess the encoding. "
                        "It's probably a binary file: " + subtitle_path)
                    continue
                else:
                    logging.debug('BAZARR detected encoding %r', guess)
                    try:
                        text = text.decode(guess)
                    except:
                        logging.debug(
                            "BAZARR skipping this subtitles because we can't decode the file using the "
                            "guessed encoding. It's probably a binary file: " +
                            subtitle_path)
                        continue

            if bool(re.search(hi_regex, text)):
                subtitles[subtitle] = Language.rebuild(subtitles[subtitle],
                                                       forced=False,
                                                       hi=True)
    return subtitles
Ejemplo n.º 39
0
def prepare_video(pms_video_info, ignore_all=False, hints=None, rating_key=None, providers=None, skip_hashing=False):
    """
    returnes a subliminal/guessit-refined parsed video
    :param pms_video_info:
    :param ignore_all:
    :param hints:
    :param rating_key:
    :return:
    """
    embedded_subtitles = not ignore_all and Prefs['subtitles.scan.embedded']
    external_subtitles = not ignore_all and Prefs['subtitles.scan.external']

    plex_part = pms_video_info["plex_part"]

    if ignore_all:
        Log.Debug("Force refresh intended.")

    Log.Debug("Detecting streams: %s, external_subtitles=%s, embedded_subtitles=%s" % (
        plex_part.file, external_subtitles, embedded_subtitles))

    known_embedded = []
    parts = []
    for media in list(Plex["library"].metadata(rating_key))[0].media:
        parts += media.parts

    plexpy_part = None
    for part in parts:
        if int(part.id) == int(plex_part.id):
            plexpy_part = part

    # embedded subtitles
    # fixme: skip the whole scanning process if known_embedded == wanted languages?
    audio_languages = []
    if plexpy_part:
        for stream in plexpy_part.streams:
            if stream.stream_type == 2:
                lang = None
                try:
                    lang = language_from_stream(stream.language_code)
                except LanguageError:
                    Log.Debug("Couldn't detect embedded audio stream language: %s", stream.language_code)

                # treat unknown language as lang1?
                if not lang and config.treat_und_as_first:
                    lang = Language.rebuild(list(config.lang_list)[0])

                audio_languages.append(lang)

            # subtitle stream
            elif stream.stream_type == 3 and embedded_subtitles:
                is_forced = helpers.is_stream_forced(stream)

                if ((config.forced_only or config.forced_also) and is_forced) or not is_forced:
                    # embedded subtitle
                    # fixme: tap into external subtitles here instead of scanning for ourselves later?
                    if stream.codec and getattr(stream, "index", None):
                        if config.exotic_ext or stream.codec.lower() in config.text_based_formats:
                            lang = None
                            try:
                                lang = language_from_stream(stream.language_code)
                            except LanguageError:
                                Log.Debug("Couldn't detect embedded subtitle stream language: %s", stream.language_code)

                            # treat unknown language as lang1?
                            if not lang and config.treat_und_as_first:
                                lang = Language.rebuild(list(config.lang_list)[0])

                            if lang:
                                if is_forced:
                                    lang.forced = True
                                known_embedded.append(lang)
    else:
        Log.Warn("Part %s missing of %s, not able to scan internal streams", plex_part.id, rating_key)

    # metadata subtitles
    known_metadata_subs = set()
    meta_subs = get_subtitles_from_metadata(plex_part)
    for language, subList in meta_subs.iteritems():
        try:
            lang = Language.fromietf(Locale.Language.Match(language))
        except LanguageError:
            if config.treat_und_as_first:
                lang = Language.rebuild(list(config.lang_list)[0])
            else:
                continue

        if subList:
            for key in subList:
                if key.startswith("subzero_md_forced"):
                    lang = Language.rebuild(lang, forced=True)

                known_metadata_subs.add(lang)
                Log.Debug("Found metadata subtitle %r:%s for %s", lang, key, plex_part.file)

    Log.Debug("Known metadata subtitles: %r", known_metadata_subs)
    Log.Debug("Known embedded subtitles: %r", known_embedded)

    subtitle_storage = get_subtitle_storage()
    stored_subs = subtitle_storage.load(rating_key)
    subtitle_storage.destroy()

    try:
        # get basic video info scan (filename)
        video = parse_video(plex_part.file, hints, skip_hashing=config.low_impact_mode or skip_hashing,
                            providers=providers)

        # set stream languages
        if audio_languages:
            video.audio_languages = audio_languages
            Log.Info("Found audio streams: %s" % ", ".join([str(l) for l in audio_languages]))

        if not ignore_all:
            set_existing_languages(video, pms_video_info, external_subtitles=external_subtitles,
                                   embedded_subtitles=embedded_subtitles, known_embedded=known_embedded,
                                   stored_subs=stored_subs, languages=config.lang_list,
                                   only_one=config.only_one, known_metadata_subs=known_metadata_subs)

        # add video fps info
        video.fps = plex_part.fps
        return video

    except ValueError:
        Log.Warn("File could not be guessed: %s: %s", plex_part.file, traceback.format_exc())
Ejemplo n.º 40
0
def ItemDetailsMenu(rating_key, title=None, base_title=None, item_title=None, randomize=None, header=None,
                    message=None):
    """
    displays the item details menu of an item that doesn't contain any deeper tree, such as a movie or an episode
    :param rating_key:
    :param title:
    :param base_title:
    :param item_title:
    :param randomize:
    :return:
    """
    from interface.main import InclExclMenu

    title = unicode(base_title) + " > " + unicode(title) if base_title else unicode(title)
    item = plex_item = get_item(rating_key)
    current_kind = get_item_kind_from_rating_key(rating_key)

    timeout = 30

    oc = SubFolderObjectContainer(
            title2=title,
            replace_parent=True,
            header=header,
            message=message)

    if not item:
        oc.add(DirectoryObject(
            key=Callback(
                    ItemDetailsMenu,
                    rating_key=rating_key,
                    title=title,
                    base_title=base_title,
                    item_title=item_title,
                    randomize=timestamp()),
            title=_(u"Item not found: %s!", item_title),
            summary=_("Plex didn't return any information about the item, please refresh it and come back later"),
            thumb=default_thumb
        ))
        return oc

    # add back to season for episode
    if current_kind == "episode":
        from interface.menu import MetadataMenu
        show = get_item(item.show.rating_key)
        season = get_item(item.season.rating_key)

        oc.add(DirectoryObject(
            key=Callback(
                    MetadataMenu,
                    rating_key=season.rating_key,
                    title=season.title,
                    base_title=show.title,
                    previous_item_type="show",
                    previous_rating_key=show.rating_key,
                    display_items=True,
                    randomize=timestamp()),
            title=_(u"< Back to %s", season.title),
            summary=_("Back to %s > %s", show.title, season.title),
            thumb=season.thumb or default_thumb
        ))

    oc.add(DirectoryObject(
        key=Callback(
                RefreshItem,
                rating_key=rating_key,
                item_title=item_title,
                randomize=timestamp(),
                timeout=timeout * 1000),
        title=_(u"Refresh: %s", item_title),
        summary=_("Refreshes %(the_movie_series_season_episode)s, possibly searching for missing and picking up "
                  "new subtitles on disk", the_movie_series_season_episode=_(u"the %s" % current_kind)),
        thumb=item.thumb or default_thumb
    ))
    oc.add(DirectoryObject(
        key=Callback(RefreshItem, rating_key=rating_key, item_title=item_title, force=True, randomize=timestamp(),
                     timeout=timeout * 1000),
        title=_(u"Force-find subtitles: %(item_title)s", item_title=item_title),
        summary=_("Issues a forced refresh, ignoring known subtitles and searching for new ones"),
        thumb=item.thumb or default_thumb
    ))

    # get stored subtitle info for item id
    subtitle_storage = get_subtitle_storage()
    stored_subs = subtitle_storage.load_or_new(item)

    # look for subtitles for all available media parts and all of their languages
    has_multiple_parts = len(plex_item.media) > 1
    part_index = 0
    for media in plex_item.media:
        for part in media.parts:
            filename = os.path.basename(part.file)
            if not os.path.exists(part.file):
                continue

            part_id = str(part.id)
            part_index += 1

            part_index_addon = u""
            part_summary_addon = u""
            if has_multiple_parts:
                part_index_addon = _(u"File %(file_part_index)s: ", file_part_index=part_index)
                part_summary_addon = u"%s " % filename

            # iterate through all configured languages
            for lang in config.lang_list:
                # get corresponding stored subtitle data for that media part (physical media item), for language
                current_sub = stored_subs.get_any(part_id, lang)
                current_sub_id = None
                current_sub_provider_name = None

                summary = _(u"%(part_summary)sNo current subtitle in storage", part_summary=part_summary_addon)
                current_score = None
                if current_sub:
                    current_sub_id = current_sub.id
                    current_sub_provider_name = current_sub.provider_name
                    current_score = current_sub.score

                    summary = _(u"%(part_summary)sCurrent subtitle: %(provider_name)s (added: %(date_added)s, "
                                u"%(mode)s), Language: %(language)s, Score: %(score)i, Storage: %(storage_type)s",
                                part_summary=part_summary_addon,
                                provider_name=_(current_sub.provider_name),
                                date_added=df(current_sub.date_added),
                                mode=_(current_sub.mode_verbose),
                                language=display_language(lang),
                                score=current_sub.score,
                                storage_type=current_sub.storage_type)

                    oc.add(DirectoryObject(
                        key=Callback(SubtitleOptionsMenu, rating_key=rating_key, part_id=part_id, title=title,
                                     item_title=item_title, language=lang, language_name=display_language(lang),
                                     current_id=current_sub_id,
                                     item_type=plex_item.type, filename=filename, current_data=summary,
                                     randomize=timestamp(), current_provider=current_sub_provider_name,
                                     current_score=current_score),
                        title=_(u"%(part_summary)sManage %(language)s subtitle", part_summary=part_index_addon,
                                language=display_language(lang)),
                        summary=summary
                    ))
                else:
                    oc.add(DirectoryObject(
                        key=Callback(ListAvailableSubsForItemMenu, rating_key=rating_key, part_id=part_id, title=title,
                                     item_title=item_title, language=lang, language_name=display_language(lang),
                                     current_id=current_sub_id,
                                     item_type=plex_item.type, filename=filename, current_data=summary,
                                     randomize=timestamp(), current_provider=current_sub_provider_name,
                                     current_score=current_score),
                        title=_(u"%(part_summary)sList %(language)s subtitles", part_summary=part_index_addon,
                                language=display_language(lang)),
                        summary=summary
                    ))

            if config.plex_transcoder:
                # embedded subtitles
                embedded_count = 0
                embedded_langs = []
                for stream in part.streams:
                    # subtitle stream
                    if stream.stream_type == 3 and not stream.stream_key and stream.codec in TEXT_SUBTITLE_EXTS:
                        lang = get_language_from_stream(stream.language_code)
                        is_forced = is_stream_forced(stream)

                        if not lang and config.treat_und_as_first:
                            lang = list(config.lang_list)[0]

                        if lang:
                            lang = Language.rebuild(lang, forced=is_forced)
                            embedded_langs.append(lang)
                            embedded_count += 1

                if embedded_count:
                    oc.add(DirectoryObject(
                        key=Callback(ListEmbeddedSubsForItemMenu, rating_key=rating_key, part_id=part_id, title=title,
                                     item_type=plex_item.type, item_title=item_title, base_title=base_title,
                                     randomize=timestamp()),
                        title=_(u"%(part_summary)sEmbedded subtitles (%(languages)s)",
                                part_summary=part_index_addon,
                                languages=", ".join(display_language(l)
                                                    for l in list(OrderedDict.fromkeys(embedded_langs)))),
                        summary=_(u"Extract embedded subtitle streams")
                    ))

    ignore_title = item_title
    if current_kind == "episode":
        ignore_title = get_item_title(item)
    add_incl_excl_options(oc, "videos", title=ignore_title, rating_key=rating_key, callback_menu=InclExclMenu)
    subtitle_storage.destroy()

    return oc