Beispiel #1
0
def _search_external_subtitles(path, languages=None, only_one=False, scandir_generic=False):
    dirpath, filename = os.path.split(path)
    dirpath = dirpath or '.'
    fileroot, fileext = os.path.splitext(filename)
    subtitles = {}
    _scandir = _scandir_generic if scandir_generic else scandir
    for entry in _scandir(dirpath):
        if not entry.name and not scandir_generic:
            logger.debug('Could not determine the name of the file, retrying with scandir_generic')
            return _search_external_subtitles(path, languages, only_one, True)
        if not entry.is_file(follow_symlinks=False):
            continue

        p = entry.name

        # keep only valid subtitle filenames
        if not p.lower().startswith(fileroot.lower()) or not p.lower().endswith(SUBTITLE_EXTENSIONS):
            continue

        p_root, p_ext = os.path.splitext(p)
        if not INCLUDE_EXOTIC_SUBS and p_ext not in (".srt", ".ass", ".ssa", ".vtt"):
            continue

        # extract potential forced/normal/default tag
        # fixme: duplicate from subtitlehelpers
        split_tag = p_root.rsplit('.', 1)
        adv_tag = None
        if len(split_tag) > 1:
            adv_tag = split_tag[1].lower()
            if adv_tag in ['forced', 'normal', 'default', 'embedded', 'embedded-forced', 'custom']:
                p_root = split_tag[0]

        forced = False
        if adv_tag:
            forced = "forced" in adv_tag

        # extract the potential language code
        language_code = p_root[len(fileroot):].replace('_', '-')[1:]

        # default language is undefined
        language = Language('und')

        # attempt to parse
        if language_code:
            try:
                language = Language.fromietf(language_code)
                language.forced = forced
            except ValueError:
                logger.error('Cannot parse language code %r', language_code)
                language = None

        elif not language_code and only_one:
            language = Language.rebuild(list(languages)[0], forced=forced)

        subtitles[p] = language

    logger.debug('Found subtitles %r', subtitles)

    return subtitles
Beispiel #2
0
def SelectStoredSubForItemMenu(**kwargs):
    rating_key = kwargs["rating_key"]
    part_id = kwargs["part_id"]
    language = Language.fromietf(kwargs["language"])
    item_type = kwargs["item_type"]
    sub_key = tuple(kwargs.pop("sub_key").split("__"))

    plex_item = get_item(rating_key)
    storage = get_subtitle_storage()
    stored_subs = storage.load(plex_item.rating_key)

    subtitles = stored_subs.get_all(part_id, language)
    subtitle = subtitles[sub_key]

    save_stored_sub(subtitle, rating_key, part_id, language, item_type, plex_item=plex_item, storage=storage,
                    stored_subs=stored_subs)

    stored_subs.set_current(part_id, language, sub_key)
    storage.save(stored_subs)
    storage.destroy()

    kwa = {
        "header": _("Success"),
        "message": _("Subtitle saved to disk"),
        "title": kwargs["title"],
        "item_title": kwargs["item_title"],
        "base_title": kwargs.get("base_title")
    }

    # fixme: return to SubtitleOptionsMenu properly? (needs recomputation of current_data

    return ItemDetailsMenu(rating_key, randomize=timestamp(), **kwa)
Beispiel #3
0
def BlacklistAllPartsSubtitleMenu(**kwargs):
    rating_key = kwargs.get("rating_key")
    language = kwargs.get("language")
    if language:
        language = Language.fromietf(language)

    item = get_item(rating_key)

    if not item:
        return

    item_title = get_item_title(item)

    subtitle_storage = get_subtitle_storage()
    stored_subs = subtitle_storage.load_or_new(item)
    for part_id, languages in stored_subs.parts.iteritems():
        sub_dict = languages
        if language:
            key = str(language)
            if key not in sub_dict:
                continue

            sub_dict = {key: sub_dict[key]}

        for language, subs in sub_dict.iteritems():
            if "current" in subs:
                stored_subs.blacklist(part_id, language, subs["current"])
                Log.Info("Added %s to blacklist", subs["current"])

    subtitle_storage.save(stored_subs)
    subtitle_storage.destroy()

    return RefreshItem(rating_key=rating_key, item_title=item_title, force=True, randomize=timestamp(), timeout=30000)
Beispiel #4
0
    def load(self, fn=None, content=None, language=None, encoding="utf-8"):
        """
        
        :param encoding: used for decoding the content when fn is given, not used in case content is given
        :param language: babelfish.Language language of the subtitle
        :param fn:  filename
        :param content: unicode 
        :return: 
        """
        if language:
            self.language = Language.rebuild(language, forced=False)
        self.initialized_mods = {}
        try:
            if fn:
                self.f = pysubs2.load(fn, encoding=encoding)
            elif content:
                self.f = pysubs2.SSAFile.from_string(content)
        except (IOError,
                UnicodeDecodeError,
                pysubs2.exceptions.UnknownFPSError,
                pysubs2.exceptions.UnknownFormatIdentifierError,
                pysubs2.exceptions.FormatAutodetectionError):
            if fn:
                logger.exception("Couldn't load subtitle: %s: %s", fn, traceback.format_exc())
            elif content:
                logger.exception("Couldn't load subtitle: %s", traceback.format_exc())

        return bool(self.f)
Beispiel #5
0
    def query(self, show_id, series, season, episode, year=None):
        # get the episode ids
        episode_ids = self.get_episode_ids(show_id, season)
        # Provider doesn't store multi episode information
        episode = min(episode) if episode and isinstance(episode, list) else episode

        if episode not in episode_ids:
            logger.error('Episode %d not found', episode)
            return []

        # get the episode page
        logger.info('Getting the page for episode %d', episode_ids[episode])
        r = self.session.get(self.server_url + 'episode-%d.html' % episode_ids[episode], timeout=10)
        soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])

        # loop over subtitles rows
        subtitles = []
        for row in soup.select('.subtitlen'):
            # read the item
            language = Language.fromtvsubtitles(row.h5.img['src'][13:-4])
            subtitle_id = int(row.parent['href'][10:-5])
            page_link = self.server_url + 'subtitle-%d.html' % subtitle_id
            rip = row.find('p', title='rip').text.strip() or None
            release = row.find('h5').text.strip() or None

            subtitle = self.subtitle_class(language, page_link, subtitle_id, series, season, episode, year, rip,
                                           release)
            logger.info('Found subtitle %s', subtitle)
            subtitles.append(subtitle)

        soup.decompose()
        soup = None

        return subtitles
Beispiel #6
0
    def query(self, show_id, series, season, year=None, country=None):
        # patch: fix logging

        # get the page of the season of the show
        logger.info('Getting the page of show id %d, season %d', show_id, season)
        r = self.session.get(self.server_url + 'ajax_loadShow.php',
                             params={'show': show_id, 'season': season},
                             timeout=10,
                             headers={
                                 "referer": "%sshow/%s" % (self.server_url, show_id),
                                 "X-Requested-With": "XMLHttpRequest"
                             }
                             )

        r.raise_for_status()

        if r.status_code == 304:
            raise TooManyRequests()

        if not r.content:
            # Provider wrongful return a status of 304 Not Modified with an empty content
            # raise_for_status won't raise exception for that status code
            logger.error('No data returned from provider')
            return []

        soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])

        # loop over subtitle rows
        subtitles = []
        for row in soup.select('tr.epeven'):
            cells = row('td')

            # ignore incomplete subtitles
            status = cells[5].text
            if status != 'Completed':
                logger.debug('Ignoring subtitle with status %s', status)
                continue

            # read the item
            language = Language.fromaddic7ed(cells[3].text)
            hearing_impaired = bool(cells[6].text)
            page_link = self.server_url + cells[2].a['href'][1:]
            season = int(cells[0].text)
            episode = int(cells[1].text)
            title = cells[2].text
            version = cells[4].text
            download_link = cells[9].a['href'][1:]

            subtitle = self.subtitle_class(language, hearing_impaired, page_link, series, season, episode, title,
                                           year,
                                           version, download_link)
            logger.debug('Found subtitle %r', subtitle)
            subtitles.append(subtitle)

        soup.decompose()
        soup = None

        return subtitles
Beispiel #7
0
def get_language_from_stream(lang_code):
    if lang_code:
        lang = Locale.Language.Match(lang_code)
        if lang and lang != "xx":
            # Log.Debug("Found language: %r", lang)
            return Language.fromietf(lang)
        elif lang:
            try:
                return language_from_stream(lang)
            except LanguageError:
                pass
Beispiel #8
0
def SubtitleReapplyMods(**kwargs):
    rating_key = kwargs["rating_key"]
    part_id = kwargs["part_id"]
    lang_a2 = kwargs["language"]
    item_type = kwargs["item_type"]

    language = Language.fromietf(lang_a2)

    set_mods_for_part(rating_key, part_id, language, item_type, [], mode="add")

    kwargs.pop("randomize")
    return SubtitleModificationsMenu(randomize=timestamp(), **kwargs)
Beispiel #9
0
def ManageBlacklistMenu(**kwargs):
    oc = SubFolderObjectContainer(title2=unicode(kwargs["title"]), replace_parent=True)
    rating_key = kwargs["rating_key"]
    part_id = kwargs["part_id"]
    language = kwargs["language"]
    remove_sub_key = kwargs.pop("remove_sub_key", None)
    current_data = unicode(kwargs["current_data"])

    current_sub, stored_subs, storage = get_current_sub(rating_key, part_id, language)
    current_bl, subs = stored_subs.get_blacklist(part_id, language)

    if remove_sub_key:
        remove_sub_key = tuple(remove_sub_key.split("__"))
        stored_subs.blacklist(part_id, language, remove_sub_key, add=False)
        storage.save(stored_subs)
        Log.Info("Removed %s from blacklist", remove_sub_key)

    kwargs.pop("randomize")

    oc.add(DirectoryObject(
        key=Callback(ItemDetailsMenu, rating_key=kwargs["rating_key"], item_title=kwargs["item_title"],
                     title=kwargs["title"], randomize=timestamp()),
        title=_(u"< Back to %s", kwargs["title"]),
        summary=current_data,
        thumb=default_thumb
    ))

    def sorter(pair):
        # thanks RestrictedModule parser for messing with lambda (x, y)
        return pair[1]["date_added"]

    for sub_key, data in sorted(current_bl.iteritems(), key=sorter, reverse=True):
        provider_name, subtitle_id = sub_key
        title = _(u"%(provider_name)s, %(subtitle_id)s (added: %(date_added)s, %(mode)s), Language: %(language)s, "
                  u"Score: %(score)i, Storage: %(storage_type)s",
                  provider_name=_(provider_name),
                  subtitle_id=subtitle_id,
                  date_added=df(data["date_added"]),
                  mode=_(current_sub.get_mode_verbose(data["mode"])),
                  language=display_language(Language.fromietf(language)),
                  score=data["score"],
                  storage_type=data["storage_type"])
        oc.add(DirectoryObject(
            key=Callback(ManageBlacklistMenu, remove_sub_key="__".join(sub_key), randomize=timestamp(), **kwargs),
            title=title,
            summary=_(u"Remove subtitle from blacklist")
        ))

    storage.destroy()

    return oc
Beispiel #10
0
def get_embedded_subtitle_streams(part, requested_language=None, skip_duplicate_unknown=True, skip_unknown=False):
    streams = []
    streams_unknown = []
    has_unknown = False
    found_requested_language = False
    for stream in part.streams:
        # subtitle stream
        if stream.stream_type == 3 and not stream.stream_key and stream.codec in TEXT_SUBTITLE_EXTS:
            is_forced = helpers.is_stream_forced(stream)
            language = helpers.get_language_from_stream(stream.language_code)
            if language:
                language = Language.rebuild(language, forced=is_forced)

            is_unknown = False
            found_requested_language = requested_language and requested_language == language

            if not language and config.treat_und_as_first:
                # only consider first unknown subtitle stream
                if has_unknown and skip_duplicate_unknown:
                    continue

                language = Language.rebuild(list(config.lang_list)[0], forced=is_forced)
                is_unknown = True
                has_unknown = True
                streams_unknown.append({"stream": stream, "is_unknown": is_unknown, "language": language,
                                        "is_forced": is_forced})

            if not requested_language or found_requested_language:
                streams.append({"stream": stream, "is_unknown": is_unknown, "language": language,
                                "is_forced": is_forced})

                if found_requested_language:
                    break

    if streams_unknown and not found_requested_language and not skip_unknown:
        streams = streams_unknown

    return streams
Beispiel #11
0
    def parse_results(self, video, film):
        subtitles = []
        for s in film.subtitles:
            subtitle = SubsceneSubtitle.from_api(s)
            subtitle.asked_for_release_group = video.release_group
            if isinstance(video, Episode):
                subtitle.asked_for_episode = video.episode

            if self.only_foreign:
                subtitle.language = Language.rebuild(subtitle.language, forced=True)

            subtitles.append(subtitle)
            logger.debug('Found subtitle %r', subtitle)

        return subtitles
Beispiel #12
0
def SubtitleSetMods(mods=None, mode=None, **kwargs):
    if not isinstance(mods, types.ListType) and mods:
        mods = [mods]

    rating_key = kwargs["rating_key"]
    part_id = kwargs["part_id"]
    lang_a2 = kwargs["language"]
    item_type = kwargs["item_type"]

    language = Language.fromietf(lang_a2)

    set_mods_for_part(rating_key, part_id, language, item_type, mods, mode=mode)

    kwargs.pop("randomize")
    return SubtitleModificationsMenu(randomize=timestamp(), **kwargs)
Beispiel #13
0
    def query(self, languages, video):
        # query the server
        keywords = []
        if isinstance(video, Movie):
            if video.title:
                keywords.append(video.title)
            if video.year:
                keywords.append(str(video.year))
        elif isinstance(video, Episode):
            if video.series:
                keywords.append(video.series)
            if video.season and video.episode:
                keywords.append('S%02dE%02d' % (video.season, video.episode))
            elif video.episode:
                keywords.append('E%02d' % video.episode)
        query = ' '.join(keywords)

        params = {'token': self.token, 'q': query, 'is_file': 1}
        logger.debug('Searching subtitles %r', params)
        res = self.session.get(server_url + '/sub/search', params=params, timeout=10)
        res.raise_for_status()
        result = res.json()

        if result['status'] != 0:
            logger.error('status error: %r', r)
            return []

        if not result['sub']['subs']:
            logger.debug('No subtitle found')

        # parse the subtitles
        pattern = re.compile(ur'lang(?P<code>\w+)')
        subtitles = []
        for sub in result['sub']['subs']:
            if 'lang' not in sub:
                continue
            for key in sub['lang']['langlist'].keys():
                match = pattern.match(key)
                try:
                    language = Language.fromassrt(match.group('code'))
                    if language in languages:
                        subtitles.append(AssrtSubtitle(language, sub['id'], sub['videoname'], self.session, self.token))
                except:
                    pass

        return subtitles
Beispiel #14
0
def apply_default_mods(reapply_current=False, scandir_generic=False):
    storage = get_subtitle_storage()
    subs_applied = 0

    try:
        for fn in storage.get_all_files(scandir_generic=scandir_generic):
            data = storage.load(None, filename=fn)
            if data:
                video_id = data.video_id
                item_type = get_item_kind_from_rating_key(video_id)
                if not item_type:
                    continue

                for part_id, part in data.parts.iteritems():
                    for lang, subs in part.iteritems():
                        current_sub = subs.get("current")
                        if not current_sub:
                            continue
                        sub = subs[current_sub]

                        if not sub.content:
                            continue

                        current_mods = sub.mods or []
                        if not reapply_current:
                            add_mods = list(set(config.default_mods).difference(set(current_mods)))
                            if not add_mods:
                                continue
                        else:
                            if not current_mods:
                                continue
                            add_mods = []

                        try:
                            set_mods_for_part(video_id, part_id, Language.fromietf(lang), item_type, add_mods, mode="add")
                        except:
                            Log.Error("Couldn't set mods for %s:%s: %s", video_id, part_id, traceback.format_exc())
                            continue

                        subs_applied += 1
    except OSError:
        return apply_default_mods(reapply_current=reapply_current, scandir_generic=True)
    storage.destroy()
    Log.Debug("Applied mods to %i items" % subs_applied)
Beispiel #15
0
    def _get_detail(self):
        if self._detail:
            return self._detail
        params = {'token': self.token, 'id': self.id}
        r = self.session.get(server_url + '/sub/detail', params=params, timeout=10)
        r.raise_for_status()

        result = r.json()
        sub = result['sub']['subs'][0]
        files = sub['filelist']

        # first pass: guessit
        for f in files:
            logger.info('File %r', f)
            guess = guessit(f['f'], self.guessit_options)
            logger.info('GuessIt %r', guess)
            langs = set()
            if 'language' in guess:
                langs.update(guess['language'])
            if 'subtitle_language' in guess:
                langs.update(guess['subtitle_language'])
            if self.language in langs:
                self._defail = f
                return f

        # second pass: keyword matching
        codes = language_converters['assrt'].codes
        for f in files:
            langs = set([ Language.fromassrt(k) for k in codes if k in f['f'] ])
            logger.info('%s: %r', f['f'], langs)
            if self.language in langs:
                self._defail = f
                return f

        # fallback: pick up first file if nothing matches
        return files[0]
Beispiel #16
0
def ListStoredSubsForItemMenu(**kwargs):
    oc = SubFolderObjectContainer(title2=unicode(kwargs["title"]), replace_parent=True)
    rating_key = kwargs["rating_key"]
    part_id = kwargs["part_id"]
    language = Language.fromietf(kwargs["language"])

    current_sub, stored_subs, storage = get_current_sub(rating_key, part_id, language)
    all_subs = stored_subs.get_all(part_id, language)
    kwargs.pop("randomize")

    for key, subtitle in sorted(filter(lambda x: x[0] not in ("current", "blacklist"), all_subs.items()),
                                key=lambda x: x[1].date_added, reverse=True):
        is_current = key == all_subs["current"]

        summary = _(u"added: %(date_added)s, %(mode)s, Language: %(language)s, Score: %(score)i, Storage: "
                    u"%(storage_type)s",
                    date_added=df(subtitle.date_added),
                    mode=_(subtitle.mode_verbose),
                    language=display_language(language),
                    score=subtitle.score,
                    storage_type=subtitle.storage_type)

        sub_name = subtitle.provider_name
        if sub_name == "embedded":
            sub_name += " (%s)" % subtitle.id

        oc.add(DirectoryObject(
            key=Callback(SelectStoredSubForItemMenu, randomize=timestamp(), sub_key="__".join(key), **kwargs),
            title=_(u"%(current_state)s%(subtitle_name)s, Score: %(score)s",
                    current_state=_("Current: ") if is_current else _("Stored: "),
                    subtitle_name=sub_name,
                    score=subtitle.score),
            summary=summary
        ))

    return oc
Beispiel #17
0
def test_search_language_in_list():
    assert search_language_in_list(Language('zho', None, 'Hant'),
                                   [Language('zho', None, 'Hant')])
    assert search_language_in_list(Language('zho', None, 'Hans'),
                                   [Language('zho', None, 'Hans')])
    assert search_language_in_list(Language('zho', None, 'Hant'),
                                   [Language('zho')])
    assert search_language_in_list(Language('zho', None, 'Hans'),
                                   [Language('zho')])
    assert search_language_in_list(
        Language('zho', None,
                 'Hant'), [Language('eng'), Language('zho')])
    assert not search_language_in_list(Language('zho', None, 'Hans'),
                                       [Language('zho', None, 'Hant')])
    assert search_language_in_list(Language(
        'zho', None, 'Hans'), [Language('zho', None, 'Hant'),
                               Language('zho')])
Beispiel #18
0
def SubtitleModificationsMenu(**kwargs):
    rating_key = kwargs["rating_key"]
    part_id = kwargs["part_id"]
    language = kwargs["language"]
    lang_instance = Language.fromietf(language)
    current_sub, stored_subs, storage = get_current_sub(rating_key, part_id, language)
    kwargs.pop("randomize")

    current_mods = current_sub.mods or []

    oc = SubFolderObjectContainer(title2=kwargs["title"], replace_parent=True)

    from interface.item_details import SubtitleOptionsMenu
    oc.add(DirectoryObject(
        key=Callback(SubtitleOptionsMenu, randomize=timestamp(), **kwargs),
        title=_(u"< Back to subtitle options for: %s", kwargs["title"]),
        summary=unicode(kwargs["current_data"]),
        thumb=default_thumb
    ))

    for identifier, mod in mod_registry.mods.iteritems():
        if mod.advanced:
            continue

        if mod.exclusive and identifier in current_mods:
            continue

        if mod.languages and lang_instance not in mod.languages:
            continue

        oc.add(DirectoryObject(
            key=Callback(SubtitleSetMods, mods=identifier, mode="add", randomize=timestamp(), **kwargs),
            title=pad_title(_(mod.description)), summary=_(mod.long_description) or ""
        ))

    fps_mod = SubtitleModifications.get_mod_class("change_FPS")
    oc.add(DirectoryObject(
        key=Callback(SubtitleFPSModMenu, randomize=timestamp(), **kwargs),
        title=pad_title(_(fps_mod.description)), summary=_(fps_mod.long_description) or ""
    ))

    shift_mod = SubtitleModifications.get_mod_class("shift_offset")
    oc.add(DirectoryObject(
        key=Callback(SubtitleShiftModUnitMenu, randomize=timestamp(), **kwargs),
        title=pad_title(_(shift_mod.description)), summary=_(shift_mod.long_description) or ""
    ))

    color_mod = SubtitleModifications.get_mod_class("color")
    oc.add(DirectoryObject(
        key=Callback(SubtitleColorModMenu, randomize=timestamp(), **kwargs),
        title=pad_title(_(color_mod.description)), summary=_(color_mod.long_description) or ""
    ))

    if current_mods:
        oc.add(DirectoryObject(
            key=Callback(SubtitleSetMods, mods=None, mode="remove_last", randomize=timestamp(), **kwargs),
            title=pad_title(_("Remove last applied mod (%s)", current_mods[-1])),
            summary=_(u"Currently applied mods: %(mod_list)s", mod_list=", ".join(current_mods) if current_mods else _("none"))
        ))
        oc.add(DirectoryObject(
            key=Callback(SubtitleListMods, randomize=timestamp(), **kwargs),
            title=pad_title(_("Manage applied mods")),
            summary=_(u"Currently applied mods: %(mod_list)s", mod_list=", ".join(current_mods))
        ))
        oc.add(DirectoryObject(
            key=Callback(SubtitleReapplyMods, randomize=timestamp(), **kwargs),
            title=pad_title(_("Reapply applied mods")),
            summary=_(u"Currently applied mods: %(mod_list)s", mod_list=", ".join(current_mods) if current_mods else _("none"))
        ))

    oc.add(DirectoryObject(
        key=Callback(SubtitleSetMods, mods=None, mode="clear", randomize=timestamp(), **kwargs),
        title=pad_title(_("Restore original version")),
        summary=_(u"Currently applied mods: %(mod_list)s", mod_list=", ".join(current_mods) if current_mods else _("none"))
    ))

    storage.destroy()

    return oc
Beispiel #19
0
class SubzProvider(Provider):
    """Subz Provider."""
    languages = {Language(l) for l in ['ell']}
    server_url = 'https://subz.xyz'
    sign_in_url = '/sessions'
    sign_out_url = '/logout'
    search_url = '/typeahead/{}'
    episode_link = '/series/{show_id}/seasons/{season:d}/episodes/{episode:d}'
    movie_link = '/movies/{}'
    subtitle_class = SubzSubtitle

    def __init__(self):
        self.logged_in = False
        self.session = None

    def initialize(self):
        self.session = Session()
        self.session.headers['User-Agent'] = 'Subliminal/{}'.format(
            __short_version__)

    def terminate(self):
        self.session.close()

    def get_show_ids(self,
                     title,
                     year=None,
                     is_episode=True,
                     country_code=None):
        """Get the best matching show id for `series`, `year` and `country_code`.

        First search in the result of :meth:`_get_show_suggestions`.

        :param title: show title.
        :param year: year of the show, if any.
        :type year: int
        :param is_episode: if the search is for episode.
        :type is_episode: bool
        :param country_code: country code of the show, if any.
        :type country_code: str
        :return: the show id, if found.
        :rtype: str

        """
        title_sanitized = sanitize(title).lower()
        show_ids = self._get_suggestions(title, is_episode)

        matched_show_ids = []
        for show in show_ids:
            show_id = None
            # attempt with country
            if not show_id and country_code:
                logger.debug('Getting show id with country')
                if sanitize(show['title']) == text_type(
                        '{title} {country}').format(
                            title=title_sanitized,
                            country=country_code.lower()):
                    show_id = show['link'].split('/')[-1]

            # attempt with year
            if not show_id and year:
                logger.debug('Getting show id with year')
                if sanitize(
                        show['title']) == text_type('{title} {year}').format(
                            title=title_sanitized, year=year):
                    show_id = show['link'].split('/')[-1]

            # attempt clean
            if not show_id:
                logger.debug('Getting show id')
                show_id = show['link'].split('/')[-1] if sanitize(
                    show['title']) == title_sanitized else None

            if show_id:
                matched_show_ids.append(show_id)

        return matched_show_ids

    @region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME,
                               to_str=text_type,
                               should_cache_fn=lambda value: value)
    def _get_suggestions(self, title, is_episode=True):
        """Search the show or movie id from the `title` and `year`.

        :param str title: title of the show.
        :param is_episode: if the search is for episode.
        :type is_episode: bool
        :return: the show suggestions found.
        :rtype: dict

        """
        # make the search
        logger.info('Searching show ids with %r', title)
        r = self.session.get(self.server_url +
                             text_type(self.search_url).format(title),
                             timeout=10)
        r.raise_for_status()

        if not r.content:
            logger.debug('No data returned from provider')
            return {}

        show_type = 'series' if is_episode else 'movie'
        parsed_suggestions = [
            s for s in json.loads(r.text)
            if 'type' in s and s['type'] == show_type
        ]
        logger.debug('Found suggestions: %r', parsed_suggestions)

        return parsed_suggestions

    def query(self, show_id, series, season, episode, title):
        # get the season list of the show
        logger.info('Getting the subtitle list of show id %s', show_id)
        is_episode = False
        if all((show_id, season, episode)):
            is_episode = True
            page_link = self.server_url + self.episode_link.format(
                show_id=show_id, season=season, episode=episode)
        elif all((show_id, title)):
            page_link = self.server_url + self.movie_link.format(show_id)
        else:
            return []

        r = self.session.get(page_link, timeout=10)
        r.raise_for_status()

        if not r.content:
            logger.debug('No data returned from provider')
            return []

        soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])

        year_num = None
        if not is_episode:
            year_num = int(soup.select_one('span.year').text)
        show_title = str(
            soup.select_one(
                '#summary-wrapper > div.summary h1').contents[0]).strip()

        subtitles = []
        # loop over episode rows
        for subtitle in soup.select('div[id="subtitles"] tr[data-id]'):
            # read common info
            version = subtitle.find('td', {'class': 'name'}).text
            download_link = subtitle.find(
                'a', {'class': 'btn-success'})['href'].strip('\'')

            # read the episode info
            if is_episode:
                episode_numbers = soup.select_one(
                    '#summary-wrapper > div.container.summary span.main-title-sxe'
                ).text
                season_num = None
                episode_num = None
                matches = episode_re.match(episode_numbers.strip())
                if matches:
                    season_num = int(matches.group(1))
                    episode_num = int(matches.group(2))

                episode_title = soup.select_one(
                    '#summary-wrapper > div.container.summary span.main-title'
                ).text

                subtitle = self.subtitle_class(Language.fromalpha2('el'),
                                               page_link, show_title,
                                               season_num, episode_num,
                                               episode_title, year_num,
                                               version, download_link)
            # read the movie info
            else:
                subtitle = self.subtitle_class(Language.fromalpha2('el'),
                                               page_link, None, None, None,
                                               show_title, year_num, version,
                                               download_link)

            logger.debug('Found subtitle %r', subtitle)
            subtitles.append(subtitle)

        return subtitles

    def list_subtitles(self, video, languages):
        # lookup show_id
        if isinstance(video, Episode):
            titles = [video.series] + video.alternative_series
        elif isinstance(video, Movie):
            titles = [video.title] + video.alternative_titles
        else:
            titles = []

        show_ids = None
        for title in titles:
            show_ids = self.get_show_ids(title, video.year,
                                         isinstance(video, Episode))
            if show_ids is not None and len(show_ids) > 0:
                break

        subtitles = []
        # query for subtitles with the show_id
        for show_id in show_ids:
            if isinstance(video, Episode):
                subtitles += [
                    s for s in self.query(show_id, video.series, video.season,
                                          video.episode, video.title)
                    if s.language in languages and s.season == video.season
                    and s.episode == video.episode
                ]
            elif isinstance(video, Movie):
                subtitles += [
                    s
                    for s in self.query(show_id, None, None, None, video.title)
                    if s.language in languages and s.year == video.year
                ]

        return subtitles

    def download_subtitle(self, subtitle):
        if isinstance(subtitle, SubzSubtitle):
            # download the subtitle
            logger.info('Downloading subtitle %r', subtitle)
            r = self.session.get(subtitle.download_link,
                                 headers={'Referer': subtitle.page_link},
                                 timeout=10)
            r.raise_for_status()

            if not r.content:
                logger.debug(
                    'Unable to download subtitle. No data returned from provider'
                )
                return

            archive = _get_archive(r.content)

            subtitle_content = _get_subtitle_from_archive(archive)
            if subtitle_content:
                subtitle.content = fix_line_ending(subtitle_content)
            else:
                logger.debug('Could not extract subtitle from %r', archive)
Beispiel #20
0
class WizdomProvider(Provider):
    """Wizdom Provider."""
    languages = {Language(l) for l in ['heb']}
    server_url = 'wizdom.xyz'

    _tmdb_api_key = 'a51ee051bcd762543373903de296e0a3'

    def __init__(self):
        self.session = None

    def initialize(self):
        self.session = Session()
        self.session.headers['User-Agent'] = 'Subliminal/{}'.format(__short_version__)

    def terminate(self):
        self.session.close()

    @region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME)
    def _search_imdb_id(self, title, year, is_movie):
        """Search the IMDB ID for the given `title` and `year`.

        :param str title: title to search for.
        :param int year: year to search for (or 0 if not relevant).
        :param bool is_movie: If True, IMDB ID will be searched for in TMDB instead of Wizdom.
        :return: the IMDB ID for the given title and year (or None if not found).
        :rtype: str

        """
        # make the search
        logger.info('Searching IMDB ID for %r%r', title, '' if not year else ' ({})'.format(year))
        category = 'movie' if is_movie else 'tv'
        title = title.replace('\'', '')
        # get TMDB ID first
        r = self.session.get('http://api.tmdb.org/3/search/{}?api_key={}&query={}{}&language=en'.format(
            category, self._tmdb_api_key, title, '' if not year else '&year={}'.format(year)))
        r.raise_for_status()
        tmdb_results = r.json().get('results')
        if tmdb_results:
            tmdb_id = tmdb_results[0].get('id')
            if tmdb_id:
                # get actual IMDB ID from TMDB
                r = self.session.get('http://api.tmdb.org/3/{}/{}{}?api_key={}&language=en'.format(
                    category, tmdb_id, '' if is_movie else '/external_ids', self._tmdb_api_key))
                r.raise_for_status()
                return str(r.json().get('imdb_id', '')) or None
        return None

    def query(self, title, season=None, episode=None, year=None, filename=None, imdb_id=None):
        # search for the IMDB ID if needed.
        is_movie = not (season and episode)
        imdb_id = imdb_id or self._search_imdb_id(title, year, is_movie)
        if not imdb_id:
            return {}

        # search
        logger.debug('Using IMDB ID %r', imdb_id)
        url = 'http://json.{}/{}.json'.format(self.server_url, imdb_id)
        page_link = 'http://{}/#/{}/{}'.format(self.server_url, 'movies' if is_movie else 'series', imdb_id)

        # get the list of subtitles
        logger.debug('Getting the list of subtitles')
        r = self.session.get(url)
        r.raise_for_status()
        try:
            results = r.json()
        except ValueError:
            return {}

        # filter irrelevant results
        if not is_movie:
            results = results.get('subs', [])
            # there are two formats of result jsons - seasons list and seasons dict
            if isinstance(results, list):
                results = results[season] if len(results) >= season else {}
            else:
                results = results.get(str(season), {})
            results = results.get(str(episode), [])
        else:
            results = results.get('subs', [])

        # loop over results
        subtitles = {}
        for result in results:
            language = Language('heb')
            hearing_impaired = False
            subtitle_id = result['id']
            release = result['version']

            # otherwise create it
            subtitle = WizdomSubtitle(language, hearing_impaired, page_link, title, season, episode, title, imdb_id,
                                      subtitle_id, release)
            logger.debug('Found subtitle %r', subtitle)
            subtitles[subtitle_id] = subtitle

        return subtitles.values()

    def list_subtitles(self, video, languages):
        season = episode = None
        year = video.year
        filename = video.name
        imdb_id = video.imdb_id

        if isinstance(video, Episode):
            titles = [video.series] + video.alternative_series
            season = video.season
            episode = video.episode
            imdb_id = video.series_imdb_id
        else:
            titles = [video.title] + video.alternative_titles

        for title in titles:
            subtitles = [s for s in
                         self.query(title, season, episode, year, filename, imdb_id) if s.language in languages]
            if subtitles:
                return subtitles

        return []

    def download_subtitle(self, subtitle):
        # download
        url = 'http://zip.{}/{}.zip'.format(self.server_url, subtitle.subtitle_id)
        r = self.session.get(url, headers={'Referer': subtitle.page_link}, timeout=10)
        r.raise_for_status()

        if len(r.content) == 0:
            return

        # open the zip
        with zipfile.ZipFile(io.BytesIO(r.content)) as zf:
            # remove some filenames from the namelist
            namelist = [n for n in zf.namelist() if os.path.splitext(n)[1] in ['.srt', '.sub']]
            if len(namelist) > 1:
                raise ProviderError('More than one file to unzip')

            subtitle.content = fix_line_ending(zf.read(namelist[0]))
Beispiel #21
0
class TitloviProvider(Provider, ProviderSubtitleArchiveMixin):
    subtitle_class = TitloviSubtitle
    languages = {
        Language.fromtitlovi(l)
        for l in language_converters['titlovi'].codes
    } | {Language.fromietf('sr-Latn')}
    server_url = 'https://titlovi.com'
    search_url = server_url + '/titlovi/?'
    download_url = server_url + '/download/?type=1&mediaid='

    def initialize(self):
        self.session = Session()
        logger.debug("Using random user agents")
        self.session.headers['User-Agent'] = AGENT_LIST[randint(
            0,
            len(AGENT_LIST) - 1)]
        logger.debug('User-Agent set to %s',
                     self.session.headers['User-Agent'])
        self.session.headers['Referer'] = self.server_url
        logger.debug('Referer set to %s', self.session.headers['Referer'])
        load_verification("titlovi", self.session)

    def terminate(self):
        self.session.close()

    def query(self,
              languages,
              title,
              season=None,
              episode=None,
              year=None,
              video=None):
        items_per_page = 10
        current_page = 1

        used_languages = languages
        lang_strings = [str(lang) for lang in used_languages]

        # handle possible duplicate use of Serbian Latin
        if "sr" in lang_strings and "sr-Latn" in lang_strings:
            logger.info(
                'Duplicate entries <Language [sr]> and <Language [sr-Latn]> found, filtering languages'
            )
            used_languages = filter(
                lambda l: l != Language.fromietf('sr-Latn'), used_languages)
            logger.info('Filtered language list %r', used_languages)

        # convert list of languages into search string
        langs = '|'.join(map(str, [l.titlovi for l in used_languages]))

        # set query params
        params = {'prijevod': title, 'jezik': langs}
        is_episode = False
        if season and episode:
            is_episode = True
            params['s'] = season
            params['e'] = episode
        if year:
            params['g'] = year

        # loop through paginated results
        logger.info('Searching subtitles %r', params)
        subtitles = []

        while True:
            # query the server
            try:
                r = self.session.get(self.search_url,
                                     params=params,
                                     timeout=10)
                r.raise_for_status()
            except RequestException as e:
                captcha_passed = False
                if e.response.status_code == 403 and "data-sitekey" in e.response.content:
                    logger.info(
                        'titlovi: Solving captcha. This might take a couple of minutes, but should only '
                        'happen once every so often')

                    site_key = re.search(r'data-sitekey="(.+?)"',
                                         e.response.content).group(1)
                    challenge_s = re.search(
                        r'type="hidden" name="s" value="(.+?)"',
                        e.response.content).group(1)
                    challenge_ray = re.search(r'data-ray="(.+?)"',
                                              e.response.content).group(1)
                    if not all([site_key, challenge_s, challenge_ray]):
                        raise Exception("titlovi: Captcha site-key not found!")

                    pitcher = pitchers.get_pitcher()(
                        "titlovi",
                        e.request.url,
                        site_key,
                        user_agent=self.session.headers["User-Agent"],
                        cookies=self.session.cookies.get_dict(),
                        is_invisible=True)

                    result = pitcher.throw()
                    if not result:
                        raise Exception("titlovi: Couldn't solve captcha!")

                    s_params = {
                        "s": challenge_s,
                        "id": challenge_ray,
                        "g-recaptcha-response": result,
                    }
                    r = self.session.get(self.server_url +
                                         "/cdn-cgi/l/chk_captcha",
                                         params=s_params,
                                         timeout=10,
                                         allow_redirects=False)
                    r.raise_for_status()
                    r = self.session.get(self.search_url,
                                         params=params,
                                         timeout=10)
                    r.raise_for_status()
                    store_verification("titlovi", self.session)
                    captcha_passed = True

                if not captcha_passed:
                    logger.exception('RequestException %s', e)
                    break
            else:
                try:
                    soup = BeautifulSoup(r.content, 'lxml')

                    # number of results
                    result_count = int(
                        soup.select_one('.results_count b').string)
                except:
                    result_count = None

                # exit if no results
                if not result_count:
                    if not subtitles:
                        logger.debug('No subtitles found')
                    else:
                        logger.debug("No more subtitles found")
                    break

                # number of pages with results
                pages = int(math.ceil(result_count / float(items_per_page)))

                # get current page
                if 'pg' in params:
                    current_page = int(params['pg'])

                try:
                    sublist = soup.select(
                        'section.titlovi > ul.titlovi > li.subtitleContainer.canEdit'
                    )
                    for sub in sublist:
                        # subtitle id
                        sid = sub.find(attrs={
                            'data-id': True
                        }).attrs['data-id']
                        # get download link
                        download_link = self.download_url + sid
                        # title and alternate title
                        match = title_re.search(sub.a.string)
                        if match:
                            _title = match.group('title')
                            alt_title = match.group('altitle')
                        else:
                            continue

                        # page link
                        page_link = self.server_url + sub.a.attrs['href']
                        # subtitle language
                        match = lang_re.search(
                            sub.select_one('.lang').attrs['src'])
                        if match:
                            try:
                                # decode language
                                lang = Language.fromtitlovi(
                                    match.group('lang') +
                                    match.group('script'))
                            except ValueError:
                                continue

                        # relase year or series start year
                        match = year_re.search(
                            sub.find(attrs={
                                'data-id': True
                            }).parent.i.string)
                        if match:
                            r_year = int(match.group('year'))
                        # fps
                        match = fps_re.search(sub.select_one('.fps').string)
                        if match:
                            fps = match.group('fps')
                        # releases
                        releases = str(
                            sub.select_one('.fps').parent.contents[0].string)

                        # handle movies and series separately
                        if is_episode:
                            # season and episode info
                            sxe = sub.select_one('.s0xe0y').string
                            r_season = None
                            r_episode = None
                            if sxe:
                                match = season_re.search(sxe)
                                if match:
                                    r_season = int(match.group('season'))
                                match = episode_re.search(sxe)
                                if match:
                                    r_episode = int(match.group('episode'))

                            subtitle = self.subtitle_class(
                                lang,
                                page_link,
                                download_link,
                                sid,
                                releases,
                                _title,
                                alt_title=alt_title,
                                season=r_season,
                                episode=r_episode,
                                year=r_year,
                                fps=fps,
                                asked_for_release_group=video.release_group,
                                asked_for_episode=episode)
                        else:
                            subtitle = self.subtitle_class(
                                lang,
                                page_link,
                                download_link,
                                sid,
                                releases,
                                _title,
                                alt_title=alt_title,
                                year=r_year,
                                fps=fps,
                                asked_for_release_group=video.release_group)
                        logger.debug('Found subtitle %r', subtitle)

                        # prime our matches so we can use the values later
                        subtitle.get_matches(video)

                        # add found subtitles
                        subtitles.append(subtitle)

                finally:
                    soup.decompose()

                # stop on last page
                if current_page >= pages:
                    break

                # increment current page
                params['pg'] = current_page + 1
                logger.debug('Getting page %d', params['pg'])

        return subtitles

    def list_subtitles(self, video, languages):
        season = episode = None
        if isinstance(video, Episode):
            title = video.series
            season = video.season
            episode = video.episode
        else:
            title = video.title

        return [
            s for s in self.query(languages,
                                  fix_inconsistent_naming(title),
                                  season=season,
                                  episode=episode,
                                  year=video.year,
                                  video=video)
        ]

    def download_subtitle(self, subtitle):
        r = self.session.get(subtitle.download_link, timeout=10)
        r.raise_for_status()

        # open the archive
        archive_stream = io.BytesIO(r.content)
        if is_rarfile(archive_stream):
            logger.debug('Archive identified as rar')
            archive = RarFile(archive_stream)
        elif is_zipfile(archive_stream):
            logger.debug('Archive identified as zip')
            archive = ZipFile(archive_stream)
        else:
            subtitle.content = r.content
            if subtitle.is_valid():
                return
            subtitle.content = None

            raise ProviderError('Unidentified archive type')

        subs_in_archive = archive.namelist()

        # if Serbian lat and cyr versions are packed together, try to find right version
        if len(subs_in_archive) > 1 and (subtitle.language == 'sr'
                                         or subtitle.language == 'sr-Cyrl'):
            self.get_subtitle_from_bundled_archive(subtitle, subs_in_archive,
                                                   archive)
        else:
            # use default method for everything else
            subtitle.content = self.get_subtitle_from_archive(
                subtitle, archive)

    def get_subtitle_from_bundled_archive(self, subtitle, subs_in_archive,
                                          archive):
        sr_lat_subs = []
        sr_cyr_subs = []
        sub_to_extract = None

        for sub_name in subs_in_archive:
            if not ('.cyr' in sub_name or '.cir' in sub_name):
                sr_lat_subs.append(sub_name)

            if ('.cyr' in sub_name
                    or '.cir' in sub_name) and not '.lat' in sub_name:
                sr_cyr_subs.append(sub_name)

        if subtitle.language == 'sr':
            if len(sr_lat_subs) > 0:
                sub_to_extract = sr_lat_subs[0]

        if subtitle.language == 'sr-Cyrl':
            if len(sr_cyr_subs) > 0:
                sub_to_extract = sr_cyr_subs[0]

        logger.info(u'Using %s from the archive', sub_to_extract)
        subtitle.content = fix_line_ending(archive.read(sub_to_extract))
Beispiel #22
0
class SubdivxSubtitlesProvider(Provider):
    provider_name = 'subdivx'
    hash_verifiable = False
    languages = {Language.fromalpha2(lang) for lang in ['es']}
    subtitle_class = SubdivxSubtitle

    server_url = 'https://www.subdivx.com/'
    multi_result_throttle = 2
    language_list = list(languages)

    def __init__(self):
        self.session = None

    def initialize(self):
        self.session = Session()
        self.session.headers['User-Agent'] = 'Subliminal/{}'.format(
            __short_version__)

    def terminate(self):
        self.session.close()

    def query(self, video, languages):
        if isinstance(video, Episode):
            query = "{} S{:02d}E{:02d}".format(video.series, video.season,
                                               video.episode)
        else:
            # Subdvix has problems searching foreign movies if the year is
            # appended. For example: if we search "Memories of Murder 2003",
            # Subdix won't return any results; but if we search "Memories of
            # Murder", it will. That's because in Subdvix foreign titles have
            # the year after the original title ("Salinui chueok (2003) aka
            # Memories of Murder").
            # A proper solution would be filtering results with the year in
            # _parse_subtitles_page.
            query = video.title

        params = {
            'q': query,  # search string
            'accion': 5,  # action search
            'oxdown': 1,  # order by downloads descending
            'pg': 1  # page 1
        }

        logger.debug('Searching subtitles %r', query)
        subtitles = []
        language = self.language_list[0]
        search_link = self.server_url + 'index.php'
        while True:
            response = self.session.get(search_link, params=params, timeout=20)
            self._check_response(response)

            try:
                page_subtitles = self._parse_subtitles_page(
                    video, response, language)
            except Exception as e:
                logger.error('Error parsing subtitles list: ' + str(e))
                break

            subtitles += page_subtitles

            if len(page_subtitles) < 100:
                break  # this is the last page

            params['pg'] += 1  # search next page
            time.sleep(self.multi_result_throttle)

        return subtitles

    def list_subtitles(self, video, languages):
        return self.query(video, languages)

    def download_subtitle(self, subtitle):
        if isinstance(subtitle, SubdivxSubtitle):
            # download the subtitle
            logger.info('Downloading subtitle %r', subtitle)

            # get download link
            download_link = self._get_download_link(subtitle)

            # download zip / rar file with the subtitle
            response = self.session.get(
                self.server_url + download_link,
                headers={'Referer': subtitle.page_link},
                timeout=30)
            self._check_response(response)

            # open the compressed archive
            archive = self._get_archive(response.content)

            # extract the subtitle
            subtitle_content = self._get_subtitle_from_archive(
                archive, subtitle)
            subtitle.content = fix_line_ending(subtitle_content)

    def _parse_subtitles_page(self, video, response, language):
        subtitles = []

        page_soup = ParserBeautifulSoup(
            response.content.decode('utf-8', 'ignore'),
            ['lxml', 'html.parser'])
        title_soups = page_soup.find_all("div",
                                         {'id': 'menu_detalle_buscador'})
        body_soups = page_soup.find_all("div", {'id': 'buscador_detalle'})

        for subtitle in range(0, len(title_soups)):
            title_soup, body_soup = title_soups[subtitle], body_soups[subtitle]

            # title
            title = title_soup.find("a").text.replace("Subtitulos de ", "")

            # filter by year
            if video.year and str(video.year) not in title:
                continue

            page_link = title_soup.find("a")["href"]

            # description
            description = body_soup.find("div", {
                'id': 'buscador_detalle_sub'
            }).text
            description = description.replace(",", " ").lower()

            # uploader
            uploader = body_soup.find("a", {'class': 'link1'}).text

            subtitle = self.subtitle_class(language, video, page_link, title,
                                           description, uploader)

            logger.debug('Found subtitle %r', subtitle)
            subtitles.append(subtitle)

        return subtitles

    def _get_download_link(self, subtitle):
        response = self.session.get(subtitle.page_link, timeout=20)
        self._check_response(response)
        try:
            page_soup = ParserBeautifulSoup(
                response.content.decode('utf-8', 'ignore'),
                ['lxml', 'html.parser'])
            links_soup = page_soup.find_all("a", {'class': 'detalle_link'})
            for link_soup in links_soup:
                if link_soup['href'].startswith('bajar'):
                    return self.server_url + link_soup['href']
            links_soup = page_soup.find_all("a", {'class': 'link1'})
            for link_soup in links_soup:
                if "bajar.php" in link_soup['href']:
                    return link_soup['href']
        except Exception as e:
            raise APIThrottled('Error parsing download link: ' + str(e))

        raise APIThrottled('Download link not found')

    @staticmethod
    def _check_response(response):
        if response.status_code != 200:
            raise ServiceUnavailable('Bad status code: ' +
                                     str(response.status_code))

    @staticmethod
    def _get_archive(content):
        # open the archive
        archive_stream = io.BytesIO(content)
        if rarfile.is_rarfile(archive_stream):
            logger.debug('Identified rar archive')
            archive = rarfile.RarFile(archive_stream)
        elif zipfile.is_zipfile(archive_stream):
            logger.debug('Identified zip archive')
            archive = zipfile.ZipFile(archive_stream)
        else:
            raise APIThrottled('Unsupported compressed format')

        return archive

    @staticmethod
    def _get_subtitle_from_archive(archive, subtitle):
        _valid_names = []
        for name in archive.namelist():
            # discard hidden files
            # discard non-subtitle files
            if not os.path.split(name)[-1].startswith('.') and name.lower(
            ).endswith(SUBTITLE_EXTENSIONS):
                _valid_names.append(name)

        # archive with only 1 subtitle
        if len(_valid_names) == 1:
            logger.debug(
                "returning from archive: {} (single subtitle file)".format(
                    _valid_names[0]))
            return archive.read(_valid_names[0])

        # in archives with more than 1 subtitle (season pack) we try to guess the best subtitle file
        _scores = get_scores(subtitle.video)
        _max_score = 0
        _max_name = ""
        for name in _valid_names:
            _guess = guessit(name)
            if 'season' not in _guess:
                _guess['season'] = -1
            if 'episode' not in _guess:
                _guess['episode'] = -1

            if isinstance(subtitle.video, Episode):
                logger.debug("guessing %s" % name)
                logger.debug("subtitle S{}E{} video S{}E{}".format(
                    _guess['season'], _guess['episode'], subtitle.video.season,
                    subtitle.video.episode))

                if subtitle.video.episode != _guess[
                        'episode'] or subtitle.video.season != _guess['season']:
                    logger.debug('subtitle does not match video, skipping')
                    continue

            matches = set()
            matches |= guess_matches(subtitle.video, _guess)
            _score = sum((_scores.get(match, 0) for match in matches))
            logger.debug('srt matches: %s, score %d' % (matches, _score))
            if _score > _max_score:
                _max_score = _score
                _max_name = name
                logger.debug("new max: {} {}".format(name, _score))

        if _max_score > 0:
            logger.debug("returning from archive: {} scored {}".format(
                _max_name, _max_score))
            return archive.read(_max_name)

        raise APIThrottled('Can not find the subtitle in the compressed file')
Beispiel #23
0
    def list_subtitles(self,
                       rating_key,
                       item_type,
                       part_id,
                       language,
                       skip_wrong_fps=True,
                       metadata=None,
                       scanned_parts=None,
                       air_date_cutoff=None):

        if not metadata:
            metadata = get_plex_metadata(rating_key, part_id, item_type)

        if not metadata:
            return

        providers = config.get_providers(
            media_type="series" if item_type == "episode" else "movies")
        if not scanned_parts:
            scanned_parts = scan_videos([metadata],
                                        ignore_all=True,
                                        providers=providers)
            if not scanned_parts:
                Log.Error(u"%s: Couldn't list available subtitles for %s",
                          self.name, rating_key)
                return

        video, plex_part = scanned_parts.items()[0]
        refine_video(video, refiner_settings=config.refiner_settings)

        if air_date_cutoff is not None and metadata["item"].year and \
            metadata["item"].year + air_date_cutoff < datetime.date.today().year:
            Log.Debug(
                "Skipping searching for subtitles: %s, it aired over %s year(s) ago.",
                rating_key, air_date_cutoff)
            return

        config.init_subliminal_patches()

        provider_settings = config.provider_settings
        if not skip_wrong_fps:
            provider_settings["opensubtitles"]["skip_wrong_fps"] = False

        if item_type == "episode":
            min_score = 240
            if video.is_special:
                min_score = 180
        else:
            min_score = 60

        languages = {Language.fromietf(language)}

        available_subs = list_all_subtitles(
            [video],
            languages,
            providers=providers,
            provider_configs=provider_settings,
            pool_class=config.provider_pool,
            throttle_callback=config.provider_throttle,
            language_hook=language_hook)

        use_hearing_impaired = Prefs['subtitles.search.hearingImpaired'] in (
            "prefer", "force HI")

        # sort subtitles by score
        unsorted_subtitles = []
        for s in available_subs[video]:
            Log.Debug(u"%s: Starting score computation for %s", self.name, s)
            try:
                matches = s.get_matches(video)
            except AttributeError:
                Log.Error(u"%s: Match computation failed for %s: %s",
                          self.name, s, traceback.format_exc())
                continue

            # skip wrong season/episodes
            if item_type == "episode":
                can_verify_series = True
                if not s.hash_verifiable and "hash" in matches:
                    can_verify_series = False

                if can_verify_series and not {"series", "season", "episode"
                                              }.issubset(matches):
                    Log.Debug(
                        u"%s: Skipping %s, because it doesn't match our series/episode",
                        self.name, s)
                    continue

            unsorted_subtitles.append(
                (s,
                 compute_score(matches,
                               s,
                               video,
                               hearing_impaired=use_hearing_impaired),
                 matches))
        scored_subtitles = sorted(unsorted_subtitles,
                                  key=operator.itemgetter(1),
                                  reverse=True)

        subtitles = []
        for subtitle, score, matches in scored_subtitles:
            # check score
            if score < min_score:
                Log.Info(u'%s: Score %d is below min_score (%d)', self.name,
                         score, min_score)
                continue
            subtitle.score = score
            subtitle.matches = matches
            subtitle.part_id = part_id
            subtitle.item_type = item_type
            subtitles.append(subtitle)
        return subtitles
Beispiel #24
0
class YavkaNetProvider(Provider):
    """YavkaNet Provider."""
    languages = {Language(l) for l in [
        'bul', 'eng', 'rus', 'spa', 'ita'
    ]}
    video_types = (Episode, Movie)

    def initialize(self):
        self.session = Session()
        self.session.headers['User-Agent'] = AGENT_LIST[randint(0, len(AGENT_LIST) - 1)]
        self.session.headers["Accept"] = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
        self.session.headers["Accept-Language"] = "en-US,en;q=0.5"
        self.session.headers["Accept-Encoding"] = "gzip, deflate, br"
        self.session.headers["DNT"] = "1"
        self.session.headers["Connection"] = "keep-alive"
        self.session.headers["Upgrade-Insecure-Requests"] = "1"
        self.session.headers["Cache-Control"] = "max-age=0"

    def terminate(self):
        self.session.close()

    def query(self, language, video):
        subtitles = []
        isEpisode = isinstance(video, Episode)
        params = {
            's': '',
            'y': '',
            'u': '',
            'l': 'BG',
            'i': ''
        }

        if isEpisode:
            params['s'] = "%s s%02de%02d" % (sanitize(fix_tv_naming(video.series), {'\''}), video.season, video.episode)
        else:
            params['y'] = video.year
            params['s'] = sanitize(fix_movie_naming(video.title), {'\''})

        if language == 'en' or language == 'eng':
            params['l'] = 'EN'
        elif language == 'ru' or language == 'rus':
            params['l'] = 'RU'
        elif language == 'es' or language == 'spa':
            params['l'] = 'ES'
        elif language == 'it' or language == 'ita':
            params['l'] = 'IT'

        logger.info('Searching subtitle %r', params)
        response = self.retry(self.session.get('https://yavka.net/subtitles.php', params=params, allow_redirects=False,
                                               timeout=10, headers={'Referer': 'https://yavka.net/'}))
        if not response:
            return subtitles
        response.raise_for_status()

        if response.status_code != 200:
            logger.debug('No subtitles found')
            return subtitles

        soup = BeautifulSoup(response.content, 'lxml')
        rows = soup.findAll('tr')
        
        # Search on first 25 rows only
        for row in rows[:25]:
            element = row.select_one('a.balon, a.selector')
            if element:
                link = element.get('href')
                notes = re.sub(r'(?s)<p.*><img [A-z0-9=\'/\. :;#]*>(.*)</p>', r"\1", element.get('content'))
                title = element.get_text()

                try:
                    year = int(element.find_next_sibling('span').text.strip('()'))
                except:
                    year = None

                try:
                    fps = float(row.find('span', {'title': 'Кадри в секунда'}).text.strip())
                except:
                    fps = None

                element = row.find('a', {'class': 'click'})
                uploader = element.get_text() if element else None
                logger.info('Found subtitle link %r', link)
                # slow down to prevent being throttled
                time.sleep(1)
                response = self.retry(self.session.get('https://yavka.net' + link))
                if not response:
                    continue
                soup = BeautifulSoup(response.content, 'lxml')
                subs_id = soup.find("input", {"name": "id"})
                if subs_id:
                    subs_id = subs_id['value']
                else:
                    continue
                sub = self.download_archive_and_add_subtitle_files('https://yavka.net' + link + '/', language, video,
                                                                   fps, subs_id)
                for s in sub:
                    s.title = title
                    s.notes = notes
                    s.year = year
                    s.uploader = uploader
                    s.single_file = True if len(sub) == 1 else False
                subtitles = subtitles + sub
        return subtitles
        
    def list_subtitles(self, video, languages):
        return [s for lang in languages for s in self.query(lang, video)]

    def download_subtitle(self, subtitle):
        if subtitle.content:
            pass
        else:
            seeking_subtitle_file = subtitle.filename
            arch = self.download_archive_and_add_subtitle_files(subtitle.page_link, subtitle.language, subtitle.video,
                                                                subtitle.fps, subtitle.subs_id)
            for s in arch:
                if s.filename == seeking_subtitle_file:
                    subtitle.content = s.content

    @staticmethod
    def process_archive_subtitle_files(archive_stream, language, video, link, fps, subs_id):
        subtitles = []
        media_type = 'episode' if isinstance(video, Episode) else 'movie'
        for file_name in archive_stream.namelist():
            if file_name.lower().endswith(('.srt', '.sub')):
                logger.info('Found subtitle file %r', file_name)
                subtitle = YavkaNetSubtitle(language, file_name, media_type, video, link, fps, subs_id)
                subtitle.content = fix_line_ending(archive_stream.read(file_name))
                subtitles.append(subtitle)
        return subtitles

    def download_archive_and_add_subtitle_files(self, link, language, video, fps, subs_id):
        logger.info('Downloading subtitle %r', link)
        cache_key = sha1(link.encode("utf-8")).digest()
        request = region.get(cache_key)
        if request is NO_VALUE:
            time.sleep(1)
            request = self.retry(self.session.post(link, data={
                'id': subs_id,
                'lng': language.basename.upper()
            }, headers={
                'referer': link
            }, allow_redirects=False))
            if not request:
                return []
            request.raise_for_status()
            region.set(cache_key, request)
        else:
            logger.info('Cache file: %s', codecs.encode(cache_key, 'hex_codec').decode('utf-8'))

        try:
            archive_stream = io.BytesIO(request.content)
            if is_rarfile(archive_stream):
                return self.process_archive_subtitle_files(RarFile(archive_stream), language, video, link, fps, subs_id)
            elif is_zipfile(archive_stream):
                return self.process_archive_subtitle_files(ZipFile(archive_stream), language, video, link, fps, subs_id)
        except:
            pass

        logger.error('Ignore unsupported archive %r', request.headers)
        region.delete(cache_key)
        return []

    @staticmethod
    def retry(func, limit=5, delay=5):
        for i in range(limit):
            response = func
            if response.content:
                return response
            else:
                logging.debug('Slowing down because we are getting throttled. Iteration {0} of {1}.Waiting {2} seconds '
                              'to retry...'.format(i + 1, limit, delay))
                time.sleep(delay)
Beispiel #25
0
    def query(self, show_id, series, season, year=None, country=None):
        # get the season list of the show
        logger.info('Getting the season list of show id %d', show_id)
        r = self.session.get(self.server_url + self.series_url.format(show_id),
                             timeout=10)
        r.raise_for_status()

        if not r.content:
            logger.debug('No data returned from provider')
            return []

        soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])

        series = soup.find('name').text

        # loop over season rows
        seasons = soup.findAll('series_group')
        season_id = None

        for season_row in seasons:
            try:
                parsed_season = int(season_row['ssnnum'])
                if parsed_season == season:
                    season_id = int(season_row['ssnid'])
                    break
            except (ValueError, TypeError):
                continue

        if season_id is None:
            logger.debug('Season not found in provider')
            return []

        # get the subtitle list of the season
        logger.info('Getting the subtitle list of season %d', season)
        r = self.session.get(
            self.server_url +
            self.season_url.format(show_id=show_id, season=season_id),
            timeout=10)
        r.raise_for_status()

        if not r.content:
            logger.debug('No data returned from provider')
            return []

        soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])

        subtitles = []
        # loop over episode rows
        for subtitle_group in soup.findAll('subg'):
            # read the episode info
            episode_info = subtitle_group.find('etitle')
            if episode_info is None:
                continue

            episodes = []
            episode_match = episode_re.match(episode_info['number'])
            if episode_match:
                episodes = [
                    int(e)
                    for e in [episode_match.group(1),
                              episode_match.group(3)] if e
                ]

            subtitle_info = subtitle_group.find('sgt')
            if subtitle_info is None:
                continue

            season = int(subtitle_info['ssnnum'])
            episode_id = int(subtitle_info['epsid'])

            # filter out unreleased subtitles
            for subs_tag in subtitle_group.findAll('sr'):
                if subs_tag['published_on'] == '':
                    continue

                page_link = self.server_url + self.page_link.format(
                    show_id=show_id,
                    season_id=season_id,
                    season=season,
                    episode=episode_id)
                title = episode_info['title']
                version = subs_tag.fmt.text + ' ' + subs_tag.team.text
                download_link = self.server_url + self.download_link.format(
                    int(subs_tag['rlsid']))

                for episode in episodes:
                    subtitle = self.subtitle_class(Language.fromalpha2('el'),
                                                   page_link, series, season,
                                                   episode, year, title,
                                                   version, download_link)
                    logger.debug('Found subtitle %r', subtitle)
                    subtitles.append(subtitle)

        return subtitles
Beispiel #26
0
class XSubsProvider(Provider):
    """XSubs Provider."""
    languages = {Language(l) for l in ['ell']}
    video_types = (Episode, )
    server_url = 'http://xsubs.tv'
    sign_in_url = '/xforum/account/signin/'
    sign_out_url = '/xforum/account/signout/'
    all_series_url = '/series/all.xml'
    series_url = '/series/{:d}/main.xml'
    season_url = '/series/{show_id:d}/{season:d}.xml'
    page_link = '/ice/xsw.xml?srsid={show_id:d}#{season_id:d};{season:d};{episode:d}'
    download_link = '/xthru/getsub/{:d}'
    subtitle_class = XSubsSubtitle

    def __init__(self, username=None, password=None):
        if any((username, password)) and not all((username, password)):
            raise ConfigurationError('Username and password must be specified')

        self.username = username
        self.password = password
        self.logged_in = False
        self.session = None

    def initialize(self):
        self.session = Session()
        self.session.headers['User-Agent'] = 'Subliminal/{}'.format(
            __short_version__)

        # login
        if self.username and self.password:
            logger.info('Logging in')
            self.session.get(self.server_url + self.sign_in_url)
            data = {
                'username': self.username,
                'password': self.password,
                'csrfmiddlewaretoken': self.session.cookies['csrftoken']
            }
            r = self.session.post(self.server_url + self.sign_in_url,
                                  data,
                                  allow_redirects=False,
                                  timeout=10)

            if r.status_code != 302:
                raise AuthenticationError(self.username)

            logger.debug('Logged in')
            self.logged_in = True

    def terminate(self):
        # logout
        if self.logged_in:
            logger.info('Logging out')
            r = self.session.get(self.server_url + self.sign_out_url,
                                 timeout=10)
            r.raise_for_status()
            logger.debug('Logged out')
            self.logged_in = False

        self.session.close()

    @region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME,
                               should_cache_fn=lambda value: value)
    def _get_show_ids(self):
        # get the shows page
        logger.info('Getting show ids')
        r = self.session.get(self.server_url + self.all_series_url, timeout=10)
        r.raise_for_status()

        if not r.content:
            logger.debug('No data returned from provider')
            return []

        soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])

        # populate the show ids
        show_ids = {}
        for show_category in soup.findAll('seriesl'):
            if show_category.attrs['category'] == u'Σειρές':
                for show in show_category.findAll('series'):
                    show_ids[sanitize(show.text)] = int(show['srsid'])
                break
        logger.debug('Found %d show ids', len(show_ids))

        return show_ids

    def get_show_id(self, series_names, year=None):
        series_sanitized_names = []
        for name in series_names:
            sanitized_name = sanitize(name)
            series_sanitized_names.append(sanitized_name)
            alternative_name = _get_alternative_name(sanitized_name)
            if alternative_name:
                series_sanitized_names.append(alternative_name)

        show_ids = self._get_show_ids()
        show_id = None

        for series_sanitized in series_sanitized_names:
            # attempt with year
            if year:
                logger.debug('Getting show id with year')
                show_id = show_ids.get('{series} {year:d}'.format(
                    series=series_sanitized, year=year))

            # attempt with article at the end
            if not show_id and year:
                logger.debug('Getting show id with year in brackets')
                show_id = show_ids.get('{series} [{year:d}]'.format(
                    series=series_sanitized, year=year))

            # attempt clean
            if not show_id:
                logger.debug('Getting show id')
                show_id = show_ids.get(series_sanitized)

            if show_id:
                break

        return int(show_id) if show_id else None

    def query(self, show_id, series, season, year=None, country=None):
        # get the season list of the show
        logger.info('Getting the season list of show id %d', show_id)
        r = self.session.get(self.server_url + self.series_url.format(show_id),
                             timeout=10)
        r.raise_for_status()

        if not r.content:
            logger.debug('No data returned from provider')
            return []

        soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])

        series = soup.find('name').text

        # loop over season rows
        seasons = soup.findAll('series_group')
        season_id = None

        for season_row in seasons:
            try:
                parsed_season = int(season_row['ssnnum'])
                if parsed_season == season:
                    season_id = int(season_row['ssnid'])
                    break
            except (ValueError, TypeError):
                continue

        if season_id is None:
            logger.debug('Season not found in provider')
            return []

        # get the subtitle list of the season
        logger.info('Getting the subtitle list of season %d', season)
        r = self.session.get(
            self.server_url +
            self.season_url.format(show_id=show_id, season=season_id),
            timeout=10)
        r.raise_for_status()

        if not r.content:
            logger.debug('No data returned from provider')
            return []

        soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])

        subtitles = []
        # loop over episode rows
        for subtitle_group in soup.findAll('subg'):
            # read the episode info
            episode_info = subtitle_group.find('etitle')
            if episode_info is None:
                continue

            episodes = []
            episode_match = episode_re.match(episode_info['number'])
            if episode_match:
                episodes = [
                    int(e)
                    for e in [episode_match.group(1),
                              episode_match.group(3)] if e
                ]

            subtitle_info = subtitle_group.find('sgt')
            if subtitle_info is None:
                continue

            season = int(subtitle_info['ssnnum'])
            episode_id = int(subtitle_info['epsid'])

            # filter out unreleased subtitles
            for subs_tag in subtitle_group.findAll('sr'):
                if subs_tag['published_on'] == '':
                    continue

                page_link = self.server_url + self.page_link.format(
                    show_id=show_id,
                    season_id=season_id,
                    season=season,
                    episode=episode_id)
                title = episode_info['title']
                version = subs_tag.fmt.text + ' ' + subs_tag.team.text
                download_link = self.server_url + self.download_link.format(
                    int(subs_tag['rlsid']))

                for episode in episodes:
                    subtitle = self.subtitle_class(Language.fromalpha2('el'),
                                                   page_link, series, season,
                                                   episode, year, title,
                                                   version, download_link)
                    logger.debug('Found subtitle %r', subtitle)
                    subtitles.append(subtitle)

        return subtitles

    def list_subtitles(self, video, languages):
        if isinstance(video, Episode):
            # lookup show_id
            titles = [video.series] + video.alternative_series
            show_id = self.get_show_id(titles, video.year)

            # query for subtitles with the show_id
            if show_id:
                subtitles = [
                    s for s in self.query(show_id, video.series, video.season,
                                          video.year)
                    if s.language in languages and s.season == video.season
                    and s.episode == video.episode
                ]
                if subtitles:
                    return subtitles
            else:
                logger.error('No show id found for %r (%r)', video.series,
                             {'year': video.year})

        return []

    def download_subtitle(self, subtitle):
        if isinstance(subtitle, XSubsSubtitle):
            # download the subtitle
            logger.info('Downloading subtitle %r', subtitle)
            r = self.session.get(subtitle.download_link,
                                 headers={'Referer': subtitle.page_link},
                                 timeout=10)
            r.raise_for_status()

            if not r.content:
                logger.debug(
                    'Unable to download subtitle. No data returned from provider'
                )
                return

            subtitle.content = fix_line_ending(r.content)
Beispiel #27
0
class ZimukuProvider(Provider):
    """Zimuku Provider."""

    languages = {Language(l) for l in ["zho", "eng"]}

    server_url = "http://www.zimuku.la"
    search_url = "/search?q={}"
    download_url = "http://www.zimuku.la/"

    UserAgent = "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)"

    subtitle_class = ZimukuSubtitle

    def __init__(self):
        self.session = None

    def initialize(self):
        self.session = Session()
        self.session.headers["User-Agent"] = "Subliminal/{}".format(__short_version__)

    def terminate(self):
        self.session.close()

    def _parse_episode_page(self, link, year):
        r = self.session.get(link)
        bs_obj = ParserBeautifulSoup(
            r.content.decode("utf-8", "ignore"), ["html.parser"]
        )
        subs_body = bs_obj.find("div", class_="subs box clearfix").find("tbody")
        subs = []
        for sub in subs_body.find_all("tr"):
            a = sub.find("a")
            name = _extract_name(a.text)
            name = os.path.splitext(name)[
                0
            ]  # remove ext because it can be an archive type

            language = Language("eng")
            for img in sub.find("td", class_="tac lang").find_all("img"):
                if (
                    "hongkong" in img.attrs["src"]
                    or "china" in img.attrs["src"]
                    or "jollyroger" in img.attrs["src"]
                ):
                    language = Language("zho")
                    break
            sub_page_link = urljoin(self.server_url, a.attrs["href"])
            backup_session = copy.deepcopy(self.session)
            backup_session.headers["Referer"] = link

            subs.append(
                self.subtitle_class(language, sub_page_link, name, backup_session, year)
            )

        return subs

    def query(self, keyword, season=None, episode=None, year=None):
        params = keyword
        if season:
            params += ".S{season:02d}".format(season=season)
        elif year:
            params += " {:4d}".format(year)

        logger.debug("Searching subtitles %r", params)
        subtitles = []
        search_link = self.server_url + text_type(self.search_url).format(params)

        r = self.session.get(search_link, timeout=30)
        r.raise_for_status()

        if not r.content:
            logger.debug("No data returned from provider")
            return []

        html = r.content.decode("utf-8", "ignore")
        # parse window location
        pattern = r"url\s*=\s*'([^']*)'\s*\+\s*url"
        parts = re.findall(pattern, html)
        redirect_url = search_link
        while parts:
            parts.reverse()
            redirect_url = urljoin(self.server_url, "".join(parts))
            r = self.session.get(redirect_url, timeout=30)
            html = r.content.decode("utf-8", "ignore")
            parts = re.findall(pattern, html)
        logger.debug("search url located: " + redirect_url)

        soup = ParserBeautifulSoup(
            r.content.decode("utf-8", "ignore"), ["lxml", "html.parser"]
        )

        # non-shooter result page
        if soup.find("div", {"class": "item"}):
            logger.debug("enter a non-shooter page")
            for item in soup.find_all("div", {"class": "item"}):
                title_a = item.find("p", class_="tt clearfix").find("a")
                subs_year = year
                if season:
                    # episode year in zimuku is the season's year not show's year
                    actual_subs_year = re.findall(r"\d{4}", title_a.text) or None
                    if actual_subs_year:
                        subs_year = int(actual_subs_year[0]) - season + 1
                    title = title_a.text
                    season_cn1 = re.search("第(.*)季", title)
                    if not season_cn1:
                        season_cn1 = "一"
                    else:
                        season_cn1 = season_cn1.group(1).strip()
                    season_cn2 = num_to_cn(str(season))
                    if season_cn1 != season_cn2:
                        continue
                episode_link = self.server_url + title_a.attrs["href"]
                new_subs = self._parse_episode_page(episode_link, subs_year)
                subtitles += new_subs

        # NOTE: shooter result pages are ignored due to the existence of assrt provider

        return subtitles

    def list_subtitles(self, video, languages):
        if isinstance(video, Episode):
            titles = [video.series] + video.alternative_series
        elif isinstance(video, Movie):
            titles = [video.title] + video.alternative_titles
        else:
            titles = []

        subtitles = []
        # query for subtitles with the show_id
        for title in titles:
            if isinstance(video, Episode):
                subtitles += [
                    s
                    for s in self.query(
                        title,
                        season=video.season,
                        episode=video.episode,
                        year=video.year,
                    )
                    if s.language in languages
                ]
            elif isinstance(video, Movie):
                subtitles += [
                    s
                    for s in self.query(title, year=video.year)
                    if s.language in languages
                ]

        return subtitles

    def download_subtitle(self, subtitle):
        def _get_archive_dowload_link(session, sub_page_link):
            r = session.get(sub_page_link)
            bs_obj = ParserBeautifulSoup(
                r.content.decode("utf-8", "ignore"), ["html.parser"]
            )
            down_page_link = bs_obj.find("a", {"id": "down1"}).attrs["href"]
            down_page_link = urljoin(sub_page_link, down_page_link)
            r = session.get(down_page_link)
            bs_obj = ParserBeautifulSoup(
                r.content.decode("utf-8", "ignore"), ["html.parser"]
            )
            download_link = bs_obj.find("a", {"rel": "nofollow"})
            download_link = download_link.attrs["href"]
            download_link = urljoin(sub_page_link, download_link)
            return download_link

        # download the subtitle
        logger.info("Downloading subtitle %r", subtitle)
        self.session = subtitle.session
        download_link = _get_archive_dowload_link(self.session, subtitle.page_link)
        r = self.session.get(download_link, timeout=30)
        r.raise_for_status()
        filename = r.headers["Content-Disposition"]

        if not r.content:
            logger.debug("Unable to download subtitle. No data returned from provider")
            return

        archive_stream = io.BytesIO(r.content)
        archive = None
        if rarfile.is_rarfile(archive_stream):
            logger.debug("Identified rar archive")
            if ".rar" not in filename:
                logger.debug(
                    ".rar should be in the downloaded file name: {}".format(filename)
                )
                return
            archive = rarfile.RarFile(archive_stream)
            subtitle_content = _get_subtitle_from_archive(archive)
        elif zipfile.is_zipfile(archive_stream):
            logger.debug("Identified zip archive")
            if ".zip" not in filename:
                logger.debug(
                    ".zip should be in the downloaded file name: {}".format(filename)
                )
                return
            archive = zipfile.ZipFile(archive_stream)
            subtitle_content = _get_subtitle_from_archive(archive)
        else:
            is_sub = ""
            for sub_ext in SUBTITLE_EXTENSIONS:
                if sub_ext in filename:
                    is_sub = sub_ext
                    break
            if not is_sub:
                logger.debug(
                    "unknown subtitle ext int downloaded file name: {}".format(filename)
                )
                return
            logger.debug("Identified {} file".format(is_sub))
            subtitle_content = r.content

        if subtitle_content:
            subtitle.content = fix_line_ending(subtitle_content)
        else:
            logger.debug("Could not extract subtitle from %r", archive)
Beispiel #28
0
    def query(self,
              video,
              languages,
              hash=None,
              size=None,
              imdb_id=None,
              query=None,
              season=None,
              episode=None,
              tag=None,
              use_tag_search=False,
              only_foreign=False,
              also_foreign=False):
        # fill the search criteria
        criteria = []
        if hash and size:
            criteria.append({'moviehash': hash, 'moviebytesize': str(size)})
        if use_tag_search and tag:
            criteria.append({'tag': tag})
        if imdb_id:
            if season and episode:
                criteria.append({
                    'imdbid': imdb_id[2:],
                    'season': season,
                    'episode': episode
                })
            else:
                criteria.append({'imdbid': imdb_id[2:]})
        if query and season and episode:
            for q in query:
                criteria.append({
                    'query': q.replace('\'', ''),
                    'season': season,
                    'episode': episode
                })
        elif query:
            for q in query:
                criteria.append({'query': q.replace('\'', '')})
        if not criteria:
            raise ValueError('Not enough information')

        # add the language
        for criterion in criteria:
            criterion['sublanguageid'] = ','.join(
                sorted(l.opensubtitles for l in languages))

        # query the server
        logger.info('Searching subtitles %r', criteria)
        response = self.use_token_or_login(lambda: self.retry(lambda: checked(
            lambda: self.server.SearchSubtitles(self.token, criteria))))

        subtitles = []

        # exit if no data
        if not response['data']:
            logger.info('No subtitles found')
            return subtitles

        # loop over subtitle items
        for subtitle_item in response['data']:
            _subtitle_item = subtitle_item

            # in case OS messes their API results up again, check whether we've got a dict or a string as subtitle_item
            if hasattr(_subtitle_item, "startswith"):
                _subtitle_item = response["data"][subtitle_item]

            # read the item
            language = Language.fromopensubtitles(
                _subtitle_item['SubLanguageID'])
            hearing_impaired = bool(int(_subtitle_item['SubHearingImpaired']))
            page_link = _subtitle_item['SubtitlesLink']
            subtitle_id = int(_subtitle_item['IDSubtitleFile'])
            matched_by = _subtitle_item['MatchedBy']
            movie_kind = _subtitle_item['MovieKind']
            hash = _subtitle_item['MovieHash']
            movie_name = _subtitle_item['MovieName']
            movie_release_name = _subtitle_item['MovieReleaseName']
            movie_year = int(_subtitle_item['MovieYear']
                             ) if _subtitle_item['MovieYear'] else None
            if season or episode:
                movie_imdb_id = 'tt' + _subtitle_item['SeriesIMDBParent']
            else:
                movie_imdb_id = 'tt' + _subtitle_item['IDMovieImdb']
            movie_fps = _subtitle_item.get('MovieFPS')
            series_season = int(_subtitle_item['SeriesSeason']
                                ) if _subtitle_item['SeriesSeason'] else None
            series_episode = int(_subtitle_item['SeriesEpisode']
                                 ) if _subtitle_item['SeriesEpisode'] else None
            filename = _subtitle_item['SubFileName']
            encoding = _subtitle_item.get('SubEncoding') or None
            foreign_parts_only = bool(
                int(_subtitle_item.get('SubForeignPartsOnly', 0)))

            # foreign/forced subtitles only wanted
            if only_foreign and not foreign_parts_only:
                continue

            # foreign/forced not wanted
            elif not only_foreign and not also_foreign and foreign_parts_only:
                continue

            # set subtitle language to forced if it's foreign_parts_only
            elif (also_foreign or only_foreign) and foreign_parts_only:
                language = Language.rebuild(language, forced=True)

            # set subtitle language to hi if it's hearing_impaired
            if hearing_impaired:
                language = Language.rebuild(language, hi=True)

            if language not in languages:
                continue

            if video.imdb_id and (movie_imdb_id != re.sub(
                    "(?<![^a-zA-Z])0+", "", video.imdb_id)):
                continue

            query_parameters = _subtitle_item.get("QueryParameters")

            subtitle = self.subtitle_class(language,
                                           hearing_impaired,
                                           page_link,
                                           subtitle_id,
                                           matched_by,
                                           movie_kind,
                                           hash,
                                           movie_name,
                                           movie_release_name,
                                           movie_year,
                                           movie_imdb_id,
                                           series_season,
                                           series_episode,
                                           query_parameters,
                                           filename,
                                           encoding,
                                           movie_fps,
                                           skip_wrong_fps=self.skip_wrong_fps)
            subtitle.uploader = _subtitle_item[
                'UserNickName'] if _subtitle_item[
                    'UserNickName'] else 'anonymous'
            logger.debug('Found subtitle %r by %s', subtitle, matched_by)
            subtitles.append(subtitle)

        return subtitles
Beispiel #29
0
class OpenSubtitlesProvider(ProviderRetryMixin, _OpenSubtitlesProvider):
    only_foreign = False
    also_foreign = False
    subtitle_class = OpenSubtitlesSubtitle
    hash_verifiable = True
    hearing_impaired_verifiable = True
    skip_wrong_fps = True
    is_vip = False
    use_ssl = True
    timeout = 15

    default_url = "//api.opensubtitles.org/xml-rpc"
    vip_url = "//vip-api.opensubtitles.org/xml-rpc"

    languages = {
        Language.fromopensubtitles(l)
        for l in language_converters['szopensubtitles'].codes
    }
    languages.update(set(Language.rebuild(l, forced=True) for l in languages))
    languages.update(set(Language.rebuild(l, hi=True) for l in languages))

    def __init__(self,
                 username=None,
                 password=None,
                 use_tag_search=False,
                 only_foreign=False,
                 also_foreign=False,
                 skip_wrong_fps=True,
                 is_vip=False,
                 use_ssl=True,
                 timeout=15):
        if any((username, password)) and not all((username, password)):
            raise ConfigurationError('Username and password must be specified')

        self.username = username or ''
        self.password = password or ''
        self.use_tag_search = use_tag_search
        self.only_foreign = only_foreign
        self.also_foreign = also_foreign
        self.skip_wrong_fps = skip_wrong_fps
        self.token = None
        self.is_vip = is_vip
        self.use_ssl = use_ssl
        self.timeout = timeout

        logger.debug("Using timeout: %d", timeout)

        if use_ssl:
            logger.debug("Using HTTPS connection")

        self.default_url = ("https:"
                            if use_ssl else "http:") + self.default_url
        self.vip_url = ("https:" if use_ssl else "http:") + self.vip_url

        if use_tag_search:
            logger.info("Using tag/exact filename search")

        if only_foreign:
            logger.info("Only searching for foreign/forced subtitles")

    def get_server_proxy(self, url, timeout=None):
        return ServerProxy(
            url,
            SubZeroRequestsTransport(use_https=self.use_ssl,
                                     timeout=timeout or self.timeout,
                                     user_agent=os.environ.get(
                                         "SZ_USER_AGENT", "Sub-Zero/2")))

    def log_in_url(self, server_url):
        self.token = None
        self.server = self.get_server_proxy(server_url)

        response = self.retry(lambda: checked(lambda: self.server.LogIn(
            self.username, self.password, 'eng',
            os.environ.get("SZ_USER_AGENT", "Sub-Zero/2"))))

        self.token = response['token']
        logger.debug('Logged in with token %r',
                     self.token[:10] + "X" * (len(self.token) - 10))

        region.set("os_token", bytearray(self.token, encoding='utf-8'))
        region.set("os_server_url", bytearray(server_url, encoding='utf-8'))

    def log_in(self):
        logger.info('Logging in')

        try:
            self.log_in_url(self.vip_url if self.is_vip else self.default_url)

        except Unauthorized:
            if self.is_vip:
                logger.info("VIP server login failed, falling back")
                try:
                    self.log_in_url(self.default_url)
                except Unauthorized:
                    pass

        if not self.token:
            logger.error("Login failed, please check your credentials")
            raise Unauthorized

    def use_token_or_login(self, func):
        if not self.token:
            self.log_in()
            return func()
        try:
            return func()
        except Unauthorized:
            self.log_in()
            return func()

    def initialize(self):
        token_cache = region.get("os_token")
        url_cache = region.get("os_server_url")

        if token_cache is not NO_VALUE and url_cache is not NO_VALUE:
            self.token = token_cache.decode("utf-8")
            self.server = self.get_server_proxy(url_cache.decode("utf-8"))
            logger.debug("Using previous login token: %r",
                         self.token[:10] + "X" * (len(self.token) - 10))
        else:
            self.server = None
            self.token = None

    def terminate(self):
        self.server = None
        self.token = None

    def list_subtitles(self, video, languages):
        """
        :param video:
        :param languages:
        :return:

         patch: query movies even if hash is known; add tag parameter
        """

        season = episode = None
        if isinstance(video, Episode):
            query = [video.series] + video.alternative_series
            season = video.season
            episode = episode = min(video.episode) if isinstance(
                video.episode, list) else video.episode

            if video.is_special:
                season = None
                episode = None
                query = [
                    u"%s %s" % (series, video.title)
                    for series in [video.series] + video.alternative_series
                ]
                logger.info("%s: Searching for special: %r", self.__class__,
                            query)
        # elif ('opensubtitles' not in video.hashes or not video.size) and not video.imdb_id:
        #    query = video.name.split(os.sep)[-1]
        else:
            query = [video.title] + video.alternative_titles

        return self.query(video,
                          languages,
                          hash=video.hashes.get('opensubtitles'),
                          size=video.size,
                          imdb_id=video.imdb_id,
                          query=query,
                          season=season,
                          episode=episode,
                          tag=video.original_name,
                          use_tag_search=self.use_tag_search,
                          only_foreign=self.only_foreign,
                          also_foreign=self.also_foreign)

    def query(self,
              video,
              languages,
              hash=None,
              size=None,
              imdb_id=None,
              query=None,
              season=None,
              episode=None,
              tag=None,
              use_tag_search=False,
              only_foreign=False,
              also_foreign=False):
        # fill the search criteria
        criteria = []
        if hash and size:
            criteria.append({'moviehash': hash, 'moviebytesize': str(size)})
        if use_tag_search and tag:
            criteria.append({'tag': tag})
        if imdb_id:
            if season and episode:
                criteria.append({
                    'imdbid': imdb_id[2:],
                    'season': season,
                    'episode': episode
                })
            else:
                criteria.append({'imdbid': imdb_id[2:]})
        if query and season and episode:
            for q in query:
                criteria.append({
                    'query': q.replace('\'', ''),
                    'season': season,
                    'episode': episode
                })
        elif query:
            for q in query:
                criteria.append({'query': q.replace('\'', '')})
        if not criteria:
            raise ValueError('Not enough information')

        # add the language
        for criterion in criteria:
            criterion['sublanguageid'] = ','.join(
                sorted(l.opensubtitles for l in languages))

        # query the server
        logger.info('Searching subtitles %r', criteria)
        response = self.use_token_or_login(lambda: self.retry(lambda: checked(
            lambda: self.server.SearchSubtitles(self.token, criteria))))

        subtitles = []

        # exit if no data
        if not response['data']:
            logger.info('No subtitles found')
            return subtitles

        # loop over subtitle items
        for subtitle_item in response['data']:
            _subtitle_item = subtitle_item

            # in case OS messes their API results up again, check whether we've got a dict or a string as subtitle_item
            if hasattr(_subtitle_item, "startswith"):
                _subtitle_item = response["data"][subtitle_item]

            # read the item
            language = Language.fromopensubtitles(
                _subtitle_item['SubLanguageID'])
            hearing_impaired = bool(int(_subtitle_item['SubHearingImpaired']))
            page_link = _subtitle_item['SubtitlesLink']
            subtitle_id = int(_subtitle_item['IDSubtitleFile'])
            matched_by = _subtitle_item['MatchedBy']
            movie_kind = _subtitle_item['MovieKind']
            hash = _subtitle_item['MovieHash']
            movie_name = _subtitle_item['MovieName']
            movie_release_name = _subtitle_item['MovieReleaseName']
            movie_year = int(_subtitle_item['MovieYear']
                             ) if _subtitle_item['MovieYear'] else None
            if season or episode:
                movie_imdb_id = 'tt' + _subtitle_item['SeriesIMDBParent']
            else:
                movie_imdb_id = 'tt' + _subtitle_item['IDMovieImdb']
            movie_fps = _subtitle_item.get('MovieFPS')
            series_season = int(_subtitle_item['SeriesSeason']
                                ) if _subtitle_item['SeriesSeason'] else None
            series_episode = int(_subtitle_item['SeriesEpisode']
                                 ) if _subtitle_item['SeriesEpisode'] else None
            filename = _subtitle_item['SubFileName']
            encoding = _subtitle_item.get('SubEncoding') or None
            foreign_parts_only = bool(
                int(_subtitle_item.get('SubForeignPartsOnly', 0)))

            # foreign/forced subtitles only wanted
            if only_foreign and not foreign_parts_only:
                continue

            # foreign/forced not wanted
            elif not only_foreign and not also_foreign and foreign_parts_only:
                continue

            # set subtitle language to forced if it's foreign_parts_only
            elif (also_foreign or only_foreign) and foreign_parts_only:
                language = Language.rebuild(language, forced=True)

            # set subtitle language to hi if it's hearing_impaired
            if hearing_impaired:
                language = Language.rebuild(language, hi=True)

            if language not in languages:
                continue

            if video.imdb_id and (movie_imdb_id != re.sub(
                    "(?<![^a-zA-Z])0+", "", video.imdb_id)):
                continue

            query_parameters = _subtitle_item.get("QueryParameters")

            subtitle = self.subtitle_class(language,
                                           hearing_impaired,
                                           page_link,
                                           subtitle_id,
                                           matched_by,
                                           movie_kind,
                                           hash,
                                           movie_name,
                                           movie_release_name,
                                           movie_year,
                                           movie_imdb_id,
                                           series_season,
                                           series_episode,
                                           query_parameters,
                                           filename,
                                           encoding,
                                           movie_fps,
                                           skip_wrong_fps=self.skip_wrong_fps)
            subtitle.uploader = _subtitle_item[
                'UserNickName'] if _subtitle_item[
                    'UserNickName'] else 'anonymous'
            logger.debug('Found subtitle %r by %s', subtitle, matched_by)
            subtitles.append(subtitle)

        return subtitles

    def download_subtitle(self, subtitle):
        logger.info('Downloading subtitle %r', subtitle)
        response = self.use_token_or_login(
            lambda: checked(lambda: self.server.DownloadSubtitles(
                self.token, [str(subtitle.subtitle_id)])))
        subtitle.content = fix_line_ending(
            zlib.decompress(base64.b64decode(response['data'][0]['data']), 47))
Beispiel #30
0
    def query(self, languages, hash=None, size=None, imdb_id=None, query=None, season=None, episode=None, tag=None,
              use_tag_search=False, only_foreign=False, also_foreign=False):
        # fill the search criteria
        criteria = []
        if hash and size:
            criteria.append({'moviehash': hash, 'moviebytesize': str(size)})
        if use_tag_search and tag:
            criteria.append({'tag': tag})
        if imdb_id:
            if season and episode:
                criteria.append({'imdbid': imdb_id[2:], 'season': season, 'episode': episode})
            else:
                criteria.append({'imdbid': imdb_id[2:]})
        if query and season and episode:
            for q in query:
                criteria.append({'query': q.replace('\'', ''), 'season': season, 'episode': episode})
        elif query:
            for q in query:
                criteria.append({'query': q.replace('\'', '')})
        if not criteria:
            raise ValueError('Not enough information')

        # add the language
        for criterion in criteria:
            criterion['sublanguageid'] = ','.join(sorted(l.opensubtitles for l in languages))

        # query the server
        logger.info('Searching subtitles %r', criteria)
        response = self.use_token_or_login(
            lambda: self.retry(lambda: checked(lambda: self.server.SearchSubtitles(self.token, criteria)))
        )

        subtitles = []

        # exit if no data
        if not response['data']:
            logger.info('No subtitles found')
            return subtitles

        # loop over subtitle items
        for subtitle_item in response['data']:
            _subtitle_item = subtitle_item

            # in case OS messes their API results up again, check whether we've got a dict or a string as subtitle_item
            if hasattr(_subtitle_item, "startswith"):
                _subtitle_item = response["data"][subtitle_item]

            # read the item
            language = Language.fromopensubtitles(_subtitle_item['SubLanguageID'])
            hearing_impaired = bool(int(_subtitle_item['SubHearingImpaired']))
            page_link = _subtitle_item['SubtitlesLink']
            subtitle_id = int(_subtitle_item['IDSubtitleFile'])
            matched_by = _subtitle_item['MatchedBy']
            movie_kind = _subtitle_item['MovieKind']
            hash = _subtitle_item['MovieHash']
            movie_name = _subtitle_item['MovieName']
            movie_release_name = _subtitle_item['MovieReleaseName']
            movie_year = int(_subtitle_item['MovieYear']) if _subtitle_item['MovieYear'] else None
            movie_imdb_id = 'tt' + _subtitle_item['IDMovieImdb']
            movie_fps = _subtitle_item.get('MovieFPS')
            series_season = int(_subtitle_item['SeriesSeason']) if _subtitle_item['SeriesSeason'] else None
            series_episode = int(_subtitle_item['SeriesEpisode']) if _subtitle_item['SeriesEpisode'] else None
            filename = _subtitle_item['SubFileName']
            encoding = _subtitle_item.get('SubEncoding') or None
            foreign_parts_only = bool(int(_subtitle_item.get('SubForeignPartsOnly', 0)))

            # foreign/forced subtitles only wanted
            if only_foreign and not foreign_parts_only:
                continue

            # foreign/forced not wanted
            elif not only_foreign and not also_foreign and foreign_parts_only:
                continue

            # set subtitle language to forced if it's foreign_parts_only
            elif (also_foreign or only_foreign) and foreign_parts_only:
                language = Language.rebuild(language, forced=True)

            if language not in languages:
                continue

            query_parameters = _subtitle_item.get("QueryParameters")

            subtitle = self.subtitle_class(language, hearing_impaired, page_link, subtitle_id, matched_by,
                                           movie_kind,
                                           hash, movie_name, movie_release_name, movie_year, movie_imdb_id,
                                           series_season, series_episode, query_parameters, filename, encoding,
                                           movie_fps, skip_wrong_fps=self.skip_wrong_fps)
            logger.debug('Found subtitle %r by %s', subtitle, matched_by)
            subtitles.append(subtitle)

        return subtitles
Beispiel #31
0
 def get_language(self, text):
     if text == '1.gif':
         return Language.fromhosszupuska('hu')
     if text == '2.gif':
         return Language.fromhosszupuska('en')
     return None
Beispiel #32
0
class SubsceneProvider(Provider, ProviderSubtitleArchiveMixin):
    """
    This currently only searches for the filename on SubScene. It doesn't open every found subtitle page to avoid
    massive hammering, thus it can't determine whether a subtitle is only-foreign or not.
    """
    subtitle_class = SubsceneSubtitle
    languages = supported_languages
    languages.update(set(Language.rebuild(l, forced=True) for l in languages))
    languages.update(set(Language.rebuild(l, hi=True) for l in languages))
    video_types = (Episode, Movie)
    session = None
    skip_wrong_fps = False
    hearing_impaired_verifiable = True
    only_foreign = False
    username = None
    password = None

    search_throttle = 8  # seconds

    def __init__(self, only_foreign=False, username=None, password=None):
        if not all((username, password)):
            raise ConfigurationError('Username and password must be specified')

        self.only_foreign = only_foreign
        self.username = username
        self.password = password

    def initialize(self):
        logger.info("Creating session")
        self.session = RetryingCFSession()

        prev_cookies = region.get("subscene_cookies2")
        if prev_cookies != NO_VALUE:
            logger.debug("Re-using old subscene cookies: %r", prev_cookies)
            self.session.cookies.update(prev_cookies)

        else:
            logger.debug("Logging in")
            self.login()

    def login(self):
        r = self.session.get("https://subscene.com/account/login")
        if "Server Error" in r.text:
            logger.error("Login unavailable; Maintenance?")
            raise ServiceUnavailable("Login unavailable; Maintenance?")

        match = re.search(
            r"<script id='modelJson' type='application/json'>\s*(.+)\s*</script>",
            r.text)

        if match:
            h = html
            data = json.loads(h.unescape(match.group(1)))
            login_url = parse.urljoin(data["siteUrl"], data["loginUrl"])
            time.sleep(1.0)

            r = self.session.post(
                login_url, {
                    "username": self.username,
                    "password": self.password,
                    data["antiForgery"]["name"]: data["antiForgery"]["value"]
                })
            pep_content = re.search(
                r"<form method=\"post\" action=\"https://subscene\.com/\">"
                r".+name=\"id_token\".+?value=\"(?P<id_token>.+?)\".*?"
                r"access_token\".+?value=\"(?P<access_token>.+?)\".+?"
                r"token_type.+?value=\"(?P<token_type>.+?)\".+?"
                r"expires_in.+?value=\"(?P<expires_in>.+?)\".+?"
                r"scope.+?value=\"(?P<scope>.+?)\".+?"
                r"state.+?value=\"(?P<state>.+?)\".+?"
                r"session_state.+?value=\"(?P<session_state>.+?)\"", r.text,
                re.MULTILINE | re.DOTALL)

            if pep_content:
                r = self.session.post(SITE_DOMAIN, pep_content.groupdict())
                try:
                    r.raise_for_status()
                except Exception:
                    raise ProviderError(
                        "Something went wrong when trying to log in: %s",
                        traceback.format_exc())
                else:
                    cj = self.session.cookies.copy()
                    store_cks = ("scene", "idsrv", "idsrv.xsrf",
                                 "idsvr.clients", "idsvr.session",
                                 "idsvr.username")
                    for cn in self.session.cookies.keys():
                        if cn not in store_cks:
                            del cj[cn]

                    logger.debug("Storing cookies: %r", cj)
                    region.set("subscene_cookies2", cj)
                    return
        raise ProviderError("Something went wrong when trying to log in #1")

    def terminate(self):
        logger.info("Closing session")
        self.session.close()

    def _create_filters(self, languages):
        self.filters = dict(HearingImpaired="2")
        acc_filters = self.filters.copy()
        if self.only_foreign:
            self.filters["ForeignOnly"] = "True"
            acc_filters["ForeignOnly"] = self.filters["ForeignOnly"].lower()
            logger.info("Only searching for foreign/forced subtitles")

        selected_ids = []
        for l in languages:
            lid = language_ids.get(l.basename,
                                   language_ids.get(l.alpha3, None))
            if lid:
                selected_ids.append(str(lid))

        acc_filters["SelectedIds"] = selected_ids
        self.filters["LanguageFilter"] = ",".join(acc_filters["SelectedIds"])

        last_filters = region.get("subscene_filters")
        if last_filters != acc_filters:
            region.set("subscene_filters", acc_filters)
            logger.debug("Setting account filters to %r", acc_filters)
            self.session.post("https://u.subscene.com/filter",
                              acc_filters,
                              allow_redirects=False)

        logger.debug("Filter created: '%s'" % self.filters)

    def _enable_filters(self):
        self.session.cookies.update(self.filters)
        logger.debug("Filters applied")

    def list_subtitles(self, video, languages):
        if not video.original_name:
            logger.info(
                "Skipping search because we don't know the original release name"
            )
            return []

        self._create_filters(languages)
        self._enable_filters()
        return [s for s in self.query(video) if s.language in languages]

    def download_subtitle(self, subtitle):
        if subtitle.pack_data:
            logger.info("Using previously downloaded pack data")
            if rarfile.is_rarfile(io.BytesIO(subtitle.pack_data)):
                logger.debug('Identified rar archive')
                archive = rarfile.RarFile(io.BytesIO(subtitle.pack_data))
            elif zipfile.is_zipfile(io.BytesIO(subtitle.pack_data)):
                logger.debug('Identified zip archive')
                archive = zipfile.ZipFile(io.BytesIO(subtitle.pack_data))
            else:
                logger.error('Unsupported compressed format')
                return
            subtitle.pack_data = None

            try:
                subtitle.content = self.get_subtitle_from_archive(
                    subtitle, archive)
                return
            except ProviderError:
                pass

        # open the archive
        r = self.session.get(subtitle.get_download_link(self.session),
                             timeout=10)
        r.raise_for_status()
        archive_stream = io.BytesIO(r.content)

        if rarfile.is_rarfile(archive_stream):
            logger.debug('Identified rar archive')
            archive = rarfile.RarFile(archive_stream)
        elif zipfile.is_zipfile(archive_stream):
            logger.debug('Identified zip archive')
            archive = zipfile.ZipFile(archive_stream)
        else:
            logger.error('Unsupported compressed format')
            return

        subtitle.content = self.get_subtitle_from_archive(subtitle, archive)

        # store archive as pack_data for later caching
        subtitle.pack_data = r.content

    def parse_results(self, video, film):
        subtitles = []
        for s in film.subtitles:
            try:
                subtitle = SubsceneSubtitle.from_api(s)
            except NotImplementedError as e:
                logger.info(e)
                continue
            subtitle.asked_for_release_group = video.release_group
            if isinstance(video, Episode):
                subtitle.asked_for_episode = video.episode

            if self.only_foreign:
                subtitle.language = Language.rebuild(subtitle.language,
                                                     forced=True)

            # set subtitle language to hi if it's hearing_impaired
            if subtitle.hearing_impaired:
                subtitle.language = Language.rebuild(subtitle.language,
                                                     hi=True)

            subtitles.append(subtitle)
            logger.debug('Found subtitle %r', subtitle)

        return subtitles

    def do_search(self, *args, **kwargs):
        try:
            return search(*args, **kwargs)
        except requests.HTTPError:
            region.delete("subscene_cookies2")

    def query(self, video):
        subtitles = []
        if isinstance(video, Episode):
            titles = list(set([video.series] + video.alternative_series[:1]))
            more_than_one = len(titles) > 1
            for series in titles:
                term = u"%s - %s Season" % (
                    series, p.number_to_words(
                        "%sth" % video.season).capitalize())
                logger.debug('Searching with series and season: %s', term)
                film = self.do_search(term,
                                      session=self.session,
                                      release=False,
                                      throttle=self.search_throttle,
                                      limit_to=SearchTypes.TvSerie)
                if not film and video.season == 1:
                    logger.debug('Searching with series name: %s', series)
                    film = self.do_search(series,
                                          session=self.session,
                                          release=False,
                                          throttle=self.search_throttle,
                                          limit_to=SearchTypes.TvSerie)

                if film and film.subtitles:
                    logger.debug('Searching found: %s', len(film.subtitles))
                    subtitles += self.parse_results(video, film)
                else:
                    logger.debug('No results found')

                if more_than_one:
                    time.sleep(self.search_throttle)
        else:
            titles = list(set([video.title] + video.alternative_titles[:1]))
            more_than_one = len(titles) > 1
            for title in titles:
                logger.debug('Searching for movie results: %r', title)
                film = self.do_search(title,
                                      year=video.year,
                                      session=self.session,
                                      limit_to=None,
                                      release=False,
                                      throttle=self.search_throttle)
                if film and film.subtitles:
                    subtitles += self.parse_results(video, film)
                if more_than_one:
                    time.sleep(self.search_throttle)

        logger.info("%s subtitles found" % len(subtitles))
        return subtitles
Beispiel #33
0
 def from_api(cls, s):
     return cls(Language.fromsubscene(s.language.strip()), s.title, hearing_impaired=s.hearing_impaired,
                page_link=s.url)
Beispiel #34
0
class OpenSubtitlesComProvider(ProviderRetryMixin, Provider):
    """OpenSubtitlesCom Provider"""
    server_url = 'https://api.opensubtitles.com/api/v1/'

    languages = {
        Language.fromopensubtitles(lang)
        for lang in language_converters['szopensubtitles'].codes
    }
    languages.update(
        set(Language.rebuild(lang, forced=True) for lang in languages))

    video_types = (Episode, Movie)

    def __init__(self,
                 username=None,
                 password=None,
                 use_hash=True,
                 api_key=None):
        if not all((username, password)):
            raise ConfigurationError('Username and password must be specified')

        if not api_key:
            raise ConfigurationError('Api_key must be specified')

        if not all((username, password)):
            raise ConfigurationError('Username and password must be specified')

        self.session = Session()
        self.session.headers = {
            'User-Agent': os.environ.get("SZ_USER_AGENT", "Sub-Zero/2"),
            'Api-Key': api_key,
            'Content-Type': 'application/json'
        }
        self.token = None
        self.username = username
        self.password = password
        self.video = None
        self.use_hash = use_hash

    def initialize(self):
        self.token = region.get("oscom_token",
                                expiration_time=TOKEN_EXPIRATION_TIME)
        if self.token is NO_VALUE:
            self.login()

    def terminate(self):
        self.session.close()

    def login(self):
        try:
            r = self.session.post(self.server_url + 'login',
                                  json={
                                      "username": self.username,
                                      "password": self.password
                                  },
                                  allow_redirects=False,
                                  timeout=30)
        except (ConnectionError, Timeout, ReadTimeout):
            raise ServiceUnavailable('Unknown Error, empty response: %s: %r' %
                                     (r.status_code, r))
        else:
            if r.status_code == 200:
                try:
                    self.token = r.json()['token']
                except ValueError:
                    raise ProviderError('Invalid JSON returned by provider')
                else:
                    region.set("oscom_token", self.token)
                    return
            elif r.status_code == 401:
                raise AuthenticationError('Login failed: {}'.format(r.reason))
            elif r.status_code == 429:
                raise TooManyRequests()
            elif r.status_code == 503:
                raise ProviderError(r.reason)
            else:
                raise ProviderError('Bad status code: {}'.format(
                    r.status_code))

    @staticmethod
    def sanitize_external_ids(external_id):
        if isinstance(external_id, str):
            external_id = external_id.lower().lstrip('tt').lstrip('0')
        sanitized_id = external_id[:-1].lstrip('0') + external_id[-1]
        return int(sanitized_id)

    @region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME)
    def search_titles(self, title):
        title_id = None

        parameters = {'query': title.lower()}
        logging.debug('Searching using this title: {}'.format(title))

        results = self.session.get(self.server_url + 'features',
                                   params=parameters,
                                   timeout=30)

        if results.status_code == 401:
            logging.debug(
                'Authentification failed: clearing cache and attempting to login.'
            )
            region.delete("oscom_token")
            self.login()

            results = self.session.get(self.server_url + 'features',
                                       params=parameters,
                                       timeout=30)

            if results.status_code == 429:
                raise TooManyRequests()
            elif results.status_code == 503:
                raise ProviderError(results.reason)
        elif results.status_code == 429:
            raise TooManyRequests()
        elif results.status_code == 503:
            raise ProviderError(results.reason)

        # deserialize results
        try:
            results_dict = results.json()['data']
        except ValueError:
            raise ProviderError('Invalid JSON returned by provider')
        else:
            # loop over results
            for result in results_dict:
                if 'title' in result['attributes']:
                    if isinstance(self.video, Episode):
                        if fix_tv_naming(title).lower() == result['attributes']['title'].lower() and \
                                (not self.video.year or self.video.year == int(result['attributes']['year'])):
                            title_id = result['id']
                            break
                    else:
                        if fix_movie_naming(title).lower() == result['attributes']['title'].lower() and \
                                (not self.video.year or self.video.year == int(result['attributes']['year'])):
                            title_id = result['id']
                            break
                else:
                    continue

            if title_id:
                logging.debug('Found this title ID: {}'.format(title_id))
                return self.sanitize_external_ids(title_id)
        finally:
            if not title_id:
                logger.debug('No match found for {}'.format(title))

    def query(self, languages, video):
        self.video = video
        if self.use_hash:
            file_hash = self.video.hashes.get('opensubtitlescom')
            logging.debug('Searching using this hash: {}'.format(hash))
        else:
            file_hash = None

        if isinstance(self.video, Episode):
            title = self.video.series
        else:
            title = self.video.title

        imdb_id = None
        if isinstance(self.video, Episode) and self.video.series_imdb_id:
            imdb_id = self.sanitize_external_ids(self.video.series_imdb_id)
        elif isinstance(self.video, Movie) and self.video.imdb_id:
            imdb_id = self.sanitize_external_ids(self.video.imdb_id)

        title_id = None
        if not imdb_id:
            title_id = self.search_titles(title)
            if not title_id:
                return []

        lang_strings = [str(lang.basename) for lang in languages]
        only_foreign = all([lang.forced for lang in languages])
        also_foreign = any([lang.forced for lang in languages])
        if only_foreign:
            forced = 'only'
        elif also_foreign:
            forced = 'include'
        else:
            forced = 'exclude'

        langs = ','.join(lang_strings)
        logging.debug('Searching for this languages: {}'.format(lang_strings))

        # query the server
        if isinstance(self.video, Episode):
            res = self.session.get(
                self.server_url + 'subtitles',
                params=(('episode_number',
                         self.video.episode), ('foreign_parts_only', forced),
                        ('languages', langs.lower()), ('moviehash', file_hash),
                        ('parent_feature_id', title_id) if title_id else
                        ('imdb_id', imdb_id), ('season_number',
                                               self.video.season),
                        ('query', os.path.basename(self.video.name))),
                timeout=30)
        else:
            res = self.session.get(
                self.server_url + 'subtitles',
                params=(('foreign_parts_only', forced),
                        ('id', title_id) if title_id else ('imdb_id', imdb_id),
                        ('languages', langs.lower()), ('moviehash', file_hash),
                        ('query', os.path.basename(self.video.name))),
                timeout=30)

        if res.status_code == 429:
            raise TooManyRequests()

        elif res.status_code == 503:
            raise ProviderError(res.reason)

        subtitles = []

        try:
            result = res.json()
            if 'data' not in result:
                raise ValueError
        except ValueError:
            raise ProviderError('Invalid JSON returned by provider')
        else:
            logging.debug('Query returned {} subtitles'.format(
                len(result['data'])))

            if len(result['data']):
                for item in result['data']:
                    if 'season_number' in item['attributes'][
                            'feature_details']:
                        season_number = item['attributes']['feature_details'][
                            'season_number']
                    else:
                        season_number = None

                    if 'episode_number' in item['attributes'][
                            'feature_details']:
                        episode_number = item['attributes']['feature_details'][
                            'episode_number']
                    else:
                        episode_number = None

                    if 'moviehash_match' in item['attributes']:
                        moviehash_match = item['attributes']['moviehash_match']
                    else:
                        moviehash_match = False

                    if len(item['attributes']['files']):
                        subtitle = OpenSubtitlesComSubtitle(
                            language=Language.fromietf(
                                item['attributes']['language']),
                            forced=item['attributes']['foreign_parts_only'],
                            hearing_impaired=item['attributes']
                            ['hearing_impaired'],
                            page_link=item['attributes']['url'],
                            file_id=item['attributes']['files'][0]['file_id'],
                            releases=item['attributes']['release'],
                            uploader=item['attributes']['uploader']['name'],
                            title=item['attributes']['feature_details']
                            ['movie_name'],
                            year=item['attributes']['feature_details']['year'],
                            season=season_number,
                            episode=episode_number,
                            hash_matched=moviehash_match)
                        subtitle.get_matches(self.video)
                        subtitles.append(subtitle)

        return subtitles

    def list_subtitles(self, video, languages):
        return self.query(languages, video)

    def download_subtitle(self, subtitle):
        if self.token is NO_VALUE:
            logger.debug("No cached token, we'll try to login again.")
            self.login()
        if self.token is NO_VALUE:
            logger.debug(
                "Unable to obtain an authentication token right now, we'll try again later."
            )
            raise ProviderError("Unable to obtain an authentication token")

        logger.info('Downloading subtitle %r', subtitle)

        headers = {
            'Accept': 'application/json',
            'Content-Type': 'application/json',
            'Authorization': 'Beaker ' + self.token
        }
        res = self.session.post(self.server_url + 'download',
                                json={
                                    'file_id': subtitle.file_id,
                                    'sub_format': 'srt'
                                },
                                headers=headers,
                                timeout=30)
        if res.status_code == 429:
            raise TooManyRequests()
        elif res.status_code == 406:
            raise DownloadLimitExceeded("Daily download limit reached")
        elif res.status_code == 503:
            raise ProviderError(res.reason)
        else:
            try:
                subtitle.download_link = res.json()['link']
            except ValueError:
                raise ProviderError('Invalid JSON returned by provider')
            else:
                r = self.session.get(subtitle.download_link, timeout=30)

                if res.status_code == 429:
                    raise TooManyRequests()
                elif res.status_code == 406:
                    raise DownloadLimitExceeded("Daily download limit reached")
                elif res.status_code == 503:
                    raise ProviderError(res.reason)

                subtitle_content = r.content

                if subtitle_content:
                    subtitle.content = fix_line_ending(subtitle_content)
                else:
                    logger.debug('Could not download subtitle from {}'.format(
                        subtitle.download_link))
Beispiel #35
0
def test_get_matches_movie_name(movies):
    subtitle = AssrtSubtitle(
        Language('zho'), 618185,
        'Man.Of.Steel.2013.BluRay.720p.x264.AC3.2Audios-CMCT', None, None)
    matches = subtitle.get_matches(movies['man_of_steel'])
    assert matches == {'title', 'format', 'year', 'video_codec', 'resolution'}
Beispiel #36
0
def test_query_movie_zh(movies):
    languages = [Language('zho')]
    video = movies['man_of_steel']
    with AssrtProvider(TOKEN) as provider:
        subtitles = provider.query(languages, video)
        assert len(subtitles) == 16
Beispiel #37
0
    def query(self, languages, video):
        self.video = video
        if self.use_hash:
            file_hash = self.video.hashes.get('opensubtitlescom')
            logging.debug('Searching using this hash: {}'.format(hash))
        else:
            file_hash = None

        if isinstance(self.video, Episode):
            title = self.video.series
        else:
            title = self.video.title

        imdb_id = None
        if isinstance(self.video, Episode) and self.video.series_imdb_id:
            imdb_id = self.sanitize_external_ids(self.video.series_imdb_id)
        elif isinstance(self.video, Movie) and self.video.imdb_id:
            imdb_id = self.sanitize_external_ids(self.video.imdb_id)

        title_id = None
        if not imdb_id:
            title_id = self.search_titles(title)
            if not title_id:
                return []

        lang_strings = [str(lang.basename) for lang in languages]
        only_foreign = all([lang.forced for lang in languages])
        also_foreign = any([lang.forced for lang in languages])
        if only_foreign:
            forced = 'only'
        elif also_foreign:
            forced = 'include'
        else:
            forced = 'exclude'

        langs = ','.join(lang_strings)
        logging.debug('Searching for this languages: {}'.format(lang_strings))

        # query the server
        if isinstance(self.video, Episode):
            res = self.session.get(
                self.server_url + 'subtitles',
                params=(('episode_number',
                         self.video.episode), ('foreign_parts_only', forced),
                        ('languages', langs.lower()), ('moviehash', file_hash),
                        ('parent_feature_id', title_id) if title_id else
                        ('imdb_id', imdb_id), ('season_number',
                                               self.video.season),
                        ('query', os.path.basename(self.video.name))),
                timeout=30)
        else:
            res = self.session.get(
                self.server_url + 'subtitles',
                params=(('foreign_parts_only', forced),
                        ('id', title_id) if title_id else ('imdb_id', imdb_id),
                        ('languages', langs.lower()), ('moviehash', file_hash),
                        ('query', os.path.basename(self.video.name))),
                timeout=30)

        if res.status_code == 429:
            raise TooManyRequests()

        elif res.status_code == 503:
            raise ProviderError(res.reason)

        subtitles = []

        try:
            result = res.json()
            if 'data' not in result:
                raise ValueError
        except ValueError:
            raise ProviderError('Invalid JSON returned by provider')
        else:
            logging.debug('Query returned {} subtitles'.format(
                len(result['data'])))

            if len(result['data']):
                for item in result['data']:
                    if 'season_number' in item['attributes'][
                            'feature_details']:
                        season_number = item['attributes']['feature_details'][
                            'season_number']
                    else:
                        season_number = None

                    if 'episode_number' in item['attributes'][
                            'feature_details']:
                        episode_number = item['attributes']['feature_details'][
                            'episode_number']
                    else:
                        episode_number = None

                    if 'moviehash_match' in item['attributes']:
                        moviehash_match = item['attributes']['moviehash_match']
                    else:
                        moviehash_match = False

                    if len(item['attributes']['files']):
                        subtitle = OpenSubtitlesComSubtitle(
                            language=Language.fromietf(
                                item['attributes']['language']),
                            forced=item['attributes']['foreign_parts_only'],
                            hearing_impaired=item['attributes']
                            ['hearing_impaired'],
                            page_link=item['attributes']['url'],
                            file_id=item['attributes']['files'][0]['file_id'],
                            releases=item['attributes']['release'],
                            uploader=item['attributes']['uploader']['name'],
                            title=item['attributes']['feature_details']
                            ['movie_name'],
                            year=item['attributes']['feature_details']['year'],
                            season=season_number,
                            episode=episode_number,
                            hash_matched=moviehash_match)
                        subtitle.get_matches(self.video)
                        subtitles.append(subtitle)

        return subtitles
Beispiel #38
0
    def query(self, language, keyword, video, season=None, episode=None, year=None, only_foreign=False,
              also_foreign=False):
        search_language = str(language).lower()

        # sr-Cyrl specialcase
        if search_language == "sr-cyrl":
            search_language = "sr"

        # set parameters, see http://www.podnapisi.net/forum/viewtopic.php?f=62&t=26164#p212652
        params = {'sXML': 1, 'sL': search_language, 'sK': keyword}

        is_episode = False
        if season and episode:
            is_episode = True
            params['sTS'] = season
            params['sTE'] = episode

        if year:
            params['sY'] = year

        # loop over paginated results
        logger.info('Searching subtitles %r', params)
        subtitles = []
        pids = set()
        while True:
            # query the server
            content = None
            try:
                content = self.session.get(self.server_url + 'search/old', params=params, timeout=10).content
                xml = etree.fromstring(content)
            except etree.ParseError:
                logger.error("Wrong data returned: %r", content)
                break

            # exit if no results
            if not int(xml.find('pagination/results').text):
                logger.debug('No subtitles found')
                break

            # loop over subtitles
            for subtitle_xml in xml.findall('subtitle'):
                # read xml elements
                pid = subtitle_xml.find('pid').text
                # ignore duplicates, see http://www.podnapisi.net/forum/viewtopic.php?f=62&t=26164&start=10#p213321
                if pid in pids:
                    continue

                _language = Language.fromietf(subtitle_xml.find('language').text)
                hearing_impaired = 'n' in (subtitle_xml.find('flags').text or '')
                foreign = 'f' in (subtitle_xml.find('flags').text or '')
                if only_foreign and not foreign:
                    continue

                elif not only_foreign and not also_foreign and foreign:
                    continue

                elif also_foreign and foreign:
                    _language = Language.rebuild(_language, forced=True)

                if language != _language:
                    continue

                page_link = subtitle_xml.find('url').text
                releases = []
                if subtitle_xml.find('release').text:
                    for release in subtitle_xml.find('release').text.split():
                        releases.append(re.sub(r'\.+$', '', release))  # remove trailing dots
                title = subtitle_xml.find('title').text
                r_season = int(subtitle_xml.find('tvSeason').text)
                r_episode = int(subtitle_xml.find('tvEpisode').text)
                r_year = int(subtitle_xml.find('year').text)

                if is_episode:
                    subtitle = self.subtitle_class(_language, hearing_impaired, page_link, pid, releases, title,
                                                   season=r_season, episode=r_episode, year=r_year,
                                                   asked_for_release_group=video.release_group,
                                                   asked_for_episode=episode)
                else:
                    subtitle = self.subtitle_class(_language, hearing_impaired, page_link, pid, releases, title,
                                                   year=r_year, asked_for_release_group=video.release_group)


                logger.debug('Found subtitle %r', subtitle)
                subtitles.append(subtitle)
                pids.add(pid)

            # stop on last page
            if int(xml.find('pagination/current').text) >= int(xml.find('pagination/count').text):
                break

            # increment current page
            params['page'] = int(xml.find('pagination/current').text) + 1
            logger.debug('Getting page %d', params['page'])
            xml = None

        return subtitles
Beispiel #39
0
class SubtitriIdProvider(Provider, ProviderSubtitleArchiveMixin):
    """subtitri.id.lv Provider."""
    subtitle_class = SubtitriIdSubtitle
    languages = {Language('lva', 'LV')
                 } | {Language.fromalpha2(l)
                      for l in ['lv']}
    video_types = (Movie, )
    server_url = 'http://subtitri.id.lv'
    search_url = server_url + '/search/'

    def __init__(self):
        self.session = None

    def initialize(self):
        self.session = Session()
        self.session.headers['User-Agent'] = AGENT_LIST[randint(
            0,
            len(AGENT_LIST) - 1)]
        self.session.headers['Referer'] = self.server_url

    def terminate(self):
        self.session.close()

    def query(self, title):
        subtitles = []

        r = self.session.get(self.search_url, params={'q': title}, timeout=10)
        r.raise_for_status()

        if not r.content:
            logger.debug('No data returned from provider')
            return []

        soup = ParserBeautifulSoup(r.content.decode('utf-8', 'ignore'),
                                   ['lxml', 'html.parser'])

        # loop over subtitle cells
        rows = soup.select('.eBlock')
        for row in rows:
            result_anchor_el = row.select_one('.eTitle > a')

            # page link
            page_link = result_anchor_el.get('href')

            # fetch/parse additional info
            r = self.session.get(page_link, timeout=10)
            soup = ParserBeautifulSoup(r.content.decode('utf-8', 'ignore'),
                                       ['lxml', 'html.parser'])

            # title
            movie_titles_string = soup.select_one('.main-header').text.strip()
            movie_titles_list = movie_titles_string.split(' / ')
            title = movie_titles_list[-1]

            # year
            year = soup.select_one('#film-page-year').text.strip()

            # imdb id
            imdb_link = soup.select_one('#actors-page > a').get('href')
            imdb_id = imdb_link.split('/')[-2]

            # download link
            href = soup.select_one('.hvr').get('href')
            download_link = self.server_url + href

            # create/add the subitle
            subtitle = self.subtitle_class(Language.fromalpha2('lv'),
                                           page_link, download_link, title,
                                           year, imdb_id)
            logger.debug('subtitri.id.lv: Found subtitle %r', subtitle)
            subtitles.append(subtitle)

        return subtitles

    def list_subtitles(self, video, languages):
        if isinstance(video, Movie):
            titles = [video.title] + video.alternative_titles
        else:
            titles = []

        subtitles = []
        # query for subtitles
        for title in titles:
            if isinstance(video, Movie):
                subtitles += [
                    s for s in self.query(title) if s.language in languages
                ]

        return subtitles

    def download_subtitle(self, subtitle):
        if isinstance(subtitle, SubtitriIdSubtitle):
            # download the subtitle
            r = self.session.get(subtitle.download_link, timeout=10)
            r.raise_for_status()

            # open the archive
            archive_stream = io.BytesIO(r.content)
            if is_rarfile(archive_stream):
                archive = RarFile(archive_stream)
            elif is_zipfile(archive_stream):
                archive = ZipFile(archive_stream)
            else:
                subtitle.content = r.content
                if subtitle.is_valid():
                    return
                subtitle.content = None

                raise ProviderError('Unidentified archive type')

            subtitle.content = self.get_subtitle_from_archive(
                subtitle, archive)
Beispiel #40
0
def ItemDetailsMenu(rating_key, title=None, base_title=None, item_title=None, randomize=None, header=None,
                    message=None):
    """
    displays the item details menu of an item that doesn't contain any deeper tree, such as a movie or an episode
    :param rating_key:
    :param title:
    :param base_title:
    :param item_title:
    :param randomize:
    :return:
    """
    from interface.main import InclExclMenu

    title = unicode(base_title) + " > " + unicode(title) if base_title else unicode(title)
    item = plex_item = get_item(rating_key)
    current_kind = get_item_kind_from_rating_key(rating_key)

    timeout = 30

    oc = SubFolderObjectContainer(
            title2=title,
            replace_parent=True,
            header=header,
            message=message)

    if not item:
        oc.add(DirectoryObject(
            key=Callback(
                    ItemDetailsMenu,
                    rating_key=rating_key,
                    title=title,
                    base_title=base_title,
                    item_title=item_title,
                    randomize=timestamp()),
            title=_(u"Item not found: %s!", item_title),
            summary=_("Plex didn't return any information about the item, please refresh it and come back later"),
            thumb=default_thumb
        ))
        return oc

    # add back to season for episode
    if current_kind == "episode":
        from interface.menu import MetadataMenu
        show = get_item(item.show.rating_key)
        season = get_item(item.season.rating_key)

        oc.add(DirectoryObject(
            key=Callback(
                    MetadataMenu,
                    rating_key=season.rating_key,
                    title=season.title,
                    base_title=show.title,
                    previous_item_type="show",
                    previous_rating_key=show.rating_key,
                    display_items=True,
                    randomize=timestamp()),
            title=_(u"< Back to %s", season.title),
            summary=_("Back to %s > %s", show.title, season.title),
            thumb=season.thumb or default_thumb
        ))

    oc.add(DirectoryObject(
        key=Callback(
                RefreshItem,
                rating_key=rating_key,
                item_title=item_title,
                randomize=timestamp(),
                timeout=timeout * 1000),
        title=_(u"Refresh: %s", item_title),
        summary=_("Refreshes %(the_movie_series_season_episode)s, possibly searching for missing and picking up "
                  "new subtitles on disk", the_movie_series_season_episode=_(u"the %s" % current_kind)),
        thumb=item.thumb or default_thumb
    ))
    oc.add(DirectoryObject(
        key=Callback(RefreshItem, rating_key=rating_key, item_title=item_title, force=True, randomize=timestamp(),
                     timeout=timeout * 1000),
        title=_(u"Force-find subtitles: %(item_title)s", item_title=item_title),
        summary=_("Issues a forced refresh, ignoring known subtitles and searching for new ones"),
        thumb=item.thumb or default_thumb
    ))

    # get stored subtitle info for item id
    subtitle_storage = get_subtitle_storage()
    stored_subs = subtitle_storage.load_or_new(item)

    # look for subtitles for all available media parts and all of their languages
    has_multiple_parts = len(plex_item.media) > 1
    part_index = 0
    for media in plex_item.media:
        for part in media.parts:
            filename = os.path.basename(part.file)
            if not os.path.exists(part.file):
                continue

            part_id = str(part.id)
            part_index += 1

            part_index_addon = u""
            part_summary_addon = u""
            if has_multiple_parts:
                part_index_addon = _(u"File %(file_part_index)s: ", file_part_index=part_index)
                part_summary_addon = u"%s " % filename

            # iterate through all configured languages
            for lang in config.lang_list:
                # get corresponding stored subtitle data for that media part (physical media item), for language
                current_sub = stored_subs.get_any(part_id, lang)
                current_sub_id = None
                current_sub_provider_name = None

                summary = _(u"%(part_summary)sNo current subtitle in storage", part_summary=part_summary_addon)
                current_score = None
                if current_sub:
                    current_sub_id = current_sub.id
                    current_sub_provider_name = current_sub.provider_name
                    current_score = current_sub.score

                    summary = _(u"%(part_summary)sCurrent subtitle: %(provider_name)s (added: %(date_added)s, "
                                u"%(mode)s), Language: %(language)s, Score: %(score)i, Storage: %(storage_type)s",
                                part_summary=part_summary_addon,
                                provider_name=_(current_sub.provider_name),
                                date_added=df(current_sub.date_added),
                                mode=_(current_sub.mode_verbose),
                                language=display_language(lang),
                                score=current_sub.score,
                                storage_type=current_sub.storage_type)

                    oc.add(DirectoryObject(
                        key=Callback(SubtitleOptionsMenu, rating_key=rating_key, part_id=part_id, title=title,
                                     item_title=item_title, language=lang, language_name=display_language(lang),
                                     current_id=current_sub_id,
                                     item_type=plex_item.type, filename=filename, current_data=summary,
                                     randomize=timestamp(), current_provider=current_sub_provider_name,
                                     current_score=current_score),
                        title=_(u"%(part_summary)sManage %(language)s subtitle", part_summary=part_index_addon,
                                language=display_language(lang)),
                        summary=summary
                    ))
                else:
                    oc.add(DirectoryObject(
                        key=Callback(ListAvailableSubsForItemMenu, rating_key=rating_key, part_id=part_id, title=title,
                                     item_title=item_title, language=lang, language_name=display_language(lang),
                                     current_id=current_sub_id,
                                     item_type=plex_item.type, filename=filename, current_data=summary,
                                     randomize=timestamp(), current_provider=current_sub_provider_name,
                                     current_score=current_score),
                        title=_(u"%(part_summary)sList %(language)s subtitles", part_summary=part_index_addon,
                                language=display_language(lang)),
                        summary=summary
                    ))

            if config.plex_transcoder:
                # embedded subtitles
                embedded_count = 0
                embedded_langs = []
                for stream in part.streams:
                    # subtitle stream
                    if stream.stream_type == 3 and not stream.stream_key and stream.codec in TEXT_SUBTITLE_EXTS:
                        lang = get_language_from_stream(stream.language_code)
                        is_forced = is_stream_forced(stream)

                        if not lang and config.treat_und_as_first:
                            lang = list(config.lang_list)[0]

                        if lang:
                            lang = Language.rebuild(lang, forced=is_forced)
                            embedded_langs.append(lang)
                            embedded_count += 1

                if embedded_count:
                    oc.add(DirectoryObject(
                        key=Callback(ListEmbeddedSubsForItemMenu, rating_key=rating_key, part_id=part_id, title=title,
                                     item_type=plex_item.type, item_title=item_title, base_title=base_title,
                                     randomize=timestamp()),
                        title=_(u"%(part_summary)sEmbedded subtitles (%(languages)s)",
                                part_summary=part_index_addon,
                                languages=", ".join(display_language(l)
                                                    for l in list(OrderedDict.fromkeys(embedded_langs)))),
                        summary=_(u"Extract embedded subtitle streams")
                    ))

    ignore_title = item_title
    if current_kind == "episode":
        ignore_title = get_item_title(item)
    add_incl_excl_options(oc, "videos", title=ignore_title, rating_key=rating_key, callback_menu=InclExclMenu)
    subtitle_storage.destroy()

    return oc
Beispiel #41
0
def test_query_list_subtitles(movies):
    languages = [Language('zho', None, 'Hant'), Language('zho', None, 'Hans')]
    video = movies['man_of_steel']
    with AssrtProvider(TOKEN) as provider:
        subtitles = provider.list_subtitles(video, languages)
        assert len(subtitles) == 16
Beispiel #42
0
class SubdivxSubtitlesProvider(Provider):
    provider_name = 'subdivx'
    hash_verifiable = False
    languages = {Language.fromalpha2(l) for l in ['es']}
    subtitle_class = SubdivxSubtitle

    server_url = 'https://www.subdivx.com/'
    multi_result_throttle = 2
    language_list = list(languages)

    def __init__(self):
        self.session = None

    def initialize(self):
        self.session = Session()
        self.session.headers['User-Agent'] = 'Subliminal/{}'.format(
            __short_version__)

    def terminate(self):
        self.session.close()

    def query(self, keyword, season=None, episode=None, year=None):
        query = keyword
        if season and episode:
            query += ' S{season:02d}E{episode:02d}'.format(season=season,
                                                           episode=episode)
        elif year:
            query += ' {:4d}'.format(year)

        params = {
            'buscar': query,  # search string
            'accion': 5,  # action search
            'oxdown': 1,  # order by downloads descending
            'pg': 1  # page 1
        }

        logger.debug('Searching subtitles %r', query)
        subtitles = []
        language = self.language_list[0]
        search_link = self.server_url + 'index.php'
        while True:
            response = self.session.get(search_link, params=params, timeout=10)
            self._check_response(response)

            try:
                page_subtitles = self._parse_subtitles_page(response, language)
            except Exception as e:
                raise ParseResponseError('Error parsing subtitles list: ' +
                                         str(e))

            subtitles += page_subtitles

            if len(page_subtitles) >= 20:
                params['pg'] += 1  # search next page
                time.sleep(self.multi_result_throttle)
            else:
                break

        return subtitles

    def list_subtitles(self, video, languages):
        if isinstance(video, Episode):
            titles = [video.series] + video.alternative_series
        elif isinstance(video, Movie):
            titles = [video.title] + video.alternative_titles
        else:
            titles = []

        subtitles = []
        for title in titles:
            if isinstance(video, Episode):
                subtitles += [
                    s for s in self.query(title,
                                          season=video.season,
                                          episode=video.episode,
                                          year=video.year)
                    if s.language in languages
                ]
            elif isinstance(video, Movie):
                subtitles += [
                    s for s in self.query(title, year=video.year)
                    if s.language in languages
                ]

        return subtitles

    def download_subtitle(self, subtitle):
        if isinstance(subtitle, SubdivxSubtitle):
            # download the subtitle
            logger.info('Downloading subtitle %r', subtitle)

            # get download link
            download_link = self._get_download_link(subtitle)

            # download zip / rar file with the subtitle
            response = self.session.get(
                download_link,
                headers={'Referer': subtitle.page_link},
                timeout=30)
            self._check_response(response)

            # open the compressed archive
            archive = self._get_archive(response.content)

            # extract the subtitle
            subtitle_content = self._get_subtitle_from_archive(archive)
            subtitle.content = fix_line_ending(subtitle_content)

    def _check_response(self, response):
        if response.status_code != 200:
            raise ServiceUnavailable('Bad status code: ' +
                                     str(response.status_code))

    def _parse_subtitles_page(self, response, language):
        subtitles = []

        page_soup = ParserBeautifulSoup(
            response.content.decode('iso-8859-1', 'ignore'),
            ['lxml', 'html.parser'])
        title_soups = page_soup.find_all("div",
                                         {'id': 'menu_detalle_buscador'})
        body_soups = page_soup.find_all("div", {'id': 'buscador_detalle'})

        for subtitle in range(0, len(title_soups)):
            title_soup, body_soup = title_soups[subtitle], body_soups[subtitle]

            # title
            title = title_soup.find("a").text.replace("Subtitulo de ", "")
            page_link = title_soup.find("a")["href"].replace(
                'http://', 'https://')

            # body
            description = body_soup.find("div", {
                'id': 'buscador_detalle_sub'
            }).text

            subtitle = self.subtitle_class(language, page_link, description,
                                           title)

            logger.debug('Found subtitle %r', subtitle)
            subtitles.append(subtitle)

        return subtitles

    def _get_download_link(self, subtitle):
        response = self.session.get(subtitle.page_link, timeout=10)
        self._check_response(response)
        try:
            page_soup = ParserBeautifulSoup(
                response.content.decode('iso-8859-1', 'ignore'),
                ['lxml', 'html.parser'])
            links_soup = page_soup.find_all("a", {'class': 'detalle_link'})
            for link_soup in links_soup:
                if link_soup['href'].startswith('bajar'):
                    return self.server_url + link_soup['href']
        except Exception as e:
            raise ParseResponseError('Error parsing download link: ' + str(e))

        raise ParseResponseError('Download link not found')

    def _get_archive(self, content):
        # open the archive
        archive_stream = io.BytesIO(content)
        if rarfile.is_rarfile(archive_stream):
            logger.debug('Identified rar archive')
            archive = rarfile.RarFile(archive_stream)
        elif zipfile.is_zipfile(archive_stream):
            logger.debug('Identified zip archive')
            archive = zipfile.ZipFile(archive_stream)
        else:
            raise ParseResponseError('Unsupported compressed format')

        return archive

    def _get_subtitle_from_archive(self, archive):
        for name in archive.namelist():
            # discard hidden files
            if os.path.split(name)[-1].startswith('.'):
                continue

            # discard non-subtitle files
            if not name.lower().endswith(SUBTITLE_EXTENSIONS):
                continue

            return archive.read(name)

        raise ParseResponseError(
            'Can not find the subtitle in the compressed file')
Beispiel #43
0
def extract_embedded_sub(**kwargs):
    rating_key = kwargs["rating_key"]
    part_id = kwargs.pop("part_id")
    stream_index = kwargs.pop("stream_index")
    with_mods = kwargs.pop("with_mods", False)
    language = Language.fromietf(kwargs.pop("language"))
    refresh = kwargs.pop("refresh", True)
    set_current = kwargs.pop("set_current", True)

    plex_item = kwargs.pop("plex_item", get_item(rating_key))
    item_type = get_item_kind_from_item(plex_item)
    part = kwargs.pop("part", get_part(plex_item, part_id))
    scanned_videos = kwargs.pop("scanned_videos", None)
    extract_mode = kwargs.pop("extract_mode", "a")

    any_successful = False

    if part:
        if not scanned_videos:
            metadata = get_plex_metadata(rating_key, part_id, item_type, plex_item=plex_item)
            scanned_videos = scan_videos([metadata], ignore_all=True, skip_hashing=True)

        for stream in part.streams:
            # subtitle stream
            if str(stream.index) == stream_index:
                is_forced = is_stream_forced(stream)
                bn = os.path.basename(part.file)

                set_refresh_menu_state(_(u"Extracting subtitle %(stream_index)s of %(filename)s",
                                         stream_index=stream_index,
                                         filename=bn))
                Log.Info(u"Extracting stream %s (%s) of %s", stream_index, str(language), bn)

                out_codec = stream.codec if stream.codec != "mov_text" else "srt"

                args = [
                    config.plex_transcoder, "-i", part.file, "-map", "0:%s" % stream_index, "-f", out_codec, "-"
                ]

                cmdline = quote_args(args)
                Log.Debug(u"Calling: %s", cmdline)
                if mswindows:
                    Log.Debug("MSWindows: Fixing encoding")
                    cmdline = cmdline.encode("mbcs")

                output = None
                try:
                    output = subprocess.check_output(cmdline, stderr=subprocess.PIPE, shell=True)
                except:
                    Log.Error("Extraction failed: %s", traceback.format_exc())

                if output:
                    subtitle = ModifiedSubtitle(language, mods=config.default_mods if with_mods else None)
                    subtitle.content = output
                    subtitle.provider_name = "embedded"
                    subtitle.id = "stream_%s" % stream_index
                    subtitle.score = 0
                    subtitle.set_encoding("utf-8")

                    # fixme: speedup video; only video.name is needed
                    video = scanned_videos.keys()[0]
                    save_successful = save_subtitles(scanned_videos, {video: [subtitle]}, mode="m",
                                                     set_current=set_current)
                    set_refresh_menu_state(None)

                    if save_successful and refresh:
                        refresh_item(rating_key)

                    # add item to history
                    item_title = get_title_for_video_metadata(video.plexapi_metadata,
                                                              add_section_title=False, add_episode_title=True)

                    history = get_history()
                    history.add(item_title, video.id, section_title=video.plexapi_metadata["section"],
                                thumb=video.plexapi_metadata["super_thumb"],
                                subtitle=subtitle, mode=extract_mode)
                    history.destroy()

                    any_successful = True

    return any_successful
Beispiel #44
0
 def from_api(cls, s):
     return cls(Language.fromsubscene(s.language.strip()),
                s.title,
                hearing_impaired=s.hearing_impaired,
                page_link=s.url)
Beispiel #45
0
def test_query_episode(episodes):
    video = episodes['bbt_s07e05']
    languages = [Language('zho', None, 'Hant'), Language('zho', None, 'Hans')]
    with AssrtProvider(TOKEN) as provider:
        subtitles = provider.query(languages, video)
        assert len(subtitles) == 11
Beispiel #46
0
class SubsynchroProvider(Provider):
    """Subsynchro Provider"""

    languages = {Language.fromalpha2(l) for l in ["fr"]}
    language_list = list(languages)
    video_types = (Movie, )

    def initialize(self):
        self.session = Session()
        self.session.headers = {"User-Agent": "Bazarr", "Referer": PAGE_URL}

    def terminate(self):
        self.session.close()

    def query(self, languages, video):
        language = self.language_list[0]

        params = {"title": video.title, "year": video.year}

        logger.debug("Searching subtitles from params: %s", params)

        result = self.session.get(SERVER_URL, params=params, timeout=10)
        result.raise_for_status()

        subtitles = []
        results = result.json() or {}

        status_ = results.get("status")

        if status_ != 200:
            logger.debug(f"No subtitles found (status {status_})")
            return subtitles

        for i in results.get("data", []):
            matches = set()
            if any(video.title.lower() in title.lower()
                   for title in (i.get("titre", "n/a"),
                                 i.get("titre_original", "n/a"))):
                # Year is already set on query
                matches.update(["title", "year"])

            subtitles.append(
                SubsynchroSubtitle(
                    language,
                    i.get("release", "n/a"),
                    i.get("filename", "n/a"),
                    i.get("telechargement"),
                    i.get("fichier"),
                    matches,
                ))
        return subtitles

    def list_subtitles(self, video, languages):
        return self.query(languages, video)

    def get_file(self, archive):
        for name in archive.namelist():
            if os.path.split(name)[-1].startswith("."):
                continue

            if not name.lower().endswith(SUBTITLE_EXTENSIONS):
                continue

            logger.debug(f"Returning from archive: {name}")
            return archive.read(name)

        raise APIThrottled("Can not find the subtitle in the zip file")

    def download_subtitle(self, subtitle):
        logger.debug(f"Downloading subtitle {subtitle.download_url}")

        response = self.session.get(subtitle.download_url,
                                    allow_redirects=True,
                                    timeout=10)
        response.raise_for_status()

        stream = io.BytesIO(response.content)
        if is_zipfile(stream):
            logger.debug("Zip file found")
            subtitle_ = self.get_file(ZipFile(stream))
            subtitle.content = fix_line_ending(subtitle_)
        else:
            raise APIThrottled(f"Unknown file type: {subtitle.download_url}")
Beispiel #47
0
def test_language_contains():
    assert language_contains(Language('zho'), Language('zho'))
    assert language_contains(Language('zho', 'TW', None), Language('zho'))
    assert language_contains(Language('zho', 'CN', None), Language('zho'))
    assert language_contains(Language('zho', None, 'Hant'), Language('zho'))
    assert language_contains(Language('zho', None, 'Hans'), Language('zho'))
    assert language_contains(Language('zho', 'TW', 'Hant'), Language('zho'))
    assert language_contains(Language('zho', 'CN', 'Hans'), Language('zho'))
    assert language_contains(Language('zho', None, 'Hant'),
                             Language('zho', None, 'Hant'))
    assert language_contains(Language('zho', None, 'Hans'),
                             Language('zho', None, 'Hans'))
    def get_lang_list(self, provider=None, ordered=False):
        # advanced settings
        if provider and self.advanced.providers and provider in self.advanced.providers:
            adv_languages = self.advanced.providers[provider].get("languages", None)
            if adv_languages:
                adv_out = set()
                for adv_lang in adv_languages:
                    adv_lang = adv_lang.strip()
                    try:
                        real_lang = Language.fromietf(adv_lang)
                    except:
                        try:
                            real_lang = Language.fromname(adv_lang)
                        except:
                            continue
                    adv_out.update({real_lang})

                # fallback to default languages if no valid language was found in advanced settings
                if adv_out:
                    return adv_out

        l = [Language.fromietf(Prefs["langPref1a"])]
        lang_custom = Prefs["langPrefCustom"].strip()

        if Prefs['subtitles.only_one']:
            return set(l) if not ordered else l

        if Prefs["langPref2a"] != "None":
            try:
                l.append(Language.fromietf(Prefs["langPref2a"]))
            except:
                pass

        if Prefs["langPref3a"] != "None":
            try:
                l.append(Language.fromietf(Prefs["langPref3a"]))
            except:
                pass

        if len(lang_custom) and lang_custom != "None":
            for lang in lang_custom.split(u","):
                lang = lang.strip()
                try:
                    real_lang = Language.fromietf(lang)
                except:
                    try:
                        real_lang = Language.fromname(lang)
                    except:
                        continue
                l.append(real_lang)

        if self.forced_also:
            if Prefs["subtitles.when_forced"] == "Always":
                for lang in list(l):
                    l.append(Language.rebuild(lang, forced=True))

            else:
                for (setting, index) in (("Only for Subtitle Language (1)", 0),
                                         ("Only for Subtitle Language (2)", 1),
                                         ("Only for Subtitle Language (3)", 2)):
                    if Prefs["subtitles.when_forced"] == setting:
                        try:
                            l.append(Language.rebuild(list(l)[index], forced=True))
                            break
                        except:
                            pass

        elif self.forced_only:
            for lang in l:
                lang.forced = True

        if not self.normal_subs:
            for lang in l[:]:
                if not lang.forced:
                    l.remove(lang)

        return set(l) if not ordered else l
Beispiel #49
0
def get_language(lang_short):
    return Language.fromietf(lang_short)
    def query(self, languages, title, season=None, episode=None, year=None, video=None):
        items_per_page = 10
        current_page = 1

        used_languages = languages
        lang_strings = [str(lang) for lang in used_languages]

        # handle possible duplicate use of Serbian Latin
        if "sr" in lang_strings and "sr-Latn" in lang_strings:
            logger.info('Duplicate entries <Language [sr]> and <Language [sr-Latn]> found, filtering languages')
            used_languages = filter(lambda l: l != Language.fromietf('sr-Latn'), used_languages)
            logger.info('Filtered language list %r', used_languages)

        # convert list of languages into search string
        langs = '|'.join(map(str, [l.titlovi for l in used_languages]))

        # set query params
        params = {'prijevod': title, 'jezik': langs}
        is_episode = False
        if season and episode:
            is_episode = True
            params['s'] = season
            params['e'] = episode
        if year:
            params['g'] = year

        # loop through paginated results
        logger.info('Searching subtitles %r', params)
        subtitles = []

        while True:
            # query the server
            try:
                r = self.session.get(self.search_url, params=params, timeout=10)
                r.raise_for_status()
            except RequestException as e:
                logger.exception('RequestException %s', e)
                break
            else:
                try:
                    soup = BeautifulSoup(r.content, 'lxml')

                    # number of results
                    result_count = int(soup.select_one('.results_count b').string)
                except:
                    result_count = None

                # exit if no results
                if not result_count:
                    if not subtitles:
                        logger.debug('No subtitles found')
                    else:
                        logger.debug("No more subtitles found")
                    break

                # number of pages with results
                pages = int(math.ceil(result_count / float(items_per_page)))

                # get current page
                if 'pg' in params:
                    current_page = int(params['pg'])

                try:
                    sublist = soup.select('section.titlovi > ul.titlovi > li.subtitleContainer.canEdit')
                    for sub in sublist:
                        # subtitle id
                        sid = sub.find(attrs={'data-id': True}).attrs['data-id']
                        # get download link
                        download_link = self.download_url + sid
                        # title and alternate title
                        match = title_re.search(sub.a.string)
                        if match:
                            _title = match.group('title')
                            alt_title = match.group('altitle')
                        else:
                            continue

                        # page link
                        page_link = self.server_url + sub.a.attrs['href']
                        # subtitle language
                        match = lang_re.search(sub.select_one('.lang').attrs['src'])
                        if match:
                            try:
                                # decode language
                                lang = Language.fromtitlovi(match.group('lang')+match.group('script'))
                            except ValueError:
                                continue

                        # relase year or series start year
                        match = year_re.search(sub.find(attrs={'data-id': True}).parent.i.string)
                        if match:
                            r_year = int(match.group('year'))
                        # fps
                        match = fps_re.search(sub.select_one('.fps').string)
                        if match:
                            fps = match.group('fps')
                        # releases
                        releases = str(sub.select_one('.fps').parent.contents[0].string)

                        # handle movies and series separately
                        if is_episode:
                            # season and episode info
                            sxe = sub.select_one('.s0xe0y').string
                            r_season = None
                            r_episode = None
                            if sxe:
                                match = season_re.search(sxe)
                                if match:
                                    r_season = int(match.group('season'))
                                match = episode_re.search(sxe)
                                if match:
                                    r_episode = int(match.group('episode'))

                            subtitle = self.subtitle_class(lang, page_link, download_link, sid, releases, _title,
                                                           alt_title=alt_title, season=r_season, episode=r_episode,
                                                           year=r_year, fps=fps,
                                                           asked_for_release_group=video.release_group,
                                                           asked_for_episode=episode)
                        else:
                            subtitle = self.subtitle_class(lang, page_link, download_link, sid, releases, _title,
                                                           alt_title=alt_title, year=r_year, fps=fps,
                                                           asked_for_release_group=video.release_group)
                        logger.debug('Found subtitle %r', subtitle)

                        # prime our matches so we can use the values later
                        subtitle.get_matches(video)

                        # add found subtitles
                        subtitles.append(subtitle)

                finally:
                    soup.decompose()

                # stop on last page
                if current_page >= pages:
                    break

                # increment current page
                params['pg'] = current_page + 1
                logger.debug('Getting page %d', params['pg'])

        return subtitles
Beispiel #51
0
from subzero.language import Language
from subliminal_patch import Subtitle
from subliminal_patch.subtitle import ftfy_defaults

logger = logging.getLogger(__name__)

from subzero.modification import SubMod

fn = sys.argv[1]
debug = "--debug" in sys.argv

if debug:
    logging.basicConfig(level=logging.DEBUG)

#sub = Subtitle(Language.fromietf("eng:forced"), mods=["common", "remove_HI", "OCR_fixes", "fix_uppercase", "shift_offset(ms=-500)", "shift_offset(ms=500)", "shift_offset(s=2,ms=800)"])
sub = Subtitle(Language.fromietf("eng:forced"), mods=["common", "remove_HI", "OCR_fixes", "fix_uppercase", "shift_offset(ms=0,s=1)"])
sub.content = open(fn).read()
sub.normalize()
content = sub.get_modified_content(debug=True)

#submod = SubMod(debug=debug)
#submod.load(fn, language=Language.fromietf("pol"), encoding="utf-8")
#submod.modify("OCR_fixes", "common", "remove_tags", "OCR_fixes", "OCR_fixes")
#submod.modify("shift_offset(s=20)", "OCR_fixes")
#submod.modify("remove_HI", "OCR_fixes", "common", "OCR_fixes", "shift_offset(s=20)", "OCR_fixes", "color(name=white)", "shift_offset(s=-5, ms=-350)")

#srt = Subtitle.pysubs2_to_unicode(submod.f)
#content = fix_text(Subtitle.pysubs2_to_unicode(submod.f, format=format), **ftfy_defaults)\
#                .encode(encoding="utf-8")
#print submod.f.to_string("srt", encoding="utf-8")
#print repr(content)
Beispiel #52
0
def _search_external_subtitles(path,
                               languages=None,
                               only_one=False,
                               scandir_generic=False,
                               match_strictness="strict"):
    dirpath, filename = os.path.split(path)
    dirpath = dirpath or '.'
    fn_no_ext, fileext = os.path.splitext(filename)
    fn_no_ext_lower = fn_no_ext.lower()
    subtitles = {}
    _scandir = _scandir_generic if scandir_generic else scandir

    for entry in _scandir(dirpath):
        if (not entry.name or entry.name
                in ('\x0c', '$', ',', '\x7f')) and not scandir_generic:
            logger.debug(
                'Could not determine the name of the file, retrying with scandir_generic'
            )
            return _search_external_subtitles(path, languages, only_one, True)
        if not entry.is_file(follow_symlinks=False):
            continue

        p = entry.name

        # keep only valid subtitle filenames
        if not p.lower().endswith(SUBTITLE_EXTENSIONS):
            continue

        # not p.lower().startswith(fileroot.lower()) or not

        p_root, p_ext = os.path.splitext(p)
        if not INCLUDE_EXOTIC_SUBS and p_ext not in (".srt", ".ass", ".ssa",
                                                     ".vtt"):
            continue

        if p_root.lower() == fn_no_ext_lower:
            # skip check for language code if the subtitle file name is the same as the video name
            subtitles[p] = None
            continue

        # extract potential forced/normal/default/hi tag
        # fixme: duplicate from subtitlehelpers
        split_tag = p_root.rsplit('.', 1)
        adv_tag = None
        if len(split_tag) > 1:
            adv_tag = split_tag[1].lower()
            if adv_tag in [
                    'forced', 'normal', 'default', 'embedded',
                    'embedded-forced', 'custom', 'hi', 'cc', 'sdh'
            ]:
                p_root = split_tag[0]

        forced = False
        if adv_tag:
            forced = "forced" in adv_tag

        hi = False
        if adv_tag:
            hi_tag = ["hi", "cc", "sdh"]
            hi = any(i for i in hi_tag if i in adv_tag)

        #add simplified/traditional chinese detection
        simplified_chinese = [
            "chs", "sc", "zhs", "hans", "zh-hans", "gb", "简", "简中", "简体",
            "简体中文", "中英双语", "中日双语", "中法双语", "简体&英文"
        ]
        traditional_chinese = [
            "cht", "tc", "zht", "hant", "zh-hant", "big5", "繁", "繁中", "繁体",
            "繁體", "繁体中文", "繁體中文", "正體中文", "中英雙語", "中日雙語", "中法雙語", "繁体&英文"
        ]
        p_root = p_root.replace('zh-TW', 'zht')

        # remove possible language code for matching
        p_root_bare = ENDSWITH_LANGUAGECODE_RE.sub(
            lambda m: ""
            if str(m.group(1)).lower() in FULL_LANGUAGE_LIST else m.group(0),
            p_root)

        p_root_lower = p_root_bare.lower()

        filename_matches = p_root_lower == fn_no_ext_lower
        filename_contains = p_root_lower in fn_no_ext_lower

        if not filename_matches:
            if match_strictness == "strict" or (match_strictness == "loose"
                                                and not filename_contains):
                continue

        language = None

        # extract the potential language code
        try:
            language_code = p_root.rsplit(".", 1)[1].replace('_', '-')
            try:
                language = Language.fromietf(language_code)
                language.forced = forced
                language.hi = hi
            except (ValueError, LanguageReverseError):
                #add simplified/traditional chinese detection
                if any(ext in str(language_code)
                       for ext in simplified_chinese):
                    language = Language.fromietf('zh')
                    language.forced = forced
                    language.hi = hi
                elif any(ext in str(language_code)
                         for ext in traditional_chinese):
                    language = Language.fromietf('zh')
                    language.forced = forced
                    language.hi = hi
                else:
                    logger.error('Cannot parse language code %r',
                                 language_code)
                    language_code = None
        except IndexError:
            language_code = None

        if not language and not language_code and only_one:
            language = Language.rebuild(list(languages)[0],
                                        forced=forced,
                                        hi=hi)

        subtitles[p] = language

    logger.debug('Found subtitles %r', subtitles)

    return subtitles
Beispiel #53
0
class Subs4SeriesProvider(Provider):
    """Subs4Series Provider."""
    languages = {Language(l) for l in ['ell', 'eng']}
    video_types = (Episode, )
    server_url = 'https://www.subs4series.com'
    search_url = '/search_report.php?search={}&searchType=1'
    episode_link = '/tv-series/{show_id}/season-{season:d}/episode-{episode:d}'
    subtitle_class = Subs4SeriesSubtitle

    def __init__(self):
        self.session = None

    def initialize(self):
        self.session = Session()
        self.session.headers['User-Agent'] = 'Subliminal/{}'.format(
            __short_version__)

    def terminate(self):
        self.session.close()

    def get_show_ids(self, title, year=None):
        """Get the best matching show id for `series` and `year`.

        First search in the result of :meth:`_get_show_suggestions`.

        :param title: show title.
        :param year: year of the show, if any.
        :type year: int
        :return: the show id, if found.
        :rtype: str

        """
        title_sanitized = sanitize(title).lower()
        show_ids = self._get_suggestions(title)

        matched_show_ids = []
        for show in show_ids:
            show_id = None
            show_title = sanitize(show['title'])
            # attempt with year
            if not show_id and year:
                logger.debug('Getting show id with year')
                show_id = '/'.join(show['link'].rsplit(
                    '/', 2)[1:]) if show_title == '{title} {year:d}'.format(
                        title=title_sanitized, year=year) else None

            # attempt clean
            if not show_id:
                logger.debug('Getting show id')
                show_id = '/'.join(show['link'].rsplit(
                    '/', 2)[1:]) if show_title == title_sanitized else None

            if show_id:
                matched_show_ids.append(show_id)

        return matched_show_ids

    @region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME,
                               to_str=text_type,
                               should_cache_fn=lambda value: value)
    def _get_suggestions(self, title):
        """Search the show or movie id from the `title` and `year`.

        :param str title: title of the show.
        :return: the show suggestions found.
        :rtype: dict

        """
        # make the search
        logger.info('Searching show ids with %r', title)
        r = self.session.get(self.server_url +
                             text_type(self.search_url).format(title),
                             headers={'Referer': self.server_url},
                             timeout=10)
        r.raise_for_status()

        if not r.content:
            logger.debug('No data returned from provider')
            return {}

        soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])
        series = [{
            'link': l.attrs['value'],
            'title': l.text
        } for l in soup.select('select[name="Mov_sel"] > option[value]')]
        logger.debug('Found suggestions: %r', series)

        return series

    def query(self, show_id, series, season, episode, title):
        # get the season list of the show
        logger.info('Getting the subtitle list of show id %s', show_id)
        if all((show_id, season, episode)):
            page_link = self.server_url + self.episode_link.format(
                show_id=show_id, season=season, episode=episode)
        else:
            return []

        r = self.session.get(page_link, timeout=10)
        r.raise_for_status()

        if not r.content:
            logger.debug('No data returned from provider')
            return []

        soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])

        year_num = None
        matches = year_re.match(
            str(soup.select_one(
                '#dates_header_br > table div').contents[2]).strip())
        if matches:
            year_num = int(matches.group(1))
        show_title = str(
            soup.select_one('#dates_header_br > table u').contents[0]).strip()

        subtitles = []
        # loop over episode rows
        for subtitle in soup.select(
                'table.table_border div[align="center"] > div'):
            # read common info
            version = subtitle.find('b').text
            download_link = self.server_url + subtitle.find('a')['href']
            language = Language.fromalpha2(
                subtitle.find('img')['src'].split('/')[-1].split('.')[0])

            subtitle = self.subtitle_class(language, page_link, show_title,
                                           year_num, version, download_link)

            logger.debug('Found subtitle %r', subtitle)
            subtitles.append(subtitle)

        return subtitles

    def list_subtitles(self, video, languages):
        # lookup show_id
        titles = [video.series] + video.alternative_series if isinstance(
            video, Episode) else []

        show_ids = None
        for title in titles:
            show_ids = self.get_show_ids(title, video.year)
            if show_ids and len(show_ids) > 0:
                break

        subtitles = []
        # query for subtitles with the show_id
        for show_id in show_ids:
            subtitles += [
                s for s in self.query(show_id, video.series, video.season,
                                      video.episode, video.title)
                if s.language in languages
            ]

        return subtitles

    def download_subtitle(self, subtitle):
        if isinstance(subtitle, Subs4SeriesSubtitle):
            # download the subtitle
            logger.info('Downloading subtitle %r', subtitle)
            r = self.session.get(subtitle.download_link,
                                 headers={'Referer': subtitle.page_link},
                                 timeout=10)
            r.raise_for_status()

            if not r.content:
                logger.debug(
                    'Unable to download subtitle. No data returned from provider'
                )
                return

            soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])
            download_element = soup.select_one('a.style55ws')
            if not download_element:
                download_element = soup.select_one('form[method="post"]')
                target = download_element[
                    'action'] if download_element else None
            else:
                target = download_element['href']

            if not target:
                logger.debug(
                    'Unable to download subtitle. No download link found')
                return

            download_url = self.server_url + target
            r = self.session.get(download_url,
                                 headers={'Referer': subtitle.download_link},
                                 timeout=10)
            r.raise_for_status()

            if not r.content:
                logger.debug(
                    'Unable to download subtitle. No data returned from provider'
                )
                return

            archive = _get_archive(r.content)
            subtitle_content = _get_subtitle_from_archive(
                archive) if archive else r.content

            if subtitle_content:
                subtitle.content = fix_line_ending(subtitle_content)
            else:
                logger.debug('Could not extract subtitle from %r', archive)
def item_discover_missing_subs(rating_key, kind="show", added_at=None, section_title=None, internal=False, external=True, languages=()):
    item_id = int(rating_key)
    item = get_item(rating_key)

    if kind == "show":
        item_title = get_plex_item_display_title(item, kind, parent=item.season, section_title=section_title, parent_title=item.show.title)
    else:
        item_title = get_plex_item_display_title(item, kind, section_title=section_title)

    subtitle_storage = get_subtitle_storage()
    stored_subs = subtitle_storage.load(rating_key)
    subtitle_storage.destroy()

    subtitle_target_dir, tdir_is_absolute = config.subtitle_sub_dir

    missing = set()
    languages_set = set([Language.rebuild(l) for l in languages])
    for media in item.media:
        existing_subs = {"internal": [], "external": [], "own_external": [], "count": 0}
        for part in media.parts:

            # did we already download an external subtitle before?
            if subtitle_target_dir and stored_subs:
                for language in languages_set:
                    if has_external_subtitle(part.id, stored_subs, language):
                        # check the existence of the actual subtitle file

                        # get media filename without extension
                        part_basename = os.path.splitext(os.path.basename(part.file))[0]

                        # compute target directory for subtitle
                        # fixme: move to central location
                        if tdir_is_absolute:
                            possible_subtitle_path_base = subtitle_target_dir
                        else:
                            possible_subtitle_path_base = os.path.join(os.path.dirname(part.file), subtitle_target_dir)

                        possible_subtitle_path_base = os.path.realpath(possible_subtitle_path_base)

                        # folder actually exists?
                        if not os.path.isdir(possible_subtitle_path_base):
                            continue

                        found_any = False
                        for ext in config.subtitle_formats:
                            if cast_bool(Prefs['subtitles.only_one']):
                                possible_subtitle_path = os.path.join(possible_subtitle_path_base,
                                                                      u"%s.%s" % (part_basename, ext))
                            else:
                                possible_subtitle_path = os.path.join(possible_subtitle_path_base,
                                                                      u"%s.%s.%s" % (part_basename, language, ext))

                            # check for subtitle existence
                            if os.path.isfile(possible_subtitle_path):
                                found_any = True
                                Log.Debug(u"Found: %s", possible_subtitle_path)
                                break

                        if found_any:
                            existing_subs["own_external"].append(language)
                            existing_subs["count"] = existing_subs["count"] + 1

            for stream in part.streams:
                if stream.stream_type == 3:
                    is_forced = is_stream_forced(stream)
                    if stream.index:
                        key = "internal"
                    else:
                        key = "external"

                    if not config.exotic_ext and stream.codec.lower() not in TEXT_SUBTITLE_EXTS:
                        continue

                    # treat unknown language as lang1?
                    if not stream.language_code and config.treat_und_as_first:
                        lang = Language.rebuild(list(config.lang_list)[0])

                    # we can't parse empty language codes
                    elif not stream.language_code or not stream.codec:
                        continue

                    else:
                        # parse with internal language parser first
                        try:
                            lang = get_language_from_stream(stream.language_code)
                            if not lang:
                                if config.treat_und_as_first:
                                    lang = Language.rebuild(list(config.lang_list)[0])
                                else:
                                    continue

                        except (ValueError, LanguageReverseError):
                            continue

                    if lang:
                        # Log.Debug("Found babelfish language: %r", lang)
                        lang.forced = is_forced
                        existing_subs[key].append(lang)
                        existing_subs["count"] = existing_subs["count"] + 1

        missing_from_part = set([Language.rebuild(l) for l in languages])
        if existing_subs["count"]:

            # fixme: this is actually somewhat broken with IETF, as Plex doesn't store the country portion
            # (pt instead of pt-BR) inside the database. So it might actually download pt-BR if there's a local pt-BR
            # subtitle but not our own.
            existing_flat = set((existing_subs["internal"] if internal else [])
                                + (existing_subs["external"] if external else [])
                                + existing_subs["own_external"])

            check_languages = set([Language.rebuild(l) for l in languages])
            alpha3_map = {}
            if config.ietf_as_alpha3:
                for language in existing_flat:
                    if language.country:
                        alpha3_map[language.alpha3] = language.country
                        language.country = None

                for language in check_languages:
                    if language.country:
                        alpha3_map[language.alpha3] = language.country
                        language.country = None

            # compare sets of strings, not sets of different Language instances
            check_languages_str = set(str(l) for l in check_languages)
            existing_flat_str = set(str(l) for l in existing_flat)

            if check_languages_str.issubset(existing_flat_str) or \
                    (len(existing_flat) >= 1 and Prefs['subtitles.only_one']):
                # all subs found
                #Log.Info(u"All subtitles exist for '%s'", item_title)
                continue

            missing_from_part = set(Language.fromietf(l) for l in check_languages_str - existing_flat_str)
            if config.ietf_as_alpha3:
                for language in missing_from_part:
                    language.country = alpha3_map.get(language.alpha3, None)

        if missing_from_part:
            Log.Info(u"Subs still missing for '%s' (%s: %s): %s", item_title, rating_key, media.id,
                     missing_from_part)
            missing.update(missing_from_part)

    if missing:
        # deduplicate
        missing = set(Language.fromietf(la) for la in set(str(l) for l in missing))
        return added_at, item_id, item_title, item, missing
Beispiel #55
0
class SubsSabBzProvider(Provider):
    """SubsSabBz Provider."""
    languages = {Language(l) for l in [
        'bul', 'eng'
    ]}
    video_types = (Episode, Movie)

    def initialize(self):
        self.session = Session()
        self.session.headers['User-Agent'] = AGENT_LIST[randint(0, len(AGENT_LIST) - 1)]
        self.session.headers["Accept"] = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
        self.session.headers["Accept-Language"] = "en-US,en;q=0.5"
        self.session.headers["Accept-Encoding"] = "gzip, deflate, br"
        self.session.headers["DNT"] = "1"
        self.session.headers["Connection"] = "keep-alive"
        self.session.headers["Upgrade-Insecure-Requests"] = "1"
        self.session.headers["Cache-Control"] = "max-age=0"

    def terminate(self):
        self.session.close()

    def query(self, language, video):
        subtitles = []
        isEpisode = isinstance(video, Episode)

        params = {
            'act': 'search',
            'movie': '',
            'select-language': '2',
            'upldr': '',
            'yr': '',
            'release': ''
        }

        if isEpisode:
            params['movie'] = "%s %02d %02d" % (sanitize(fix_tv_naming(video.series), {'\''}), video.season, video.episode)
        else:
            params['yr'] = video.year
            params['movie'] = sanitize(fix_movie_naming(video.title), {'\''})

        if language == 'en' or language == 'eng':
            params['select-language'] = 1

        logger.info('Searching subtitle %r', params)
        response = self.session.post('http://subs.sab.bz/index.php?', params=params, allow_redirects=False, timeout=10, headers={
            'Referer': 'http://subs.sab.bz/',
            })

        response.raise_for_status()

        if response.status_code != 200:
            logger.debug('No subtitles found')
            return subtitles

        soup = BeautifulSoup(response.content, 'lxml')
        rows = soup.findAll('tr', {'class': 'subs-row'})

        # Search on first 25 rows only
        for row in rows[:25]:
            a_element_wrapper = row.find('td', { 'class': 'c2field' })
            if a_element_wrapper:
                element = a_element_wrapper.find('a')
                if element:
                    link = element.get('href')
                    notes = re.sub(r'ddrivetip\(\'<div.*/></div>(.*)\',\'#[0-9]+\'\)', r'\1', element.get('onmouseover'))
                    title = element.get_text()

                    try:
                        year = int(str(element.next_sibling).strip(' ()'))
                    except:
                        year = None

                    td = row.findAll('td')

                    try:
                        num_cds = int(td[6].get_text())
                    except:
                        num_cds = None

                    try:
                        fps = float(td[7].get_text())
                    except:
                        fps = None

                    try:
                        uploader = td[8].get_text()
                    except:
                        uploader = None

                    try:
                        imdb_id = re.findall(r'imdb.com/title/(tt\d+)/?$', td[9].find('a').get('href'))[0]
                    except:
                        imdb_id = None

                    logger.info('Found subtitle link %r', link)
                    sub = self.download_archive_and_add_subtitle_files(link, language, video, fps, num_cds)
                    for s in sub:
                        s.title = title
                        s.notes = notes
                        s.year = year
                        s.uploader = uploader
                        s.imdb_id = imdb_id
                        s.single_file = True if len(sub) == 1 and num_cds == 1 else False
                    subtitles = subtitles + sub
        return subtitles

    def list_subtitles(self, video, languages):
        return [s for l in languages for s in self.query(l, video)]

    def download_subtitle(self, subtitle):
        if subtitle.content:
            pass
        else:
            seeking_subtitle_file = subtitle.filename
            arch = self.download_archive_and_add_subtitle_files(subtitle.page_link, subtitle.language, subtitle.video,
                                                                subtitle.fps, subtitle.num_cds)
            for s in arch:
                if s.filename == seeking_subtitle_file:
                    subtitle.content = s.content

    def process_archive_subtitle_files(self, archiveStream, language, video, link, fps, num_cds):
        subtitles = []
        type = 'episode' if isinstance(video, Episode) else 'movie'
        for file_name in sorted(archiveStream.namelist()):
            if file_name.lower().endswith(('.srt', '.sub')):
                logger.info('Found subtitle file %r', file_name)
                subtitle = SubsSabBzSubtitle(language, file_name, type, video, link, fps, num_cds)
                subtitle.content = fix_line_ending(archiveStream.read(file_name))
                subtitles.append(subtitle)
        return subtitles

    def download_archive_and_add_subtitle_files(self, link, language, video, fps, num_cds):
        logger.info('Downloading subtitle %r', link)
        cache_key = sha1(link.encode("utf-8")).digest()
        request = region.get(cache_key)
        if request is NO_VALUE:
            request = self.session.get(link, headers={
                'Referer': 'http://subs.sab.bz/index.php?'
                })
            request.raise_for_status()
            region.set(cache_key, request)
        else:
            logger.info('Cache file: %s', codecs.encode(cache_key, 'hex_codec').decode('utf-8'))

        try:
            archive_stream = io.BytesIO(request.content)
            if is_rarfile(archive_stream):
                return self.process_archive_subtitle_files(RarFile(archive_stream), language, video, link, fps, num_cds)
            elif is_zipfile(archive_stream):
                return self.process_archive_subtitle_files(ZipFile(archive_stream), language, video, link, fps, num_cds)
        except:
            pass

        logger.error('Ignore unsupported archive %r', request.headers)
        region.delete(cache_key)
        return []
Beispiel #56
0
class LegendasTVProvider(_LegendasTVProvider):
    languages = {Language(*l) for l in language_converters['legendastv'].to_legendastv.keys()}
    subtitle_class = LegendasTVSubtitle

    def __init__(self, username=None, password=None):

        # Provider needs UNRAR installed. If not available raise ConfigurationError
        try:
            rarfile.custom_check([rarfile.UNRAR_TOOL], True)
        except rarfile.RarExecError:
            raise ConfigurationError('UNRAR tool not available')

        if any((username, password)) and not all((username, password)):
            raise ConfigurationError('Username and password must be specified')

        self.username = username
        self.password = password
        self.logged_in = False
        self.session = None

    @staticmethod
    def is_valid_title(title, title_id, sanitized_title, season, year, imdb_id):
        """Check if is a valid title."""
        if title["imdb_id"] and title["imdb_id"] == imdb_id:
            logger.debug(u'Matched title "%s" as IMDB ID %s', sanitized_title, title["imdb_id"])
            return True

        if title["title2"] and sanitize(title['title2']) == sanitized_title:
            logger.debug(u'Matched title "%s" as "%s"', sanitized_title, title["title2"])
            return True

        return _LegendasTVProvider.is_valid_title(title, title_id, sanitized_title, season, year)

    @region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME, should_cache_fn=lambda value: value)
    def search_titles(self, title, season, title_year, imdb_id):
        """Search for titles matching the `title`.

        For episodes, each season has it own title
        :param str title: the title to search for.
        :param int season: season of the title
        :param int title_year: year of the title
        :return: found titles.
        :rtype: dict
        """
        titles = {}
        sanitized_titles = [sanitize(title)]
        ignore_characters = {'\'', '.'}
        if any(c in title for c in ignore_characters):
            sanitized_titles.append(sanitize(title, ignore_characters=ignore_characters))

        for sanitized_title in sanitized_titles:
            # make the query
            if season:
                logger.info('Searching episode title %r for season %r', sanitized_title, season)
            else:
                logger.info('Searching movie title %r', sanitized_title)

            r = self.session.get(self.server_url + 'legenda/sugestao/{}'.format(sanitized_title), timeout=10)
            raise_for_status(r)
            results = json.loads(r.text)

            # loop over results
            for result in results:
                source = result['_source']

                # extract id
                title_id = int(source['id_filme'])

                # extract type
                title = {'type': type_map[source['tipo']], 'title2': None, 'imdb_id': None}

                # extract title, year and country
                name, year, country = title_re.match(source['dsc_nome']).groups()
                title['title'] = name

                if "dsc_nome_br" in source:
                    name2, ommit1, ommit2 = title_re.match(source['dsc_nome_br']).groups()
                    title['title2'] = name2

                # extract imdb_id
                if source['id_imdb'] != '0':
                    if not source['id_imdb'].startswith('tt'):
                        title['imdb_id'] = 'tt' + source['id_imdb'].zfill(7)
                    else:
                        title['imdb_id'] = source['id_imdb']

                # extract season
                if title['type'] == 'episode':
                    if source['temporada'] and source['temporada'].isdigit():
                        title['season'] = int(source['temporada'])
                    else:
                        match = season_re.search(source['dsc_nome_br'])
                        if match:
                            title['season'] = int(match.group('season'))
                        else:
                            logger.debug('No season detected for title %d (%s)', title_id, name)

                # extract year
                if year:
                    title['year'] = int(year)
                elif source['dsc_data_lancamento'] and source['dsc_data_lancamento'].isdigit():
                    # year is based on season air date hence the adjustment
                    title['year'] = int(source['dsc_data_lancamento']) - title.get('season', 1) + 1

                # add title only if is valid
                # Check against title without ignored chars
                if self.is_valid_title(title, title_id, sanitized_titles[0], season, title_year, imdb_id):
                    logger.debug(u'Found title: %s', title)
                    titles[title_id] = title

            logger.debug('Found %d titles', len(titles))

        return titles

    def query(self, language, title, season=None, episode=None, year=None, imdb_id=None):
        # search for titles
        titles = self.search_titles(title, season, year, imdb_id)

        subtitles = []
        # iterate over titles
        for title_id, t in titles.items():

            logger.info('Getting archives for title %d and language %d', title_id, language.legendastv)
            archives = self.get_archives(title_id, language.legendastv, t['type'], season, episode)
            if not archives:
                logger.info('No archives found for title %d and language %d', title_id, language.legendastv)

            # iterate over title's archives
            for a in archives:

                # compute an expiration time based on the archive timestamp
                expiration_time = (datetime.utcnow().replace(tzinfo=pytz.utc) - a.timestamp).total_seconds()

                # attempt to get the releases from the cache
                cache_key = str(a.id + "|" + a.name)
                releases = region.get(cache_key, expiration_time=expiration_time)

                # the releases are not in cache or cache is expired
                if releases == NO_VALUE:
                    logger.info('Releases not found in cache')

                    # download archive
                    self.download_archive(a)

                    # extract the releases
                    releases = []
                    for name in a.content.namelist():
                        # discard the legendastv file
                        if name.startswith('Legendas.tv'):
                            continue

                        # discard hidden files
                        if os.path.split(name)[-1].startswith('.'):
                            continue

                        # discard non-subtitle files
                        if not name.lower().endswith(SUBTITLE_EXTENSIONS):
                            continue

                        releases.append(name)

                    # cache the releases
                    region.set(cache_key, releases)

                # iterate over releases
                for r in releases:
                    subtitle = self.subtitle_class(language, t['type'], t['title'], t.get('year'), t.get('imdb_id'),
                                                   t.get('season'), a, r)
                    logger.debug('Found subtitle %r', subtitle)
                    subtitles.append(subtitle)

        return subtitles

    def list_subtitles(self, video, languages):
        season = episode = None
        if isinstance(video, Episode):
            titles = [video.series] + video.alternative_series
            season = video.season
            episode = video.episode
        else:
            titles = [video.title] + video.alternative_titles

        for title in titles:
            subtitles = [s for l in languages for s in
                         self.query(l, title, season=season, episode=episode, year=video.year, imdb_id=video.imdb_id)]
            if subtitles:
                return subtitles

        return []

    def download_subtitle(self, subtitle):
        super(LegendasTVProvider, self).download_subtitle(subtitle)
        subtitle.archive.content = None

    def get_archives(self, title_id, language_code, title_type, season, episode):
        return super(LegendasTVProvider, self).get_archives.original(self, title_id, language_code, title_type,
                                                                     season, episode)
    'tel': 63,
    'tha': 40,
    'tur': 41,
    'ukr': 56,
    'urd': 42,
    'yor': 71
}

# TODO: specify codes for unspecified_languages
unspecified_languages = [
    'Big 5 code', 'Brazillian Portuguese', 'Bulgarian/ English',
    'Chinese BG code', 'Dutch/ English', 'English/ German',
    'Hungarian/ English', 'Rohingya'
]

supported_languages = {Language(l) for l in exact_languages_alpha3}

alpha3_of_code = {l.name: l.alpha3 for l in supported_languages}

supported_languages.update({Language(l) for l in to_subscene})


class SubsceneConverter(LanguageReverseConverter):
    codes = {l.name for l in supported_languages}

    def convert(self, alpha3, country=None, script=None):
        if alpha3 in exact_languages_alpha3:
            return Language(alpha3).name

        if alpha3 in to_subscene:
            return to_subscene[alpha3]
Beispiel #58
0
def prepare_video(pms_video_info, ignore_all=False, hints=None, rating_key=None, providers=None, skip_hashing=False):
    """
    returnes a subliminal/guessit-refined parsed video
    :param pms_video_info:
    :param ignore_all:
    :param hints:
    :param rating_key:
    :return:
    """
    embedded_subtitles = not ignore_all and Prefs['subtitles.scan.embedded']
    external_subtitles = not ignore_all and Prefs['subtitles.scan.external']

    plex_part = pms_video_info["plex_part"]

    if ignore_all:
        Log.Debug("Force refresh intended.")

    Log.Debug("Detecting streams: %s, external_subtitles=%s, embedded_subtitles=%s" % (
        plex_part.file, external_subtitles, embedded_subtitles))

    known_embedded = []
    parts = []
    for media in list(Plex["library"].metadata(rating_key))[0].media:
        parts += media.parts

    plexpy_part = None
    for part in parts:
        if int(part.id) == int(plex_part.id):
            plexpy_part = part

    # embedded subtitles
    # fixme: skip the whole scanning process if known_embedded == wanted languages?
    audio_languages = []
    if plexpy_part:
        for stream in plexpy_part.streams:
            if stream.stream_type == 2:
                lang = None
                try:
                    lang = language_from_stream(stream.language_code)
                except LanguageError:
                    Log.Debug("Couldn't detect embedded audio stream language: %s", stream.language_code)

                # treat unknown language as lang1?
                if not lang and config.treat_und_as_first:
                    lang = Language.rebuild(list(config.lang_list)[0])

                audio_languages.append(lang)

            # subtitle stream
            elif stream.stream_type == 3 and embedded_subtitles:
                is_forced = helpers.is_stream_forced(stream)

                if ((config.forced_only or config.forced_also) and is_forced) or not is_forced:
                    # embedded subtitle
                    # fixme: tap into external subtitles here instead of scanning for ourselves later?
                    if stream.codec and getattr(stream, "index", None):
                        if config.exotic_ext or stream.codec.lower() in config.text_based_formats:
                            lang = None
                            try:
                                lang = language_from_stream(stream.language_code)
                            except LanguageError:
                                Log.Debug("Couldn't detect embedded subtitle stream language: %s", stream.language_code)

                            # treat unknown language as lang1?
                            if not lang and config.treat_und_as_first:
                                lang = Language.rebuild(list(config.lang_list)[0])

                            if lang:
                                if is_forced:
                                    lang.forced = True
                                known_embedded.append(lang)
    else:
        Log.Warn("Part %s missing of %s, not able to scan internal streams", plex_part.id, rating_key)

    # metadata subtitles
    known_metadata_subs = set()
    meta_subs = get_subtitles_from_metadata(plex_part)
    for language, subList in meta_subs.iteritems():
        try:
            lang = Language.fromietf(Locale.Language.Match(language))
        except LanguageError:
            if config.treat_und_as_first:
                lang = Language.rebuild(list(config.lang_list)[0])
            else:
                continue

        if subList:
            for key in subList:
                if key.startswith("subzero_md_forced"):
                    lang = Language.rebuild(lang, forced=True)

                known_metadata_subs.add(lang)
                Log.Debug("Found metadata subtitle %r:%s for %s", lang, key, plex_part.file)

    Log.Debug("Known metadata subtitles: %r", known_metadata_subs)
    Log.Debug("Known embedded subtitles: %r", known_embedded)

    subtitle_storage = get_subtitle_storage()
    stored_subs = subtitle_storage.load(rating_key)
    subtitle_storage.destroy()

    try:
        # get basic video info scan (filename)
        video = parse_video(plex_part.file, hints, skip_hashing=config.low_impact_mode or skip_hashing,
                            providers=providers)

        # set stream languages
        if audio_languages:
            video.audio_languages = audio_languages
            Log.Info("Found audio streams: %s" % ", ".join([str(l) for l in audio_languages]))

        if not ignore_all:
            set_existing_languages(video, pms_video_info, external_subtitles=external_subtitles,
                                   embedded_subtitles=embedded_subtitles, known_embedded=known_embedded,
                                   stored_subs=stored_subs, languages=config.lang_list,
                                   only_one=config.only_one, known_metadata_subs=known_metadata_subs)

        # add video fps info
        video.fps = plex_part.fps
        return video

    except ValueError:
        Log.Warn("File could not be guessed: %s: %s", plex_part.file, traceback.format_exc())
Beispiel #59
0
class GreekSubtitlesProvider(Provider):
    """GreekSubtitles Provider."""
    languages = {Language(l) for l in ['ell', 'eng']}
    server_url = 'http://gr.greek-subtitles.com/'
    search_url = 'search.php?name={}'
    download_url = 'http://www.greeksubtitles.info/getp.php?id={:d}'
    subtitle_class = GreekSubtitlesSubtitle

    def __init__(self):
        self.session = None

    def initialize(self):
        self.session = Session()
        self.session.headers['User-Agent'] = 'Subliminal/{}'.format(
            __short_version__)

    def terminate(self):
        self.session.close()

    def query(self, keyword, season=None, episode=None, year=None):
        params = keyword
        if season and episode:
            params += ' S{season:02d}E{episode:02d}'.format(season=season,
                                                            episode=episode)
        elif year:
            params += ' {:4d}'.format(year)

        logger.debug('Searching subtitles %r', params)
        subtitles = []
        search_link = self.server_url + self.search_url.format(params)
        while True:
            r = self.session.get(search_link, timeout=30)
            r.raise_for_status()

            if not r.content:
                logger.debug('No data returned from provider')
                return []

            soup = ParserBeautifulSoup(r.content.decode('utf-8', 'ignore'),
                                       ['lxml', 'html.parser'])

            # loop over subtitles cells
            for cell in soup.select('td.latest_name > a:nth-of-type(1)'):
                # read the item
                subtitle_id = int(cell['href'].rsplit('/', 2)[1])
                page_link = cell['href']
                language = Language.fromalpha2(
                    cell.parent.find('img')['src'].split('/')[-1].split('.')
                    [0])
                version = cell.text.strip() or None
                if version is None:
                    version = ""

                subtitle = self.subtitle_class(
                    language, page_link, version,
                    self.download_url.format(subtitle_id))

                logger.debug('Found subtitle %r', subtitle)
                subtitles.append(subtitle)

            anchors = soup.select('td a')
            next_page_available = False
            for anchor in anchors:
                if 'Next' in anchor.text and 'search.php' in anchor['href']:
                    search_link = self.server_url + anchor['href']
                    next_page_available = True
                    break
            if not next_page_available:
                break

        return subtitles

    def list_subtitles(self, video, languages):
        if isinstance(video, Episode):
            titles = [video.series] + video.alternative_series
        elif isinstance(video, Movie):
            titles = [video.title] + video.alternative_titles
        else:
            titles = []

        subtitles = []
        # query for subtitles with the show_id
        for title in titles:
            if isinstance(video, Episode):
                subtitles += [
                    s for s in self.query(title,
                                          season=video.season,
                                          episode=video.episode,
                                          year=video.year)
                    if s.language in languages
                ]
            elif isinstance(video, Movie):
                subtitles += [
                    s for s in self.query(title, year=video.year)
                    if s.language in languages
                ]

        return subtitles

    def download_subtitle(self, subtitle):
        if isinstance(subtitle, GreekSubtitlesSubtitle):
            # download the subtitle
            logger.info('Downloading subtitle %r', subtitle)
            r = self.session.get(subtitle.download_link,
                                 headers={'Referer': subtitle.page_link},
                                 timeout=30)
            r.raise_for_status()

            if not r.content:
                logger.debug(
                    'Unable to download subtitle. No data returned from provider'
                )
                return

            archive = _get_archive(r.content)

            subtitle_content = _get_subtitle_from_archive(archive)
            if subtitle_content:
                subtitle.content = fix_line_ending(subtitle_content)
            else:
                logger.debug('Could not extract subtitle from %r', archive)