def _search_external_subtitles(path, languages=None, only_one=False, scandir_generic=False): dirpath, filename = os.path.split(path) dirpath = dirpath or '.' fileroot, fileext = os.path.splitext(filename) subtitles = {} _scandir = _scandir_generic if scandir_generic else scandir for entry in _scandir(dirpath): if not entry.name and not scandir_generic: logger.debug('Could not determine the name of the file, retrying with scandir_generic') return _search_external_subtitles(path, languages, only_one, True) if not entry.is_file(follow_symlinks=False): continue p = entry.name # keep only valid subtitle filenames if not p.lower().startswith(fileroot.lower()) or not p.lower().endswith(SUBTITLE_EXTENSIONS): continue p_root, p_ext = os.path.splitext(p) if not INCLUDE_EXOTIC_SUBS and p_ext not in (".srt", ".ass", ".ssa", ".vtt"): continue # extract potential forced/normal/default tag # fixme: duplicate from subtitlehelpers split_tag = p_root.rsplit('.', 1) adv_tag = None if len(split_tag) > 1: adv_tag = split_tag[1].lower() if adv_tag in ['forced', 'normal', 'default', 'embedded', 'embedded-forced', 'custom']: p_root = split_tag[0] forced = False if adv_tag: forced = "forced" in adv_tag # extract the potential language code language_code = p_root[len(fileroot):].replace('_', '-')[1:] # default language is undefined language = Language('und') # attempt to parse if language_code: try: language = Language.fromietf(language_code) language.forced = forced except ValueError: logger.error('Cannot parse language code %r', language_code) language = None elif not language_code and only_one: language = Language.rebuild(list(languages)[0], forced=forced) subtitles[p] = language logger.debug('Found subtitles %r', subtitles) return subtitles
def SelectStoredSubForItemMenu(**kwargs): rating_key = kwargs["rating_key"] part_id = kwargs["part_id"] language = Language.fromietf(kwargs["language"]) item_type = kwargs["item_type"] sub_key = tuple(kwargs.pop("sub_key").split("__")) plex_item = get_item(rating_key) storage = get_subtitle_storage() stored_subs = storage.load(plex_item.rating_key) subtitles = stored_subs.get_all(part_id, language) subtitle = subtitles[sub_key] save_stored_sub(subtitle, rating_key, part_id, language, item_type, plex_item=plex_item, storage=storage, stored_subs=stored_subs) stored_subs.set_current(part_id, language, sub_key) storage.save(stored_subs) storage.destroy() kwa = { "header": _("Success"), "message": _("Subtitle saved to disk"), "title": kwargs["title"], "item_title": kwargs["item_title"], "base_title": kwargs.get("base_title") } # fixme: return to SubtitleOptionsMenu properly? (needs recomputation of current_data return ItemDetailsMenu(rating_key, randomize=timestamp(), **kwa)
def BlacklistAllPartsSubtitleMenu(**kwargs): rating_key = kwargs.get("rating_key") language = kwargs.get("language") if language: language = Language.fromietf(language) item = get_item(rating_key) if not item: return item_title = get_item_title(item) subtitle_storage = get_subtitle_storage() stored_subs = subtitle_storage.load_or_new(item) for part_id, languages in stored_subs.parts.iteritems(): sub_dict = languages if language: key = str(language) if key not in sub_dict: continue sub_dict = {key: sub_dict[key]} for language, subs in sub_dict.iteritems(): if "current" in subs: stored_subs.blacklist(part_id, language, subs["current"]) Log.Info("Added %s to blacklist", subs["current"]) subtitle_storage.save(stored_subs) subtitle_storage.destroy() return RefreshItem(rating_key=rating_key, item_title=item_title, force=True, randomize=timestamp(), timeout=30000)
def load(self, fn=None, content=None, language=None, encoding="utf-8"): """ :param encoding: used for decoding the content when fn is given, not used in case content is given :param language: babelfish.Language language of the subtitle :param fn: filename :param content: unicode :return: """ if language: self.language = Language.rebuild(language, forced=False) self.initialized_mods = {} try: if fn: self.f = pysubs2.load(fn, encoding=encoding) elif content: self.f = pysubs2.SSAFile.from_string(content) except (IOError, UnicodeDecodeError, pysubs2.exceptions.UnknownFPSError, pysubs2.exceptions.UnknownFormatIdentifierError, pysubs2.exceptions.FormatAutodetectionError): if fn: logger.exception("Couldn't load subtitle: %s: %s", fn, traceback.format_exc()) elif content: logger.exception("Couldn't load subtitle: %s", traceback.format_exc()) return bool(self.f)
def query(self, show_id, series, season, episode, year=None): # get the episode ids episode_ids = self.get_episode_ids(show_id, season) # Provider doesn't store multi episode information episode = min(episode) if episode and isinstance(episode, list) else episode if episode not in episode_ids: logger.error('Episode %d not found', episode) return [] # get the episode page logger.info('Getting the page for episode %d', episode_ids[episode]) r = self.session.get(self.server_url + 'episode-%d.html' % episode_ids[episode], timeout=10) soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser']) # loop over subtitles rows subtitles = [] for row in soup.select('.subtitlen'): # read the item language = Language.fromtvsubtitles(row.h5.img['src'][13:-4]) subtitle_id = int(row.parent['href'][10:-5]) page_link = self.server_url + 'subtitle-%d.html' % subtitle_id rip = row.find('p', title='rip').text.strip() or None release = row.find('h5').text.strip() or None subtitle = self.subtitle_class(language, page_link, subtitle_id, series, season, episode, year, rip, release) logger.info('Found subtitle %s', subtitle) subtitles.append(subtitle) soup.decompose() soup = None return subtitles
def query(self, show_id, series, season, year=None, country=None): # patch: fix logging # get the page of the season of the show logger.info('Getting the page of show id %d, season %d', show_id, season) r = self.session.get(self.server_url + 'ajax_loadShow.php', params={'show': show_id, 'season': season}, timeout=10, headers={ "referer": "%sshow/%s" % (self.server_url, show_id), "X-Requested-With": "XMLHttpRequest" } ) r.raise_for_status() if r.status_code == 304: raise TooManyRequests() if not r.content: # Provider wrongful return a status of 304 Not Modified with an empty content # raise_for_status won't raise exception for that status code logger.error('No data returned from provider') return [] soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser']) # loop over subtitle rows subtitles = [] for row in soup.select('tr.epeven'): cells = row('td') # ignore incomplete subtitles status = cells[5].text if status != 'Completed': logger.debug('Ignoring subtitle with status %s', status) continue # read the item language = Language.fromaddic7ed(cells[3].text) hearing_impaired = bool(cells[6].text) page_link = self.server_url + cells[2].a['href'][1:] season = int(cells[0].text) episode = int(cells[1].text) title = cells[2].text version = cells[4].text download_link = cells[9].a['href'][1:] subtitle = self.subtitle_class(language, hearing_impaired, page_link, series, season, episode, title, year, version, download_link) logger.debug('Found subtitle %r', subtitle) subtitles.append(subtitle) soup.decompose() soup = None return subtitles
def get_language_from_stream(lang_code): if lang_code: lang = Locale.Language.Match(lang_code) if lang and lang != "xx": # Log.Debug("Found language: %r", lang) return Language.fromietf(lang) elif lang: try: return language_from_stream(lang) except LanguageError: pass
def SubtitleReapplyMods(**kwargs): rating_key = kwargs["rating_key"] part_id = kwargs["part_id"] lang_a2 = kwargs["language"] item_type = kwargs["item_type"] language = Language.fromietf(lang_a2) set_mods_for_part(rating_key, part_id, language, item_type, [], mode="add") kwargs.pop("randomize") return SubtitleModificationsMenu(randomize=timestamp(), **kwargs)
def ManageBlacklistMenu(**kwargs): oc = SubFolderObjectContainer(title2=unicode(kwargs["title"]), replace_parent=True) rating_key = kwargs["rating_key"] part_id = kwargs["part_id"] language = kwargs["language"] remove_sub_key = kwargs.pop("remove_sub_key", None) current_data = unicode(kwargs["current_data"]) current_sub, stored_subs, storage = get_current_sub(rating_key, part_id, language) current_bl, subs = stored_subs.get_blacklist(part_id, language) if remove_sub_key: remove_sub_key = tuple(remove_sub_key.split("__")) stored_subs.blacklist(part_id, language, remove_sub_key, add=False) storage.save(stored_subs) Log.Info("Removed %s from blacklist", remove_sub_key) kwargs.pop("randomize") oc.add(DirectoryObject( key=Callback(ItemDetailsMenu, rating_key=kwargs["rating_key"], item_title=kwargs["item_title"], title=kwargs["title"], randomize=timestamp()), title=_(u"< Back to %s", kwargs["title"]), summary=current_data, thumb=default_thumb )) def sorter(pair): # thanks RestrictedModule parser for messing with lambda (x, y) return pair[1]["date_added"] for sub_key, data in sorted(current_bl.iteritems(), key=sorter, reverse=True): provider_name, subtitle_id = sub_key title = _(u"%(provider_name)s, %(subtitle_id)s (added: %(date_added)s, %(mode)s), Language: %(language)s, " u"Score: %(score)i, Storage: %(storage_type)s", provider_name=_(provider_name), subtitle_id=subtitle_id, date_added=df(data["date_added"]), mode=_(current_sub.get_mode_verbose(data["mode"])), language=display_language(Language.fromietf(language)), score=data["score"], storage_type=data["storage_type"]) oc.add(DirectoryObject( key=Callback(ManageBlacklistMenu, remove_sub_key="__".join(sub_key), randomize=timestamp(), **kwargs), title=title, summary=_(u"Remove subtitle from blacklist") )) storage.destroy() return oc
def get_embedded_subtitle_streams(part, requested_language=None, skip_duplicate_unknown=True, skip_unknown=False): streams = [] streams_unknown = [] has_unknown = False found_requested_language = False for stream in part.streams: # subtitle stream if stream.stream_type == 3 and not stream.stream_key and stream.codec in TEXT_SUBTITLE_EXTS: is_forced = helpers.is_stream_forced(stream) language = helpers.get_language_from_stream(stream.language_code) if language: language = Language.rebuild(language, forced=is_forced) is_unknown = False found_requested_language = requested_language and requested_language == language if not language and config.treat_und_as_first: # only consider first unknown subtitle stream if has_unknown and skip_duplicate_unknown: continue language = Language.rebuild(list(config.lang_list)[0], forced=is_forced) is_unknown = True has_unknown = True streams_unknown.append({"stream": stream, "is_unknown": is_unknown, "language": language, "is_forced": is_forced}) if not requested_language or found_requested_language: streams.append({"stream": stream, "is_unknown": is_unknown, "language": language, "is_forced": is_forced}) if found_requested_language: break if streams_unknown and not found_requested_language and not skip_unknown: streams = streams_unknown return streams
def parse_results(self, video, film): subtitles = [] for s in film.subtitles: subtitle = SubsceneSubtitle.from_api(s) subtitle.asked_for_release_group = video.release_group if isinstance(video, Episode): subtitle.asked_for_episode = video.episode if self.only_foreign: subtitle.language = Language.rebuild(subtitle.language, forced=True) subtitles.append(subtitle) logger.debug('Found subtitle %r', subtitle) return subtitles
def SubtitleSetMods(mods=None, mode=None, **kwargs): if not isinstance(mods, types.ListType) and mods: mods = [mods] rating_key = kwargs["rating_key"] part_id = kwargs["part_id"] lang_a2 = kwargs["language"] item_type = kwargs["item_type"] language = Language.fromietf(lang_a2) set_mods_for_part(rating_key, part_id, language, item_type, mods, mode=mode) kwargs.pop("randomize") return SubtitleModificationsMenu(randomize=timestamp(), **kwargs)
def query(self, languages, video): # query the server keywords = [] if isinstance(video, Movie): if video.title: keywords.append(video.title) if video.year: keywords.append(str(video.year)) elif isinstance(video, Episode): if video.series: keywords.append(video.series) if video.season and video.episode: keywords.append('S%02dE%02d' % (video.season, video.episode)) elif video.episode: keywords.append('E%02d' % video.episode) query = ' '.join(keywords) params = {'token': self.token, 'q': query, 'is_file': 1} logger.debug('Searching subtitles %r', params) res = self.session.get(server_url + '/sub/search', params=params, timeout=10) res.raise_for_status() result = res.json() if result['status'] != 0: logger.error('status error: %r', r) return [] if not result['sub']['subs']: logger.debug('No subtitle found') # parse the subtitles pattern = re.compile(ur'lang(?P<code>\w+)') subtitles = [] for sub in result['sub']['subs']: if 'lang' not in sub: continue for key in sub['lang']['langlist'].keys(): match = pattern.match(key) try: language = Language.fromassrt(match.group('code')) if language in languages: subtitles.append(AssrtSubtitle(language, sub['id'], sub['videoname'], self.session, self.token)) except: pass return subtitles
def apply_default_mods(reapply_current=False, scandir_generic=False): storage = get_subtitle_storage() subs_applied = 0 try: for fn in storage.get_all_files(scandir_generic=scandir_generic): data = storage.load(None, filename=fn) if data: video_id = data.video_id item_type = get_item_kind_from_rating_key(video_id) if not item_type: continue for part_id, part in data.parts.iteritems(): for lang, subs in part.iteritems(): current_sub = subs.get("current") if not current_sub: continue sub = subs[current_sub] if not sub.content: continue current_mods = sub.mods or [] if not reapply_current: add_mods = list(set(config.default_mods).difference(set(current_mods))) if not add_mods: continue else: if not current_mods: continue add_mods = [] try: set_mods_for_part(video_id, part_id, Language.fromietf(lang), item_type, add_mods, mode="add") except: Log.Error("Couldn't set mods for %s:%s: %s", video_id, part_id, traceback.format_exc()) continue subs_applied += 1 except OSError: return apply_default_mods(reapply_current=reapply_current, scandir_generic=True) storage.destroy() Log.Debug("Applied mods to %i items" % subs_applied)
def _get_detail(self): if self._detail: return self._detail params = {'token': self.token, 'id': self.id} r = self.session.get(server_url + '/sub/detail', params=params, timeout=10) r.raise_for_status() result = r.json() sub = result['sub']['subs'][0] files = sub['filelist'] # first pass: guessit for f in files: logger.info('File %r', f) guess = guessit(f['f'], self.guessit_options) logger.info('GuessIt %r', guess) langs = set() if 'language' in guess: langs.update(guess['language']) if 'subtitle_language' in guess: langs.update(guess['subtitle_language']) if self.language in langs: self._defail = f return f # second pass: keyword matching codes = language_converters['assrt'].codes for f in files: langs = set([ Language.fromassrt(k) for k in codes if k in f['f'] ]) logger.info('%s: %r', f['f'], langs) if self.language in langs: self._defail = f return f # fallback: pick up first file if nothing matches return files[0]
def ListStoredSubsForItemMenu(**kwargs): oc = SubFolderObjectContainer(title2=unicode(kwargs["title"]), replace_parent=True) rating_key = kwargs["rating_key"] part_id = kwargs["part_id"] language = Language.fromietf(kwargs["language"]) current_sub, stored_subs, storage = get_current_sub(rating_key, part_id, language) all_subs = stored_subs.get_all(part_id, language) kwargs.pop("randomize") for key, subtitle in sorted(filter(lambda x: x[0] not in ("current", "blacklist"), all_subs.items()), key=lambda x: x[1].date_added, reverse=True): is_current = key == all_subs["current"] summary = _(u"added: %(date_added)s, %(mode)s, Language: %(language)s, Score: %(score)i, Storage: " u"%(storage_type)s", date_added=df(subtitle.date_added), mode=_(subtitle.mode_verbose), language=display_language(language), score=subtitle.score, storage_type=subtitle.storage_type) sub_name = subtitle.provider_name if sub_name == "embedded": sub_name += " (%s)" % subtitle.id oc.add(DirectoryObject( key=Callback(SelectStoredSubForItemMenu, randomize=timestamp(), sub_key="__".join(key), **kwargs), title=_(u"%(current_state)s%(subtitle_name)s, Score: %(score)s", current_state=_("Current: ") if is_current else _("Stored: "), subtitle_name=sub_name, score=subtitle.score), summary=summary )) return oc
def test_search_language_in_list(): assert search_language_in_list(Language('zho', None, 'Hant'), [Language('zho', None, 'Hant')]) assert search_language_in_list(Language('zho', None, 'Hans'), [Language('zho', None, 'Hans')]) assert search_language_in_list(Language('zho', None, 'Hant'), [Language('zho')]) assert search_language_in_list(Language('zho', None, 'Hans'), [Language('zho')]) assert search_language_in_list( Language('zho', None, 'Hant'), [Language('eng'), Language('zho')]) assert not search_language_in_list(Language('zho', None, 'Hans'), [Language('zho', None, 'Hant')]) assert search_language_in_list(Language( 'zho', None, 'Hans'), [Language('zho', None, 'Hant'), Language('zho')])
def SubtitleModificationsMenu(**kwargs): rating_key = kwargs["rating_key"] part_id = kwargs["part_id"] language = kwargs["language"] lang_instance = Language.fromietf(language) current_sub, stored_subs, storage = get_current_sub(rating_key, part_id, language) kwargs.pop("randomize") current_mods = current_sub.mods or [] oc = SubFolderObjectContainer(title2=kwargs["title"], replace_parent=True) from interface.item_details import SubtitleOptionsMenu oc.add(DirectoryObject( key=Callback(SubtitleOptionsMenu, randomize=timestamp(), **kwargs), title=_(u"< Back to subtitle options for: %s", kwargs["title"]), summary=unicode(kwargs["current_data"]), thumb=default_thumb )) for identifier, mod in mod_registry.mods.iteritems(): if mod.advanced: continue if mod.exclusive and identifier in current_mods: continue if mod.languages and lang_instance not in mod.languages: continue oc.add(DirectoryObject( key=Callback(SubtitleSetMods, mods=identifier, mode="add", randomize=timestamp(), **kwargs), title=pad_title(_(mod.description)), summary=_(mod.long_description) or "" )) fps_mod = SubtitleModifications.get_mod_class("change_FPS") oc.add(DirectoryObject( key=Callback(SubtitleFPSModMenu, randomize=timestamp(), **kwargs), title=pad_title(_(fps_mod.description)), summary=_(fps_mod.long_description) or "" )) shift_mod = SubtitleModifications.get_mod_class("shift_offset") oc.add(DirectoryObject( key=Callback(SubtitleShiftModUnitMenu, randomize=timestamp(), **kwargs), title=pad_title(_(shift_mod.description)), summary=_(shift_mod.long_description) or "" )) color_mod = SubtitleModifications.get_mod_class("color") oc.add(DirectoryObject( key=Callback(SubtitleColorModMenu, randomize=timestamp(), **kwargs), title=pad_title(_(color_mod.description)), summary=_(color_mod.long_description) or "" )) if current_mods: oc.add(DirectoryObject( key=Callback(SubtitleSetMods, mods=None, mode="remove_last", randomize=timestamp(), **kwargs), title=pad_title(_("Remove last applied mod (%s)", current_mods[-1])), summary=_(u"Currently applied mods: %(mod_list)s", mod_list=", ".join(current_mods) if current_mods else _("none")) )) oc.add(DirectoryObject( key=Callback(SubtitleListMods, randomize=timestamp(), **kwargs), title=pad_title(_("Manage applied mods")), summary=_(u"Currently applied mods: %(mod_list)s", mod_list=", ".join(current_mods)) )) oc.add(DirectoryObject( key=Callback(SubtitleReapplyMods, randomize=timestamp(), **kwargs), title=pad_title(_("Reapply applied mods")), summary=_(u"Currently applied mods: %(mod_list)s", mod_list=", ".join(current_mods) if current_mods else _("none")) )) oc.add(DirectoryObject( key=Callback(SubtitleSetMods, mods=None, mode="clear", randomize=timestamp(), **kwargs), title=pad_title(_("Restore original version")), summary=_(u"Currently applied mods: %(mod_list)s", mod_list=", ".join(current_mods) if current_mods else _("none")) )) storage.destroy() return oc
class SubzProvider(Provider): """Subz Provider.""" languages = {Language(l) for l in ['ell']} server_url = 'https://subz.xyz' sign_in_url = '/sessions' sign_out_url = '/logout' search_url = '/typeahead/{}' episode_link = '/series/{show_id}/seasons/{season:d}/episodes/{episode:d}' movie_link = '/movies/{}' subtitle_class = SubzSubtitle def __init__(self): self.logged_in = False self.session = None def initialize(self): self.session = Session() self.session.headers['User-Agent'] = 'Subliminal/{}'.format( __short_version__) def terminate(self): self.session.close() def get_show_ids(self, title, year=None, is_episode=True, country_code=None): """Get the best matching show id for `series`, `year` and `country_code`. First search in the result of :meth:`_get_show_suggestions`. :param title: show title. :param year: year of the show, if any. :type year: int :param is_episode: if the search is for episode. :type is_episode: bool :param country_code: country code of the show, if any. :type country_code: str :return: the show id, if found. :rtype: str """ title_sanitized = sanitize(title).lower() show_ids = self._get_suggestions(title, is_episode) matched_show_ids = [] for show in show_ids: show_id = None # attempt with country if not show_id and country_code: logger.debug('Getting show id with country') if sanitize(show['title']) == text_type( '{title} {country}').format( title=title_sanitized, country=country_code.lower()): show_id = show['link'].split('/')[-1] # attempt with year if not show_id and year: logger.debug('Getting show id with year') if sanitize( show['title']) == text_type('{title} {year}').format( title=title_sanitized, year=year): show_id = show['link'].split('/')[-1] # attempt clean if not show_id: logger.debug('Getting show id') show_id = show['link'].split('/')[-1] if sanitize( show['title']) == title_sanitized else None if show_id: matched_show_ids.append(show_id) return matched_show_ids @region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME, to_str=text_type, should_cache_fn=lambda value: value) def _get_suggestions(self, title, is_episode=True): """Search the show or movie id from the `title` and `year`. :param str title: title of the show. :param is_episode: if the search is for episode. :type is_episode: bool :return: the show suggestions found. :rtype: dict """ # make the search logger.info('Searching show ids with %r', title) r = self.session.get(self.server_url + text_type(self.search_url).format(title), timeout=10) r.raise_for_status() if not r.content: logger.debug('No data returned from provider') return {} show_type = 'series' if is_episode else 'movie' parsed_suggestions = [ s for s in json.loads(r.text) if 'type' in s and s['type'] == show_type ] logger.debug('Found suggestions: %r', parsed_suggestions) return parsed_suggestions def query(self, show_id, series, season, episode, title): # get the season list of the show logger.info('Getting the subtitle list of show id %s', show_id) is_episode = False if all((show_id, season, episode)): is_episode = True page_link = self.server_url + self.episode_link.format( show_id=show_id, season=season, episode=episode) elif all((show_id, title)): page_link = self.server_url + self.movie_link.format(show_id) else: return [] r = self.session.get(page_link, timeout=10) r.raise_for_status() if not r.content: logger.debug('No data returned from provider') return [] soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser']) year_num = None if not is_episode: year_num = int(soup.select_one('span.year').text) show_title = str( soup.select_one( '#summary-wrapper > div.summary h1').contents[0]).strip() subtitles = [] # loop over episode rows for subtitle in soup.select('div[id="subtitles"] tr[data-id]'): # read common info version = subtitle.find('td', {'class': 'name'}).text download_link = subtitle.find( 'a', {'class': 'btn-success'})['href'].strip('\'') # read the episode info if is_episode: episode_numbers = soup.select_one( '#summary-wrapper > div.container.summary span.main-title-sxe' ).text season_num = None episode_num = None matches = episode_re.match(episode_numbers.strip()) if matches: season_num = int(matches.group(1)) episode_num = int(matches.group(2)) episode_title = soup.select_one( '#summary-wrapper > div.container.summary span.main-title' ).text subtitle = self.subtitle_class(Language.fromalpha2('el'), page_link, show_title, season_num, episode_num, episode_title, year_num, version, download_link) # read the movie info else: subtitle = self.subtitle_class(Language.fromalpha2('el'), page_link, None, None, None, show_title, year_num, version, download_link) logger.debug('Found subtitle %r', subtitle) subtitles.append(subtitle) return subtitles def list_subtitles(self, video, languages): # lookup show_id if isinstance(video, Episode): titles = [video.series] + video.alternative_series elif isinstance(video, Movie): titles = [video.title] + video.alternative_titles else: titles = [] show_ids = None for title in titles: show_ids = self.get_show_ids(title, video.year, isinstance(video, Episode)) if show_ids is not None and len(show_ids) > 0: break subtitles = [] # query for subtitles with the show_id for show_id in show_ids: if isinstance(video, Episode): subtitles += [ s for s in self.query(show_id, video.series, video.season, video.episode, video.title) if s.language in languages and s.season == video.season and s.episode == video.episode ] elif isinstance(video, Movie): subtitles += [ s for s in self.query(show_id, None, None, None, video.title) if s.language in languages and s.year == video.year ] return subtitles def download_subtitle(self, subtitle): if isinstance(subtitle, SubzSubtitle): # download the subtitle logger.info('Downloading subtitle %r', subtitle) r = self.session.get(subtitle.download_link, headers={'Referer': subtitle.page_link}, timeout=10) r.raise_for_status() if not r.content: logger.debug( 'Unable to download subtitle. No data returned from provider' ) return archive = _get_archive(r.content) subtitle_content = _get_subtitle_from_archive(archive) if subtitle_content: subtitle.content = fix_line_ending(subtitle_content) else: logger.debug('Could not extract subtitle from %r', archive)
class WizdomProvider(Provider): """Wizdom Provider.""" languages = {Language(l) for l in ['heb']} server_url = 'wizdom.xyz' _tmdb_api_key = 'a51ee051bcd762543373903de296e0a3' def __init__(self): self.session = None def initialize(self): self.session = Session() self.session.headers['User-Agent'] = 'Subliminal/{}'.format(__short_version__) def terminate(self): self.session.close() @region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME) def _search_imdb_id(self, title, year, is_movie): """Search the IMDB ID for the given `title` and `year`. :param str title: title to search for. :param int year: year to search for (or 0 if not relevant). :param bool is_movie: If True, IMDB ID will be searched for in TMDB instead of Wizdom. :return: the IMDB ID for the given title and year (or None if not found). :rtype: str """ # make the search logger.info('Searching IMDB ID for %r%r', title, '' if not year else ' ({})'.format(year)) category = 'movie' if is_movie else 'tv' title = title.replace('\'', '') # get TMDB ID first r = self.session.get('http://api.tmdb.org/3/search/{}?api_key={}&query={}{}&language=en'.format( category, self._tmdb_api_key, title, '' if not year else '&year={}'.format(year))) r.raise_for_status() tmdb_results = r.json().get('results') if tmdb_results: tmdb_id = tmdb_results[0].get('id') if tmdb_id: # get actual IMDB ID from TMDB r = self.session.get('http://api.tmdb.org/3/{}/{}{}?api_key={}&language=en'.format( category, tmdb_id, '' if is_movie else '/external_ids', self._tmdb_api_key)) r.raise_for_status() return str(r.json().get('imdb_id', '')) or None return None def query(self, title, season=None, episode=None, year=None, filename=None, imdb_id=None): # search for the IMDB ID if needed. is_movie = not (season and episode) imdb_id = imdb_id or self._search_imdb_id(title, year, is_movie) if not imdb_id: return {} # search logger.debug('Using IMDB ID %r', imdb_id) url = 'http://json.{}/{}.json'.format(self.server_url, imdb_id) page_link = 'http://{}/#/{}/{}'.format(self.server_url, 'movies' if is_movie else 'series', imdb_id) # get the list of subtitles logger.debug('Getting the list of subtitles') r = self.session.get(url) r.raise_for_status() try: results = r.json() except ValueError: return {} # filter irrelevant results if not is_movie: results = results.get('subs', []) # there are two formats of result jsons - seasons list and seasons dict if isinstance(results, list): results = results[season] if len(results) >= season else {} else: results = results.get(str(season), {}) results = results.get(str(episode), []) else: results = results.get('subs', []) # loop over results subtitles = {} for result in results: language = Language('heb') hearing_impaired = False subtitle_id = result['id'] release = result['version'] # otherwise create it subtitle = WizdomSubtitle(language, hearing_impaired, page_link, title, season, episode, title, imdb_id, subtitle_id, release) logger.debug('Found subtitle %r', subtitle) subtitles[subtitle_id] = subtitle return subtitles.values() def list_subtitles(self, video, languages): season = episode = None year = video.year filename = video.name imdb_id = video.imdb_id if isinstance(video, Episode): titles = [video.series] + video.alternative_series season = video.season episode = video.episode imdb_id = video.series_imdb_id else: titles = [video.title] + video.alternative_titles for title in titles: subtitles = [s for s in self.query(title, season, episode, year, filename, imdb_id) if s.language in languages] if subtitles: return subtitles return [] def download_subtitle(self, subtitle): # download url = 'http://zip.{}/{}.zip'.format(self.server_url, subtitle.subtitle_id) r = self.session.get(url, headers={'Referer': subtitle.page_link}, timeout=10) r.raise_for_status() if len(r.content) == 0: return # open the zip with zipfile.ZipFile(io.BytesIO(r.content)) as zf: # remove some filenames from the namelist namelist = [n for n in zf.namelist() if os.path.splitext(n)[1] in ['.srt', '.sub']] if len(namelist) > 1: raise ProviderError('More than one file to unzip') subtitle.content = fix_line_ending(zf.read(namelist[0]))
class TitloviProvider(Provider, ProviderSubtitleArchiveMixin): subtitle_class = TitloviSubtitle languages = { Language.fromtitlovi(l) for l in language_converters['titlovi'].codes } | {Language.fromietf('sr-Latn')} server_url = 'https://titlovi.com' search_url = server_url + '/titlovi/?' download_url = server_url + '/download/?type=1&mediaid=' def initialize(self): self.session = Session() logger.debug("Using random user agents") self.session.headers['User-Agent'] = AGENT_LIST[randint( 0, len(AGENT_LIST) - 1)] logger.debug('User-Agent set to %s', self.session.headers['User-Agent']) self.session.headers['Referer'] = self.server_url logger.debug('Referer set to %s', self.session.headers['Referer']) load_verification("titlovi", self.session) def terminate(self): self.session.close() def query(self, languages, title, season=None, episode=None, year=None, video=None): items_per_page = 10 current_page = 1 used_languages = languages lang_strings = [str(lang) for lang in used_languages] # handle possible duplicate use of Serbian Latin if "sr" in lang_strings and "sr-Latn" in lang_strings: logger.info( 'Duplicate entries <Language [sr]> and <Language [sr-Latn]> found, filtering languages' ) used_languages = filter( lambda l: l != Language.fromietf('sr-Latn'), used_languages) logger.info('Filtered language list %r', used_languages) # convert list of languages into search string langs = '|'.join(map(str, [l.titlovi for l in used_languages])) # set query params params = {'prijevod': title, 'jezik': langs} is_episode = False if season and episode: is_episode = True params['s'] = season params['e'] = episode if year: params['g'] = year # loop through paginated results logger.info('Searching subtitles %r', params) subtitles = [] while True: # query the server try: r = self.session.get(self.search_url, params=params, timeout=10) r.raise_for_status() except RequestException as e: captcha_passed = False if e.response.status_code == 403 and "data-sitekey" in e.response.content: logger.info( 'titlovi: Solving captcha. This might take a couple of minutes, but should only ' 'happen once every so often') site_key = re.search(r'data-sitekey="(.+?)"', e.response.content).group(1) challenge_s = re.search( r'type="hidden" name="s" value="(.+?)"', e.response.content).group(1) challenge_ray = re.search(r'data-ray="(.+?)"', e.response.content).group(1) if not all([site_key, challenge_s, challenge_ray]): raise Exception("titlovi: Captcha site-key not found!") pitcher = pitchers.get_pitcher()( "titlovi", e.request.url, site_key, user_agent=self.session.headers["User-Agent"], cookies=self.session.cookies.get_dict(), is_invisible=True) result = pitcher.throw() if not result: raise Exception("titlovi: Couldn't solve captcha!") s_params = { "s": challenge_s, "id": challenge_ray, "g-recaptcha-response": result, } r = self.session.get(self.server_url + "/cdn-cgi/l/chk_captcha", params=s_params, timeout=10, allow_redirects=False) r.raise_for_status() r = self.session.get(self.search_url, params=params, timeout=10) r.raise_for_status() store_verification("titlovi", self.session) captcha_passed = True if not captcha_passed: logger.exception('RequestException %s', e) break else: try: soup = BeautifulSoup(r.content, 'lxml') # number of results result_count = int( soup.select_one('.results_count b').string) except: result_count = None # exit if no results if not result_count: if not subtitles: logger.debug('No subtitles found') else: logger.debug("No more subtitles found") break # number of pages with results pages = int(math.ceil(result_count / float(items_per_page))) # get current page if 'pg' in params: current_page = int(params['pg']) try: sublist = soup.select( 'section.titlovi > ul.titlovi > li.subtitleContainer.canEdit' ) for sub in sublist: # subtitle id sid = sub.find(attrs={ 'data-id': True }).attrs['data-id'] # get download link download_link = self.download_url + sid # title and alternate title match = title_re.search(sub.a.string) if match: _title = match.group('title') alt_title = match.group('altitle') else: continue # page link page_link = self.server_url + sub.a.attrs['href'] # subtitle language match = lang_re.search( sub.select_one('.lang').attrs['src']) if match: try: # decode language lang = Language.fromtitlovi( match.group('lang') + match.group('script')) except ValueError: continue # relase year or series start year match = year_re.search( sub.find(attrs={ 'data-id': True }).parent.i.string) if match: r_year = int(match.group('year')) # fps match = fps_re.search(sub.select_one('.fps').string) if match: fps = match.group('fps') # releases releases = str( sub.select_one('.fps').parent.contents[0].string) # handle movies and series separately if is_episode: # season and episode info sxe = sub.select_one('.s0xe0y').string r_season = None r_episode = None if sxe: match = season_re.search(sxe) if match: r_season = int(match.group('season')) match = episode_re.search(sxe) if match: r_episode = int(match.group('episode')) subtitle = self.subtitle_class( lang, page_link, download_link, sid, releases, _title, alt_title=alt_title, season=r_season, episode=r_episode, year=r_year, fps=fps, asked_for_release_group=video.release_group, asked_for_episode=episode) else: subtitle = self.subtitle_class( lang, page_link, download_link, sid, releases, _title, alt_title=alt_title, year=r_year, fps=fps, asked_for_release_group=video.release_group) logger.debug('Found subtitle %r', subtitle) # prime our matches so we can use the values later subtitle.get_matches(video) # add found subtitles subtitles.append(subtitle) finally: soup.decompose() # stop on last page if current_page >= pages: break # increment current page params['pg'] = current_page + 1 logger.debug('Getting page %d', params['pg']) return subtitles def list_subtitles(self, video, languages): season = episode = None if isinstance(video, Episode): title = video.series season = video.season episode = video.episode else: title = video.title return [ s for s in self.query(languages, fix_inconsistent_naming(title), season=season, episode=episode, year=video.year, video=video) ] def download_subtitle(self, subtitle): r = self.session.get(subtitle.download_link, timeout=10) r.raise_for_status() # open the archive archive_stream = io.BytesIO(r.content) if is_rarfile(archive_stream): logger.debug('Archive identified as rar') archive = RarFile(archive_stream) elif is_zipfile(archive_stream): logger.debug('Archive identified as zip') archive = ZipFile(archive_stream) else: subtitle.content = r.content if subtitle.is_valid(): return subtitle.content = None raise ProviderError('Unidentified archive type') subs_in_archive = archive.namelist() # if Serbian lat and cyr versions are packed together, try to find right version if len(subs_in_archive) > 1 and (subtitle.language == 'sr' or subtitle.language == 'sr-Cyrl'): self.get_subtitle_from_bundled_archive(subtitle, subs_in_archive, archive) else: # use default method for everything else subtitle.content = self.get_subtitle_from_archive( subtitle, archive) def get_subtitle_from_bundled_archive(self, subtitle, subs_in_archive, archive): sr_lat_subs = [] sr_cyr_subs = [] sub_to_extract = None for sub_name in subs_in_archive: if not ('.cyr' in sub_name or '.cir' in sub_name): sr_lat_subs.append(sub_name) if ('.cyr' in sub_name or '.cir' in sub_name) and not '.lat' in sub_name: sr_cyr_subs.append(sub_name) if subtitle.language == 'sr': if len(sr_lat_subs) > 0: sub_to_extract = sr_lat_subs[0] if subtitle.language == 'sr-Cyrl': if len(sr_cyr_subs) > 0: sub_to_extract = sr_cyr_subs[0] logger.info(u'Using %s from the archive', sub_to_extract) subtitle.content = fix_line_ending(archive.read(sub_to_extract))
class SubdivxSubtitlesProvider(Provider): provider_name = 'subdivx' hash_verifiable = False languages = {Language.fromalpha2(lang) for lang in ['es']} subtitle_class = SubdivxSubtitle server_url = 'https://www.subdivx.com/' multi_result_throttle = 2 language_list = list(languages) def __init__(self): self.session = None def initialize(self): self.session = Session() self.session.headers['User-Agent'] = 'Subliminal/{}'.format( __short_version__) def terminate(self): self.session.close() def query(self, video, languages): if isinstance(video, Episode): query = "{} S{:02d}E{:02d}".format(video.series, video.season, video.episode) else: # Subdvix has problems searching foreign movies if the year is # appended. For example: if we search "Memories of Murder 2003", # Subdix won't return any results; but if we search "Memories of # Murder", it will. That's because in Subdvix foreign titles have # the year after the original title ("Salinui chueok (2003) aka # Memories of Murder"). # A proper solution would be filtering results with the year in # _parse_subtitles_page. query = video.title params = { 'q': query, # search string 'accion': 5, # action search 'oxdown': 1, # order by downloads descending 'pg': 1 # page 1 } logger.debug('Searching subtitles %r', query) subtitles = [] language = self.language_list[0] search_link = self.server_url + 'index.php' while True: response = self.session.get(search_link, params=params, timeout=20) self._check_response(response) try: page_subtitles = self._parse_subtitles_page( video, response, language) except Exception as e: logger.error('Error parsing subtitles list: ' + str(e)) break subtitles += page_subtitles if len(page_subtitles) < 100: break # this is the last page params['pg'] += 1 # search next page time.sleep(self.multi_result_throttle) return subtitles def list_subtitles(self, video, languages): return self.query(video, languages) def download_subtitle(self, subtitle): if isinstance(subtitle, SubdivxSubtitle): # download the subtitle logger.info('Downloading subtitle %r', subtitle) # get download link download_link = self._get_download_link(subtitle) # download zip / rar file with the subtitle response = self.session.get( self.server_url + download_link, headers={'Referer': subtitle.page_link}, timeout=30) self._check_response(response) # open the compressed archive archive = self._get_archive(response.content) # extract the subtitle subtitle_content = self._get_subtitle_from_archive( archive, subtitle) subtitle.content = fix_line_ending(subtitle_content) def _parse_subtitles_page(self, video, response, language): subtitles = [] page_soup = ParserBeautifulSoup( response.content.decode('utf-8', 'ignore'), ['lxml', 'html.parser']) title_soups = page_soup.find_all("div", {'id': 'menu_detalle_buscador'}) body_soups = page_soup.find_all("div", {'id': 'buscador_detalle'}) for subtitle in range(0, len(title_soups)): title_soup, body_soup = title_soups[subtitle], body_soups[subtitle] # title title = title_soup.find("a").text.replace("Subtitulos de ", "") # filter by year if video.year and str(video.year) not in title: continue page_link = title_soup.find("a")["href"] # description description = body_soup.find("div", { 'id': 'buscador_detalle_sub' }).text description = description.replace(",", " ").lower() # uploader uploader = body_soup.find("a", {'class': 'link1'}).text subtitle = self.subtitle_class(language, video, page_link, title, description, uploader) logger.debug('Found subtitle %r', subtitle) subtitles.append(subtitle) return subtitles def _get_download_link(self, subtitle): response = self.session.get(subtitle.page_link, timeout=20) self._check_response(response) try: page_soup = ParserBeautifulSoup( response.content.decode('utf-8', 'ignore'), ['lxml', 'html.parser']) links_soup = page_soup.find_all("a", {'class': 'detalle_link'}) for link_soup in links_soup: if link_soup['href'].startswith('bajar'): return self.server_url + link_soup['href'] links_soup = page_soup.find_all("a", {'class': 'link1'}) for link_soup in links_soup: if "bajar.php" in link_soup['href']: return link_soup['href'] except Exception as e: raise APIThrottled('Error parsing download link: ' + str(e)) raise APIThrottled('Download link not found') @staticmethod def _check_response(response): if response.status_code != 200: raise ServiceUnavailable('Bad status code: ' + str(response.status_code)) @staticmethod def _get_archive(content): # open the archive archive_stream = io.BytesIO(content) if rarfile.is_rarfile(archive_stream): logger.debug('Identified rar archive') archive = rarfile.RarFile(archive_stream) elif zipfile.is_zipfile(archive_stream): logger.debug('Identified zip archive') archive = zipfile.ZipFile(archive_stream) else: raise APIThrottled('Unsupported compressed format') return archive @staticmethod def _get_subtitle_from_archive(archive, subtitle): _valid_names = [] for name in archive.namelist(): # discard hidden files # discard non-subtitle files if not os.path.split(name)[-1].startswith('.') and name.lower( ).endswith(SUBTITLE_EXTENSIONS): _valid_names.append(name) # archive with only 1 subtitle if len(_valid_names) == 1: logger.debug( "returning from archive: {} (single subtitle file)".format( _valid_names[0])) return archive.read(_valid_names[0]) # in archives with more than 1 subtitle (season pack) we try to guess the best subtitle file _scores = get_scores(subtitle.video) _max_score = 0 _max_name = "" for name in _valid_names: _guess = guessit(name) if 'season' not in _guess: _guess['season'] = -1 if 'episode' not in _guess: _guess['episode'] = -1 if isinstance(subtitle.video, Episode): logger.debug("guessing %s" % name) logger.debug("subtitle S{}E{} video S{}E{}".format( _guess['season'], _guess['episode'], subtitle.video.season, subtitle.video.episode)) if subtitle.video.episode != _guess[ 'episode'] or subtitle.video.season != _guess['season']: logger.debug('subtitle does not match video, skipping') continue matches = set() matches |= guess_matches(subtitle.video, _guess) _score = sum((_scores.get(match, 0) for match in matches)) logger.debug('srt matches: %s, score %d' % (matches, _score)) if _score > _max_score: _max_score = _score _max_name = name logger.debug("new max: {} {}".format(name, _score)) if _max_score > 0: logger.debug("returning from archive: {} scored {}".format( _max_name, _max_score)) return archive.read(_max_name) raise APIThrottled('Can not find the subtitle in the compressed file')
def list_subtitles(self, rating_key, item_type, part_id, language, skip_wrong_fps=True, metadata=None, scanned_parts=None, air_date_cutoff=None): if not metadata: metadata = get_plex_metadata(rating_key, part_id, item_type) if not metadata: return providers = config.get_providers( media_type="series" if item_type == "episode" else "movies") if not scanned_parts: scanned_parts = scan_videos([metadata], ignore_all=True, providers=providers) if not scanned_parts: Log.Error(u"%s: Couldn't list available subtitles for %s", self.name, rating_key) return video, plex_part = scanned_parts.items()[0] refine_video(video, refiner_settings=config.refiner_settings) if air_date_cutoff is not None and metadata["item"].year and \ metadata["item"].year + air_date_cutoff < datetime.date.today().year: Log.Debug( "Skipping searching for subtitles: %s, it aired over %s year(s) ago.", rating_key, air_date_cutoff) return config.init_subliminal_patches() provider_settings = config.provider_settings if not skip_wrong_fps: provider_settings["opensubtitles"]["skip_wrong_fps"] = False if item_type == "episode": min_score = 240 if video.is_special: min_score = 180 else: min_score = 60 languages = {Language.fromietf(language)} available_subs = list_all_subtitles( [video], languages, providers=providers, provider_configs=provider_settings, pool_class=config.provider_pool, throttle_callback=config.provider_throttle, language_hook=language_hook) use_hearing_impaired = Prefs['subtitles.search.hearingImpaired'] in ( "prefer", "force HI") # sort subtitles by score unsorted_subtitles = [] for s in available_subs[video]: Log.Debug(u"%s: Starting score computation for %s", self.name, s) try: matches = s.get_matches(video) except AttributeError: Log.Error(u"%s: Match computation failed for %s: %s", self.name, s, traceback.format_exc()) continue # skip wrong season/episodes if item_type == "episode": can_verify_series = True if not s.hash_verifiable and "hash" in matches: can_verify_series = False if can_verify_series and not {"series", "season", "episode" }.issubset(matches): Log.Debug( u"%s: Skipping %s, because it doesn't match our series/episode", self.name, s) continue unsorted_subtitles.append( (s, compute_score(matches, s, video, hearing_impaired=use_hearing_impaired), matches)) scored_subtitles = sorted(unsorted_subtitles, key=operator.itemgetter(1), reverse=True) subtitles = [] for subtitle, score, matches in scored_subtitles: # check score if score < min_score: Log.Info(u'%s: Score %d is below min_score (%d)', self.name, score, min_score) continue subtitle.score = score subtitle.matches = matches subtitle.part_id = part_id subtitle.item_type = item_type subtitles.append(subtitle) return subtitles
class YavkaNetProvider(Provider): """YavkaNet Provider.""" languages = {Language(l) for l in [ 'bul', 'eng', 'rus', 'spa', 'ita' ]} video_types = (Episode, Movie) def initialize(self): self.session = Session() self.session.headers['User-Agent'] = AGENT_LIST[randint(0, len(AGENT_LIST) - 1)] self.session.headers["Accept"] = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" self.session.headers["Accept-Language"] = "en-US,en;q=0.5" self.session.headers["Accept-Encoding"] = "gzip, deflate, br" self.session.headers["DNT"] = "1" self.session.headers["Connection"] = "keep-alive" self.session.headers["Upgrade-Insecure-Requests"] = "1" self.session.headers["Cache-Control"] = "max-age=0" def terminate(self): self.session.close() def query(self, language, video): subtitles = [] isEpisode = isinstance(video, Episode) params = { 's': '', 'y': '', 'u': '', 'l': 'BG', 'i': '' } if isEpisode: params['s'] = "%s s%02de%02d" % (sanitize(fix_tv_naming(video.series), {'\''}), video.season, video.episode) else: params['y'] = video.year params['s'] = sanitize(fix_movie_naming(video.title), {'\''}) if language == 'en' or language == 'eng': params['l'] = 'EN' elif language == 'ru' or language == 'rus': params['l'] = 'RU' elif language == 'es' or language == 'spa': params['l'] = 'ES' elif language == 'it' or language == 'ita': params['l'] = 'IT' logger.info('Searching subtitle %r', params) response = self.retry(self.session.get('https://yavka.net/subtitles.php', params=params, allow_redirects=False, timeout=10, headers={'Referer': 'https://yavka.net/'})) if not response: return subtitles response.raise_for_status() if response.status_code != 200: logger.debug('No subtitles found') return subtitles soup = BeautifulSoup(response.content, 'lxml') rows = soup.findAll('tr') # Search on first 25 rows only for row in rows[:25]: element = row.select_one('a.balon, a.selector') if element: link = element.get('href') notes = re.sub(r'(?s)<p.*><img [A-z0-9=\'/\. :;#]*>(.*)</p>', r"\1", element.get('content')) title = element.get_text() try: year = int(element.find_next_sibling('span').text.strip('()')) except: year = None try: fps = float(row.find('span', {'title': 'Кадри в секунда'}).text.strip()) except: fps = None element = row.find('a', {'class': 'click'}) uploader = element.get_text() if element else None logger.info('Found subtitle link %r', link) # slow down to prevent being throttled time.sleep(1) response = self.retry(self.session.get('https://yavka.net' + link)) if not response: continue soup = BeautifulSoup(response.content, 'lxml') subs_id = soup.find("input", {"name": "id"}) if subs_id: subs_id = subs_id['value'] else: continue sub = self.download_archive_and_add_subtitle_files('https://yavka.net' + link + '/', language, video, fps, subs_id) for s in sub: s.title = title s.notes = notes s.year = year s.uploader = uploader s.single_file = True if len(sub) == 1 else False subtitles = subtitles + sub return subtitles def list_subtitles(self, video, languages): return [s for lang in languages for s in self.query(lang, video)] def download_subtitle(self, subtitle): if subtitle.content: pass else: seeking_subtitle_file = subtitle.filename arch = self.download_archive_and_add_subtitle_files(subtitle.page_link, subtitle.language, subtitle.video, subtitle.fps, subtitle.subs_id) for s in arch: if s.filename == seeking_subtitle_file: subtitle.content = s.content @staticmethod def process_archive_subtitle_files(archive_stream, language, video, link, fps, subs_id): subtitles = [] media_type = 'episode' if isinstance(video, Episode) else 'movie' for file_name in archive_stream.namelist(): if file_name.lower().endswith(('.srt', '.sub')): logger.info('Found subtitle file %r', file_name) subtitle = YavkaNetSubtitle(language, file_name, media_type, video, link, fps, subs_id) subtitle.content = fix_line_ending(archive_stream.read(file_name)) subtitles.append(subtitle) return subtitles def download_archive_and_add_subtitle_files(self, link, language, video, fps, subs_id): logger.info('Downloading subtitle %r', link) cache_key = sha1(link.encode("utf-8")).digest() request = region.get(cache_key) if request is NO_VALUE: time.sleep(1) request = self.retry(self.session.post(link, data={ 'id': subs_id, 'lng': language.basename.upper() }, headers={ 'referer': link }, allow_redirects=False)) if not request: return [] request.raise_for_status() region.set(cache_key, request) else: logger.info('Cache file: %s', codecs.encode(cache_key, 'hex_codec').decode('utf-8')) try: archive_stream = io.BytesIO(request.content) if is_rarfile(archive_stream): return self.process_archive_subtitle_files(RarFile(archive_stream), language, video, link, fps, subs_id) elif is_zipfile(archive_stream): return self.process_archive_subtitle_files(ZipFile(archive_stream), language, video, link, fps, subs_id) except: pass logger.error('Ignore unsupported archive %r', request.headers) region.delete(cache_key) return [] @staticmethod def retry(func, limit=5, delay=5): for i in range(limit): response = func if response.content: return response else: logging.debug('Slowing down because we are getting throttled. Iteration {0} of {1}.Waiting {2} seconds ' 'to retry...'.format(i + 1, limit, delay)) time.sleep(delay)
def query(self, show_id, series, season, year=None, country=None): # get the season list of the show logger.info('Getting the season list of show id %d', show_id) r = self.session.get(self.server_url + self.series_url.format(show_id), timeout=10) r.raise_for_status() if not r.content: logger.debug('No data returned from provider') return [] soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser']) series = soup.find('name').text # loop over season rows seasons = soup.findAll('series_group') season_id = None for season_row in seasons: try: parsed_season = int(season_row['ssnnum']) if parsed_season == season: season_id = int(season_row['ssnid']) break except (ValueError, TypeError): continue if season_id is None: logger.debug('Season not found in provider') return [] # get the subtitle list of the season logger.info('Getting the subtitle list of season %d', season) r = self.session.get( self.server_url + self.season_url.format(show_id=show_id, season=season_id), timeout=10) r.raise_for_status() if not r.content: logger.debug('No data returned from provider') return [] soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser']) subtitles = [] # loop over episode rows for subtitle_group in soup.findAll('subg'): # read the episode info episode_info = subtitle_group.find('etitle') if episode_info is None: continue episodes = [] episode_match = episode_re.match(episode_info['number']) if episode_match: episodes = [ int(e) for e in [episode_match.group(1), episode_match.group(3)] if e ] subtitle_info = subtitle_group.find('sgt') if subtitle_info is None: continue season = int(subtitle_info['ssnnum']) episode_id = int(subtitle_info['epsid']) # filter out unreleased subtitles for subs_tag in subtitle_group.findAll('sr'): if subs_tag['published_on'] == '': continue page_link = self.server_url + self.page_link.format( show_id=show_id, season_id=season_id, season=season, episode=episode_id) title = episode_info['title'] version = subs_tag.fmt.text + ' ' + subs_tag.team.text download_link = self.server_url + self.download_link.format( int(subs_tag['rlsid'])) for episode in episodes: subtitle = self.subtitle_class(Language.fromalpha2('el'), page_link, series, season, episode, year, title, version, download_link) logger.debug('Found subtitle %r', subtitle) subtitles.append(subtitle) return subtitles
class XSubsProvider(Provider): """XSubs Provider.""" languages = {Language(l) for l in ['ell']} video_types = (Episode, ) server_url = 'http://xsubs.tv' sign_in_url = '/xforum/account/signin/' sign_out_url = '/xforum/account/signout/' all_series_url = '/series/all.xml' series_url = '/series/{:d}/main.xml' season_url = '/series/{show_id:d}/{season:d}.xml' page_link = '/ice/xsw.xml?srsid={show_id:d}#{season_id:d};{season:d};{episode:d}' download_link = '/xthru/getsub/{:d}' subtitle_class = XSubsSubtitle def __init__(self, username=None, password=None): if any((username, password)) and not all((username, password)): raise ConfigurationError('Username and password must be specified') self.username = username self.password = password self.logged_in = False self.session = None def initialize(self): self.session = Session() self.session.headers['User-Agent'] = 'Subliminal/{}'.format( __short_version__) # login if self.username and self.password: logger.info('Logging in') self.session.get(self.server_url + self.sign_in_url) data = { 'username': self.username, 'password': self.password, 'csrfmiddlewaretoken': self.session.cookies['csrftoken'] } r = self.session.post(self.server_url + self.sign_in_url, data, allow_redirects=False, timeout=10) if r.status_code != 302: raise AuthenticationError(self.username) logger.debug('Logged in') self.logged_in = True def terminate(self): # logout if self.logged_in: logger.info('Logging out') r = self.session.get(self.server_url + self.sign_out_url, timeout=10) r.raise_for_status() logger.debug('Logged out') self.logged_in = False self.session.close() @region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME, should_cache_fn=lambda value: value) def _get_show_ids(self): # get the shows page logger.info('Getting show ids') r = self.session.get(self.server_url + self.all_series_url, timeout=10) r.raise_for_status() if not r.content: logger.debug('No data returned from provider') return [] soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser']) # populate the show ids show_ids = {} for show_category in soup.findAll('seriesl'): if show_category.attrs['category'] == u'Σειρές': for show in show_category.findAll('series'): show_ids[sanitize(show.text)] = int(show['srsid']) break logger.debug('Found %d show ids', len(show_ids)) return show_ids def get_show_id(self, series_names, year=None): series_sanitized_names = [] for name in series_names: sanitized_name = sanitize(name) series_sanitized_names.append(sanitized_name) alternative_name = _get_alternative_name(sanitized_name) if alternative_name: series_sanitized_names.append(alternative_name) show_ids = self._get_show_ids() show_id = None for series_sanitized in series_sanitized_names: # attempt with year if year: logger.debug('Getting show id with year') show_id = show_ids.get('{series} {year:d}'.format( series=series_sanitized, year=year)) # attempt with article at the end if not show_id and year: logger.debug('Getting show id with year in brackets') show_id = show_ids.get('{series} [{year:d}]'.format( series=series_sanitized, year=year)) # attempt clean if not show_id: logger.debug('Getting show id') show_id = show_ids.get(series_sanitized) if show_id: break return int(show_id) if show_id else None def query(self, show_id, series, season, year=None, country=None): # get the season list of the show logger.info('Getting the season list of show id %d', show_id) r = self.session.get(self.server_url + self.series_url.format(show_id), timeout=10) r.raise_for_status() if not r.content: logger.debug('No data returned from provider') return [] soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser']) series = soup.find('name').text # loop over season rows seasons = soup.findAll('series_group') season_id = None for season_row in seasons: try: parsed_season = int(season_row['ssnnum']) if parsed_season == season: season_id = int(season_row['ssnid']) break except (ValueError, TypeError): continue if season_id is None: logger.debug('Season not found in provider') return [] # get the subtitle list of the season logger.info('Getting the subtitle list of season %d', season) r = self.session.get( self.server_url + self.season_url.format(show_id=show_id, season=season_id), timeout=10) r.raise_for_status() if not r.content: logger.debug('No data returned from provider') return [] soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser']) subtitles = [] # loop over episode rows for subtitle_group in soup.findAll('subg'): # read the episode info episode_info = subtitle_group.find('etitle') if episode_info is None: continue episodes = [] episode_match = episode_re.match(episode_info['number']) if episode_match: episodes = [ int(e) for e in [episode_match.group(1), episode_match.group(3)] if e ] subtitle_info = subtitle_group.find('sgt') if subtitle_info is None: continue season = int(subtitle_info['ssnnum']) episode_id = int(subtitle_info['epsid']) # filter out unreleased subtitles for subs_tag in subtitle_group.findAll('sr'): if subs_tag['published_on'] == '': continue page_link = self.server_url + self.page_link.format( show_id=show_id, season_id=season_id, season=season, episode=episode_id) title = episode_info['title'] version = subs_tag.fmt.text + ' ' + subs_tag.team.text download_link = self.server_url + self.download_link.format( int(subs_tag['rlsid'])) for episode in episodes: subtitle = self.subtitle_class(Language.fromalpha2('el'), page_link, series, season, episode, year, title, version, download_link) logger.debug('Found subtitle %r', subtitle) subtitles.append(subtitle) return subtitles def list_subtitles(self, video, languages): if isinstance(video, Episode): # lookup show_id titles = [video.series] + video.alternative_series show_id = self.get_show_id(titles, video.year) # query for subtitles with the show_id if show_id: subtitles = [ s for s in self.query(show_id, video.series, video.season, video.year) if s.language in languages and s.season == video.season and s.episode == video.episode ] if subtitles: return subtitles else: logger.error('No show id found for %r (%r)', video.series, {'year': video.year}) return [] def download_subtitle(self, subtitle): if isinstance(subtitle, XSubsSubtitle): # download the subtitle logger.info('Downloading subtitle %r', subtitle) r = self.session.get(subtitle.download_link, headers={'Referer': subtitle.page_link}, timeout=10) r.raise_for_status() if not r.content: logger.debug( 'Unable to download subtitle. No data returned from provider' ) return subtitle.content = fix_line_ending(r.content)
class ZimukuProvider(Provider): """Zimuku Provider.""" languages = {Language(l) for l in ["zho", "eng"]} server_url = "http://www.zimuku.la" search_url = "/search?q={}" download_url = "http://www.zimuku.la/" UserAgent = "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)" subtitle_class = ZimukuSubtitle def __init__(self): self.session = None def initialize(self): self.session = Session() self.session.headers["User-Agent"] = "Subliminal/{}".format(__short_version__) def terminate(self): self.session.close() def _parse_episode_page(self, link, year): r = self.session.get(link) bs_obj = ParserBeautifulSoup( r.content.decode("utf-8", "ignore"), ["html.parser"] ) subs_body = bs_obj.find("div", class_="subs box clearfix").find("tbody") subs = [] for sub in subs_body.find_all("tr"): a = sub.find("a") name = _extract_name(a.text) name = os.path.splitext(name)[ 0 ] # remove ext because it can be an archive type language = Language("eng") for img in sub.find("td", class_="tac lang").find_all("img"): if ( "hongkong" in img.attrs["src"] or "china" in img.attrs["src"] or "jollyroger" in img.attrs["src"] ): language = Language("zho") break sub_page_link = urljoin(self.server_url, a.attrs["href"]) backup_session = copy.deepcopy(self.session) backup_session.headers["Referer"] = link subs.append( self.subtitle_class(language, sub_page_link, name, backup_session, year) ) return subs def query(self, keyword, season=None, episode=None, year=None): params = keyword if season: params += ".S{season:02d}".format(season=season) elif year: params += " {:4d}".format(year) logger.debug("Searching subtitles %r", params) subtitles = [] search_link = self.server_url + text_type(self.search_url).format(params) r = self.session.get(search_link, timeout=30) r.raise_for_status() if not r.content: logger.debug("No data returned from provider") return [] html = r.content.decode("utf-8", "ignore") # parse window location pattern = r"url\s*=\s*'([^']*)'\s*\+\s*url" parts = re.findall(pattern, html) redirect_url = search_link while parts: parts.reverse() redirect_url = urljoin(self.server_url, "".join(parts)) r = self.session.get(redirect_url, timeout=30) html = r.content.decode("utf-8", "ignore") parts = re.findall(pattern, html) logger.debug("search url located: " + redirect_url) soup = ParserBeautifulSoup( r.content.decode("utf-8", "ignore"), ["lxml", "html.parser"] ) # non-shooter result page if soup.find("div", {"class": "item"}): logger.debug("enter a non-shooter page") for item in soup.find_all("div", {"class": "item"}): title_a = item.find("p", class_="tt clearfix").find("a") subs_year = year if season: # episode year in zimuku is the season's year not show's year actual_subs_year = re.findall(r"\d{4}", title_a.text) or None if actual_subs_year: subs_year = int(actual_subs_year[0]) - season + 1 title = title_a.text season_cn1 = re.search("第(.*)季", title) if not season_cn1: season_cn1 = "一" else: season_cn1 = season_cn1.group(1).strip() season_cn2 = num_to_cn(str(season)) if season_cn1 != season_cn2: continue episode_link = self.server_url + title_a.attrs["href"] new_subs = self._parse_episode_page(episode_link, subs_year) subtitles += new_subs # NOTE: shooter result pages are ignored due to the existence of assrt provider return subtitles def list_subtitles(self, video, languages): if isinstance(video, Episode): titles = [video.series] + video.alternative_series elif isinstance(video, Movie): titles = [video.title] + video.alternative_titles else: titles = [] subtitles = [] # query for subtitles with the show_id for title in titles: if isinstance(video, Episode): subtitles += [ s for s in self.query( title, season=video.season, episode=video.episode, year=video.year, ) if s.language in languages ] elif isinstance(video, Movie): subtitles += [ s for s in self.query(title, year=video.year) if s.language in languages ] return subtitles def download_subtitle(self, subtitle): def _get_archive_dowload_link(session, sub_page_link): r = session.get(sub_page_link) bs_obj = ParserBeautifulSoup( r.content.decode("utf-8", "ignore"), ["html.parser"] ) down_page_link = bs_obj.find("a", {"id": "down1"}).attrs["href"] down_page_link = urljoin(sub_page_link, down_page_link) r = session.get(down_page_link) bs_obj = ParserBeautifulSoup( r.content.decode("utf-8", "ignore"), ["html.parser"] ) download_link = bs_obj.find("a", {"rel": "nofollow"}) download_link = download_link.attrs["href"] download_link = urljoin(sub_page_link, download_link) return download_link # download the subtitle logger.info("Downloading subtitle %r", subtitle) self.session = subtitle.session download_link = _get_archive_dowload_link(self.session, subtitle.page_link) r = self.session.get(download_link, timeout=30) r.raise_for_status() filename = r.headers["Content-Disposition"] if not r.content: logger.debug("Unable to download subtitle. No data returned from provider") return archive_stream = io.BytesIO(r.content) archive = None if rarfile.is_rarfile(archive_stream): logger.debug("Identified rar archive") if ".rar" not in filename: logger.debug( ".rar should be in the downloaded file name: {}".format(filename) ) return archive = rarfile.RarFile(archive_stream) subtitle_content = _get_subtitle_from_archive(archive) elif zipfile.is_zipfile(archive_stream): logger.debug("Identified zip archive") if ".zip" not in filename: logger.debug( ".zip should be in the downloaded file name: {}".format(filename) ) return archive = zipfile.ZipFile(archive_stream) subtitle_content = _get_subtitle_from_archive(archive) else: is_sub = "" for sub_ext in SUBTITLE_EXTENSIONS: if sub_ext in filename: is_sub = sub_ext break if not is_sub: logger.debug( "unknown subtitle ext int downloaded file name: {}".format(filename) ) return logger.debug("Identified {} file".format(is_sub)) subtitle_content = r.content if subtitle_content: subtitle.content = fix_line_ending(subtitle_content) else: logger.debug("Could not extract subtitle from %r", archive)
def query(self, video, languages, hash=None, size=None, imdb_id=None, query=None, season=None, episode=None, tag=None, use_tag_search=False, only_foreign=False, also_foreign=False): # fill the search criteria criteria = [] if hash and size: criteria.append({'moviehash': hash, 'moviebytesize': str(size)}) if use_tag_search and tag: criteria.append({'tag': tag}) if imdb_id: if season and episode: criteria.append({ 'imdbid': imdb_id[2:], 'season': season, 'episode': episode }) else: criteria.append({'imdbid': imdb_id[2:]}) if query and season and episode: for q in query: criteria.append({ 'query': q.replace('\'', ''), 'season': season, 'episode': episode }) elif query: for q in query: criteria.append({'query': q.replace('\'', '')}) if not criteria: raise ValueError('Not enough information') # add the language for criterion in criteria: criterion['sublanguageid'] = ','.join( sorted(l.opensubtitles for l in languages)) # query the server logger.info('Searching subtitles %r', criteria) response = self.use_token_or_login(lambda: self.retry(lambda: checked( lambda: self.server.SearchSubtitles(self.token, criteria)))) subtitles = [] # exit if no data if not response['data']: logger.info('No subtitles found') return subtitles # loop over subtitle items for subtitle_item in response['data']: _subtitle_item = subtitle_item # in case OS messes their API results up again, check whether we've got a dict or a string as subtitle_item if hasattr(_subtitle_item, "startswith"): _subtitle_item = response["data"][subtitle_item] # read the item language = Language.fromopensubtitles( _subtitle_item['SubLanguageID']) hearing_impaired = bool(int(_subtitle_item['SubHearingImpaired'])) page_link = _subtitle_item['SubtitlesLink'] subtitle_id = int(_subtitle_item['IDSubtitleFile']) matched_by = _subtitle_item['MatchedBy'] movie_kind = _subtitle_item['MovieKind'] hash = _subtitle_item['MovieHash'] movie_name = _subtitle_item['MovieName'] movie_release_name = _subtitle_item['MovieReleaseName'] movie_year = int(_subtitle_item['MovieYear'] ) if _subtitle_item['MovieYear'] else None if season or episode: movie_imdb_id = 'tt' + _subtitle_item['SeriesIMDBParent'] else: movie_imdb_id = 'tt' + _subtitle_item['IDMovieImdb'] movie_fps = _subtitle_item.get('MovieFPS') series_season = int(_subtitle_item['SeriesSeason'] ) if _subtitle_item['SeriesSeason'] else None series_episode = int(_subtitle_item['SeriesEpisode'] ) if _subtitle_item['SeriesEpisode'] else None filename = _subtitle_item['SubFileName'] encoding = _subtitle_item.get('SubEncoding') or None foreign_parts_only = bool( int(_subtitle_item.get('SubForeignPartsOnly', 0))) # foreign/forced subtitles only wanted if only_foreign and not foreign_parts_only: continue # foreign/forced not wanted elif not only_foreign and not also_foreign and foreign_parts_only: continue # set subtitle language to forced if it's foreign_parts_only elif (also_foreign or only_foreign) and foreign_parts_only: language = Language.rebuild(language, forced=True) # set subtitle language to hi if it's hearing_impaired if hearing_impaired: language = Language.rebuild(language, hi=True) if language not in languages: continue if video.imdb_id and (movie_imdb_id != re.sub( "(?<![^a-zA-Z])0+", "", video.imdb_id)): continue query_parameters = _subtitle_item.get("QueryParameters") subtitle = self.subtitle_class(language, hearing_impaired, page_link, subtitle_id, matched_by, movie_kind, hash, movie_name, movie_release_name, movie_year, movie_imdb_id, series_season, series_episode, query_parameters, filename, encoding, movie_fps, skip_wrong_fps=self.skip_wrong_fps) subtitle.uploader = _subtitle_item[ 'UserNickName'] if _subtitle_item[ 'UserNickName'] else 'anonymous' logger.debug('Found subtitle %r by %s', subtitle, matched_by) subtitles.append(subtitle) return subtitles
class OpenSubtitlesProvider(ProviderRetryMixin, _OpenSubtitlesProvider): only_foreign = False also_foreign = False subtitle_class = OpenSubtitlesSubtitle hash_verifiable = True hearing_impaired_verifiable = True skip_wrong_fps = True is_vip = False use_ssl = True timeout = 15 default_url = "//api.opensubtitles.org/xml-rpc" vip_url = "//vip-api.opensubtitles.org/xml-rpc" languages = { Language.fromopensubtitles(l) for l in language_converters['szopensubtitles'].codes } languages.update(set(Language.rebuild(l, forced=True) for l in languages)) languages.update(set(Language.rebuild(l, hi=True) for l in languages)) def __init__(self, username=None, password=None, use_tag_search=False, only_foreign=False, also_foreign=False, skip_wrong_fps=True, is_vip=False, use_ssl=True, timeout=15): if any((username, password)) and not all((username, password)): raise ConfigurationError('Username and password must be specified') self.username = username or '' self.password = password or '' self.use_tag_search = use_tag_search self.only_foreign = only_foreign self.also_foreign = also_foreign self.skip_wrong_fps = skip_wrong_fps self.token = None self.is_vip = is_vip self.use_ssl = use_ssl self.timeout = timeout logger.debug("Using timeout: %d", timeout) if use_ssl: logger.debug("Using HTTPS connection") self.default_url = ("https:" if use_ssl else "http:") + self.default_url self.vip_url = ("https:" if use_ssl else "http:") + self.vip_url if use_tag_search: logger.info("Using tag/exact filename search") if only_foreign: logger.info("Only searching for foreign/forced subtitles") def get_server_proxy(self, url, timeout=None): return ServerProxy( url, SubZeroRequestsTransport(use_https=self.use_ssl, timeout=timeout or self.timeout, user_agent=os.environ.get( "SZ_USER_AGENT", "Sub-Zero/2"))) def log_in_url(self, server_url): self.token = None self.server = self.get_server_proxy(server_url) response = self.retry(lambda: checked(lambda: self.server.LogIn( self.username, self.password, 'eng', os.environ.get("SZ_USER_AGENT", "Sub-Zero/2")))) self.token = response['token'] logger.debug('Logged in with token %r', self.token[:10] + "X" * (len(self.token) - 10)) region.set("os_token", bytearray(self.token, encoding='utf-8')) region.set("os_server_url", bytearray(server_url, encoding='utf-8')) def log_in(self): logger.info('Logging in') try: self.log_in_url(self.vip_url if self.is_vip else self.default_url) except Unauthorized: if self.is_vip: logger.info("VIP server login failed, falling back") try: self.log_in_url(self.default_url) except Unauthorized: pass if not self.token: logger.error("Login failed, please check your credentials") raise Unauthorized def use_token_or_login(self, func): if not self.token: self.log_in() return func() try: return func() except Unauthorized: self.log_in() return func() def initialize(self): token_cache = region.get("os_token") url_cache = region.get("os_server_url") if token_cache is not NO_VALUE and url_cache is not NO_VALUE: self.token = token_cache.decode("utf-8") self.server = self.get_server_proxy(url_cache.decode("utf-8")) logger.debug("Using previous login token: %r", self.token[:10] + "X" * (len(self.token) - 10)) else: self.server = None self.token = None def terminate(self): self.server = None self.token = None def list_subtitles(self, video, languages): """ :param video: :param languages: :return: patch: query movies even if hash is known; add tag parameter """ season = episode = None if isinstance(video, Episode): query = [video.series] + video.alternative_series season = video.season episode = episode = min(video.episode) if isinstance( video.episode, list) else video.episode if video.is_special: season = None episode = None query = [ u"%s %s" % (series, video.title) for series in [video.series] + video.alternative_series ] logger.info("%s: Searching for special: %r", self.__class__, query) # elif ('opensubtitles' not in video.hashes or not video.size) and not video.imdb_id: # query = video.name.split(os.sep)[-1] else: query = [video.title] + video.alternative_titles return self.query(video, languages, hash=video.hashes.get('opensubtitles'), size=video.size, imdb_id=video.imdb_id, query=query, season=season, episode=episode, tag=video.original_name, use_tag_search=self.use_tag_search, only_foreign=self.only_foreign, also_foreign=self.also_foreign) def query(self, video, languages, hash=None, size=None, imdb_id=None, query=None, season=None, episode=None, tag=None, use_tag_search=False, only_foreign=False, also_foreign=False): # fill the search criteria criteria = [] if hash and size: criteria.append({'moviehash': hash, 'moviebytesize': str(size)}) if use_tag_search and tag: criteria.append({'tag': tag}) if imdb_id: if season and episode: criteria.append({ 'imdbid': imdb_id[2:], 'season': season, 'episode': episode }) else: criteria.append({'imdbid': imdb_id[2:]}) if query and season and episode: for q in query: criteria.append({ 'query': q.replace('\'', ''), 'season': season, 'episode': episode }) elif query: for q in query: criteria.append({'query': q.replace('\'', '')}) if not criteria: raise ValueError('Not enough information') # add the language for criterion in criteria: criterion['sublanguageid'] = ','.join( sorted(l.opensubtitles for l in languages)) # query the server logger.info('Searching subtitles %r', criteria) response = self.use_token_or_login(lambda: self.retry(lambda: checked( lambda: self.server.SearchSubtitles(self.token, criteria)))) subtitles = [] # exit if no data if not response['data']: logger.info('No subtitles found') return subtitles # loop over subtitle items for subtitle_item in response['data']: _subtitle_item = subtitle_item # in case OS messes their API results up again, check whether we've got a dict or a string as subtitle_item if hasattr(_subtitle_item, "startswith"): _subtitle_item = response["data"][subtitle_item] # read the item language = Language.fromopensubtitles( _subtitle_item['SubLanguageID']) hearing_impaired = bool(int(_subtitle_item['SubHearingImpaired'])) page_link = _subtitle_item['SubtitlesLink'] subtitle_id = int(_subtitle_item['IDSubtitleFile']) matched_by = _subtitle_item['MatchedBy'] movie_kind = _subtitle_item['MovieKind'] hash = _subtitle_item['MovieHash'] movie_name = _subtitle_item['MovieName'] movie_release_name = _subtitle_item['MovieReleaseName'] movie_year = int(_subtitle_item['MovieYear'] ) if _subtitle_item['MovieYear'] else None if season or episode: movie_imdb_id = 'tt' + _subtitle_item['SeriesIMDBParent'] else: movie_imdb_id = 'tt' + _subtitle_item['IDMovieImdb'] movie_fps = _subtitle_item.get('MovieFPS') series_season = int(_subtitle_item['SeriesSeason'] ) if _subtitle_item['SeriesSeason'] else None series_episode = int(_subtitle_item['SeriesEpisode'] ) if _subtitle_item['SeriesEpisode'] else None filename = _subtitle_item['SubFileName'] encoding = _subtitle_item.get('SubEncoding') or None foreign_parts_only = bool( int(_subtitle_item.get('SubForeignPartsOnly', 0))) # foreign/forced subtitles only wanted if only_foreign and not foreign_parts_only: continue # foreign/forced not wanted elif not only_foreign and not also_foreign and foreign_parts_only: continue # set subtitle language to forced if it's foreign_parts_only elif (also_foreign or only_foreign) and foreign_parts_only: language = Language.rebuild(language, forced=True) # set subtitle language to hi if it's hearing_impaired if hearing_impaired: language = Language.rebuild(language, hi=True) if language not in languages: continue if video.imdb_id and (movie_imdb_id != re.sub( "(?<![^a-zA-Z])0+", "", video.imdb_id)): continue query_parameters = _subtitle_item.get("QueryParameters") subtitle = self.subtitle_class(language, hearing_impaired, page_link, subtitle_id, matched_by, movie_kind, hash, movie_name, movie_release_name, movie_year, movie_imdb_id, series_season, series_episode, query_parameters, filename, encoding, movie_fps, skip_wrong_fps=self.skip_wrong_fps) subtitle.uploader = _subtitle_item[ 'UserNickName'] if _subtitle_item[ 'UserNickName'] else 'anonymous' logger.debug('Found subtitle %r by %s', subtitle, matched_by) subtitles.append(subtitle) return subtitles def download_subtitle(self, subtitle): logger.info('Downloading subtitle %r', subtitle) response = self.use_token_or_login( lambda: checked(lambda: self.server.DownloadSubtitles( self.token, [str(subtitle.subtitle_id)]))) subtitle.content = fix_line_ending( zlib.decompress(base64.b64decode(response['data'][0]['data']), 47))
def query(self, languages, hash=None, size=None, imdb_id=None, query=None, season=None, episode=None, tag=None, use_tag_search=False, only_foreign=False, also_foreign=False): # fill the search criteria criteria = [] if hash and size: criteria.append({'moviehash': hash, 'moviebytesize': str(size)}) if use_tag_search and tag: criteria.append({'tag': tag}) if imdb_id: if season and episode: criteria.append({'imdbid': imdb_id[2:], 'season': season, 'episode': episode}) else: criteria.append({'imdbid': imdb_id[2:]}) if query and season and episode: for q in query: criteria.append({'query': q.replace('\'', ''), 'season': season, 'episode': episode}) elif query: for q in query: criteria.append({'query': q.replace('\'', '')}) if not criteria: raise ValueError('Not enough information') # add the language for criterion in criteria: criterion['sublanguageid'] = ','.join(sorted(l.opensubtitles for l in languages)) # query the server logger.info('Searching subtitles %r', criteria) response = self.use_token_or_login( lambda: self.retry(lambda: checked(lambda: self.server.SearchSubtitles(self.token, criteria))) ) subtitles = [] # exit if no data if not response['data']: logger.info('No subtitles found') return subtitles # loop over subtitle items for subtitle_item in response['data']: _subtitle_item = subtitle_item # in case OS messes their API results up again, check whether we've got a dict or a string as subtitle_item if hasattr(_subtitle_item, "startswith"): _subtitle_item = response["data"][subtitle_item] # read the item language = Language.fromopensubtitles(_subtitle_item['SubLanguageID']) hearing_impaired = bool(int(_subtitle_item['SubHearingImpaired'])) page_link = _subtitle_item['SubtitlesLink'] subtitle_id = int(_subtitle_item['IDSubtitleFile']) matched_by = _subtitle_item['MatchedBy'] movie_kind = _subtitle_item['MovieKind'] hash = _subtitle_item['MovieHash'] movie_name = _subtitle_item['MovieName'] movie_release_name = _subtitle_item['MovieReleaseName'] movie_year = int(_subtitle_item['MovieYear']) if _subtitle_item['MovieYear'] else None movie_imdb_id = 'tt' + _subtitle_item['IDMovieImdb'] movie_fps = _subtitle_item.get('MovieFPS') series_season = int(_subtitle_item['SeriesSeason']) if _subtitle_item['SeriesSeason'] else None series_episode = int(_subtitle_item['SeriesEpisode']) if _subtitle_item['SeriesEpisode'] else None filename = _subtitle_item['SubFileName'] encoding = _subtitle_item.get('SubEncoding') or None foreign_parts_only = bool(int(_subtitle_item.get('SubForeignPartsOnly', 0))) # foreign/forced subtitles only wanted if only_foreign and not foreign_parts_only: continue # foreign/forced not wanted elif not only_foreign and not also_foreign and foreign_parts_only: continue # set subtitle language to forced if it's foreign_parts_only elif (also_foreign or only_foreign) and foreign_parts_only: language = Language.rebuild(language, forced=True) if language not in languages: continue query_parameters = _subtitle_item.get("QueryParameters") subtitle = self.subtitle_class(language, hearing_impaired, page_link, subtitle_id, matched_by, movie_kind, hash, movie_name, movie_release_name, movie_year, movie_imdb_id, series_season, series_episode, query_parameters, filename, encoding, movie_fps, skip_wrong_fps=self.skip_wrong_fps) logger.debug('Found subtitle %r by %s', subtitle, matched_by) subtitles.append(subtitle) return subtitles
def get_language(self, text): if text == '1.gif': return Language.fromhosszupuska('hu') if text == '2.gif': return Language.fromhosszupuska('en') return None
class SubsceneProvider(Provider, ProviderSubtitleArchiveMixin): """ This currently only searches for the filename on SubScene. It doesn't open every found subtitle page to avoid massive hammering, thus it can't determine whether a subtitle is only-foreign or not. """ subtitle_class = SubsceneSubtitle languages = supported_languages languages.update(set(Language.rebuild(l, forced=True) for l in languages)) languages.update(set(Language.rebuild(l, hi=True) for l in languages)) video_types = (Episode, Movie) session = None skip_wrong_fps = False hearing_impaired_verifiable = True only_foreign = False username = None password = None search_throttle = 8 # seconds def __init__(self, only_foreign=False, username=None, password=None): if not all((username, password)): raise ConfigurationError('Username and password must be specified') self.only_foreign = only_foreign self.username = username self.password = password def initialize(self): logger.info("Creating session") self.session = RetryingCFSession() prev_cookies = region.get("subscene_cookies2") if prev_cookies != NO_VALUE: logger.debug("Re-using old subscene cookies: %r", prev_cookies) self.session.cookies.update(prev_cookies) else: logger.debug("Logging in") self.login() def login(self): r = self.session.get("https://subscene.com/account/login") if "Server Error" in r.text: logger.error("Login unavailable; Maintenance?") raise ServiceUnavailable("Login unavailable; Maintenance?") match = re.search( r"<script id='modelJson' type='application/json'>\s*(.+)\s*</script>", r.text) if match: h = html data = json.loads(h.unescape(match.group(1))) login_url = parse.urljoin(data["siteUrl"], data["loginUrl"]) time.sleep(1.0) r = self.session.post( login_url, { "username": self.username, "password": self.password, data["antiForgery"]["name"]: data["antiForgery"]["value"] }) pep_content = re.search( r"<form method=\"post\" action=\"https://subscene\.com/\">" r".+name=\"id_token\".+?value=\"(?P<id_token>.+?)\".*?" r"access_token\".+?value=\"(?P<access_token>.+?)\".+?" r"token_type.+?value=\"(?P<token_type>.+?)\".+?" r"expires_in.+?value=\"(?P<expires_in>.+?)\".+?" r"scope.+?value=\"(?P<scope>.+?)\".+?" r"state.+?value=\"(?P<state>.+?)\".+?" r"session_state.+?value=\"(?P<session_state>.+?)\"", r.text, re.MULTILINE | re.DOTALL) if pep_content: r = self.session.post(SITE_DOMAIN, pep_content.groupdict()) try: r.raise_for_status() except Exception: raise ProviderError( "Something went wrong when trying to log in: %s", traceback.format_exc()) else: cj = self.session.cookies.copy() store_cks = ("scene", "idsrv", "idsrv.xsrf", "idsvr.clients", "idsvr.session", "idsvr.username") for cn in self.session.cookies.keys(): if cn not in store_cks: del cj[cn] logger.debug("Storing cookies: %r", cj) region.set("subscene_cookies2", cj) return raise ProviderError("Something went wrong when trying to log in #1") def terminate(self): logger.info("Closing session") self.session.close() def _create_filters(self, languages): self.filters = dict(HearingImpaired="2") acc_filters = self.filters.copy() if self.only_foreign: self.filters["ForeignOnly"] = "True" acc_filters["ForeignOnly"] = self.filters["ForeignOnly"].lower() logger.info("Only searching for foreign/forced subtitles") selected_ids = [] for l in languages: lid = language_ids.get(l.basename, language_ids.get(l.alpha3, None)) if lid: selected_ids.append(str(lid)) acc_filters["SelectedIds"] = selected_ids self.filters["LanguageFilter"] = ",".join(acc_filters["SelectedIds"]) last_filters = region.get("subscene_filters") if last_filters != acc_filters: region.set("subscene_filters", acc_filters) logger.debug("Setting account filters to %r", acc_filters) self.session.post("https://u.subscene.com/filter", acc_filters, allow_redirects=False) logger.debug("Filter created: '%s'" % self.filters) def _enable_filters(self): self.session.cookies.update(self.filters) logger.debug("Filters applied") def list_subtitles(self, video, languages): if not video.original_name: logger.info( "Skipping search because we don't know the original release name" ) return [] self._create_filters(languages) self._enable_filters() return [s for s in self.query(video) if s.language in languages] def download_subtitle(self, subtitle): if subtitle.pack_data: logger.info("Using previously downloaded pack data") if rarfile.is_rarfile(io.BytesIO(subtitle.pack_data)): logger.debug('Identified rar archive') archive = rarfile.RarFile(io.BytesIO(subtitle.pack_data)) elif zipfile.is_zipfile(io.BytesIO(subtitle.pack_data)): logger.debug('Identified zip archive') archive = zipfile.ZipFile(io.BytesIO(subtitle.pack_data)) else: logger.error('Unsupported compressed format') return subtitle.pack_data = None try: subtitle.content = self.get_subtitle_from_archive( subtitle, archive) return except ProviderError: pass # open the archive r = self.session.get(subtitle.get_download_link(self.session), timeout=10) r.raise_for_status() archive_stream = io.BytesIO(r.content) if rarfile.is_rarfile(archive_stream): logger.debug('Identified rar archive') archive = rarfile.RarFile(archive_stream) elif zipfile.is_zipfile(archive_stream): logger.debug('Identified zip archive') archive = zipfile.ZipFile(archive_stream) else: logger.error('Unsupported compressed format') return subtitle.content = self.get_subtitle_from_archive(subtitle, archive) # store archive as pack_data for later caching subtitle.pack_data = r.content def parse_results(self, video, film): subtitles = [] for s in film.subtitles: try: subtitle = SubsceneSubtitle.from_api(s) except NotImplementedError as e: logger.info(e) continue subtitle.asked_for_release_group = video.release_group if isinstance(video, Episode): subtitle.asked_for_episode = video.episode if self.only_foreign: subtitle.language = Language.rebuild(subtitle.language, forced=True) # set subtitle language to hi if it's hearing_impaired if subtitle.hearing_impaired: subtitle.language = Language.rebuild(subtitle.language, hi=True) subtitles.append(subtitle) logger.debug('Found subtitle %r', subtitle) return subtitles def do_search(self, *args, **kwargs): try: return search(*args, **kwargs) except requests.HTTPError: region.delete("subscene_cookies2") def query(self, video): subtitles = [] if isinstance(video, Episode): titles = list(set([video.series] + video.alternative_series[:1])) more_than_one = len(titles) > 1 for series in titles: term = u"%s - %s Season" % ( series, p.number_to_words( "%sth" % video.season).capitalize()) logger.debug('Searching with series and season: %s', term) film = self.do_search(term, session=self.session, release=False, throttle=self.search_throttle, limit_to=SearchTypes.TvSerie) if not film and video.season == 1: logger.debug('Searching with series name: %s', series) film = self.do_search(series, session=self.session, release=False, throttle=self.search_throttle, limit_to=SearchTypes.TvSerie) if film and film.subtitles: logger.debug('Searching found: %s', len(film.subtitles)) subtitles += self.parse_results(video, film) else: logger.debug('No results found') if more_than_one: time.sleep(self.search_throttle) else: titles = list(set([video.title] + video.alternative_titles[:1])) more_than_one = len(titles) > 1 for title in titles: logger.debug('Searching for movie results: %r', title) film = self.do_search(title, year=video.year, session=self.session, limit_to=None, release=False, throttle=self.search_throttle) if film and film.subtitles: subtitles += self.parse_results(video, film) if more_than_one: time.sleep(self.search_throttle) logger.info("%s subtitles found" % len(subtitles)) return subtitles
def from_api(cls, s): return cls(Language.fromsubscene(s.language.strip()), s.title, hearing_impaired=s.hearing_impaired, page_link=s.url)
class OpenSubtitlesComProvider(ProviderRetryMixin, Provider): """OpenSubtitlesCom Provider""" server_url = 'https://api.opensubtitles.com/api/v1/' languages = { Language.fromopensubtitles(lang) for lang in language_converters['szopensubtitles'].codes } languages.update( set(Language.rebuild(lang, forced=True) for lang in languages)) video_types = (Episode, Movie) def __init__(self, username=None, password=None, use_hash=True, api_key=None): if not all((username, password)): raise ConfigurationError('Username and password must be specified') if not api_key: raise ConfigurationError('Api_key must be specified') if not all((username, password)): raise ConfigurationError('Username and password must be specified') self.session = Session() self.session.headers = { 'User-Agent': os.environ.get("SZ_USER_AGENT", "Sub-Zero/2"), 'Api-Key': api_key, 'Content-Type': 'application/json' } self.token = None self.username = username self.password = password self.video = None self.use_hash = use_hash def initialize(self): self.token = region.get("oscom_token", expiration_time=TOKEN_EXPIRATION_TIME) if self.token is NO_VALUE: self.login() def terminate(self): self.session.close() def login(self): try: r = self.session.post(self.server_url + 'login', json={ "username": self.username, "password": self.password }, allow_redirects=False, timeout=30) except (ConnectionError, Timeout, ReadTimeout): raise ServiceUnavailable('Unknown Error, empty response: %s: %r' % (r.status_code, r)) else: if r.status_code == 200: try: self.token = r.json()['token'] except ValueError: raise ProviderError('Invalid JSON returned by provider') else: region.set("oscom_token", self.token) return elif r.status_code == 401: raise AuthenticationError('Login failed: {}'.format(r.reason)) elif r.status_code == 429: raise TooManyRequests() elif r.status_code == 503: raise ProviderError(r.reason) else: raise ProviderError('Bad status code: {}'.format( r.status_code)) @staticmethod def sanitize_external_ids(external_id): if isinstance(external_id, str): external_id = external_id.lower().lstrip('tt').lstrip('0') sanitized_id = external_id[:-1].lstrip('0') + external_id[-1] return int(sanitized_id) @region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME) def search_titles(self, title): title_id = None parameters = {'query': title.lower()} logging.debug('Searching using this title: {}'.format(title)) results = self.session.get(self.server_url + 'features', params=parameters, timeout=30) if results.status_code == 401: logging.debug( 'Authentification failed: clearing cache and attempting to login.' ) region.delete("oscom_token") self.login() results = self.session.get(self.server_url + 'features', params=parameters, timeout=30) if results.status_code == 429: raise TooManyRequests() elif results.status_code == 503: raise ProviderError(results.reason) elif results.status_code == 429: raise TooManyRequests() elif results.status_code == 503: raise ProviderError(results.reason) # deserialize results try: results_dict = results.json()['data'] except ValueError: raise ProviderError('Invalid JSON returned by provider') else: # loop over results for result in results_dict: if 'title' in result['attributes']: if isinstance(self.video, Episode): if fix_tv_naming(title).lower() == result['attributes']['title'].lower() and \ (not self.video.year or self.video.year == int(result['attributes']['year'])): title_id = result['id'] break else: if fix_movie_naming(title).lower() == result['attributes']['title'].lower() and \ (not self.video.year or self.video.year == int(result['attributes']['year'])): title_id = result['id'] break else: continue if title_id: logging.debug('Found this title ID: {}'.format(title_id)) return self.sanitize_external_ids(title_id) finally: if not title_id: logger.debug('No match found for {}'.format(title)) def query(self, languages, video): self.video = video if self.use_hash: file_hash = self.video.hashes.get('opensubtitlescom') logging.debug('Searching using this hash: {}'.format(hash)) else: file_hash = None if isinstance(self.video, Episode): title = self.video.series else: title = self.video.title imdb_id = None if isinstance(self.video, Episode) and self.video.series_imdb_id: imdb_id = self.sanitize_external_ids(self.video.series_imdb_id) elif isinstance(self.video, Movie) and self.video.imdb_id: imdb_id = self.sanitize_external_ids(self.video.imdb_id) title_id = None if not imdb_id: title_id = self.search_titles(title) if not title_id: return [] lang_strings = [str(lang.basename) for lang in languages] only_foreign = all([lang.forced for lang in languages]) also_foreign = any([lang.forced for lang in languages]) if only_foreign: forced = 'only' elif also_foreign: forced = 'include' else: forced = 'exclude' langs = ','.join(lang_strings) logging.debug('Searching for this languages: {}'.format(lang_strings)) # query the server if isinstance(self.video, Episode): res = self.session.get( self.server_url + 'subtitles', params=(('episode_number', self.video.episode), ('foreign_parts_only', forced), ('languages', langs.lower()), ('moviehash', file_hash), ('parent_feature_id', title_id) if title_id else ('imdb_id', imdb_id), ('season_number', self.video.season), ('query', os.path.basename(self.video.name))), timeout=30) else: res = self.session.get( self.server_url + 'subtitles', params=(('foreign_parts_only', forced), ('id', title_id) if title_id else ('imdb_id', imdb_id), ('languages', langs.lower()), ('moviehash', file_hash), ('query', os.path.basename(self.video.name))), timeout=30) if res.status_code == 429: raise TooManyRequests() elif res.status_code == 503: raise ProviderError(res.reason) subtitles = [] try: result = res.json() if 'data' not in result: raise ValueError except ValueError: raise ProviderError('Invalid JSON returned by provider') else: logging.debug('Query returned {} subtitles'.format( len(result['data']))) if len(result['data']): for item in result['data']: if 'season_number' in item['attributes'][ 'feature_details']: season_number = item['attributes']['feature_details'][ 'season_number'] else: season_number = None if 'episode_number' in item['attributes'][ 'feature_details']: episode_number = item['attributes']['feature_details'][ 'episode_number'] else: episode_number = None if 'moviehash_match' in item['attributes']: moviehash_match = item['attributes']['moviehash_match'] else: moviehash_match = False if len(item['attributes']['files']): subtitle = OpenSubtitlesComSubtitle( language=Language.fromietf( item['attributes']['language']), forced=item['attributes']['foreign_parts_only'], hearing_impaired=item['attributes'] ['hearing_impaired'], page_link=item['attributes']['url'], file_id=item['attributes']['files'][0]['file_id'], releases=item['attributes']['release'], uploader=item['attributes']['uploader']['name'], title=item['attributes']['feature_details'] ['movie_name'], year=item['attributes']['feature_details']['year'], season=season_number, episode=episode_number, hash_matched=moviehash_match) subtitle.get_matches(self.video) subtitles.append(subtitle) return subtitles def list_subtitles(self, video, languages): return self.query(languages, video) def download_subtitle(self, subtitle): if self.token is NO_VALUE: logger.debug("No cached token, we'll try to login again.") self.login() if self.token is NO_VALUE: logger.debug( "Unable to obtain an authentication token right now, we'll try again later." ) raise ProviderError("Unable to obtain an authentication token") logger.info('Downloading subtitle %r', subtitle) headers = { 'Accept': 'application/json', 'Content-Type': 'application/json', 'Authorization': 'Beaker ' + self.token } res = self.session.post(self.server_url + 'download', json={ 'file_id': subtitle.file_id, 'sub_format': 'srt' }, headers=headers, timeout=30) if res.status_code == 429: raise TooManyRequests() elif res.status_code == 406: raise DownloadLimitExceeded("Daily download limit reached") elif res.status_code == 503: raise ProviderError(res.reason) else: try: subtitle.download_link = res.json()['link'] except ValueError: raise ProviderError('Invalid JSON returned by provider') else: r = self.session.get(subtitle.download_link, timeout=30) if res.status_code == 429: raise TooManyRequests() elif res.status_code == 406: raise DownloadLimitExceeded("Daily download limit reached") elif res.status_code == 503: raise ProviderError(res.reason) subtitle_content = r.content if subtitle_content: subtitle.content = fix_line_ending(subtitle_content) else: logger.debug('Could not download subtitle from {}'.format( subtitle.download_link))
def test_get_matches_movie_name(movies): subtitle = AssrtSubtitle( Language('zho'), 618185, 'Man.Of.Steel.2013.BluRay.720p.x264.AC3.2Audios-CMCT', None, None) matches = subtitle.get_matches(movies['man_of_steel']) assert matches == {'title', 'format', 'year', 'video_codec', 'resolution'}
def test_query_movie_zh(movies): languages = [Language('zho')] video = movies['man_of_steel'] with AssrtProvider(TOKEN) as provider: subtitles = provider.query(languages, video) assert len(subtitles) == 16
def query(self, languages, video): self.video = video if self.use_hash: file_hash = self.video.hashes.get('opensubtitlescom') logging.debug('Searching using this hash: {}'.format(hash)) else: file_hash = None if isinstance(self.video, Episode): title = self.video.series else: title = self.video.title imdb_id = None if isinstance(self.video, Episode) and self.video.series_imdb_id: imdb_id = self.sanitize_external_ids(self.video.series_imdb_id) elif isinstance(self.video, Movie) and self.video.imdb_id: imdb_id = self.sanitize_external_ids(self.video.imdb_id) title_id = None if not imdb_id: title_id = self.search_titles(title) if not title_id: return [] lang_strings = [str(lang.basename) for lang in languages] only_foreign = all([lang.forced for lang in languages]) also_foreign = any([lang.forced for lang in languages]) if only_foreign: forced = 'only' elif also_foreign: forced = 'include' else: forced = 'exclude' langs = ','.join(lang_strings) logging.debug('Searching for this languages: {}'.format(lang_strings)) # query the server if isinstance(self.video, Episode): res = self.session.get( self.server_url + 'subtitles', params=(('episode_number', self.video.episode), ('foreign_parts_only', forced), ('languages', langs.lower()), ('moviehash', file_hash), ('parent_feature_id', title_id) if title_id else ('imdb_id', imdb_id), ('season_number', self.video.season), ('query', os.path.basename(self.video.name))), timeout=30) else: res = self.session.get( self.server_url + 'subtitles', params=(('foreign_parts_only', forced), ('id', title_id) if title_id else ('imdb_id', imdb_id), ('languages', langs.lower()), ('moviehash', file_hash), ('query', os.path.basename(self.video.name))), timeout=30) if res.status_code == 429: raise TooManyRequests() elif res.status_code == 503: raise ProviderError(res.reason) subtitles = [] try: result = res.json() if 'data' not in result: raise ValueError except ValueError: raise ProviderError('Invalid JSON returned by provider') else: logging.debug('Query returned {} subtitles'.format( len(result['data']))) if len(result['data']): for item in result['data']: if 'season_number' in item['attributes'][ 'feature_details']: season_number = item['attributes']['feature_details'][ 'season_number'] else: season_number = None if 'episode_number' in item['attributes'][ 'feature_details']: episode_number = item['attributes']['feature_details'][ 'episode_number'] else: episode_number = None if 'moviehash_match' in item['attributes']: moviehash_match = item['attributes']['moviehash_match'] else: moviehash_match = False if len(item['attributes']['files']): subtitle = OpenSubtitlesComSubtitle( language=Language.fromietf( item['attributes']['language']), forced=item['attributes']['foreign_parts_only'], hearing_impaired=item['attributes'] ['hearing_impaired'], page_link=item['attributes']['url'], file_id=item['attributes']['files'][0]['file_id'], releases=item['attributes']['release'], uploader=item['attributes']['uploader']['name'], title=item['attributes']['feature_details'] ['movie_name'], year=item['attributes']['feature_details']['year'], season=season_number, episode=episode_number, hash_matched=moviehash_match) subtitle.get_matches(self.video) subtitles.append(subtitle) return subtitles
def query(self, language, keyword, video, season=None, episode=None, year=None, only_foreign=False, also_foreign=False): search_language = str(language).lower() # sr-Cyrl specialcase if search_language == "sr-cyrl": search_language = "sr" # set parameters, see http://www.podnapisi.net/forum/viewtopic.php?f=62&t=26164#p212652 params = {'sXML': 1, 'sL': search_language, 'sK': keyword} is_episode = False if season and episode: is_episode = True params['sTS'] = season params['sTE'] = episode if year: params['sY'] = year # loop over paginated results logger.info('Searching subtitles %r', params) subtitles = [] pids = set() while True: # query the server content = None try: content = self.session.get(self.server_url + 'search/old', params=params, timeout=10).content xml = etree.fromstring(content) except etree.ParseError: logger.error("Wrong data returned: %r", content) break # exit if no results if not int(xml.find('pagination/results').text): logger.debug('No subtitles found') break # loop over subtitles for subtitle_xml in xml.findall('subtitle'): # read xml elements pid = subtitle_xml.find('pid').text # ignore duplicates, see http://www.podnapisi.net/forum/viewtopic.php?f=62&t=26164&start=10#p213321 if pid in pids: continue _language = Language.fromietf(subtitle_xml.find('language').text) hearing_impaired = 'n' in (subtitle_xml.find('flags').text or '') foreign = 'f' in (subtitle_xml.find('flags').text or '') if only_foreign and not foreign: continue elif not only_foreign and not also_foreign and foreign: continue elif also_foreign and foreign: _language = Language.rebuild(_language, forced=True) if language != _language: continue page_link = subtitle_xml.find('url').text releases = [] if subtitle_xml.find('release').text: for release in subtitle_xml.find('release').text.split(): releases.append(re.sub(r'\.+$', '', release)) # remove trailing dots title = subtitle_xml.find('title').text r_season = int(subtitle_xml.find('tvSeason').text) r_episode = int(subtitle_xml.find('tvEpisode').text) r_year = int(subtitle_xml.find('year').text) if is_episode: subtitle = self.subtitle_class(_language, hearing_impaired, page_link, pid, releases, title, season=r_season, episode=r_episode, year=r_year, asked_for_release_group=video.release_group, asked_for_episode=episode) else: subtitle = self.subtitle_class(_language, hearing_impaired, page_link, pid, releases, title, year=r_year, asked_for_release_group=video.release_group) logger.debug('Found subtitle %r', subtitle) subtitles.append(subtitle) pids.add(pid) # stop on last page if int(xml.find('pagination/current').text) >= int(xml.find('pagination/count').text): break # increment current page params['page'] = int(xml.find('pagination/current').text) + 1 logger.debug('Getting page %d', params['page']) xml = None return subtitles
class SubtitriIdProvider(Provider, ProviderSubtitleArchiveMixin): """subtitri.id.lv Provider.""" subtitle_class = SubtitriIdSubtitle languages = {Language('lva', 'LV') } | {Language.fromalpha2(l) for l in ['lv']} video_types = (Movie, ) server_url = 'http://subtitri.id.lv' search_url = server_url + '/search/' def __init__(self): self.session = None def initialize(self): self.session = Session() self.session.headers['User-Agent'] = AGENT_LIST[randint( 0, len(AGENT_LIST) - 1)] self.session.headers['Referer'] = self.server_url def terminate(self): self.session.close() def query(self, title): subtitles = [] r = self.session.get(self.search_url, params={'q': title}, timeout=10) r.raise_for_status() if not r.content: logger.debug('No data returned from provider') return [] soup = ParserBeautifulSoup(r.content.decode('utf-8', 'ignore'), ['lxml', 'html.parser']) # loop over subtitle cells rows = soup.select('.eBlock') for row in rows: result_anchor_el = row.select_one('.eTitle > a') # page link page_link = result_anchor_el.get('href') # fetch/parse additional info r = self.session.get(page_link, timeout=10) soup = ParserBeautifulSoup(r.content.decode('utf-8', 'ignore'), ['lxml', 'html.parser']) # title movie_titles_string = soup.select_one('.main-header').text.strip() movie_titles_list = movie_titles_string.split(' / ') title = movie_titles_list[-1] # year year = soup.select_one('#film-page-year').text.strip() # imdb id imdb_link = soup.select_one('#actors-page > a').get('href') imdb_id = imdb_link.split('/')[-2] # download link href = soup.select_one('.hvr').get('href') download_link = self.server_url + href # create/add the subitle subtitle = self.subtitle_class(Language.fromalpha2('lv'), page_link, download_link, title, year, imdb_id) logger.debug('subtitri.id.lv: Found subtitle %r', subtitle) subtitles.append(subtitle) return subtitles def list_subtitles(self, video, languages): if isinstance(video, Movie): titles = [video.title] + video.alternative_titles else: titles = [] subtitles = [] # query for subtitles for title in titles: if isinstance(video, Movie): subtitles += [ s for s in self.query(title) if s.language in languages ] return subtitles def download_subtitle(self, subtitle): if isinstance(subtitle, SubtitriIdSubtitle): # download the subtitle r = self.session.get(subtitle.download_link, timeout=10) r.raise_for_status() # open the archive archive_stream = io.BytesIO(r.content) if is_rarfile(archive_stream): archive = RarFile(archive_stream) elif is_zipfile(archive_stream): archive = ZipFile(archive_stream) else: subtitle.content = r.content if subtitle.is_valid(): return subtitle.content = None raise ProviderError('Unidentified archive type') subtitle.content = self.get_subtitle_from_archive( subtitle, archive)
def ItemDetailsMenu(rating_key, title=None, base_title=None, item_title=None, randomize=None, header=None, message=None): """ displays the item details menu of an item that doesn't contain any deeper tree, such as a movie or an episode :param rating_key: :param title: :param base_title: :param item_title: :param randomize: :return: """ from interface.main import InclExclMenu title = unicode(base_title) + " > " + unicode(title) if base_title else unicode(title) item = plex_item = get_item(rating_key) current_kind = get_item_kind_from_rating_key(rating_key) timeout = 30 oc = SubFolderObjectContainer( title2=title, replace_parent=True, header=header, message=message) if not item: oc.add(DirectoryObject( key=Callback( ItemDetailsMenu, rating_key=rating_key, title=title, base_title=base_title, item_title=item_title, randomize=timestamp()), title=_(u"Item not found: %s!", item_title), summary=_("Plex didn't return any information about the item, please refresh it and come back later"), thumb=default_thumb )) return oc # add back to season for episode if current_kind == "episode": from interface.menu import MetadataMenu show = get_item(item.show.rating_key) season = get_item(item.season.rating_key) oc.add(DirectoryObject( key=Callback( MetadataMenu, rating_key=season.rating_key, title=season.title, base_title=show.title, previous_item_type="show", previous_rating_key=show.rating_key, display_items=True, randomize=timestamp()), title=_(u"< Back to %s", season.title), summary=_("Back to %s > %s", show.title, season.title), thumb=season.thumb or default_thumb )) oc.add(DirectoryObject( key=Callback( RefreshItem, rating_key=rating_key, item_title=item_title, randomize=timestamp(), timeout=timeout * 1000), title=_(u"Refresh: %s", item_title), summary=_("Refreshes %(the_movie_series_season_episode)s, possibly searching for missing and picking up " "new subtitles on disk", the_movie_series_season_episode=_(u"the %s" % current_kind)), thumb=item.thumb or default_thumb )) oc.add(DirectoryObject( key=Callback(RefreshItem, rating_key=rating_key, item_title=item_title, force=True, randomize=timestamp(), timeout=timeout * 1000), title=_(u"Force-find subtitles: %(item_title)s", item_title=item_title), summary=_("Issues a forced refresh, ignoring known subtitles and searching for new ones"), thumb=item.thumb or default_thumb )) # get stored subtitle info for item id subtitle_storage = get_subtitle_storage() stored_subs = subtitle_storage.load_or_new(item) # look for subtitles for all available media parts and all of their languages has_multiple_parts = len(plex_item.media) > 1 part_index = 0 for media in plex_item.media: for part in media.parts: filename = os.path.basename(part.file) if not os.path.exists(part.file): continue part_id = str(part.id) part_index += 1 part_index_addon = u"" part_summary_addon = u"" if has_multiple_parts: part_index_addon = _(u"File %(file_part_index)s: ", file_part_index=part_index) part_summary_addon = u"%s " % filename # iterate through all configured languages for lang in config.lang_list: # get corresponding stored subtitle data for that media part (physical media item), for language current_sub = stored_subs.get_any(part_id, lang) current_sub_id = None current_sub_provider_name = None summary = _(u"%(part_summary)sNo current subtitle in storage", part_summary=part_summary_addon) current_score = None if current_sub: current_sub_id = current_sub.id current_sub_provider_name = current_sub.provider_name current_score = current_sub.score summary = _(u"%(part_summary)sCurrent subtitle: %(provider_name)s (added: %(date_added)s, " u"%(mode)s), Language: %(language)s, Score: %(score)i, Storage: %(storage_type)s", part_summary=part_summary_addon, provider_name=_(current_sub.provider_name), date_added=df(current_sub.date_added), mode=_(current_sub.mode_verbose), language=display_language(lang), score=current_sub.score, storage_type=current_sub.storage_type) oc.add(DirectoryObject( key=Callback(SubtitleOptionsMenu, rating_key=rating_key, part_id=part_id, title=title, item_title=item_title, language=lang, language_name=display_language(lang), current_id=current_sub_id, item_type=plex_item.type, filename=filename, current_data=summary, randomize=timestamp(), current_provider=current_sub_provider_name, current_score=current_score), title=_(u"%(part_summary)sManage %(language)s subtitle", part_summary=part_index_addon, language=display_language(lang)), summary=summary )) else: oc.add(DirectoryObject( key=Callback(ListAvailableSubsForItemMenu, rating_key=rating_key, part_id=part_id, title=title, item_title=item_title, language=lang, language_name=display_language(lang), current_id=current_sub_id, item_type=plex_item.type, filename=filename, current_data=summary, randomize=timestamp(), current_provider=current_sub_provider_name, current_score=current_score), title=_(u"%(part_summary)sList %(language)s subtitles", part_summary=part_index_addon, language=display_language(lang)), summary=summary )) if config.plex_transcoder: # embedded subtitles embedded_count = 0 embedded_langs = [] for stream in part.streams: # subtitle stream if stream.stream_type == 3 and not stream.stream_key and stream.codec in TEXT_SUBTITLE_EXTS: lang = get_language_from_stream(stream.language_code) is_forced = is_stream_forced(stream) if not lang and config.treat_und_as_first: lang = list(config.lang_list)[0] if lang: lang = Language.rebuild(lang, forced=is_forced) embedded_langs.append(lang) embedded_count += 1 if embedded_count: oc.add(DirectoryObject( key=Callback(ListEmbeddedSubsForItemMenu, rating_key=rating_key, part_id=part_id, title=title, item_type=plex_item.type, item_title=item_title, base_title=base_title, randomize=timestamp()), title=_(u"%(part_summary)sEmbedded subtitles (%(languages)s)", part_summary=part_index_addon, languages=", ".join(display_language(l) for l in list(OrderedDict.fromkeys(embedded_langs)))), summary=_(u"Extract embedded subtitle streams") )) ignore_title = item_title if current_kind == "episode": ignore_title = get_item_title(item) add_incl_excl_options(oc, "videos", title=ignore_title, rating_key=rating_key, callback_menu=InclExclMenu) subtitle_storage.destroy() return oc
def test_query_list_subtitles(movies): languages = [Language('zho', None, 'Hant'), Language('zho', None, 'Hans')] video = movies['man_of_steel'] with AssrtProvider(TOKEN) as provider: subtitles = provider.list_subtitles(video, languages) assert len(subtitles) == 16
class SubdivxSubtitlesProvider(Provider): provider_name = 'subdivx' hash_verifiable = False languages = {Language.fromalpha2(l) for l in ['es']} subtitle_class = SubdivxSubtitle server_url = 'https://www.subdivx.com/' multi_result_throttle = 2 language_list = list(languages) def __init__(self): self.session = None def initialize(self): self.session = Session() self.session.headers['User-Agent'] = 'Subliminal/{}'.format( __short_version__) def terminate(self): self.session.close() def query(self, keyword, season=None, episode=None, year=None): query = keyword if season and episode: query += ' S{season:02d}E{episode:02d}'.format(season=season, episode=episode) elif year: query += ' {:4d}'.format(year) params = { 'buscar': query, # search string 'accion': 5, # action search 'oxdown': 1, # order by downloads descending 'pg': 1 # page 1 } logger.debug('Searching subtitles %r', query) subtitles = [] language = self.language_list[0] search_link = self.server_url + 'index.php' while True: response = self.session.get(search_link, params=params, timeout=10) self._check_response(response) try: page_subtitles = self._parse_subtitles_page(response, language) except Exception as e: raise ParseResponseError('Error parsing subtitles list: ' + str(e)) subtitles += page_subtitles if len(page_subtitles) >= 20: params['pg'] += 1 # search next page time.sleep(self.multi_result_throttle) else: break return subtitles def list_subtitles(self, video, languages): if isinstance(video, Episode): titles = [video.series] + video.alternative_series elif isinstance(video, Movie): titles = [video.title] + video.alternative_titles else: titles = [] subtitles = [] for title in titles: if isinstance(video, Episode): subtitles += [ s for s in self.query(title, season=video.season, episode=video.episode, year=video.year) if s.language in languages ] elif isinstance(video, Movie): subtitles += [ s for s in self.query(title, year=video.year) if s.language in languages ] return subtitles def download_subtitle(self, subtitle): if isinstance(subtitle, SubdivxSubtitle): # download the subtitle logger.info('Downloading subtitle %r', subtitle) # get download link download_link = self._get_download_link(subtitle) # download zip / rar file with the subtitle response = self.session.get( download_link, headers={'Referer': subtitle.page_link}, timeout=30) self._check_response(response) # open the compressed archive archive = self._get_archive(response.content) # extract the subtitle subtitle_content = self._get_subtitle_from_archive(archive) subtitle.content = fix_line_ending(subtitle_content) def _check_response(self, response): if response.status_code != 200: raise ServiceUnavailable('Bad status code: ' + str(response.status_code)) def _parse_subtitles_page(self, response, language): subtitles = [] page_soup = ParserBeautifulSoup( response.content.decode('iso-8859-1', 'ignore'), ['lxml', 'html.parser']) title_soups = page_soup.find_all("div", {'id': 'menu_detalle_buscador'}) body_soups = page_soup.find_all("div", {'id': 'buscador_detalle'}) for subtitle in range(0, len(title_soups)): title_soup, body_soup = title_soups[subtitle], body_soups[subtitle] # title title = title_soup.find("a").text.replace("Subtitulo de ", "") page_link = title_soup.find("a")["href"].replace( 'http://', 'https://') # body description = body_soup.find("div", { 'id': 'buscador_detalle_sub' }).text subtitle = self.subtitle_class(language, page_link, description, title) logger.debug('Found subtitle %r', subtitle) subtitles.append(subtitle) return subtitles def _get_download_link(self, subtitle): response = self.session.get(subtitle.page_link, timeout=10) self._check_response(response) try: page_soup = ParserBeautifulSoup( response.content.decode('iso-8859-1', 'ignore'), ['lxml', 'html.parser']) links_soup = page_soup.find_all("a", {'class': 'detalle_link'}) for link_soup in links_soup: if link_soup['href'].startswith('bajar'): return self.server_url + link_soup['href'] except Exception as e: raise ParseResponseError('Error parsing download link: ' + str(e)) raise ParseResponseError('Download link not found') def _get_archive(self, content): # open the archive archive_stream = io.BytesIO(content) if rarfile.is_rarfile(archive_stream): logger.debug('Identified rar archive') archive = rarfile.RarFile(archive_stream) elif zipfile.is_zipfile(archive_stream): logger.debug('Identified zip archive') archive = zipfile.ZipFile(archive_stream) else: raise ParseResponseError('Unsupported compressed format') return archive def _get_subtitle_from_archive(self, archive): for name in archive.namelist(): # discard hidden files if os.path.split(name)[-1].startswith('.'): continue # discard non-subtitle files if not name.lower().endswith(SUBTITLE_EXTENSIONS): continue return archive.read(name) raise ParseResponseError( 'Can not find the subtitle in the compressed file')
def extract_embedded_sub(**kwargs): rating_key = kwargs["rating_key"] part_id = kwargs.pop("part_id") stream_index = kwargs.pop("stream_index") with_mods = kwargs.pop("with_mods", False) language = Language.fromietf(kwargs.pop("language")) refresh = kwargs.pop("refresh", True) set_current = kwargs.pop("set_current", True) plex_item = kwargs.pop("plex_item", get_item(rating_key)) item_type = get_item_kind_from_item(plex_item) part = kwargs.pop("part", get_part(plex_item, part_id)) scanned_videos = kwargs.pop("scanned_videos", None) extract_mode = kwargs.pop("extract_mode", "a") any_successful = False if part: if not scanned_videos: metadata = get_plex_metadata(rating_key, part_id, item_type, plex_item=plex_item) scanned_videos = scan_videos([metadata], ignore_all=True, skip_hashing=True) for stream in part.streams: # subtitle stream if str(stream.index) == stream_index: is_forced = is_stream_forced(stream) bn = os.path.basename(part.file) set_refresh_menu_state(_(u"Extracting subtitle %(stream_index)s of %(filename)s", stream_index=stream_index, filename=bn)) Log.Info(u"Extracting stream %s (%s) of %s", stream_index, str(language), bn) out_codec = stream.codec if stream.codec != "mov_text" else "srt" args = [ config.plex_transcoder, "-i", part.file, "-map", "0:%s" % stream_index, "-f", out_codec, "-" ] cmdline = quote_args(args) Log.Debug(u"Calling: %s", cmdline) if mswindows: Log.Debug("MSWindows: Fixing encoding") cmdline = cmdline.encode("mbcs") output = None try: output = subprocess.check_output(cmdline, stderr=subprocess.PIPE, shell=True) except: Log.Error("Extraction failed: %s", traceback.format_exc()) if output: subtitle = ModifiedSubtitle(language, mods=config.default_mods if with_mods else None) subtitle.content = output subtitle.provider_name = "embedded" subtitle.id = "stream_%s" % stream_index subtitle.score = 0 subtitle.set_encoding("utf-8") # fixme: speedup video; only video.name is needed video = scanned_videos.keys()[0] save_successful = save_subtitles(scanned_videos, {video: [subtitle]}, mode="m", set_current=set_current) set_refresh_menu_state(None) if save_successful and refresh: refresh_item(rating_key) # add item to history item_title = get_title_for_video_metadata(video.plexapi_metadata, add_section_title=False, add_episode_title=True) history = get_history() history.add(item_title, video.id, section_title=video.plexapi_metadata["section"], thumb=video.plexapi_metadata["super_thumb"], subtitle=subtitle, mode=extract_mode) history.destroy() any_successful = True return any_successful
def test_query_episode(episodes): video = episodes['bbt_s07e05'] languages = [Language('zho', None, 'Hant'), Language('zho', None, 'Hans')] with AssrtProvider(TOKEN) as provider: subtitles = provider.query(languages, video) assert len(subtitles) == 11
class SubsynchroProvider(Provider): """Subsynchro Provider""" languages = {Language.fromalpha2(l) for l in ["fr"]} language_list = list(languages) video_types = (Movie, ) def initialize(self): self.session = Session() self.session.headers = {"User-Agent": "Bazarr", "Referer": PAGE_URL} def terminate(self): self.session.close() def query(self, languages, video): language = self.language_list[0] params = {"title": video.title, "year": video.year} logger.debug("Searching subtitles from params: %s", params) result = self.session.get(SERVER_URL, params=params, timeout=10) result.raise_for_status() subtitles = [] results = result.json() or {} status_ = results.get("status") if status_ != 200: logger.debug(f"No subtitles found (status {status_})") return subtitles for i in results.get("data", []): matches = set() if any(video.title.lower() in title.lower() for title in (i.get("titre", "n/a"), i.get("titre_original", "n/a"))): # Year is already set on query matches.update(["title", "year"]) subtitles.append( SubsynchroSubtitle( language, i.get("release", "n/a"), i.get("filename", "n/a"), i.get("telechargement"), i.get("fichier"), matches, )) return subtitles def list_subtitles(self, video, languages): return self.query(languages, video) def get_file(self, archive): for name in archive.namelist(): if os.path.split(name)[-1].startswith("."): continue if not name.lower().endswith(SUBTITLE_EXTENSIONS): continue logger.debug(f"Returning from archive: {name}") return archive.read(name) raise APIThrottled("Can not find the subtitle in the zip file") def download_subtitle(self, subtitle): logger.debug(f"Downloading subtitle {subtitle.download_url}") response = self.session.get(subtitle.download_url, allow_redirects=True, timeout=10) response.raise_for_status() stream = io.BytesIO(response.content) if is_zipfile(stream): logger.debug("Zip file found") subtitle_ = self.get_file(ZipFile(stream)) subtitle.content = fix_line_ending(subtitle_) else: raise APIThrottled(f"Unknown file type: {subtitle.download_url}")
def test_language_contains(): assert language_contains(Language('zho'), Language('zho')) assert language_contains(Language('zho', 'TW', None), Language('zho')) assert language_contains(Language('zho', 'CN', None), Language('zho')) assert language_contains(Language('zho', None, 'Hant'), Language('zho')) assert language_contains(Language('zho', None, 'Hans'), Language('zho')) assert language_contains(Language('zho', 'TW', 'Hant'), Language('zho')) assert language_contains(Language('zho', 'CN', 'Hans'), Language('zho')) assert language_contains(Language('zho', None, 'Hant'), Language('zho', None, 'Hant')) assert language_contains(Language('zho', None, 'Hans'), Language('zho', None, 'Hans'))
def get_lang_list(self, provider=None, ordered=False): # advanced settings if provider and self.advanced.providers and provider in self.advanced.providers: adv_languages = self.advanced.providers[provider].get("languages", None) if adv_languages: adv_out = set() for adv_lang in adv_languages: adv_lang = adv_lang.strip() try: real_lang = Language.fromietf(adv_lang) except: try: real_lang = Language.fromname(adv_lang) except: continue adv_out.update({real_lang}) # fallback to default languages if no valid language was found in advanced settings if adv_out: return adv_out l = [Language.fromietf(Prefs["langPref1a"])] lang_custom = Prefs["langPrefCustom"].strip() if Prefs['subtitles.only_one']: return set(l) if not ordered else l if Prefs["langPref2a"] != "None": try: l.append(Language.fromietf(Prefs["langPref2a"])) except: pass if Prefs["langPref3a"] != "None": try: l.append(Language.fromietf(Prefs["langPref3a"])) except: pass if len(lang_custom) and lang_custom != "None": for lang in lang_custom.split(u","): lang = lang.strip() try: real_lang = Language.fromietf(lang) except: try: real_lang = Language.fromname(lang) except: continue l.append(real_lang) if self.forced_also: if Prefs["subtitles.when_forced"] == "Always": for lang in list(l): l.append(Language.rebuild(lang, forced=True)) else: for (setting, index) in (("Only for Subtitle Language (1)", 0), ("Only for Subtitle Language (2)", 1), ("Only for Subtitle Language (3)", 2)): if Prefs["subtitles.when_forced"] == setting: try: l.append(Language.rebuild(list(l)[index], forced=True)) break except: pass elif self.forced_only: for lang in l: lang.forced = True if not self.normal_subs: for lang in l[:]: if not lang.forced: l.remove(lang) return set(l) if not ordered else l
def get_language(lang_short): return Language.fromietf(lang_short)
def query(self, languages, title, season=None, episode=None, year=None, video=None): items_per_page = 10 current_page = 1 used_languages = languages lang_strings = [str(lang) for lang in used_languages] # handle possible duplicate use of Serbian Latin if "sr" in lang_strings and "sr-Latn" in lang_strings: logger.info('Duplicate entries <Language [sr]> and <Language [sr-Latn]> found, filtering languages') used_languages = filter(lambda l: l != Language.fromietf('sr-Latn'), used_languages) logger.info('Filtered language list %r', used_languages) # convert list of languages into search string langs = '|'.join(map(str, [l.titlovi for l in used_languages])) # set query params params = {'prijevod': title, 'jezik': langs} is_episode = False if season and episode: is_episode = True params['s'] = season params['e'] = episode if year: params['g'] = year # loop through paginated results logger.info('Searching subtitles %r', params) subtitles = [] while True: # query the server try: r = self.session.get(self.search_url, params=params, timeout=10) r.raise_for_status() except RequestException as e: logger.exception('RequestException %s', e) break else: try: soup = BeautifulSoup(r.content, 'lxml') # number of results result_count = int(soup.select_one('.results_count b').string) except: result_count = None # exit if no results if not result_count: if not subtitles: logger.debug('No subtitles found') else: logger.debug("No more subtitles found") break # number of pages with results pages = int(math.ceil(result_count / float(items_per_page))) # get current page if 'pg' in params: current_page = int(params['pg']) try: sublist = soup.select('section.titlovi > ul.titlovi > li.subtitleContainer.canEdit') for sub in sublist: # subtitle id sid = sub.find(attrs={'data-id': True}).attrs['data-id'] # get download link download_link = self.download_url + sid # title and alternate title match = title_re.search(sub.a.string) if match: _title = match.group('title') alt_title = match.group('altitle') else: continue # page link page_link = self.server_url + sub.a.attrs['href'] # subtitle language match = lang_re.search(sub.select_one('.lang').attrs['src']) if match: try: # decode language lang = Language.fromtitlovi(match.group('lang')+match.group('script')) except ValueError: continue # relase year or series start year match = year_re.search(sub.find(attrs={'data-id': True}).parent.i.string) if match: r_year = int(match.group('year')) # fps match = fps_re.search(sub.select_one('.fps').string) if match: fps = match.group('fps') # releases releases = str(sub.select_one('.fps').parent.contents[0].string) # handle movies and series separately if is_episode: # season and episode info sxe = sub.select_one('.s0xe0y').string r_season = None r_episode = None if sxe: match = season_re.search(sxe) if match: r_season = int(match.group('season')) match = episode_re.search(sxe) if match: r_episode = int(match.group('episode')) subtitle = self.subtitle_class(lang, page_link, download_link, sid, releases, _title, alt_title=alt_title, season=r_season, episode=r_episode, year=r_year, fps=fps, asked_for_release_group=video.release_group, asked_for_episode=episode) else: subtitle = self.subtitle_class(lang, page_link, download_link, sid, releases, _title, alt_title=alt_title, year=r_year, fps=fps, asked_for_release_group=video.release_group) logger.debug('Found subtitle %r', subtitle) # prime our matches so we can use the values later subtitle.get_matches(video) # add found subtitles subtitles.append(subtitle) finally: soup.decompose() # stop on last page if current_page >= pages: break # increment current page params['pg'] = current_page + 1 logger.debug('Getting page %d', params['pg']) return subtitles
from subzero.language import Language from subliminal_patch import Subtitle from subliminal_patch.subtitle import ftfy_defaults logger = logging.getLogger(__name__) from subzero.modification import SubMod fn = sys.argv[1] debug = "--debug" in sys.argv if debug: logging.basicConfig(level=logging.DEBUG) #sub = Subtitle(Language.fromietf("eng:forced"), mods=["common", "remove_HI", "OCR_fixes", "fix_uppercase", "shift_offset(ms=-500)", "shift_offset(ms=500)", "shift_offset(s=2,ms=800)"]) sub = Subtitle(Language.fromietf("eng:forced"), mods=["common", "remove_HI", "OCR_fixes", "fix_uppercase", "shift_offset(ms=0,s=1)"]) sub.content = open(fn).read() sub.normalize() content = sub.get_modified_content(debug=True) #submod = SubMod(debug=debug) #submod.load(fn, language=Language.fromietf("pol"), encoding="utf-8") #submod.modify("OCR_fixes", "common", "remove_tags", "OCR_fixes", "OCR_fixes") #submod.modify("shift_offset(s=20)", "OCR_fixes") #submod.modify("remove_HI", "OCR_fixes", "common", "OCR_fixes", "shift_offset(s=20)", "OCR_fixes", "color(name=white)", "shift_offset(s=-5, ms=-350)") #srt = Subtitle.pysubs2_to_unicode(submod.f) #content = fix_text(Subtitle.pysubs2_to_unicode(submod.f, format=format), **ftfy_defaults)\ # .encode(encoding="utf-8") #print submod.f.to_string("srt", encoding="utf-8") #print repr(content)
def _search_external_subtitles(path, languages=None, only_one=False, scandir_generic=False, match_strictness="strict"): dirpath, filename = os.path.split(path) dirpath = dirpath or '.' fn_no_ext, fileext = os.path.splitext(filename) fn_no_ext_lower = fn_no_ext.lower() subtitles = {} _scandir = _scandir_generic if scandir_generic else scandir for entry in _scandir(dirpath): if (not entry.name or entry.name in ('\x0c', '$', ',', '\x7f')) and not scandir_generic: logger.debug( 'Could not determine the name of the file, retrying with scandir_generic' ) return _search_external_subtitles(path, languages, only_one, True) if not entry.is_file(follow_symlinks=False): continue p = entry.name # keep only valid subtitle filenames if not p.lower().endswith(SUBTITLE_EXTENSIONS): continue # not p.lower().startswith(fileroot.lower()) or not p_root, p_ext = os.path.splitext(p) if not INCLUDE_EXOTIC_SUBS and p_ext not in (".srt", ".ass", ".ssa", ".vtt"): continue if p_root.lower() == fn_no_ext_lower: # skip check for language code if the subtitle file name is the same as the video name subtitles[p] = None continue # extract potential forced/normal/default/hi tag # fixme: duplicate from subtitlehelpers split_tag = p_root.rsplit('.', 1) adv_tag = None if len(split_tag) > 1: adv_tag = split_tag[1].lower() if adv_tag in [ 'forced', 'normal', 'default', 'embedded', 'embedded-forced', 'custom', 'hi', 'cc', 'sdh' ]: p_root = split_tag[0] forced = False if adv_tag: forced = "forced" in adv_tag hi = False if adv_tag: hi_tag = ["hi", "cc", "sdh"] hi = any(i for i in hi_tag if i in adv_tag) #add simplified/traditional chinese detection simplified_chinese = [ "chs", "sc", "zhs", "hans", "zh-hans", "gb", "简", "简中", "简体", "简体中文", "中英双语", "中日双语", "中法双语", "简体&英文" ] traditional_chinese = [ "cht", "tc", "zht", "hant", "zh-hant", "big5", "繁", "繁中", "繁体", "繁體", "繁体中文", "繁體中文", "正體中文", "中英雙語", "中日雙語", "中法雙語", "繁体&英文" ] p_root = p_root.replace('zh-TW', 'zht') # remove possible language code for matching p_root_bare = ENDSWITH_LANGUAGECODE_RE.sub( lambda m: "" if str(m.group(1)).lower() in FULL_LANGUAGE_LIST else m.group(0), p_root) p_root_lower = p_root_bare.lower() filename_matches = p_root_lower == fn_no_ext_lower filename_contains = p_root_lower in fn_no_ext_lower if not filename_matches: if match_strictness == "strict" or (match_strictness == "loose" and not filename_contains): continue language = None # extract the potential language code try: language_code = p_root.rsplit(".", 1)[1].replace('_', '-') try: language = Language.fromietf(language_code) language.forced = forced language.hi = hi except (ValueError, LanguageReverseError): #add simplified/traditional chinese detection if any(ext in str(language_code) for ext in simplified_chinese): language = Language.fromietf('zh') language.forced = forced language.hi = hi elif any(ext in str(language_code) for ext in traditional_chinese): language = Language.fromietf('zh') language.forced = forced language.hi = hi else: logger.error('Cannot parse language code %r', language_code) language_code = None except IndexError: language_code = None if not language and not language_code and only_one: language = Language.rebuild(list(languages)[0], forced=forced, hi=hi) subtitles[p] = language logger.debug('Found subtitles %r', subtitles) return subtitles
class Subs4SeriesProvider(Provider): """Subs4Series Provider.""" languages = {Language(l) for l in ['ell', 'eng']} video_types = (Episode, ) server_url = 'https://www.subs4series.com' search_url = '/search_report.php?search={}&searchType=1' episode_link = '/tv-series/{show_id}/season-{season:d}/episode-{episode:d}' subtitle_class = Subs4SeriesSubtitle def __init__(self): self.session = None def initialize(self): self.session = Session() self.session.headers['User-Agent'] = 'Subliminal/{}'.format( __short_version__) def terminate(self): self.session.close() def get_show_ids(self, title, year=None): """Get the best matching show id for `series` and `year`. First search in the result of :meth:`_get_show_suggestions`. :param title: show title. :param year: year of the show, if any. :type year: int :return: the show id, if found. :rtype: str """ title_sanitized = sanitize(title).lower() show_ids = self._get_suggestions(title) matched_show_ids = [] for show in show_ids: show_id = None show_title = sanitize(show['title']) # attempt with year if not show_id and year: logger.debug('Getting show id with year') show_id = '/'.join(show['link'].rsplit( '/', 2)[1:]) if show_title == '{title} {year:d}'.format( title=title_sanitized, year=year) else None # attempt clean if not show_id: logger.debug('Getting show id') show_id = '/'.join(show['link'].rsplit( '/', 2)[1:]) if show_title == title_sanitized else None if show_id: matched_show_ids.append(show_id) return matched_show_ids @region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME, to_str=text_type, should_cache_fn=lambda value: value) def _get_suggestions(self, title): """Search the show or movie id from the `title` and `year`. :param str title: title of the show. :return: the show suggestions found. :rtype: dict """ # make the search logger.info('Searching show ids with %r', title) r = self.session.get(self.server_url + text_type(self.search_url).format(title), headers={'Referer': self.server_url}, timeout=10) r.raise_for_status() if not r.content: logger.debug('No data returned from provider') return {} soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser']) series = [{ 'link': l.attrs['value'], 'title': l.text } for l in soup.select('select[name="Mov_sel"] > option[value]')] logger.debug('Found suggestions: %r', series) return series def query(self, show_id, series, season, episode, title): # get the season list of the show logger.info('Getting the subtitle list of show id %s', show_id) if all((show_id, season, episode)): page_link = self.server_url + self.episode_link.format( show_id=show_id, season=season, episode=episode) else: return [] r = self.session.get(page_link, timeout=10) r.raise_for_status() if not r.content: logger.debug('No data returned from provider') return [] soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser']) year_num = None matches = year_re.match( str(soup.select_one( '#dates_header_br > table div').contents[2]).strip()) if matches: year_num = int(matches.group(1)) show_title = str( soup.select_one('#dates_header_br > table u').contents[0]).strip() subtitles = [] # loop over episode rows for subtitle in soup.select( 'table.table_border div[align="center"] > div'): # read common info version = subtitle.find('b').text download_link = self.server_url + subtitle.find('a')['href'] language = Language.fromalpha2( subtitle.find('img')['src'].split('/')[-1].split('.')[0]) subtitle = self.subtitle_class(language, page_link, show_title, year_num, version, download_link) logger.debug('Found subtitle %r', subtitle) subtitles.append(subtitle) return subtitles def list_subtitles(self, video, languages): # lookup show_id titles = [video.series] + video.alternative_series if isinstance( video, Episode) else [] show_ids = None for title in titles: show_ids = self.get_show_ids(title, video.year) if show_ids and len(show_ids) > 0: break subtitles = [] # query for subtitles with the show_id for show_id in show_ids: subtitles += [ s for s in self.query(show_id, video.series, video.season, video.episode, video.title) if s.language in languages ] return subtitles def download_subtitle(self, subtitle): if isinstance(subtitle, Subs4SeriesSubtitle): # download the subtitle logger.info('Downloading subtitle %r', subtitle) r = self.session.get(subtitle.download_link, headers={'Referer': subtitle.page_link}, timeout=10) r.raise_for_status() if not r.content: logger.debug( 'Unable to download subtitle. No data returned from provider' ) return soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser']) download_element = soup.select_one('a.style55ws') if not download_element: download_element = soup.select_one('form[method="post"]') target = download_element[ 'action'] if download_element else None else: target = download_element['href'] if not target: logger.debug( 'Unable to download subtitle. No download link found') return download_url = self.server_url + target r = self.session.get(download_url, headers={'Referer': subtitle.download_link}, timeout=10) r.raise_for_status() if not r.content: logger.debug( 'Unable to download subtitle. No data returned from provider' ) return archive = _get_archive(r.content) subtitle_content = _get_subtitle_from_archive( archive) if archive else r.content if subtitle_content: subtitle.content = fix_line_ending(subtitle_content) else: logger.debug('Could not extract subtitle from %r', archive)
def item_discover_missing_subs(rating_key, kind="show", added_at=None, section_title=None, internal=False, external=True, languages=()): item_id = int(rating_key) item = get_item(rating_key) if kind == "show": item_title = get_plex_item_display_title(item, kind, parent=item.season, section_title=section_title, parent_title=item.show.title) else: item_title = get_plex_item_display_title(item, kind, section_title=section_title) subtitle_storage = get_subtitle_storage() stored_subs = subtitle_storage.load(rating_key) subtitle_storage.destroy() subtitle_target_dir, tdir_is_absolute = config.subtitle_sub_dir missing = set() languages_set = set([Language.rebuild(l) for l in languages]) for media in item.media: existing_subs = {"internal": [], "external": [], "own_external": [], "count": 0} for part in media.parts: # did we already download an external subtitle before? if subtitle_target_dir and stored_subs: for language in languages_set: if has_external_subtitle(part.id, stored_subs, language): # check the existence of the actual subtitle file # get media filename without extension part_basename = os.path.splitext(os.path.basename(part.file))[0] # compute target directory for subtitle # fixme: move to central location if tdir_is_absolute: possible_subtitle_path_base = subtitle_target_dir else: possible_subtitle_path_base = os.path.join(os.path.dirname(part.file), subtitle_target_dir) possible_subtitle_path_base = os.path.realpath(possible_subtitle_path_base) # folder actually exists? if not os.path.isdir(possible_subtitle_path_base): continue found_any = False for ext in config.subtitle_formats: if cast_bool(Prefs['subtitles.only_one']): possible_subtitle_path = os.path.join(possible_subtitle_path_base, u"%s.%s" % (part_basename, ext)) else: possible_subtitle_path = os.path.join(possible_subtitle_path_base, u"%s.%s.%s" % (part_basename, language, ext)) # check for subtitle existence if os.path.isfile(possible_subtitle_path): found_any = True Log.Debug(u"Found: %s", possible_subtitle_path) break if found_any: existing_subs["own_external"].append(language) existing_subs["count"] = existing_subs["count"] + 1 for stream in part.streams: if stream.stream_type == 3: is_forced = is_stream_forced(stream) if stream.index: key = "internal" else: key = "external" if not config.exotic_ext and stream.codec.lower() not in TEXT_SUBTITLE_EXTS: continue # treat unknown language as lang1? if not stream.language_code and config.treat_und_as_first: lang = Language.rebuild(list(config.lang_list)[0]) # we can't parse empty language codes elif not stream.language_code or not stream.codec: continue else: # parse with internal language parser first try: lang = get_language_from_stream(stream.language_code) if not lang: if config.treat_und_as_first: lang = Language.rebuild(list(config.lang_list)[0]) else: continue except (ValueError, LanguageReverseError): continue if lang: # Log.Debug("Found babelfish language: %r", lang) lang.forced = is_forced existing_subs[key].append(lang) existing_subs["count"] = existing_subs["count"] + 1 missing_from_part = set([Language.rebuild(l) for l in languages]) if existing_subs["count"]: # fixme: this is actually somewhat broken with IETF, as Plex doesn't store the country portion # (pt instead of pt-BR) inside the database. So it might actually download pt-BR if there's a local pt-BR # subtitle but not our own. existing_flat = set((existing_subs["internal"] if internal else []) + (existing_subs["external"] if external else []) + existing_subs["own_external"]) check_languages = set([Language.rebuild(l) for l in languages]) alpha3_map = {} if config.ietf_as_alpha3: for language in existing_flat: if language.country: alpha3_map[language.alpha3] = language.country language.country = None for language in check_languages: if language.country: alpha3_map[language.alpha3] = language.country language.country = None # compare sets of strings, not sets of different Language instances check_languages_str = set(str(l) for l in check_languages) existing_flat_str = set(str(l) for l in existing_flat) if check_languages_str.issubset(existing_flat_str) or \ (len(existing_flat) >= 1 and Prefs['subtitles.only_one']): # all subs found #Log.Info(u"All subtitles exist for '%s'", item_title) continue missing_from_part = set(Language.fromietf(l) for l in check_languages_str - existing_flat_str) if config.ietf_as_alpha3: for language in missing_from_part: language.country = alpha3_map.get(language.alpha3, None) if missing_from_part: Log.Info(u"Subs still missing for '%s' (%s: %s): %s", item_title, rating_key, media.id, missing_from_part) missing.update(missing_from_part) if missing: # deduplicate missing = set(Language.fromietf(la) for la in set(str(l) for l in missing)) return added_at, item_id, item_title, item, missing
class SubsSabBzProvider(Provider): """SubsSabBz Provider.""" languages = {Language(l) for l in [ 'bul', 'eng' ]} video_types = (Episode, Movie) def initialize(self): self.session = Session() self.session.headers['User-Agent'] = AGENT_LIST[randint(0, len(AGENT_LIST) - 1)] self.session.headers["Accept"] = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" self.session.headers["Accept-Language"] = "en-US,en;q=0.5" self.session.headers["Accept-Encoding"] = "gzip, deflate, br" self.session.headers["DNT"] = "1" self.session.headers["Connection"] = "keep-alive" self.session.headers["Upgrade-Insecure-Requests"] = "1" self.session.headers["Cache-Control"] = "max-age=0" def terminate(self): self.session.close() def query(self, language, video): subtitles = [] isEpisode = isinstance(video, Episode) params = { 'act': 'search', 'movie': '', 'select-language': '2', 'upldr': '', 'yr': '', 'release': '' } if isEpisode: params['movie'] = "%s %02d %02d" % (sanitize(fix_tv_naming(video.series), {'\''}), video.season, video.episode) else: params['yr'] = video.year params['movie'] = sanitize(fix_movie_naming(video.title), {'\''}) if language == 'en' or language == 'eng': params['select-language'] = 1 logger.info('Searching subtitle %r', params) response = self.session.post('http://subs.sab.bz/index.php?', params=params, allow_redirects=False, timeout=10, headers={ 'Referer': 'http://subs.sab.bz/', }) response.raise_for_status() if response.status_code != 200: logger.debug('No subtitles found') return subtitles soup = BeautifulSoup(response.content, 'lxml') rows = soup.findAll('tr', {'class': 'subs-row'}) # Search on first 25 rows only for row in rows[:25]: a_element_wrapper = row.find('td', { 'class': 'c2field' }) if a_element_wrapper: element = a_element_wrapper.find('a') if element: link = element.get('href') notes = re.sub(r'ddrivetip\(\'<div.*/></div>(.*)\',\'#[0-9]+\'\)', r'\1', element.get('onmouseover')) title = element.get_text() try: year = int(str(element.next_sibling).strip(' ()')) except: year = None td = row.findAll('td') try: num_cds = int(td[6].get_text()) except: num_cds = None try: fps = float(td[7].get_text()) except: fps = None try: uploader = td[8].get_text() except: uploader = None try: imdb_id = re.findall(r'imdb.com/title/(tt\d+)/?$', td[9].find('a').get('href'))[0] except: imdb_id = None logger.info('Found subtitle link %r', link) sub = self.download_archive_and_add_subtitle_files(link, language, video, fps, num_cds) for s in sub: s.title = title s.notes = notes s.year = year s.uploader = uploader s.imdb_id = imdb_id s.single_file = True if len(sub) == 1 and num_cds == 1 else False subtitles = subtitles + sub return subtitles def list_subtitles(self, video, languages): return [s for l in languages for s in self.query(l, video)] def download_subtitle(self, subtitle): if subtitle.content: pass else: seeking_subtitle_file = subtitle.filename arch = self.download_archive_and_add_subtitle_files(subtitle.page_link, subtitle.language, subtitle.video, subtitle.fps, subtitle.num_cds) for s in arch: if s.filename == seeking_subtitle_file: subtitle.content = s.content def process_archive_subtitle_files(self, archiveStream, language, video, link, fps, num_cds): subtitles = [] type = 'episode' if isinstance(video, Episode) else 'movie' for file_name in sorted(archiveStream.namelist()): if file_name.lower().endswith(('.srt', '.sub')): logger.info('Found subtitle file %r', file_name) subtitle = SubsSabBzSubtitle(language, file_name, type, video, link, fps, num_cds) subtitle.content = fix_line_ending(archiveStream.read(file_name)) subtitles.append(subtitle) return subtitles def download_archive_and_add_subtitle_files(self, link, language, video, fps, num_cds): logger.info('Downloading subtitle %r', link) cache_key = sha1(link.encode("utf-8")).digest() request = region.get(cache_key) if request is NO_VALUE: request = self.session.get(link, headers={ 'Referer': 'http://subs.sab.bz/index.php?' }) request.raise_for_status() region.set(cache_key, request) else: logger.info('Cache file: %s', codecs.encode(cache_key, 'hex_codec').decode('utf-8')) try: archive_stream = io.BytesIO(request.content) if is_rarfile(archive_stream): return self.process_archive_subtitle_files(RarFile(archive_stream), language, video, link, fps, num_cds) elif is_zipfile(archive_stream): return self.process_archive_subtitle_files(ZipFile(archive_stream), language, video, link, fps, num_cds) except: pass logger.error('Ignore unsupported archive %r', request.headers) region.delete(cache_key) return []
class LegendasTVProvider(_LegendasTVProvider): languages = {Language(*l) for l in language_converters['legendastv'].to_legendastv.keys()} subtitle_class = LegendasTVSubtitle def __init__(self, username=None, password=None): # Provider needs UNRAR installed. If not available raise ConfigurationError try: rarfile.custom_check([rarfile.UNRAR_TOOL], True) except rarfile.RarExecError: raise ConfigurationError('UNRAR tool not available') if any((username, password)) and not all((username, password)): raise ConfigurationError('Username and password must be specified') self.username = username self.password = password self.logged_in = False self.session = None @staticmethod def is_valid_title(title, title_id, sanitized_title, season, year, imdb_id): """Check if is a valid title.""" if title["imdb_id"] and title["imdb_id"] == imdb_id: logger.debug(u'Matched title "%s" as IMDB ID %s', sanitized_title, title["imdb_id"]) return True if title["title2"] and sanitize(title['title2']) == sanitized_title: logger.debug(u'Matched title "%s" as "%s"', sanitized_title, title["title2"]) return True return _LegendasTVProvider.is_valid_title(title, title_id, sanitized_title, season, year) @region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME, should_cache_fn=lambda value: value) def search_titles(self, title, season, title_year, imdb_id): """Search for titles matching the `title`. For episodes, each season has it own title :param str title: the title to search for. :param int season: season of the title :param int title_year: year of the title :return: found titles. :rtype: dict """ titles = {} sanitized_titles = [sanitize(title)] ignore_characters = {'\'', '.'} if any(c in title for c in ignore_characters): sanitized_titles.append(sanitize(title, ignore_characters=ignore_characters)) for sanitized_title in sanitized_titles: # make the query if season: logger.info('Searching episode title %r for season %r', sanitized_title, season) else: logger.info('Searching movie title %r', sanitized_title) r = self.session.get(self.server_url + 'legenda/sugestao/{}'.format(sanitized_title), timeout=10) raise_for_status(r) results = json.loads(r.text) # loop over results for result in results: source = result['_source'] # extract id title_id = int(source['id_filme']) # extract type title = {'type': type_map[source['tipo']], 'title2': None, 'imdb_id': None} # extract title, year and country name, year, country = title_re.match(source['dsc_nome']).groups() title['title'] = name if "dsc_nome_br" in source: name2, ommit1, ommit2 = title_re.match(source['dsc_nome_br']).groups() title['title2'] = name2 # extract imdb_id if source['id_imdb'] != '0': if not source['id_imdb'].startswith('tt'): title['imdb_id'] = 'tt' + source['id_imdb'].zfill(7) else: title['imdb_id'] = source['id_imdb'] # extract season if title['type'] == 'episode': if source['temporada'] and source['temporada'].isdigit(): title['season'] = int(source['temporada']) else: match = season_re.search(source['dsc_nome_br']) if match: title['season'] = int(match.group('season')) else: logger.debug('No season detected for title %d (%s)', title_id, name) # extract year if year: title['year'] = int(year) elif source['dsc_data_lancamento'] and source['dsc_data_lancamento'].isdigit(): # year is based on season air date hence the adjustment title['year'] = int(source['dsc_data_lancamento']) - title.get('season', 1) + 1 # add title only if is valid # Check against title without ignored chars if self.is_valid_title(title, title_id, sanitized_titles[0], season, title_year, imdb_id): logger.debug(u'Found title: %s', title) titles[title_id] = title logger.debug('Found %d titles', len(titles)) return titles def query(self, language, title, season=None, episode=None, year=None, imdb_id=None): # search for titles titles = self.search_titles(title, season, year, imdb_id) subtitles = [] # iterate over titles for title_id, t in titles.items(): logger.info('Getting archives for title %d and language %d', title_id, language.legendastv) archives = self.get_archives(title_id, language.legendastv, t['type'], season, episode) if not archives: logger.info('No archives found for title %d and language %d', title_id, language.legendastv) # iterate over title's archives for a in archives: # compute an expiration time based on the archive timestamp expiration_time = (datetime.utcnow().replace(tzinfo=pytz.utc) - a.timestamp).total_seconds() # attempt to get the releases from the cache cache_key = str(a.id + "|" + a.name) releases = region.get(cache_key, expiration_time=expiration_time) # the releases are not in cache or cache is expired if releases == NO_VALUE: logger.info('Releases not found in cache') # download archive self.download_archive(a) # extract the releases releases = [] for name in a.content.namelist(): # discard the legendastv file if name.startswith('Legendas.tv'): continue # discard hidden files if os.path.split(name)[-1].startswith('.'): continue # discard non-subtitle files if not name.lower().endswith(SUBTITLE_EXTENSIONS): continue releases.append(name) # cache the releases region.set(cache_key, releases) # iterate over releases for r in releases: subtitle = self.subtitle_class(language, t['type'], t['title'], t.get('year'), t.get('imdb_id'), t.get('season'), a, r) logger.debug('Found subtitle %r', subtitle) subtitles.append(subtitle) return subtitles def list_subtitles(self, video, languages): season = episode = None if isinstance(video, Episode): titles = [video.series] + video.alternative_series season = video.season episode = video.episode else: titles = [video.title] + video.alternative_titles for title in titles: subtitles = [s for l in languages for s in self.query(l, title, season=season, episode=episode, year=video.year, imdb_id=video.imdb_id)] if subtitles: return subtitles return [] def download_subtitle(self, subtitle): super(LegendasTVProvider, self).download_subtitle(subtitle) subtitle.archive.content = None def get_archives(self, title_id, language_code, title_type, season, episode): return super(LegendasTVProvider, self).get_archives.original(self, title_id, language_code, title_type, season, episode)
'tel': 63, 'tha': 40, 'tur': 41, 'ukr': 56, 'urd': 42, 'yor': 71 } # TODO: specify codes for unspecified_languages unspecified_languages = [ 'Big 5 code', 'Brazillian Portuguese', 'Bulgarian/ English', 'Chinese BG code', 'Dutch/ English', 'English/ German', 'Hungarian/ English', 'Rohingya' ] supported_languages = {Language(l) for l in exact_languages_alpha3} alpha3_of_code = {l.name: l.alpha3 for l in supported_languages} supported_languages.update({Language(l) for l in to_subscene}) class SubsceneConverter(LanguageReverseConverter): codes = {l.name for l in supported_languages} def convert(self, alpha3, country=None, script=None): if alpha3 in exact_languages_alpha3: return Language(alpha3).name if alpha3 in to_subscene: return to_subscene[alpha3]
def prepare_video(pms_video_info, ignore_all=False, hints=None, rating_key=None, providers=None, skip_hashing=False): """ returnes a subliminal/guessit-refined parsed video :param pms_video_info: :param ignore_all: :param hints: :param rating_key: :return: """ embedded_subtitles = not ignore_all and Prefs['subtitles.scan.embedded'] external_subtitles = not ignore_all and Prefs['subtitles.scan.external'] plex_part = pms_video_info["plex_part"] if ignore_all: Log.Debug("Force refresh intended.") Log.Debug("Detecting streams: %s, external_subtitles=%s, embedded_subtitles=%s" % ( plex_part.file, external_subtitles, embedded_subtitles)) known_embedded = [] parts = [] for media in list(Plex["library"].metadata(rating_key))[0].media: parts += media.parts plexpy_part = None for part in parts: if int(part.id) == int(plex_part.id): plexpy_part = part # embedded subtitles # fixme: skip the whole scanning process if known_embedded == wanted languages? audio_languages = [] if plexpy_part: for stream in plexpy_part.streams: if stream.stream_type == 2: lang = None try: lang = language_from_stream(stream.language_code) except LanguageError: Log.Debug("Couldn't detect embedded audio stream language: %s", stream.language_code) # treat unknown language as lang1? if not lang and config.treat_und_as_first: lang = Language.rebuild(list(config.lang_list)[0]) audio_languages.append(lang) # subtitle stream elif stream.stream_type == 3 and embedded_subtitles: is_forced = helpers.is_stream_forced(stream) if ((config.forced_only or config.forced_also) and is_forced) or not is_forced: # embedded subtitle # fixme: tap into external subtitles here instead of scanning for ourselves later? if stream.codec and getattr(stream, "index", None): if config.exotic_ext or stream.codec.lower() in config.text_based_formats: lang = None try: lang = language_from_stream(stream.language_code) except LanguageError: Log.Debug("Couldn't detect embedded subtitle stream language: %s", stream.language_code) # treat unknown language as lang1? if not lang and config.treat_und_as_first: lang = Language.rebuild(list(config.lang_list)[0]) if lang: if is_forced: lang.forced = True known_embedded.append(lang) else: Log.Warn("Part %s missing of %s, not able to scan internal streams", plex_part.id, rating_key) # metadata subtitles known_metadata_subs = set() meta_subs = get_subtitles_from_metadata(plex_part) for language, subList in meta_subs.iteritems(): try: lang = Language.fromietf(Locale.Language.Match(language)) except LanguageError: if config.treat_und_as_first: lang = Language.rebuild(list(config.lang_list)[0]) else: continue if subList: for key in subList: if key.startswith("subzero_md_forced"): lang = Language.rebuild(lang, forced=True) known_metadata_subs.add(lang) Log.Debug("Found metadata subtitle %r:%s for %s", lang, key, plex_part.file) Log.Debug("Known metadata subtitles: %r", known_metadata_subs) Log.Debug("Known embedded subtitles: %r", known_embedded) subtitle_storage = get_subtitle_storage() stored_subs = subtitle_storage.load(rating_key) subtitle_storage.destroy() try: # get basic video info scan (filename) video = parse_video(plex_part.file, hints, skip_hashing=config.low_impact_mode or skip_hashing, providers=providers) # set stream languages if audio_languages: video.audio_languages = audio_languages Log.Info("Found audio streams: %s" % ", ".join([str(l) for l in audio_languages])) if not ignore_all: set_existing_languages(video, pms_video_info, external_subtitles=external_subtitles, embedded_subtitles=embedded_subtitles, known_embedded=known_embedded, stored_subs=stored_subs, languages=config.lang_list, only_one=config.only_one, known_metadata_subs=known_metadata_subs) # add video fps info video.fps = plex_part.fps return video except ValueError: Log.Warn("File could not be guessed: %s: %s", plex_part.file, traceback.format_exc())
class GreekSubtitlesProvider(Provider): """GreekSubtitles Provider.""" languages = {Language(l) for l in ['ell', 'eng']} server_url = 'http://gr.greek-subtitles.com/' search_url = 'search.php?name={}' download_url = 'http://www.greeksubtitles.info/getp.php?id={:d}' subtitle_class = GreekSubtitlesSubtitle def __init__(self): self.session = None def initialize(self): self.session = Session() self.session.headers['User-Agent'] = 'Subliminal/{}'.format( __short_version__) def terminate(self): self.session.close() def query(self, keyword, season=None, episode=None, year=None): params = keyword if season and episode: params += ' S{season:02d}E{episode:02d}'.format(season=season, episode=episode) elif year: params += ' {:4d}'.format(year) logger.debug('Searching subtitles %r', params) subtitles = [] search_link = self.server_url + self.search_url.format(params) while True: r = self.session.get(search_link, timeout=30) r.raise_for_status() if not r.content: logger.debug('No data returned from provider') return [] soup = ParserBeautifulSoup(r.content.decode('utf-8', 'ignore'), ['lxml', 'html.parser']) # loop over subtitles cells for cell in soup.select('td.latest_name > a:nth-of-type(1)'): # read the item subtitle_id = int(cell['href'].rsplit('/', 2)[1]) page_link = cell['href'] language = Language.fromalpha2( cell.parent.find('img')['src'].split('/')[-1].split('.') [0]) version = cell.text.strip() or None if version is None: version = "" subtitle = self.subtitle_class( language, page_link, version, self.download_url.format(subtitle_id)) logger.debug('Found subtitle %r', subtitle) subtitles.append(subtitle) anchors = soup.select('td a') next_page_available = False for anchor in anchors: if 'Next' in anchor.text and 'search.php' in anchor['href']: search_link = self.server_url + anchor['href'] next_page_available = True break if not next_page_available: break return subtitles def list_subtitles(self, video, languages): if isinstance(video, Episode): titles = [video.series] + video.alternative_series elif isinstance(video, Movie): titles = [video.title] + video.alternative_titles else: titles = [] subtitles = [] # query for subtitles with the show_id for title in titles: if isinstance(video, Episode): subtitles += [ s for s in self.query(title, season=video.season, episode=video.episode, year=video.year) if s.language in languages ] elif isinstance(video, Movie): subtitles += [ s for s in self.query(title, year=video.year) if s.language in languages ] return subtitles def download_subtitle(self, subtitle): if isinstance(subtitle, GreekSubtitlesSubtitle): # download the subtitle logger.info('Downloading subtitle %r', subtitle) r = self.session.get(subtitle.download_link, headers={'Referer': subtitle.page_link}, timeout=30) r.raise_for_status() if not r.content: logger.debug( 'Unable to download subtitle. No data returned from provider' ) return archive = _get_archive(r.content) subtitle_content = _get_subtitle_from_archive(archive) if subtitle_content: subtitle.content = fix_line_ending(subtitle_content) else: logger.debug('Could not extract subtitle from %r', archive)