def _search_external_subtitles(path, languages=None, only_one=False, scandir_generic=False, match_strictness="strict"): dirpath, filename = os.path.split(path) dirpath = dirpath or '.' fn_no_ext, fileext = os.path.splitext(filename) fn_no_ext_lower = fn_no_ext.lower() subtitles = {} _scandir = _scandir_generic if scandir_generic else scandir for entry in _scandir(dirpath): if (not entry.name or entry.name in ('\x0c', '$', ',', '\x7f')) and not scandir_generic: logger.debug( 'Could not determine the name of the file, retrying with scandir_generic' ) return _search_external_subtitles(path, languages, only_one, True) if not entry.is_file(follow_symlinks=False): continue p = entry.name # keep only valid subtitle filenames if not p.lower().endswith(SUBTITLE_EXTENSIONS): continue # not p.lower().startswith(fileroot.lower()) or not p_root, p_ext = os.path.splitext(p) if not INCLUDE_EXOTIC_SUBS and p_ext not in (".srt", ".ass", ".ssa", ".vtt"): continue # extract potential forced/normal/default tag # fixme: duplicate from subtitlehelpers split_tag = p_root.rsplit('.', 1) adv_tag = None if len(split_tag) > 1: adv_tag = split_tag[1].lower() if adv_tag in [ 'forced', 'normal', 'default', 'embedded', 'embedded-forced', 'custom' ]: p_root = split_tag[0] forced = False if adv_tag: forced = "forced" in adv_tag # remove possible language code for matching p_root_bare = ENDSWITH_LANGUAGECODE_RE.sub( lambda m: "" if str(m.group(1)).lower() in FULL_LANGUAGE_LIST else m.group(0), p_root) p_root_lower = p_root_bare.lower() filename_matches = p_root_lower == fn_no_ext_lower filename_contains = p_root_lower in fn_no_ext_lower if not filename_matches: if match_strictness == "strict" or (match_strictness == "loose" and not filename_contains): continue language = None # extract the potential language code try: language_code = p_root.rsplit(".", 1)[1].replace('_', '-') try: language = Language.fromietf(language_code) language.forced = forced except (ValueError, LanguageReverseError): logger.error('Cannot parse language code %r', language_code) language_code = None except IndexError: language_code = None if not language and not language_code and only_one: language = Language.rebuild(list(languages)[0], forced=forced) subtitles[p] = language logger.debug('Found subtitles %r', subtitles) return subtitles
def _search_external_subtitles(path, languages=None, only_one=False, scandir_generic=False, match_strictness="strict"): dirpath, filename = os.path.split(path) dirpath = dirpath or '.' fn_no_ext, fileext = os.path.splitext(filename) fn_no_ext_lower = fn_no_ext.lower() subtitles = {} _scandir = _scandir_generic if scandir_generic else scandir for entry in _scandir(dirpath): if (not entry.name or entry.name in ('\x0c', '$', ',', '\x7f')) and not scandir_generic: logger.debug( 'Could not determine the name of the file, retrying with scandir_generic' ) return _search_external_subtitles(path, languages, only_one, True) if not entry.is_file(follow_symlinks=False): continue p = entry.name # keep only valid subtitle filenames if not p.lower().endswith(SUBTITLE_EXTENSIONS): continue # not p.lower().startswith(fileroot.lower()) or not p_root, p_ext = os.path.splitext(p) if not INCLUDE_EXOTIC_SUBS and p_ext not in (".srt", ".ass", ".ssa", ".vtt"): continue if p_root.lower() == fn_no_ext_lower: # skip check for language code if the subtitle file name is the same as the video name subtitles[p] = None continue # extract potential forced/normal/default/hi tag # fixme: duplicate from subtitlehelpers split_tag = p_root.rsplit('.', 1) adv_tag = None if len(split_tag) > 1: adv_tag = split_tag[1].lower() if adv_tag in [ 'forced', 'normal', 'default', 'embedded', 'embedded-forced', 'custom', 'hi', 'cc', 'sdh' ]: p_root = split_tag[0] forced = False if adv_tag: forced = "forced" in adv_tag hi = False if adv_tag: hi_tag = ["hi", "cc", "sdh"] hi = any(i for i in hi_tag if i in adv_tag) #add simplified/traditional chinese detection simplified_chinese = [ "chs", "sc", "zhs", "hans", "zh-hans", "gb", "简", "简中", "简体", "简体中文", "中英双语", "中日双语", "中法双语", "简体&英文" ] traditional_chinese = [ "cht", "tc", "zht", "hant", "zh-hant", "big5", "繁", "繁中", "繁体", "繁體", "繁体中文", "繁體中文", "正體中文", "中英雙語", "中日雙語", "中法雙語", "繁体&英文" ] FULL_LANGUAGE_LIST.extend(simplified_chinese) FULL_LANGUAGE_LIST.extend(traditional_chinese) p_root = p_root.replace('zh-TW', 'zht') # remove possible language code for matching p_root_bare = ENDSWITH_LANGUAGECODE_RE.sub( lambda m: "" if str(m.group(1)).lower() in FULL_LANGUAGE_LIST else m.group(0), p_root) p_root_lower = p_root_bare.lower() filename_matches = p_root_lower == fn_no_ext_lower filename_contains = p_root_lower in fn_no_ext_lower if not filename_matches: if match_strictness == "strict" or (match_strictness == "loose" and not filename_contains): continue language = None # extract the potential language code try: language_code = p_root.rsplit(".", 1)[1].replace('_', '-') try: language = Language.fromietf(language_code) language.forced = forced language.hi = hi except (ValueError, LanguageReverseError): #add simplified/traditional chinese detection if any(ext in str(language_code) for ext in simplified_chinese): language = Language.fromietf('zh') language.forced = forced language.hi = hi elif any(ext in str(language_code) for ext in traditional_chinese): language = Language.fromietf('zh') language.forced = forced language.hi = hi else: logger.error('Cannot parse language code %r', language_code) language_code = None except IndexError: language_code = None if not language and not language_code and only_one: language = Language.rebuild(list(languages)[0], forced=forced, hi=hi) subtitles[p] = language logger.debug('Found subtitles %r', subtitles) return subtitles
def find_subtitles(part, ignore_parts_cleanup=None): lang_sub_map = {} ignore_parts_cleanup = ignore_parts_cleanup or [] part_filename = helpers.unicodize(part.file) part_basename = os.path.splitext(os.path.basename(part_filename))[0] use_filesystem = helpers.cast_bool(Prefs["subtitles.save.filesystem"]) sub_dir_custom = Prefs["subtitles.save.subFolder.Custom"].strip() \ if Prefs["subtitles.save.subFolder.Custom"] else None use_sub_subfolder = Prefs[ "subtitles.save.subFolder"] != "current folder" and not sub_dir_custom autoclean = helpers.cast_bool(Prefs["subtitles.autoclean"]) sub_subfolder = None paths = [os.path.dirname(part_filename)] if use_filesystem else [] global_folders = [] if use_filesystem: # Check for local subtitles subdirectory sub_dir_base = paths[0] sub_dir_list = [] if use_sub_subfolder: # got selected subfolder sub_subfolder = os.path.join(sub_dir_base, Prefs["subtitles.save.subFolder"]) sub_dir_list.append(sub_subfolder) sub_subfolder = os.path.normpath(helpers.unicodize(sub_subfolder)) if sub_dir_custom: # got custom subfolder sub_dir_custom = os.path.normpath(sub_dir_custom) if os.path.isdir(sub_dir_custom) and os.path.isabs(sub_dir_custom): # absolute folder sub_dir_list.append(sub_dir_custom) global_folders.append(sub_dir_custom) else: # relative folder fld = os.path.join(sub_dir_base, sub_dir_custom) sub_dir_list.append(fld) for sub_dir in sub_dir_list: if os.path.isdir(sub_dir): paths.append(sub_dir) # Check for a global subtitle location global_subtitle_folder = os.path.join(Core.app_support_path, 'Subtitles') if os.path.exists(global_subtitle_folder): paths.append(global_subtitle_folder) global_folders.append(global_subtitle_folder) # normalize all paths paths = [os.path.normpath(helpers.unicodize(path)) for path in paths] # We start by building a dictionary of files to their absolute paths. We also need to know # the number of media files that are actually present, in case the found local media asset # is limited to a single instance per media file. # file_paths = {} total_media_files = 0 media_files = [] for path in paths: for file_path_listing in os.listdir(path.encode( sz_config.fs_encoding)): # When using os.listdir with a unicode path, it will always return a string using the # NFD form. However, we internally are using the form NFC and therefore need to convert # it to allow correct regex / comparisons to be performed. # file_path_listing = helpers.unicodize(file_path_listing) if os.path.isfile( os.path.join(path, file_path_listing).encode( sz_config.fs_encoding)): file_paths[file_path_listing.lower()] = os.path.join( path, file_path_listing) # If we've found an actual media file, we should record it. (root, ext) = os.path.splitext(file_path_listing) if ext.lower()[1:] in config.VIDEO_EXTS: total_media_files += 1 # collect found media files media_files.append(root) # cleanup any leftover subtitle if no associated media file was found if autoclean and ignore_parts_cleanup: Log.Info("Skipping housekeeping of: %s", paths) if use_filesystem and autoclean and not ignore_parts_cleanup: for path in paths: # only housekeep in sub_subfolder if sub_subfolder is used if use_sub_subfolder and path != sub_subfolder and not sz_config.advanced.thorough_cleaning: continue # we can't housekeep the global subtitle folders as we don't know about *all* media files # in a library; skip them skip_path = False for fld in global_folders: if path.startswith(fld): Log.Info("Skipping housekeeping of folder: %s", path) skip_path = True break if skip_path: continue for file_path_listing in os.listdir( path.encode(sz_config.fs_encoding)): file_path_listing = helpers.unicodize(file_path_listing) enc_fn = os.path.join(path, file_path_listing).encode( sz_config.fs_encoding) if os.path.isfile(enc_fn): (root, ext) = os.path.splitext(file_path_listing) # it's a subtitle file if ext.lower()[1:] in config.SUBTITLE_EXTS_BASE: # get fn without forced/default/normal tag split_tag = root.rsplit(".", 1) if len(split_tag) > 1 and split_tag[1].lower( ) in SECONDARY_TAGS: root = split_tag[0] # get associated media file name without language sub_fn = ENDSWITH_LANGUAGECODE_RE.sub("", root) # subtitle basename and basename without possible language tag not found in collected # media files? kill. if root not in media_files and sub_fn not in media_files: Log.Info("Removing leftover subtitle: %s", os.path.join(path, file_path_listing)) try: os.remove(enc_fn) except (OSError, IOError): Log.Error("Removing failed") Log('Looking for subtitle media in %d paths with %d media files.', len(paths), total_media_files) Log('Paths: %s', ", ".join([helpers.unicodize(p) for p in paths])) for file_path in file_paths.values(): local_filename = os.path.basename(file_path) bn, ext = os.path.splitext(local_filename) local_basename = helpers.unicodize(bn) # get fn without forced/default/normal tag split_tag = local_basename.rsplit(".", 1) has_additional_tag = False if len(split_tag) > 1 and split_tag[1].lower() in SECONDARY_TAGS: local_basename = split_tag[0] has_additional_tag = True # split off possible language tag local_basename2 = local_basename.rsplit('.', 1)[0] filename_matches_part = local_basename == part_basename or local_basename2 == part_basename filename_contains_part = part_basename in local_basename if not ext.lower()[1:] in config.SUBTITLE_EXTS: continue # if the file is located within the global subtitle folders and its name doesn't match exactly, ignore it if global_folders and not filename_matches_part: skip_path = False for fld in global_folders: if file_path.startswith(fld): skip_path = True break if skip_path: continue # determine whether to pick up the subtitle based on our match strictness if not filename_matches_part: if sz_config.ext_match_strictness == "strict" or ( sz_config.ext_match_strictness == "loose" and not filename_contains_part): # Log.Debug("%s doesn't match %s, skipping" % (helpers.unicodize(local_filename), # helpers.unicodize(part_basename))) continue subtitle_helper = subtitlehelpers.subtitle_helpers(file_path) if subtitle_helper is not None: local_lang_map = subtitle_helper.process_subtitles(part) for new_language, subtitles in local_lang_map.items(): # Add the possible new language along with the located subtitles so that we can validate them # at the end... # if not lang_sub_map.has_key(new_language): lang_sub_map[new_language] = [] lang_sub_map[ new_language] = lang_sub_map[new_language] + subtitles # add known metadata subs to our sub list if not use_filesystem: for language, sub_list in subtitlehelpers.get_subtitles_from_metadata( part).iteritems(): if sub_list: if language not in lang_sub_map: lang_sub_map[language] = [] lang_sub_map[language] = lang_sub_map[language] + sub_list # Now whack subtitles that don't exist anymore. for language in lang_sub_map.keys(): part.subtitles[language].validate_keys(lang_sub_map[language]) # Now whack the languages that don't exist anymore. for language in list( set(part.subtitles.keys()) - set(lang_sub_map.keys())): part.subtitles[language].validate_keys({})