def guess_release_group(string): group_names = [ r'\.(Xvid)-(?P<releaseGroup>.*?)[ \.]', r'\.(DivX)-(?P<releaseGroup>.*?)[\. ]', r'\.(DVDivX)-(?P<releaseGroup>.*?)[\. ]', ] # first try to see whether we have both a known codec and a known release group group_names = [ r'\.(?P<videoCodec>' + codec + r')-(?P<releaseGroup>.*?)[ \.]' for codec in (CODECS + FORMATS) ] for rexp in group_names: match = re.search(rexp, string, re.IGNORECASE) if match: metadata = match.groupdict() if canonical_form(metadata['releaseGroup']) in properties['releaseGroup']: return adjust_metadata(metadata), (match.start(1), match.end(2)) # pick anything as releaseGroup as long as we have a codec in front # this doesn't include a potential dash ('-') ending the release group # eg: [...].X264-HiS@SiLUHD-English.[...] group_names = [ r'\.(?P<videoCodec>' + codec + r')-(?P<releaseGroup>.*?)(-(.*?))?[ \.]' for codec in (CODECS + FORMATS) ] for rexp in group_names: match = re.search(rexp, string, re.IGNORECASE) if match: return adjust_metadata(match.groupdict()), (match.start(1), match.end(2)) return None, None
def format_guess(guess): """Format all the found values to their natural type. For instance, a year would be stored as an int value, etc... Note that this modifies the dictionary given as input. """ for prop, value in guess.items(): if prop in ("season", "episodeNumber", "year", "cdNumber", "cdNumberTotal"): guess[prop] = int(guess[prop]) elif isinstance(value, basestring): if prop in ("edition",): value = clean_string(value) guess[prop] = canonical_form(value) return guess
def guess_filetype(filename, filetype = 'autodetect'): other = {} # look at the extension first fileext = os.path.splitext(filename)[1][1:].lower() if fileext in subtitle_exts: if 'movie' in filetype: filetype = 'moviesubtitle' elif 'episode' in filetype: filetype = 'episodesubtitle' else: filetype = 'subtitle' other = { 'container': fileext } elif fileext in video_exts: if filetype == 'autodetect': filetype = 'video' other = { 'container': fileext } else: if filetype == 'autodetect': filetype = 'unknown' other = { 'extension': fileext } # now look whether there are some specific hints for episode vs movie if filetype in ('video', 'subtitle'): for rexp, confidence, span_adjust in episode_rexps: match = re.search(rexp, filename, re.IGNORECASE) if match: if filetype == 'video': filetype = 'episode' elif filetype == 'subtitle': filetype = 'episodesubtitle' break for prop, value, start, end in find_properties(filename): if canonical_form(value) == 'DVB': if filetype == 'video': filetype = 'episode' elif filetype == 'subtitle': filetype = 'episodesubtitle' break # if no episode info found, assume it's a movie if filetype == 'video': filetype = 'movie' elif filetype == 'subtitle': filetype = 'moviesubtitle' return filetype, other
def format_guess(guess): """Format all the found values to their natural type. For instance, a year would be stored as an int value, etc... Note that this modifies the dictionary given as input. """ for prop, value in list(guess.items()): if prop in ('season', 'episodeNumber', 'year', 'cdNumber', 'cdNumberTotal', 'bonusNumber', 'filmNumber'): guess[prop] = int(guess[prop]) elif isinstance(value, base_text_type): if prop in ('edition', ): value = clean_string(value) guess[prop] = canonical_form(value).replace('\\', '') return guess
def format_guess(guess): """Format all the found values to their natural type. For instance, a year would be stored as an int value, etc... Note that this modifies the dictionary given as input. """ for prop, value in guess.items(): if prop in ('season', 'episodeNumber', 'year', 'cdNumber', 'cdNumberTotal', 'bonusNumber', 'filmNumber'): guess[prop] = int(guess[prop]) elif isinstance(value, base_text_type): if prop in ('edition',): value = clean_string(value) guess[prop] = canonical_form(value).replace('\\', '') return guess
def guess_filetype(mtree, filetype): # put the filetype inside a dummy container to be able to have the # following functions work correctly as closures # this is a workaround for python 2 which doesn't have the # 'nonlocal' keyword (python 3 does have it) filetype_container = [filetype] other = {} filename = mtree.string def upgrade_episode(): if filetype_container[0] == 'video': filetype_container[0] = 'episode' elif filetype_container[0] == 'subtitle': filetype_container[0] = 'episodesubtitle' def upgrade_movie(): if filetype_container[0] == 'video': filetype_container[0] = 'movie' elif filetype_container[0] == 'subtitle': filetype_container[0] = 'moviesubtitle' def upgrade_subtitle(): if 'movie' in filetype_container[0]: filetype_container[0] = 'moviesubtitle' elif 'episode' in filetype_container[0]: filetype_container[0] = 'episodesubtitle' else: filetype_container[0] = 'subtitle' def upgrade(type='unknown'): if filetype_container[0] == 'autodetect': filetype_container[0] = type # look at the extension first fileext = os.path.splitext(filename)[1][1:].lower() if fileext in subtitle_exts: upgrade_subtitle() other = {'container': fileext} elif fileext in video_exts: upgrade(type='video') other = {'container': fileext} else: upgrade(type='unknown') other = {'extension': fileext} # check whether we are in a 'Movies', 'Tv Shows', ... folder folder_rexps = [(r'Movies?', upgrade_movie), (r'Tv ?Shows?', upgrade_episode), (r'Series', upgrade_episode)] for frexp, upgrade_func in folder_rexps: frexp = re.compile(frexp, re.IGNORECASE) for pathgroup in mtree.children: if frexp.match(pathgroup.value): upgrade_func() # check for a few specific cases which will unintentionally make the # following heuristics confused (eg: OSS 117 will look like an episode, # season 1, epnum 17, when it is in fact a movie) fname = clean_string(filename).lower() for m in MOVIES: if m in fname: upgrade_movie() for s in SERIES: if s in fname: upgrade_episode() # now look whether there are some specific hints for episode vs movie if filetype_container[0] in ('video', 'subtitle'): # if we have an episode_rexp (eg: s02e13), it is an episode for rexp, _, _ in episode_rexps: match = re.search(rexp, filename, re.IGNORECASE) if match: upgrade_episode() break # if we have a 3-4 digit number that's not a year, maybe an episode match = re.search(r'[^0-9]([0-9]{3,4})[^0-9]', filename) if match: fullnumber = int(match.group()[1:-1]) #season = fullnumber // 100 epnumber = fullnumber % 100 possible = True # check for validity if epnumber > 40: possible = False if valid_year(fullnumber): possible = False if possible: upgrade_episode() # if we have certain properties characteristic of episodes, it is an ep for prop, value, _, _ in find_properties(filename): log.debug('prop: %s = %s' % (prop, value)) if prop == 'episodeFormat': upgrade_episode() break elif canonical_form(value) == 'DVB': upgrade_episode() break # origin-specific type if 'tvu.org.ru' in filename: upgrade_episode() # if no episode info found, assume it's a movie upgrade_movie() filetype = filetype_container[0] return filetype, other
def adjust_metadata(md): codec = canonical_form(md['videoCodec']) if codec in FORMATS: md['format'] = codec del md['videoCodec'] return md
def guess_filetype(mtree, filetype): # put the filetype inside a dummy container to be able to have the # following functions work correctly as closures # this is a workaround for python 2 which doesn't have the # 'nonlocal' keyword (python 3 does have it) filetype_container = [filetype] other = {} filename = mtree.string def upgrade_episode(): if filetype_container[0] == 'video': filetype_container[0] = 'episode' elif filetype_container[0] == 'subtitle': filetype_container[0] = 'episodesubtitle' def upgrade_movie(): if filetype_container[0] == 'video': filetype_container[0] = 'movie' elif filetype_container[0] == 'subtitle': filetype_container[0] = 'moviesubtitle' def upgrade_subtitle(): if 'movie' in filetype_container[0]: filetype_container[0] = 'moviesubtitle' elif 'episode' in filetype_container[0]: filetype_container[0] = 'episodesubtitle' else: filetype_container[0] = 'subtitle' def upgrade(type='unknown'): if filetype_container[0] == 'autodetect': filetype_container[0] = type # look at the extension first fileext = os.path.splitext(filename)[1][1:].lower() if fileext in subtitle_exts: upgrade_subtitle() other = { 'container': fileext } elif fileext in video_exts: upgrade(type='video') other = { 'container': fileext } else: upgrade(type='unknown') other = { 'extension': fileext } # check whether we are in a 'Movies', 'Tv Shows', ... folder folder_rexps = [ (r'Movies?', upgrade_movie), (r'Tv ?Shows?', upgrade_episode), (r'Series', upgrade_episode) ] for frexp, upgrade_func in folder_rexps: frexp = re.compile(frexp, re.IGNORECASE) for pathgroup in mtree.children: if frexp.match(pathgroup.value): upgrade_func() # check for a few specific cases which will unintentionally make the # following heuristics confused (eg: OSS 117 will look like an episode, # season 1, epnum 17, when it is in fact a movie) fname = clean_string(filename).lower() for m in MOVIES: if m in fname: upgrade_movie() for s in SERIES: if s in fname: upgrade_episode() # now look whether there are some specific hints for episode vs movie if filetype_container[0] in ('video', 'subtitle'): # if we have an episode_rexp (eg: s02e13), it is an episode for rexp, _, _ in episode_rexps: match = re.search(rexp, filename, re.IGNORECASE) if match: upgrade_episode() break # if we have a 3-4 digit number that's not a year, maybe an episode match = re.search(r'[^0-9]([0-9]{3,4})[^0-9]', filename) if match: fullnumber = int(match.group()[1:-1]) #season = fullnumber // 100 epnumber = fullnumber % 100 possible = True # check for validity if epnumber > 40: possible = False if valid_year(fullnumber): possible = False if possible: upgrade_episode() # if we have certain properties characteristic of episodes, it is an ep for prop, value, _, _ in find_properties(filename): log.debug('prop: %s = %s' % (prop, value)) if prop == 'episodeFormat': upgrade_episode() break elif canonical_form(value) == 'DVB': upgrade_episode() break # origin-specific type if 'tvu.org.ru' in filename: upgrade_episode() # if no episode info found, assume it's a movie upgrade_movie() filetype = filetype_container[0] return filetype, other
def guess_filetype(filename, filetype): other = {} # look at the extension first fileext = os.path.splitext(filename)[1][1:].lower() if fileext in subtitle_exts: if 'movie' in filetype: filetype = 'moviesubtitle' elif 'episode' in filetype: filetype = 'episodesubtitle' else: filetype = 'subtitle' other = { 'container': fileext } elif fileext in video_exts: if filetype == 'autodetect': filetype = 'video' other = { 'container': fileext } else: if filetype == 'autodetect': filetype = 'unknown' other = { 'extension': fileext } # put the filetype inside a dummy container to be able to have the # following functions work correctly as closures # this is a workaround for python 2 which doesn't have the # 'nonlocal' keyword (python 3 does have it) filetype_container = [filetype] def upgrade_episode(): if filetype_container[0] == 'video': filetype_container[0] = 'episode' elif filetype_container[0] == 'subtitle': filetype_container[0] = 'episodesubtitle' def upgrade_movie(): if filetype_container[0] == 'video': filetype_container[0] = 'movie' elif filetype_container[0] == 'subtitle': filetype_container[0] = 'moviesubtitle' # now look whether there are some specific hints for episode vs movie if filetype in ('video', 'subtitle'): for rexp, _, _ in episode_rexps: match = re.search(rexp, filename, re.IGNORECASE) if match: upgrade_episode() break for prop, value, _, _ in find_properties(filename): log.debug('prop: %s = %s' % (prop, value)) if prop == 'episodeFormat': upgrade_episode() break elif canonical_form(value) == 'DVB': upgrade_episode() break # if no episode info found, assume it's a movie upgrade_movie() filetype = filetype_container[0] return filetype, other
def guess_filetype(filename, filetype): other = {} # look at the extension first fileext = os.path.splitext(filename)[1][1:].lower() if fileext in subtitle_exts: if 'movie' in filetype: filetype = 'moviesubtitle' elif 'episode' in filetype: filetype = 'episodesubtitle' else: filetype = 'subtitle' other = {'container': fileext} elif fileext in video_exts: if filetype == 'autodetect': filetype = 'video' other = {'container': fileext} else: if filetype == 'autodetect': filetype = 'unknown' other = {'extension': fileext} # put the filetype inside a dummy container to be able to have the # following functions work correctly as closures # this is a workaround for python 2 which doesn't have the # 'nonlocal' keyword (python 3 does have it) filetype_container = [filetype] def upgrade_episode(): if filetype_container[0] == 'video': filetype_container[0] = 'episode' elif filetype_container[0] == 'subtitle': filetype_container[0] = 'episodesubtitle' def upgrade_movie(): if filetype_container[0] == 'video': filetype_container[0] = 'movie' elif filetype_container[0] == 'subtitle': filetype_container[0] = 'moviesubtitle' # now look whether there are some specific hints for episode vs movie if filetype in ('video', 'subtitle'): for rexp, _, _ in episode_rexps: match = re.search(rexp, filename, re.IGNORECASE) if match: upgrade_episode() break for prop, value, _, _ in find_properties(filename): log.debug('prop: %s = %s' % (prop, value)) if prop == 'episodeFormat': upgrade_episode() break elif canonical_form(value) == 'DVB': upgrade_episode() break if 'tvu.org.ru' in filename: upgrade_episode() # if no episode info found, assume it's a movie upgrade_movie() filetype = filetype_container[0] return filetype, other