def guess_release_group(string): # first try to see whether we have both a known codec and a known release group for rexp in GROUP_NAMES: match = rexp.search(string) while match: metadata = match.groupdict() # make sure this is an actual release group we caught release_group = (compute_canonical_form('releaseGroup', metadata['releaseGroup']) or compute_canonical_form( 'weakReleaseGroup', metadata['releaseGroup'])) if release_group: return adjust_metadata(metadata), (match.start(1), match.end(2)) # we didn't find anything conclusive, keep searching match = rexp.search(string, match.span()[0] + 1) # pick anything as releaseGroup as long as we have a codec in front # this doesn't include a potential dash ('-') ending the release group # eg: [...].X264-HiS@SiLUHD-English.[...] for rexp in GROUP_NAMES2: match = rexp.search(string) if match: return adjust_metadata(match.groupdict()), (match.start(1), match.end(2)) return None, None
def guess_release_group(string): # first try to see whether we have both a known codec and a known release group for rexp in GROUP_NAMES: match = rexp.search(string) while match: metadata = match.groupdict() # make sure this is an actual release group we caught release_group = compute_canonical_form("releaseGroup", metadata["releaseGroup"]) or compute_canonical_form( "weakReleaseGroup", metadata["releaseGroup"] ) if release_group: return adjust_metadata(metadata), (match.start(1), match.end(2)) # we didn't find anything conclusive, keep searching match = rexp.search(string, match.span()[0] + 1) # pick anything as releaseGroup as long as we have a codec in front # this doesn't include a potential dash ('-') ending the release group # eg: [...].X264-HiS@SiLUHD-English.[...] for rexp in GROUP_NAMES2: match = rexp.search(string) if match: return adjust_metadata(match.groupdict()), (match.start(1), match.end(2)) return None, None
def guess_release_group(string): # first try to see whether we have both a known codec and a known release group group_names = [r"(?P<videoCodec>" + codec + r")-?(?P<releaseGroup>.*?)[ \.]" for codec in CODECS] group_names += [r"(?P<format>" + fmt + r")-?(?P<releaseGroup>.*?)[ \.]" for fmt in FORMATS] for rexp in group_names: match = re.search(rexp, string, re.IGNORECASE) if match: metadata = match.groupdict() release_group = compute_canonical_form("releaseGroup", metadata["releaseGroup"]) if release_group: return adjust_metadata(metadata), (match.start(1), match.end(2)) # pick anything as releaseGroup as long as we have a codec in front # this doesn't include a potential dash ('-') ending the release group # eg: [...].X264-HiS@SiLUHD-English.[...] group_names = [r"\.(?P<videoCodec>" + codec + r")-(?P<releaseGroup>.*?)(-(.*?))?[ \.]" for codec in CODECS] group_names += [r"\.(?P<format>" + fmt + r")-(?P<releaseGroup>.*?)(-(.*?))?[ \.]" for fmt in FORMATS] for rexp in group_names: match = re.search(rexp, string, re.IGNORECASE) if match: return adjust_metadata(match.groupdict()), (match.start(1), match.end(2)) return None, None
def guess_release_group(string): # first try to see whether we have both a known codec and a known release group for rexp in GROUP_NAMES: match = rexp.search(string) if match: metadata = match.groupdict() release_group = compute_canonical_form('releaseGroup', metadata['releaseGroup']) if release_group: return adjust_metadata(metadata), (match.start(1), match.end(2)) # pick anything as releaseGroup as long as we have a codec in front # this doesn't include a potential dash ('-') ending the release group # eg: [...].X264-HiS@SiLUHD-English.[...] for rexp in GROUP_NAMES2: match = rexp.search(string) if match: return adjust_metadata(match.groupdict()), (match.start(1), match.end(2)) return None, None
def guess_release_group(string): # first try to see whether we have both a known codec and a known release group group_names = [ r'(?P<videoCodec>' + codec + r')-?(?P<releaseGroup>.*?)[ \.]' for codec in CODECS ] group_names += [ r'(?P<format>' + fmt + r')-?(?P<releaseGroup>.*?)[ \.]' for fmt in FORMATS ] for rexp in group_names: match = re.search(rexp, string, re.IGNORECASE) if match: metadata = match.groupdict() release_group = compute_canonical_form('releaseGroup', metadata['releaseGroup']) if release_group: return adjust_metadata(metadata), (match.start(1), match.end(2)) # pick anything as releaseGroup as long as we have a codec in front # this doesn't include a potential dash ('-') ending the release group # eg: [...].X264-HiS@SiLUHD-English.[...] group_names = [ r'\.(?P<videoCodec>' + codec + r')-(?P<releaseGroup>.*?)(-(.*?))?[ \.]' for codec in CODECS ] group_names += [ r'\.(?P<format>' + fmt + r')-(?P<releaseGroup>.*?)(-(.*?))?[ \.]' for fmt in FORMATS ] for rexp in group_names: match = re.search(rexp, string, re.IGNORECASE) if match: return adjust_metadata(match.groupdict()), (match.start(1), match.end(2)) return None, None
def adjust_metadata(md): return dict( (property_name, compute_canonical_form(property_name, value) or value) for property_name, value in md.items() )
def guess_filetype(mtree, filetype): # put the filetype inside a dummy container to be able to have the # following functions work correctly as closures # this is a workaround for python 2 which doesn't have the # 'nonlocal' keyword (python 3 does have it) filetype_container = [filetype] other = {} filename = mtree.string def upgrade_episode(): if filetype_container[0] == 'video': filetype_container[0] = 'episode' elif filetype_container[0] == 'subtitle': filetype_container[0] = 'episodesubtitle' def upgrade_movie(): if filetype_container[0] == 'video': filetype_container[0] = 'movie' elif filetype_container[0] == 'subtitle': filetype_container[0] = 'moviesubtitle' def upgrade_subtitle(): if 'movie' in filetype_container[0]: filetype_container[0] = 'moviesubtitle' elif 'episode' in filetype_container[0]: filetype_container[0] = 'episodesubtitle' else: filetype_container[0] = 'subtitle' def upgrade(type='unknown'): if filetype_container[0] == 'autodetect': filetype_container[0] = type # look at the extension first fileext = os.path.splitext(filename)[1][1:].lower() if fileext in subtitle_exts: upgrade_subtitle() other = {'container': fileext} elif fileext in video_exts: upgrade(type='video') other = {'container': fileext} else: upgrade(type='unknown') other = {'extension': fileext} # check whether we are in a 'Movies', 'Tv Shows', ... folder folder_rexps = [(r'Movies?', upgrade_movie), (r'Tv[ _-]?Shows?', upgrade_episode), (r'Series', upgrade_episode)] for frexp, upgrade_func in folder_rexps: frexp = re.compile(frexp, re.IGNORECASE) for pathgroup in mtree.children: if frexp.match(pathgroup.value): upgrade_func() # check for a few specific cases which will unintentionally make the # following heuristics confused (eg: OSS 117 will look like an episode, # season 1, epnum 17, when it is in fact a movie) fname = clean_string(filename).lower() for m in MOVIES: if m in fname: upgrade_movie() for s in SERIES: if s in fname: upgrade_episode() # now look whether there are some specific hints for episode vs movie if filetype_container[0] in ('video', 'subtitle'): # if we have an episode_rexp (eg: s02e13), it is an episode for rexp, _, _ in episode_rexps: match = re.search(rexp, filename, re.IGNORECASE) if match: upgrade_episode() break # if we have a 3-4 digit number that's not a year, maybe an episode match = re.search(r'[^0-9]([0-9]{3,4})[^0-9]', filename) if match: fullnumber = int(match.group()[1:-1]) #season = fullnumber // 100 epnumber = fullnumber % 100 possible = True # check for validity if epnumber > 40: possible = False if valid_year(fullnumber): possible = False if possible: upgrade_episode() # if we have certain properties characteristic of episodes, it is an ep for prop, value, _, _ in find_properties(filename): log.debug('prop: %s = %s' % (prop, value)) if prop == 'episodeFormat': upgrade_episode() break elif compute_canonical_form('format', value) == 'DVB': upgrade_episode() break # origin-specific type if 'tvu.org.ru' in filename: upgrade_episode() # if no episode info found, assume it's a movie upgrade_movie() filetype = filetype_container[0] return filetype, other
def adjust_metadata(md): return dict((property_name, compute_canonical_form(property_name, value) or value) for property_name, value in md.items())
def guess_filetype(mtree, filetype): # put the filetype inside a dummy container to be able to have the # following functions work correctly as closures # this is a workaround for python 2 which doesn't have the # 'nonlocal' keyword (python 3 does have it) filetype_container = [filetype] other = {} filename = mtree.string def upgrade_episode(): if filetype_container[0] == 'video': filetype_container[0] = 'episode' elif filetype_container[0] == 'subtitle': filetype_container[0] = 'episodesubtitle' def upgrade_movie(): if filetype_container[0] == 'video': filetype_container[0] = 'movie' elif filetype_container[0] == 'subtitle': filetype_container[0] = 'moviesubtitle' def upgrade_subtitle(): if 'movie' in filetype_container[0]: filetype_container[0] = 'moviesubtitle' elif 'episode' in filetype_container[0]: filetype_container[0] = 'episodesubtitle' else: filetype_container[0] = 'subtitle' def upgrade(type='unknown'): if filetype_container[0] == 'autodetect': filetype_container[0] = type # look at the extension first fileext = os.path.splitext(filename)[1][1:].lower() if fileext in subtitle_exts: upgrade_subtitle() other = { 'container': fileext } elif fileext in video_exts: upgrade(type='video') other = { 'container': fileext } else: upgrade(type='unknown') other = { 'extension': fileext } # check whether we are in a 'Movies', 'Tv Shows', ... folder folder_rexps = [ (r'Movies?', upgrade_movie), (r'Tv[ _-]?Shows?', upgrade_episode), (r'Series', upgrade_episode) ] for frexp, upgrade_func in folder_rexps: frexp = re.compile(frexp, re.IGNORECASE) for pathgroup in mtree.children: if frexp.match(pathgroup.value): upgrade_func() # check for a few specific cases which will unintentionally make the # following heuristics confused (eg: OSS 117 will look like an episode, # season 1, epnum 17, when it is in fact a movie) fname = clean_string(filename).lower() for m in MOVIES: if m in fname: upgrade_movie() for s in SERIES: if s in fname: upgrade_episode() # now look whether there are some specific hints for episode vs movie if filetype_container[0] in ('video', 'subtitle'): # if we have an episode_rexp (eg: s02e13), it is an episode for rexp, _, _ in episode_rexps: match = re.search(rexp, filename, re.IGNORECASE) if match: upgrade_episode() break # if we have a 3-4 digit number that's not a year, maybe an episode match = re.search(r'[^0-9]([0-9]{3,4})[^0-9]', filename) if match: fullnumber = int(match.group()[1:-1]) #season = fullnumber // 100 epnumber = fullnumber % 100 possible = True # check for validity if epnumber > 40: possible = False if valid_year(fullnumber): possible = False if possible: upgrade_episode() # if we have certain properties characteristic of episodes, it is an ep for prop, value, _, _ in find_properties(filename): log.debug('prop: %s = %s' % (prop, value)) if prop == 'episodeFormat': upgrade_episode() break elif compute_canonical_form('format', value) == 'DVB': upgrade_episode() break # origin-specific type if 'tvu.org.ru' in filename: upgrade_episode() # if no episode info found, assume it's a movie upgrade_movie() filetype = filetype_container[0] return filetype, other