def process(self, mtree, options=None): GuessFinder(self.guess_year, 1.0, self.log, options).process_nodes(mtree.unidentified_leaves()) # if we found a season number that is a valid year, it is usually safe to assume # we can also set the year property to that value for n in mtree.leaves_containing("season"): g = n.guess season = g["season"] if valid_year(season): g["year"] = season
def process(self, mtree, options=None): GuessFinder(self.guess_year, 1.0, self.log, options).process_nodes(mtree.unidentified_leaves()) # if we found a season number that is a valid year, it is usually safe to assume # we can also set the year property to that value for n in mtree.leaves_containing('season'): g = n.guess season = g['season'] if valid_year(season): g['year'] = season
def _formater(episodeNumber): epnum = parse_numeral(episodeNumber) if not valid_year(epnum): if epnum > 100: season, epnum = epnum // 100, epnum % 100 # episodes which have a season > 50 are most likely errors # (Simpson is at 25!) if season > 50: return None return {'season': season, 'episodeNumber': epnum} else: return epnum
def season_episode_parser(episode_number): epnum = parse_numeral(episode_number) if not valid_year(epnum): if epnum > 100: season, epnum = epnum // 100, epnum % 100 # episodes which have a season > 50 are most likely errors # (Simpson is at 25!) if season > 50: return None return {"season": season, "episodeNumber": epnum} else: return epnum
def guess_filetype(mtree, filetype): # put the filetype inside a dummy container to be able to have the # following functions work correctly as closures # this is a workaround for python 2 which doesn't have the # 'nonlocal' keyword (python 3 does have it) filetype_container = [filetype] other = {} filename = mtree.string def upgrade_episode(): if filetype_container[0] == 'video': filetype_container[0] = 'episode' elif filetype_container[0] == 'subtitle': filetype_container[0] = 'episodesubtitle' def upgrade_movie(): if filetype_container[0] == 'video': filetype_container[0] = 'movie' elif filetype_container[0] == 'subtitle': filetype_container[0] = 'moviesubtitle' def upgrade_subtitle(): if 'movie' in filetype_container[0]: filetype_container[0] = 'moviesubtitle' elif 'episode' in filetype_container[0]: filetype_container[0] = 'episodesubtitle' else: filetype_container[0] = 'subtitle' def upgrade(type='unknown'): if filetype_container[0] == 'autodetect': filetype_container[0] = type # look at the extension first fileext = os.path.splitext(filename)[1][1:].lower() if fileext in subtitle_exts: upgrade_subtitle() other = {'container': fileext} elif fileext in video_exts: upgrade(type='video') other = {'container': fileext} else: upgrade(type='unknown') other = {'extension': fileext} # check whether we are in a 'Movies', 'Tv Shows', ... folder folder_rexps = [(r'Movies?', upgrade_movie), (r'Tv[ _-]?Shows?', upgrade_episode), (r'Series', upgrade_episode)] for frexp, upgrade_func in folder_rexps: frexp = re.compile(frexp, re.IGNORECASE) for pathgroup in mtree.children: if frexp.match(pathgroup.value): upgrade_func() # check for a few specific cases which will unintentionally make the # following heuristics confused (eg: OSS 117 will look like an episode, # season 1, epnum 17, when it is in fact a movie) fname = clean_string(filename).lower() for m in MOVIES: if m in fname: upgrade_movie() for s in SERIES: if s in fname: upgrade_episode() # now look whether there are some specific hints for episode vs movie if filetype_container[0] in ('video', 'subtitle'): # if we have an episode_rexp (eg: s02e13), it is an episode for rexp, _, _ in episode_rexps: match = re.search(rexp, filename, re.IGNORECASE) if match: upgrade_episode() break # if we have a 3-4 digit number that's not a year, maybe an episode match = re.search(r'[^0-9]([0-9]{3,4})[^0-9]', filename) if match: fullnumber = int(match.group()[1:-1]) #season = fullnumber // 100 epnumber = fullnumber % 100 possible = True # check for validity if epnumber > 40: possible = False if valid_year(fullnumber): possible = False if possible: upgrade_episode() # if we have certain properties characteristic of episodes, it is an ep for prop, value, _, _ in find_properties(filename): log.debug('prop: %s = %s' % (prop, value)) if prop == 'episodeFormat': upgrade_episode() break elif compute_canonical_form('format', value) == 'DVB': upgrade_episode() break # origin-specific type if 'tvu.org.ru' in filename: upgrade_episode() # if no episode info found, assume it's a movie upgrade_movie() filetype = filetype_container[0] return filetype, other
def guess_filetype(mtree, filetype): # put the filetype inside a dummy container to be able to have the # following functions work correctly as closures # this is a workaround for python 2 which doesn't have the # 'nonlocal' keyword (python 3 does have it) filetype_container = [filetype] other = {} filename = mtree.string def upgrade_episode(): if filetype_container[0] == 'video': filetype_container[0] = 'episode' elif filetype_container[0] == 'subtitle': filetype_container[0] = 'episodesubtitle' elif filetype_container[0] == 'info': filetype_container[0] = 'episodeinfo' def upgrade_movie(): if filetype_container[0] == 'video': filetype_container[0] = 'movie' elif filetype_container[0] == 'subtitle': filetype_container[0] = 'moviesubtitle' elif filetype_container[0] == 'info': filetype_container[0] = 'movieinfo' def upgrade_subtitle(): if 'movie' in filetype_container[0]: filetype_container[0] = 'moviesubtitle' elif 'episode' in filetype_container[0]: filetype_container[0] = 'episodesubtitle' else: filetype_container[0] = 'subtitle' def upgrade_info(): if 'movie' in filetype_container[0]: filetype_container[0] = 'movieinfo' elif 'episode' in filetype_container[0]: filetype_container[0] = 'episodeinfo' else: filetype_container[0] = 'info' def upgrade(type='unknown'): if filetype_container[0] == 'autodetect': filetype_container[0] = type # look at the extension first fileext = os.path.splitext(filename)[1][1:].lower() if fileext in subtitle_exts: upgrade_subtitle() other = {'container': fileext} elif fileext in info_exts: upgrade_info() other = {'container': fileext} elif fileext in video_exts: upgrade(type='video') other = {'container': fileext} else: upgrade(type='unknown') other = {'extension': fileext} # check whether we are in a 'Movies', 'Tv Shows', ... folder folder_rexps = [ (r'Movies?', upgrade_movie), (r'Tv[ _-]?Shows?', upgrade_episode), (r'Series', upgrade_episode) ] for frexp, upgrade_func in folder_rexps: frexp = re.compile(frexp, re.IGNORECASE) for pathgroup in mtree.children: if frexp.match(pathgroup.value): upgrade_func() # check for a few specific cases which will unintentionally make the # following heuristics confused (eg: OSS 117 will look like an episode, # season 1, epnum 17, when it is in fact a movie) fname = clean_string(filename).lower() for m in MOVIES: if m in fname: log.debug('Found in exception list of movies -> type = movie') upgrade_movie() for s in SERIES: if s in fname: log.debug('Found in exception list of series -> type = episode') upgrade_episode() # now look whether there are some specific hints for episode vs movie if filetype_container[0] in ('video', 'subtitle', 'info'): # if we have an episode_rexp (eg: s02e13), it is an episode for rexp, _, _ in episode_rexps: match = re.search(rexp, filename, re.IGNORECASE) if match: log.debug('Found matching regexp: "%s" (string = "%s") -> type = episode', rexp, match.group()) upgrade_episode() break # if we have a 3-4 digit number that's not a year, maybe an episode match = re.search(r'[^0-9]([0-9]{3,4})[^0-9]', filename) if match: fullnumber = int(match.group()[1:-1]) #season = fullnumber // 100 epnumber = fullnumber % 100 possible = True # check for validity if epnumber > 40: possible = False if valid_year(fullnumber): possible = False if possible: log.debug('Found possible episode number: %s (from string "%s") -> type = episode', epnumber, match.group()) upgrade_episode() # if we have certain properties characteristic of episodes, it is an ep for prop, _ in container.find_properties(filename, 'episodeFormat'): log.debug('prop: %s' % prop) log.debug('Found characteristic property of episodes: %s"', prop) upgrade_episode() for prop, _ in container.find_properties(filename, 'format'): if container.compute_canonical_form('format', prop.canonical_form) == 'DVB': log.debug('Found characteristic property of episodes: %s', prop) upgrade_episode() break # origin-specific type if 'tvu.org.ru' in filename: log.debug('Found characteristic property of episodes: %s', 'tvu.org.ru') upgrade_episode() # if no episode info found, assume it's a movie log.debug('Nothing characteristic found, assuming type = movie') upgrade_movie() filetype = filetype_container[0] return filetype, other