def guess_properties(string): """Extract properties from `string` using guessit's `guess_properties` transformer. :param str string: the string potentially containing properties. :return: the guessed properties. :rtype: dict """ mtree = MatchTree(string) get_transformer('guess_properties').process(mtree) return mtree.matched()
def guess_properties(string, propertytype): properties = set() if string: tree = guessit.matchtree.MatchTree(string) _guess_properties = get_transformer('guess_properties') _guess_properties.process(tree) properties = set(n.guess[propertytype] for n in tree.nodes() if propertytype in n.guess) return properties
def _filter_candidates(self, candidates): episode_details_transformer = get_transformer('guess_episode_details') if episode_details_transformer: return [ n for n in candidates if not episode_details_transformer.container.find_properties( n.value, n, re_match=True) ] else: return list(candidates)
def _filter_candidates(candidates, options): episode_details_transformer = get_transformer('guess_episode_details') if episode_details_transformer: candidates = [ n for n in candidates if not episode_details_transformer.container.find_properties( n.value, n, options, re_match=True) ] candidates = list( filter(lambda n: not GuessEpisodeInfoFromPosition.excluded_word(n), candidates)) return candidates
def guess_filetype(self, mtree, options=None): options = options or {} # put the filetype inside a dummy container to be able to have the # following functions work correctly as closures # this is a workaround for python 2 which doesn't have the # 'nonlocal' keyword which we could use here in the upgrade_* functions # (python 3 does have it) filetype_container = [mtree.guess.get("type")] other = {} filename = mtree.string def upgrade_episode(): if filetype_container[0] == "subtitle": filetype_container[0] = "episodesubtitle" elif filetype_container[0] == "info": filetype_container[0] = "episodeinfo" elif not filetype_container[0]: filetype_container[0] = "episode" def upgrade_movie(): if filetype_container[0] == "subtitle": filetype_container[0] = "moviesubtitle" elif filetype_container[0] == "info": filetype_container[0] = "movieinfo" elif not filetype_container[0]: filetype_container[0] = "movie" def upgrade_subtitle(): if filetype_container[0] == "movie": filetype_container[0] = "moviesubtitle" elif filetype_container[0] == "episode": filetype_container[0] = "episodesubtitle" elif not filetype_container[0]: filetype_container[0] = "subtitle" def upgrade_info(): if filetype_container[0] == "movie": filetype_container[0] = "movieinfo" elif filetype_container[0] == "episode": filetype_container[0] = "episodeinfo" elif not filetype_container[0]: filetype_container[0] = "info" # look at the extension first fileext = os.path.splitext(filename)[1][1:].lower() if fileext in subtitle_exts: upgrade_subtitle() other = {"container": fileext} elif fileext in info_exts: upgrade_info() other = {"container": fileext} elif fileext in video_exts: other = {"container": fileext} else: if fileext and not options.get("name_only"): other = {"extension": fileext} # check whether we are in a 'Movies', 'Tv Shows', ... folder folder_rexps = [ (r"Movies?", upgrade_movie), (r"Films?", upgrade_movie), (r"Tv[ _-]?Shows?", upgrade_episode), (r"Series?", upgrade_episode), (r"Episodes?", upgrade_episode), ] for frexp, upgrade_func in folder_rexps: frexp = re.compile(frexp, re.IGNORECASE) for pathgroup in mtree.children: if frexp.match(pathgroup.value): upgrade_func() return filetype_container[0], other # check for a few specific cases which will unintentionally make the # following heuristics confused (eg: OSS 117 will look like an episode, # season 1, epnum 17, when it is in fact a movie) fname = clean_string(filename).lower() for m in self.MOVIES: if m in fname: self.log.debug("Found in exception list of movies -> type = movie") upgrade_movie() return filetype_container[0], other for s in self.SERIES: if s in fname: self.log.debug("Found in exception list of series -> type = episode") upgrade_episode() return filetype_container[0], other # now look whether there are some specific hints for episode vs movie # if we have an episode_rexp (eg: s02e13), it is an episode episode_transformer = get_transformer("guess_episodes_rexps") if episode_transformer: guess = episode_transformer.guess_episodes_rexps(filename) if guess: self.log.debug("Found guess_episodes_rexps: %s -> type = episode", guess) upgrade_episode() return filetype_container[0], other properties_transformer = get_transformer("guess_properties") if properties_transformer: # if we have certain properties characteristic of episodes, it is an ep found = properties_transformer.container.find_properties(filename, mtree, "episodeFormat") guess = properties_transformer.container.as_guess(found, filename) if guess: self.log.debug('Found characteristic property of episodes: %s"', guess) upgrade_episode() return filetype_container[0], other found = properties_transformer.container.find_properties(filename, mtree, "format") guess = properties_transformer.container.as_guess(found, filename) if guess and guess["format"] in ("HDTV", "WEBRip", "WEB-DL", "DVB"): # Use weak episodes only if TV or WEB source weak_episode_transformer = get_transformer("guess_weak_episodes_rexps") if weak_episode_transformer: guess = weak_episode_transformer.guess_weak_episodes_rexps(filename) if guess: self.log.debug("Found guess_weak_episodes_rexps: %s -> type = episode", guess) upgrade_episode() return filetype_container[0], other website_transformer = get_transformer("guess_website") if website_transformer: found = website_transformer.container.find_properties(filename, mtree, "website") guess = website_transformer.container.as_guess(found, filename) if guess: for namepart in ("tv", "serie", "episode"): if namepart in guess["website"]: # origin-specific type self.log.debug("Found characteristic property of episodes: %s", guess) upgrade_episode() return filetype_container[0], other if filetype_container[0] in ("subtitle", "info") or (not filetype_container[0] and fileext in video_exts): # if no episode info found, assume it's a movie self.log.debug("Nothing characteristic found, assuming type = movie") upgrade_movie() if not filetype_container[0]: self.log.debug("Nothing characteristic found, assuming type = unknown") filetype_container[0] = "unknown" return filetype_container[0], other
def _filter_candidates(self, candidates): episode_special_transformer = get_transformer('guess_episode_special') if episode_special_transformer: return [n for n in candidates if not episode_special_transformer.container.find_properties(n.value, n, re_match=True)] else: return candidates
def guess_filetype(self, mtree, options=None): options = options or {} # put the filetype inside a dummy container to be able to have the # following functions work correctly as closures # this is a workaround for python 2 which doesn't have the # 'nonlocal' keyword which we could use here in the upgrade_* functions # (python 3 does have it) filetype_container = [mtree.guess.get('type')] other = {} filename = mtree.string def upgrade_episode(): if filetype_container[0] == 'subtitle': filetype_container[0] = 'episodesubtitle' elif filetype_container[0] == 'info': filetype_container[0] = 'episodeinfo' elif not filetype_container[0]: filetype_container[0] = 'episode' def upgrade_movie(): if filetype_container[0] == 'subtitle': filetype_container[0] = 'moviesubtitle' elif filetype_container[0] == 'info': filetype_container[0] = 'movieinfo' elif not filetype_container[0]: filetype_container[0] = 'movie' def upgrade_subtitle(): if filetype_container[0] == 'movie': filetype_container[0] = 'moviesubtitle' elif filetype_container[0] == 'episode': filetype_container[0] = 'episodesubtitle' elif not filetype_container[0]: filetype_container[0] = 'subtitle' def upgrade_info(): if filetype_container[0] == 'movie': filetype_container[0] = 'movieinfo' elif filetype_container[0] == 'episode': filetype_container[0] = 'episodeinfo' elif not filetype_container[0]: filetype_container[0] = 'info' # look at the extension first fileext = os.path.splitext(filename)[1][1:].lower() if fileext in subtitle_exts: upgrade_subtitle() other = {'container': fileext} elif fileext in info_exts: upgrade_info() other = {'container': fileext} elif fileext in video_exts: other = {'container': fileext} else: if fileext and not options.get('name_only'): other = {'extension': fileext} # check whether we are in a 'Movies', 'Tv Shows', ... folder folder_rexps = [ (r'Movies?', upgrade_movie), (r'Films?', upgrade_movie), (r'Tv[ _-]?Shows?', upgrade_episode), (r'Series?', upgrade_episode), (r'Episodes?', upgrade_episode), ] for frexp, upgrade_func in folder_rexps: frexp = re.compile(frexp, re.IGNORECASE) for pathgroup in mtree.children: if frexp.match(pathgroup.value): upgrade_func() return filetype_container[0], other # check for a few specific cases which will unintentionally make the # following heuristics confused (eg: OSS 117 will look like an episode, # season 1, epnum 17, when it is in fact a movie) fname = clean_string(filename).lower() for m in self.MOVIES: if m in fname: self.log.debug( 'Found in exception list of movies -> type = movie') upgrade_movie() return filetype_container[0], other for s in self.SERIES: if s in fname: self.log.debug( 'Found in exception list of series -> type = episode') upgrade_episode() return filetype_container[0], other # now look whether there are some specific hints for episode vs movie # if we have an episode_rexp (eg: s02e13), it is an episode episode_transformer = get_transformer('guess_episodes_rexps') if episode_transformer: guess = episode_transformer.guess_episodes_rexps(filename) if guess: self.log.debug( 'Found guess_episodes_rexps: %s -> type = episode', guess) upgrade_episode() return filetype_container[0], other properties_transformer = get_transformer('guess_properties') if properties_transformer: # if we have certain properties characteristic of episodes, it is an ep found = properties_transformer.container.find_properties( filename, mtree, 'episodeFormat') guess = properties_transformer.container.as_guess(found, filename) if guess: self.log.debug( 'Found characteristic property of episodes: %s"', guess) upgrade_episode() return filetype_container[0], other found = properties_transformer.container.find_properties( filename, mtree, 'format') guess = properties_transformer.container.as_guess(found, filename) if guess and guess['format'] in ('HDTV', 'WEBRip', 'WEB-DL', 'DVB'): # Use weak episodes only if TV or WEB source weak_episode_transformer = get_transformer( 'guess_weak_episodes_rexps') if weak_episode_transformer: guess = weak_episode_transformer.guess_weak_episodes_rexps( filename) if guess: self.log.debug( 'Found guess_weak_episodes_rexps: %s -> type = episode', guess) upgrade_episode() return filetype_container[0], other website_transformer = get_transformer('guess_website') if website_transformer: found = website_transformer.container.find_properties( filename, mtree, 'website') guess = website_transformer.container.as_guess(found, filename) if guess: for namepart in ('tv', 'serie', 'episode'): if namepart in guess['website']: # origin-specific type self.log.debug( 'Found characteristic property of episodes: %s', guess) upgrade_episode() return filetype_container[0], other if filetype_container[0] in ('subtitle', 'info') or (not filetype_container[0] and fileext in video_exts): # if no episode info found, assume it's a movie self.log.debug( 'Nothing characteristic found, assuming type = movie') upgrade_movie() if not filetype_container[0]: self.log.debug( 'Nothing characteristic found, assuming type = unknown') filetype_container[0] = 'unknown' return filetype_container[0], other
def _filter_candidates(candidates, options): episode_details_transformer = get_transformer('guess_episode_details') if episode_details_transformer: return [n for n in candidates if not episode_details_transformer.container.find_properties(n.value, n, options, re_match=True)] else: return candidates
def guess_filetype(self, mtree, options=None): options = options or {} # put the filetype inside a dummy container to be able to have the # following functions work correctly as closures # this is a workaround for python 2 which doesn't have the # 'nonlocal' keyword which we could use here in the upgrade_* functions # (python 3 does have it) filetype_container = [mtree.guess.get('type')] other = {} filename = mtree.string def upgrade_episode(): if filetype_container[0] == 'subtitle': filetype_container[0] = 'episodesubtitle' elif filetype_container[0] == 'info': filetype_container[0] = 'episodeinfo' elif (not filetype_container[0] or filetype_container[0] == 'video'): filetype_container[0] = 'episode' def upgrade_movie(): if filetype_container[0] == 'subtitle': filetype_container[0] = 'moviesubtitle' elif filetype_container[0] == 'info': filetype_container[0] = 'movieinfo' elif (not filetype_container[0] or filetype_container[0] == 'video'): filetype_container[0] = 'movie' def upgrade_subtitle(): if filetype_container[0] == 'movie': filetype_container[0] = 'moviesubtitle' elif filetype_container[0] == 'episode': filetype_container[0] = 'episodesubtitle' elif not filetype_container[0]: filetype_container[0] = 'subtitle' def upgrade_info(): if filetype_container[0] == 'movie': filetype_container[0] = 'movieinfo' elif filetype_container[0] == 'episode': filetype_container[0] = 'episodeinfo' elif not filetype_container[0]: filetype_container[0] = 'info' # look at the extension first fileext = os.path.splitext(filename)[1][1:].lower() if fileext in subtitle_exts: upgrade_subtitle() other = {'container': fileext} elif fileext in info_exts: upgrade_info() other = {'container': fileext} elif fileext in video_exts: other = {'container': fileext} else: if fileext and not options.get('name_only'): other = {'extension': fileext} list(mtree.unidentified_leaves())[-1].guess = Guess(other) # check whether we are in a 'Movies', 'Tv Shows', ... folder folder_rexps = [(r'Movies?', upgrade_movie), (r'Films?', upgrade_movie), (r'Tv[ _-]?Shows?', upgrade_episode), (r'Series?', upgrade_episode), (r'Episodes?', upgrade_episode)] for frexp, upgrade_func in folder_rexps: frexp = re.compile(frexp, re.IGNORECASE) for pathgroup in mtree.children: if frexp.match(pathgroup.value): upgrade_func() return filetype_container[0], other # check for a few specific cases which will unintentionally make the # following heuristics confused (eg: OSS 117 will look like an episode, # season 1, epnum 17, when it is in fact a movie) fname = mtree.clean_string(filename).lower() for m in self.MOVIES: if m in fname: self.log.debug('Found in exception list of movies -> type = movie') upgrade_movie() return filetype_container[0], other for s in self.SERIES: if s in fname: self.log.debug('Found in exception list of series -> type = episode') upgrade_episode() return filetype_container[0], other # if we have an episode_rexp (eg: s02e13), it is an episode episode_transformer = get_transformer('guess_episodes_rexps') if episode_transformer: filename_parts = list(x.value for x in mtree.unidentified_leaves()) filename_parts.append(filename) for filename_part in filename_parts: guess = episode_transformer.guess_episodes_rexps(filename_part) if guess: self.log.debug('Found guess_episodes_rexps: %s -> type = episode', guess) upgrade_episode() return filetype_container[0], other properties_transformer = get_transformer('guess_properties') if properties_transformer: # if we have certain properties characteristic of episodes, it is an ep found = properties_transformer.container.find_properties(filename, mtree, options, 'episodeFormat') guess = properties_transformer.container.as_guess(found, filename) if guess: self.log.debug('Found characteristic property of episodes: %s"', guess) upgrade_episode() return filetype_container[0], other weak_episode_transformer = get_transformer('guess_weak_episodes_rexps') if weak_episode_transformer: found = properties_transformer.container.find_properties(filename, mtree, options, 'crc32') guess = properties_transformer.container.as_guess(found, filename) if guess: found = weak_episode_transformer.container.find_properties(filename, mtree, options) guess = weak_episode_transformer.container.as_guess(found, filename) if guess: self.log.debug('Found characteristic property of episodes: %s"', guess) upgrade_episode() return filetype_container[0], other found = properties_transformer.container.find_properties(filename, mtree, options, 'format') guess = properties_transformer.container.as_guess(found, filename) if guess and guess['format'] in ('HDTV', 'WEBRip', 'WEB-DL', 'DVB'): # Use weak episodes only if TV or WEB source weak_episode_transformer = get_transformer('guess_weak_episodes_rexps') if weak_episode_transformer: guess = weak_episode_transformer.guess_weak_episodes_rexps(filename) if guess: self.log.debug('Found guess_weak_episodes_rexps: %s -> type = episode', guess) upgrade_episode() return filetype_container[0], other website_transformer = get_transformer('guess_website') if website_transformer: found = website_transformer.container.find_properties(filename, mtree, options, 'website') guess = website_transformer.container.as_guess(found, filename) if guess: for namepart in ('tv', 'serie', 'episode'): if namepart in guess['website']: # origin-specific type self.log.debug('Found characteristic property of episodes: %s', guess) upgrade_episode() return filetype_container[0], other if filetype_container[0] in ('subtitle', 'info') or (not filetype_container[0] and fileext in video_exts): # if no episode info found, assume it's a movie self.log.debug('Nothing characteristic found, assuming type = movie') upgrade_movie() if not filetype_container[0]: self.log.debug('Nothing characteristic found, assuming type = unknown') filetype_container[0] = 'unknown' return filetype_container[0], other
def _filter_candidates(candidates, options): episode_details_transformer = get_transformer('guess_episode_details') if episode_details_transformer: candidates = [n for n in candidates if not episode_details_transformer.container.find_properties(n.value, n, options, re_match=True)] candidates = list(filter(lambda n: not GuessEpisodeInfoFromPosition.excluded_word(n), candidates)) return candidates