def process(self, mtree, options=None): """ try to identify the remaining unknown groups by looking at their position relative to other known elements """ eps = [ node for node in mtree.leaves() if 'episodeNumber' in node.guess ] if eps: self.match_from_epnum_position(mtree, eps[0]) else: # if we don't have the episode number, but at least 2 groups in the # basename, then it's probably series - eptitle basename = mtree.node_at((-2, )) title_candidates = self._filter_candidates( basename.unidentified_leaves()) if len(title_candidates) >= 2: found_property(title_candidates[0], 'series', confidence=0.4) found_property(title_candidates[1], 'title', confidence=0.4) elif len(title_candidates) == 1: # but if there's only one candidate, it's probably the series name found_property(title_candidates[0], 'series', confidence=0.4) # if we only have 1 remaining valid group in the folder containing the # file, then it's likely that it is the series name try: series_candidates = list( mtree.node_at((-3, )).unidentified_leaves()) except ValueError: series_candidates = [] if len(series_candidates) == 1: found_property(series_candidates[0], 'series', confidence=0.3) # if there's a path group that only contains the season info, then the # previous one is most likely the series title (ie: ../series/season X/..) eps = [ node for node in mtree.nodes() if 'season' in node.guess and 'episodeNumber' not in node.guess ] if eps: previous = [ node for node in mtree.unidentified_leaves() if node.node_idx[0] == eps[0].node_idx[0] - 1 ] if len(previous) == 1: found_property(previous[0], 'series', confidence=0.5) # If we have found title without any serie name, replace it by the serie name. if 'series' not in mtree.info and 'title' in mtree.info: title_leaf = mtree.first_leaf_containing('title') metadata = title_leaf.guess.metadata('title') value = title_leaf.guess['title'] del title_leaf.guess['title'] title_leaf.guess.set('series', value, metadata=metadata)
def process(self, mtree, options=None): GuessFinder(self.guess_properties, 1.0, self.log, options).process_nodes(mtree.unidentified_leaves()) proper_count = 0 for other_leaf in mtree.leaves_containing('other'): if 'other' in other_leaf.info and 'Proper' in other_leaf.info['other']: proper_count += 1 if proper_count: found_property(mtree, 'properCount', proper_count)
def process(self, mtree, options=None): GuessFinder(self.guess_properties, 1.0, self.log, options).process_nodes(mtree.unidentified_leaves()) proper_count = 0 for other_leaf in mtree.leaves_containing("other"): if "other" in other_leaf.info and "Proper" in other_leaf.info["other"]: proper_count += 1 if proper_count: found_property(mtree, "properCount", proper_count)
def process(self, mtree, options=None): """ try to identify the remaining unknown groups by looking at their position relative to other known elements """ eps = [node for node in mtree.leaves() if 'episodeNumber' in node.guess] if not eps: eps = [node for node in mtree.leaves() if 'date' in node.guess] eps = sorted(eps, key=lambda ep: -ep.guess.confidence()) if eps: self.match_from_epnum_position(mtree, eps[0], options) else: # if we don't have the episode number, but at least 2 groups in the # basename, then it's probably series - eptitle basename = list(filter(lambda x: x.category == 'path', mtree.nodes()))[-2] title_candidates = self._filter_candidates(basename.unidentified_leaves(), options) if len(title_candidates) >= 2 and 'series' not in mtree.info: found_property(title_candidates[0], 'series', confidence=0.4) found_property(title_candidates[1], 'title', confidence=0.4) elif len(title_candidates) == 1: # but if there's only one candidate, it's probably the series name found_property(title_candidates[0], 'series' if 'series' not in mtree.info else 'title', confidence=0.4) # if we only have 1 remaining valid group in the folder containing the # file, then it's likely that it is the series name path_nodes = list(filter(lambda x: x.category == 'path', mtree.nodes())) try: series_candidates = list(path_nodes[-3].unidentified_leaves()) except IndexError: series_candidates = [] if len(series_candidates) == 1: found_property(series_candidates[0], 'series', confidence=0.3) # if there's a path group that only contains the season info, then the # previous one is most likely the series title (ie: ../series/season X/..) eps = [node for node in mtree.nodes() if 'season' in node.guess and 'episodeNumber' not in node.guess] if eps: previous = [node for node in mtree.unidentified_leaves() if node.node_idx[0] == eps[0].node_idx[0] - 1] if len(previous) == 1: found_property(previous[0], 'series', confidence=0.5) # If we have found title without any serie name, replace it by the serie name. if 'series' not in mtree.info and 'title' in mtree.info: title_leaf = mtree.first_leaf_containing('title') metadata = title_leaf.guess.metadata('title') value = title_leaf.guess['title'] del title_leaf.guess['title'] title_leaf.guess.set('series', value, metadata=metadata)
def process(self, mtree, options=None): def previous_group(g): for leaf in reversed(list(mtree.unidentified_leaves())): if leaf.node_idx < g.node_idx: return leaf def next_group(g): for leaf in mtree.unidentified_leaves(): if leaf.node_idx > g.node_idx: return leaf def same_group(g1, g2): return g1.node_idx[:2] == g2.node_idx[:2] bonus = [node for node in mtree.leaves() if 'bonusNumber' in node.guess] if bonus: bonus_title = next_group(bonus[0]) if bonus_title and same_group(bonus_title, bonus[0]): found_property(bonus_title, 'bonusTitle', confidence=0.8) film_number = [node for node in mtree.leaves() if 'filmNumber' in node.guess] if film_number: film_series = previous_group(film_number[0]) found_property(film_series, 'filmSeries', confidence=0.9) title = next_group(film_number[0]) found_property(title, 'title', confidence=0.9) season = [node for node in mtree.leaves() if 'season' in node.guess] if season and 'bonusNumber' in mtree.info: series = previous_group(season[0]) if same_group(series, season[0]): found_property(series, 'series', confidence=0.9)
def process(self, mtree, options=None): """ try to identify the remaining unknown groups by looking at their position relative to other known elements """ eps = [node for node in mtree.leaves() if 'episodeNumber' in node.guess] if eps: self.match_from_epnum_position(mtree, eps[0]) else: # if we don't have the episode number, but at least 2 groups in the # basename, then it's probably series - eptitle basename = mtree.node_at((-2,)) title_candidates = self._filter_candidates(basename.unidentified_leaves()) if len(title_candidates) >= 2: found_property(title_candidates[0], 'series', confidence=0.4) found_property(title_candidates[1], 'title', confidence=0.4) elif len(title_candidates) == 1: # but if there's only one candidate, it's probably the series name found_property(title_candidates[0], 'series', confidence=0.4) # if we only have 1 remaining valid group in the folder containing the # file, then it's likely that it is the series name try: series_candidates = mtree.node_at((-3,)).unidentified_leaves() except ValueError: series_candidates = [] if len(series_candidates) == 1: found_property(series_candidates[0], 'series', confidence=0.3) # if there's a path group that only contains the season info, then the # previous one is most likely the series title (ie: ../series/season X/..) eps = [node for node in mtree.nodes() if 'season' in node.guess and 'episodeNumber' not in node.guess] if eps: previous = [node for node in mtree.unidentified_leaves() if node.node_idx[0] == eps[0].node_idx[0] - 1] if len(previous) == 1: found_property(previous[0], 'series', confidence=0.5)
def match_from_epnum_position(self, mtree, node): epnum_idx = node.node_idx # a few helper functions to be able to filter using high-level semantics def before_epnum_in_same_pathgroup(): return [ leaf for leaf in mtree.unidentified_leaves() if (leaf.node_idx[0] == epnum_idx[0] and leaf.node_idx[1:] < epnum_idx[1:]) ] def after_epnum_in_same_pathgroup(): return [ leaf for leaf in mtree.unidentified_leaves() if (leaf.node_idx[0] == epnum_idx[0] and leaf.node_idx[1:] > epnum_idx[1:]) ] def after_epnum_in_same_explicitgroup(): return [ leaf for leaf in mtree.unidentified_leaves() if (leaf.node_idx[:2] == epnum_idx[:2] and leaf.node_idx[2:] > epnum_idx[2:]) ] # epnumber is the first group and there are only 2 after it in same # path group # -> series title - episode title title_candidates = self._filter_candidates( after_epnum_in_same_pathgroup()) if ('title' not in mtree.info and # no title before_epnum_in_same_pathgroup() == [] and # no groups before len(title_candidates) == 2): # only 2 groups after found_property(title_candidates[0], 'series', confidence=0.4) found_property(title_candidates[1], 'title', confidence=0.4) return # if we have at least 1 valid group before the episodeNumber, then it's # probably the series name series_candidates = before_epnum_in_same_pathgroup() if len(series_candidates) >= 1: found_property(series_candidates[0], 'series', confidence=0.7) # only 1 group after (in the same path group) and it's probably the # episode title. title_candidates = self._filter_candidates( after_epnum_in_same_pathgroup()) if len(title_candidates) == 1: found_property(title_candidates[0], 'title', confidence=0.5) return else: # try in the same explicit group, with lower confidence title_candidates = self._filter_candidates( after_epnum_in_same_explicitgroup()) if len(title_candidates) == 1: found_property(title_candidates[0], 'title', confidence=0.4) return elif len(title_candidates) > 1: found_property(title_candidates[0], 'title', confidence=0.3) return # get the one with the longest value title_candidates = self._filter_candidates( after_epnum_in_same_pathgroup()) if title_candidates: maxidx = -1 maxv = -1 for i, c in enumerate(title_candidates): if len(c.clean_value) > maxv: maxidx = i maxv = len(c.clean_value) found_property(title_candidates[maxidx], 'title', confidence=0.3)
def process(self, mtree, options=None): """ try to identify the remaining unknown groups by looking at their position relative to other known elements """ if 'title' in mtree.info: return path_nodes = list(filter(lambda x: x.category == 'path', mtree.nodes())) basename = path_nodes[-2] all_valid = lambda leaf: len(leaf.clean_value) > 0 basename_leftover = list(basename.unidentified_leaves(valid=all_valid)) try: folder = path_nodes[-3] folder_leftover = list(folder.unidentified_leaves()) except IndexError: folder = None folder_leftover = [] self.log.debug('folder: %s' % u(folder_leftover)) self.log.debug('basename: %s' % u(basename_leftover)) # specific cases: # if we find the same group both in the folder name and the filename, # it's a good candidate for title if (folder_leftover and basename_leftover and folder_leftover[0].clean_value == basename_leftover[0].clean_value and not GuessMovieTitleFromPosition.excluded_word(folder_leftover[0])): found_property(folder_leftover[0], 'title', confidence=0.8) return # specific cases: # if the basename contains a number first followed by an unidentified # group, and the folder only contains 1 unidentified one, then we have # a series # ex: Millenium Trilogy (2009)/(1)The Girl With The Dragon Tattoo(2009).mkv if len(folder_leftover) > 0 and len(basename_leftover) > 1: series = folder_leftover[0] film_number = basename_leftover[0] title = basename_leftover[1] basename_leaves = list(basename.leaves()) num = None try: num = int(film_number.clean_value) except ValueError: pass if num: self.log.debug('series: %s' % series.clean_value) self.log.debug('title: %s' % title.clean_value) if (series.clean_value != title.clean_value and series.clean_value != film_number.clean_value and basename_leaves.index(film_number) == 0 and basename_leaves.index(title) == 1 and not GuessMovieTitleFromPosition.excluded_word(title, series)): found_property(title, 'title', confidence=0.6) found_property(series, 'filmSeries', confidence=0.6) found_property(film_number, 'filmNumber', num, confidence=0.6) return if folder: year_group = folder.first_leaf_containing('year') if year_group: groups_before = folder.previous_unidentified_leaves(year_group) if groups_before: try: node = next(groups_before) if not GuessMovieTitleFromPosition.excluded_word(node): found_property(node, 'title', confidence=0.8) return except StopIteration: pass # if we have either format or videoCodec in the folder containing the # file or one of its parents, then we should probably look for the title # in there rather than in the basename try: props = list(mtree.previous_leaves_containing(mtree.children[-2], ['videoCodec', 'format', 'language'])) except IndexError: props = [] if props: group_idx = props[0].node_idx[0] if all(g.node_idx[0] == group_idx for g in props): # if they're all in the same group, take leftover info from there leftover = mtree.node_at((group_idx,)).unidentified_leaves() try: node = next(leftover) if not GuessMovieTitleFromPosition.excluded_word(node): found_property(node, 'title', confidence=0.7) return except StopIteration: pass # look for title in basename if there are some remaining unidentified # groups there if basename_leftover: # if basename is only one word and the containing folder has at least # 3 words in it, we should take the title from the folder name # ex: Movies/Alice in Wonderland DVDRip.XviD-DiAMOND/dmd-aw.avi # ex: Movies/Somewhere.2010.DVDRip.XviD-iLG/i-smwhr.avi <-- TODO: gets caught here? if (basename_leftover[0].clean_value.count(' ') == 0 and folder_leftover and folder_leftover[0].clean_value.count(' ') >= 2 and not GuessMovieTitleFromPosition.excluded_word(folder_leftover[0])): found_property(folder_leftover[0], 'title', confidence=0.7) return # if there are only many unidentified groups, take the first of which is # not inside brackets or parentheses. # ex: Movies/[阿维达].Avida.2006.FRENCH.DVDRiP.XViD-PROD.avi if basename_leftover[0].is_explicit(): for basename_leftover_elt in basename_leftover: if not basename_leftover_elt.is_explicit() and not GuessMovieTitleFromPosition.excluded_word(basename_leftover_elt): found_property(basename_leftover_elt, 'title', confidence=0.8) return # if all else fails, take the first remaining unidentified group in the # basename as title if not GuessMovieTitleFromPosition.excluded_word(basename_leftover[0]): found_property(basename_leftover[0], 'title', confidence=0.6) return # if there are no leftover groups in the basename, look in the folder name if folder_leftover and not GuessMovieTitleFromPosition.excluded_word(folder_leftover[0]): found_property(folder_leftover[0], 'title', confidence=0.5) return # if nothing worked, look if we have a very small group at the beginning # of the basename basename_leftover = basename.unidentified_leaves(valid=lambda leaf: True) try: node = next(basename_leftover) if not GuessMovieTitleFromPosition.excluded_word(node): found_property(node, 'title', confidence=0.4) return except StopIteration: pass
def match_from_epnum_position(self, mtree, node): epnum_idx = node.node_idx # a few helper functions to be able to filter using high-level semantics def before_epnum_in_same_pathgroup(): return [leaf for leaf in mtree.unidentified_leaves() if (leaf.node_idx[0] == epnum_idx[0] and leaf.node_idx[1:] < epnum_idx[1:])] def after_epnum_in_same_pathgroup(): return [leaf for leaf in mtree.unidentified_leaves() if (leaf.node_idx[0] == epnum_idx[0] and leaf.node_idx[1:] > epnum_idx[1:])] def after_epnum_in_same_explicitgroup(): return [leaf for leaf in mtree.unidentified_leaves() if (leaf.node_idx[:2] == epnum_idx[:2] and leaf.node_idx[2:] > epnum_idx[2:])] # epnumber is the first group and there are only 2 after it in same # path group # -> series title - episode title title_candidates = self._filter_candidates(after_epnum_in_same_pathgroup()) if ('title' not in mtree.info and # no title before_epnum_in_same_pathgroup() == [] and # no groups before len(title_candidates) == 2): # only 2 groups after found_property(title_candidates[0], 'series', confidence=0.4) found_property(title_candidates[1], 'title', confidence=0.4) return # if we have at least 1 valid group before the episodeNumber, then it's # probably the series name series_candidates = before_epnum_in_same_pathgroup() if len(series_candidates) >= 1: found_property(series_candidates[0], 'series', confidence=0.7) # only 1 group after (in the same path group) and it's probably the # episode title title_candidates = self._filter_candidates(after_epnum_in_same_pathgroup()) if len(title_candidates) == 1: found_property(title_candidates[0], 'title', confidence=0.5) return else: # try in the same explicit group, with lower confidence title_candidates = self._filter_candidates(after_epnum_in_same_explicitgroup()) if len(title_candidates) == 1: found_property(title_candidates[0], 'title', confidence=0.4) return elif len(title_candidates) > 1: found_property(title_candidates[0], 'title', confidence=0.3) return # get the one with the longest value title_candidates = self._filter_candidates(after_epnum_in_same_pathgroup()) if title_candidates: maxidx = -1 maxv = -1 for i, c in enumerate(title_candidates): if len(c.clean_value) > maxv: maxidx = i maxv = len(c.clean_value) found_property(title_candidates[maxidx], 'title', confidence=0.3)
def process(self, mtree, options=None): """ try to identify the remaining unknown groups by looking at their position relative to other known elements """ eps = [ node for node in mtree.leaves() if 'episodeNumber' in node.guess ] if not eps: eps = [node for node in mtree.leaves() if 'date' in node.guess] eps = sorted(eps, key=lambda ep: -ep.guess.confidence()) if eps: performed_path_nodes = [] for ep_node in eps: # Perform only first episode node for each path node path_node = [ node for node in ep_node.ancestors if node.category == 'path' ] if len(path_node) > 0: path_node = path_node[0] else: path_node = ep_node.root if path_node not in performed_path_nodes: self.match_from_epnum_position(path_node, ep_node, options) performed_path_nodes.append(path_node) else: # if we don't have the episode number, but at least 2 groups in the # basename, then it's probably series - eptitle basename = list( filter(lambda x: x.category == 'path', mtree.nodes()))[-2] title_candidates = GuessEpisodeInfoFromPosition._filter_candidates( basename.unidentified_leaves(), options) if len(title_candidates) >= 2 and 'series' not in mtree.info: found_property(title_candidates[0], 'series', confidence=0.4) found_property(title_candidates[1], 'title', confidence=0.4) elif len(title_candidates) == 1: # but if there's only one candidate, it's probably the series name found_property( title_candidates[0], 'series' if 'series' not in mtree.info else 'title', confidence=0.4) # if we only have 1 remaining valid group in the folder containing the # file, then it's likely that it is the series name path_nodes = list(filter(lambda x: x.category == 'path', mtree.nodes())) try: series_candidates = list(path_nodes[-3].unidentified_leaves()) except IndexError: series_candidates = [] if len(series_candidates ) == 1 and not GuessEpisodeInfoFromPosition.excluded_word( series_candidates[0]): found_property(series_candidates[0], 'series', confidence=0.3) # if there's a path group that only contains the season info, then the # previous one is most likely the series title (ie: ../series/season X/..) eps = [ node for node in mtree.nodes() if 'season' in node.guess and 'episodeNumber' not in node.guess ] if eps: previous = [ node for node in mtree.unidentified_leaves() if node.node_idx[0] == eps[0].node_idx[0] - 1 ] if len(previous ) == 1 and not GuessEpisodeInfoFromPosition.excluded_word( previous[0]): found_property(previous[0], 'series', confidence=0.5) # If we have found title without any serie name, replace it by the serie name. if 'series' not in mtree.info and 'title' in mtree.info: title_leaf = mtree.first_leaf_containing('title') metadata = title_leaf.guess.metadata('title') value = title_leaf.guess['title'] del title_leaf.guess['title'] title_leaf.guess.set('series', value, metadata=metadata)
def process(self, mtree, options=None): """ try to identify the remaining unknown groups by looking at their position relative to other known elements """ basename = mtree.node_at((-2,)) all_valid = lambda leaf: len(leaf.clean_value) > 0 basename_leftover = basename.unidentified_leaves(valid=all_valid) try: folder = mtree.node_at((-3,)) folder_leftover = folder.unidentified_leaves() except ValueError: folder = None folder_leftover = [] self.log.debug('folder: %s' % u(folder_leftover)) self.log.debug('basename: %s' % u(basename_leftover)) # specific cases: # if we find the same group both in the folder name and the filename, # it's a good candidate for title if (folder_leftover and basename_leftover and folder_leftover[0].clean_value == basename_leftover[0].clean_value): found_property(folder_leftover[0], 'title', confidence=0.8) return # specific cases: # if the basename contains a number first followed by an unidentified # group, and the folder only contains 1 unidentified one, then we have # a series # ex: Millenium Trilogy (2009)/(1)The Girl With The Dragon Tattoo(2009).mkv try: series = folder_leftover[0] filmNumber = basename_leftover[0] title = basename_leftover[1] basename_leaves = basename.leaves() num = int(filmNumber.clean_value) self.log.debug('series: %s' % series.clean_value) self.log.debug('title: %s' % title.clean_value) if (series.clean_value != title.clean_value and series.clean_value != filmNumber.clean_value and basename_leaves.index(filmNumber) == 0 and basename_leaves.index(title) == 1): found_property(title, 'title', confidence=0.6) found_property(series, 'filmSeries', confidence=0.6) found_property(filmNumber, 'filmNumber', num, confidence=0.6) return except Exception: pass # specific cases: # - movies/tttttt (yyyy)/tttttt.ccc try: if mtree.node_at((-4, 0)).value.lower() == 'movies': folder = mtree.node_at((-3,)) # Note:too generic, might solve all the unittests as they all # contain 'movies' in their path # # if containing_folder.is_leaf() and not containing_folder.guess: # containing_folder.guess = # Guess({ 'title': clean_string(containing_folder.value) }, # confidence=0.7) year_group = folder.first_leaf_containing('year') groups_before = folder.previous_unidentified_leaves(year_group) found_property(groups_before[0], 'title', confidence=0.8) return except Exception: pass # if we have either format or videoCodec in the folder containing the file # or one of its parents, then we should probably look for the title in # there rather than in the basename try: props = mtree.previous_leaves_containing(mtree.children[-2], ['videoCodec', 'format', 'language']) except IndexError: props = [] if props: group_idx = props[0].node_idx[0] if all(g.node_idx[0] == group_idx for g in props): # if they're all in the same group, take leftover info from there leftover = mtree.node_at((group_idx,)).unidentified_leaves() if leftover: found_property(leftover[0], 'title', confidence=0.7) return # look for title in basename if there are some remaining unidentified # groups there if basename_leftover: # if basename is only one word and the containing folder has at least # 3 words in it, we should take the title from the folder name # ex: Movies/Alice in Wonderland DVDRip.XviD-DiAMOND/dmd-aw.avi # ex: Movies/Somewhere.2010.DVDRip.XviD-iLG/i-smwhr.avi <-- TODO: gets caught here? if (basename_leftover[0].clean_value.count(' ') == 0 and folder_leftover and folder_leftover[0].clean_value.count(' ') >= 2): found_property(folder_leftover[0], 'title', confidence=0.7) return # if there are only many unidentified groups, take the first of which is # not inside brackets or parentheses. # ex: Movies/[阿维达].Avida.2006.FRENCH.DVDRiP.XViD-PROD.avi if basename_leftover[0].is_explicit(): for basename_leftover_elt in basename_leftover: if not basename_leftover_elt.is_explicit(): found_property(basename_leftover_elt, 'title', confidence=0.8) return # if all else fails, take the first remaining unidentified group in the # basename as title found_property(basename_leftover[0], 'title', confidence=0.6) return # if there are no leftover groups in the basename, look in the folder name if folder_leftover: found_property(folder_leftover[0], 'title', confidence=0.5) return # if nothing worked, look if we have a very small group at the beginning # of the basename basename = mtree.node_at((-2,)) basename_leftover = basename.unidentified_leaves(valid=lambda leaf: True) if basename_leftover: found_property(basename_leftover[0], 'title', confidence=0.4) return
def process(self, mtree, options=None): """ try to identify the remaining unknown groups by looking at their position relative to other known elements """ if 'title' in mtree.info: return path_nodes = list(filter(lambda x: x.category == 'path', mtree.nodes())) basename = path_nodes[-2] all_valid = lambda leaf: len(leaf.clean_value) > 0 basename_leftover = list(basename.unidentified_leaves(valid=all_valid)) try: folder = path_nodes[-3] folder_leftover = list(folder.unidentified_leaves()) except IndexError: folder = None folder_leftover = [] self.log.debug('folder: %s' % u(folder_leftover)) self.log.debug('basename: %s' % u(basename_leftover)) # specific cases: # if we find the same group both in the folder name and the filename, # it's a good candidate for title if (folder_leftover and basename_leftover and folder_leftover[0].clean_value == basename_leftover[0].clean_value and not GuessMovieTitleFromPosition.excluded_word( folder_leftover[0])): found_property(folder_leftover[0], 'title', confidence=0.8) return # specific cases: # if the basename contains a number first followed by an unidentified # group, and the folder only contains 1 unidentified one, then we have # a series # ex: Millenium Trilogy (2009)/(1)The Girl With The Dragon Tattoo(2009).mkv if len(folder_leftover) > 0 and len(basename_leftover) > 1: series = folder_leftover[0] film_number = basename_leftover[0] title = basename_leftover[1] basename_leaves = list(basename.leaves()) num = None try: num = int(film_number.clean_value) except ValueError: pass if num: self.log.debug('series: %s' % series.clean_value) self.log.debug('title: %s' % title.clean_value) if (series.clean_value != title.clean_value and series.clean_value != film_number.clean_value and basename_leaves.index(film_number) == 0 and basename_leaves.index(title) == 1 and not GuessMovieTitleFromPosition.excluded_word( title, series)): found_property(title, 'title', confidence=0.6) found_property(series, 'filmSeries', confidence=0.6) found_property(film_number, 'filmNumber', num, confidence=0.6) return if folder: year_group = folder.first_leaf_containing('year') if year_group: groups_before = folder.previous_unidentified_leaves(year_group) if groups_before: try: node = next(groups_before) if not GuessMovieTitleFromPosition.excluded_word(node): found_property(node, 'title', confidence=0.8) return except StopIteration: pass # if we have either format or videoCodec in the folder containing the # file or one of its parents, then we should probably look for the title # in there rather than in the basename try: props = list( mtree.previous_leaves_containing( mtree.children[-2], ['videoCodec', 'format', 'language'])) except IndexError: props = [] if props: group_idx = props[0].node_idx[0] if all(g.node_idx[0] == group_idx for g in props): # if they're all in the same group, take leftover info from there leftover = mtree.node_at((group_idx, )).unidentified_leaves() try: node = next(leftover) if not GuessMovieTitleFromPosition.excluded_word(node): found_property(node, 'title', confidence=0.7) return except StopIteration: pass # look for title in basename if there are some remaining unidentified # groups there if basename_leftover: # if basename is only one word and the containing folder has at least # 3 words in it, we should take the title from the folder name # ex: Movies/Alice in Wonderland DVDRip.XviD-DiAMOND/dmd-aw.avi # ex: Movies/Somewhere.2010.DVDRip.XviD-iLG/i-smwhr.avi <-- TODO: gets caught here? if (basename_leftover[0].clean_value.count(' ') == 0 and folder_leftover and folder_leftover[0].clean_value.count(' ') >= 2 and not GuessMovieTitleFromPosition.excluded_word( folder_leftover[0])): found_property(folder_leftover[0], 'title', confidence=0.7) return # if there are only many unidentified groups, take the first of which is # not inside brackets or parentheses. # ex: Movies/[阿维达].Avida.2006.FRENCH.DVDRiP.XViD-PROD.avi if basename_leftover[0].is_explicit(): for basename_leftover_elt in basename_leftover: if not basename_leftover_elt.is_explicit( ) and not GuessMovieTitleFromPosition.excluded_word( basename_leftover_elt): found_property(basename_leftover_elt, 'title', confidence=0.8) return # if all else fails, take the first remaining unidentified group in the # basename as title if not GuessMovieTitleFromPosition.excluded_word( basename_leftover[0]): found_property(basename_leftover[0], 'title', confidence=0.6) return # if there are no leftover groups in the basename, look in the folder name if folder_leftover and not GuessMovieTitleFromPosition.excluded_word( folder_leftover[0]): found_property(folder_leftover[0], 'title', confidence=0.5) return # if nothing worked, look if we have a very small group at the beginning # of the basename basename_leftover = basename.unidentified_leaves( valid=lambda leaf: True) try: node = next(basename_leftover) if not GuessMovieTitleFromPosition.excluded_word(node): found_property(node, 'title', confidence=0.4) return except StopIteration: pass
def process(self, mtree, options=None): """ try to identify the remaining unknown groups by looking at their position relative to other known elements """ basename = mtree.node_at((-2, )) all_valid = lambda leaf: len(leaf.clean_value) > 0 basename_leftover = basename.unidentified_leaves(valid=all_valid) try: folder = mtree.node_at((-3, )) folder_leftover = folder.unidentified_leaves() except ValueError: folder = None folder_leftover = [] self.log.debug('folder: %s' % u(folder_leftover)) self.log.debug('basename: %s' % u(basename_leftover)) # specific cases: # if we find the same group both in the folder name and the filename, # it's a good candidate for title if (folder_leftover and basename_leftover and folder_leftover[0].clean_value == basename_leftover[0].clean_value): found_property(folder_leftover[0], 'title', confidence=0.8) return # specific cases: # if the basename contains a number first followed by an unidentified # group, and the folder only contains 1 unidentified one, then we have # a series # ex: Millenium Trilogy (2009)/(1)The Girl With The Dragon Tattoo(2009).mkv try: series = folder_leftover[0] filmNumber = basename_leftover[0] title = basename_leftover[1] basename_leaves = basename.leaves() num = int(filmNumber.clean_value) self.log.debug('series: %s' % series.clean_value) self.log.debug('title: %s' % title.clean_value) if (series.clean_value != title.clean_value and series.clean_value != filmNumber.clean_value and basename_leaves.index(filmNumber) == 0 and basename_leaves.index(title) == 1): found_property(title, 'title', confidence=0.6) found_property(series, 'filmSeries', confidence=0.6) found_property(filmNumber, 'filmNumber', num, confidence=0.6) return except Exception: pass # specific cases: # - movies/tttttt (yyyy)/tttttt.ccc try: if mtree.node_at((-4, 0)).value.lower() == 'movies': folder = mtree.node_at((-3, )) # Note:too generic, might solve all the unittests as they all # contain 'movies' in their path # # if containing_folder.is_leaf() and not containing_folder.guess: # containing_folder.guess = # Guess({ 'title': clean_string(containing_folder.value) }, # confidence=0.7) year_group = folder.first_leaf_containing('year') groups_before = folder.previous_unidentified_leaves(year_group) found_property(groups_before[0], 'title', confidence=0.8) return except Exception: pass # if we have either format or videoCodec in the folder containing the file # or one of its parents, then we should probably look for the title in # there rather than in the basename try: props = mtree.previous_leaves_containing( mtree.children[-2], ['videoCodec', 'format', 'language']) except IndexError: props = [] if props: group_idx = props[0].node_idx[0] if all(g.node_idx[0] == group_idx for g in props): # if they're all in the same group, take leftover info from there leftover = mtree.node_at((group_idx, )).unidentified_leaves() if leftover: found_property(leftover[0], 'title', confidence=0.7) return # look for title in basename if there are some remaining unidentified # groups there if basename_leftover: # if basename is only one word and the containing folder has at least # 3 words in it, we should take the title from the folder name # ex: Movies/Alice in Wonderland DVDRip.XviD-DiAMOND/dmd-aw.avi # ex: Movies/Somewhere.2010.DVDRip.XviD-iLG/i-smwhr.avi <-- TODO: gets caught here? if (basename_leftover[0].clean_value.count(' ') == 0 and folder_leftover and folder_leftover[0].clean_value.count(' ') >= 2): found_property(folder_leftover[0], 'title', confidence=0.7) return # if there are only many unidentified groups, take the first of which is # not inside brackets or parentheses. # ex: Movies/[阿维达].Avida.2006.FRENCH.DVDRiP.XViD-PROD.avi if basename_leftover[0].is_explicit(): for basename_leftover_elt in basename_leftover: if not basename_leftover_elt.is_explicit(): found_property(basename_leftover_elt, 'title', confidence=0.8) return # if all else fails, take the first remaining unidentified group in the # basename as title found_property(basename_leftover[0], 'title', confidence=0.6) return # if there are no leftover groups in the basename, look in the folder name if folder_leftover: found_property(folder_leftover[0], 'title', confidence=0.5) return # if nothing worked, look if we have a very small group at the beginning # of the basename basename = mtree.node_at((-2, )) basename_leftover = basename.unidentified_leaves( valid=lambda leaf: True) if basename_leftover: found_property(basename_leftover[0], 'title', confidence=0.4) return