def process(mtree): eps = [node for node in mtree.leaves() if 'episodeNumber' in node.guess] if eps: match_from_epnum_position(mtree, eps[0]) else: # if we don't have the episode number, but at least 2 groups in the # basename, then it's probably series - eptitle basename = mtree.node_at((-2, )) title_candidates = [ n for n in basename.unidentified_leaves() if n.clean_value.lower() not in non_episode_title ] if len(title_candidates) >= 2: found_property(title_candidates[0], 'series', 0.4) found_property(title_candidates[1], 'title', 0.4) elif len(title_candidates) == 1: # but if there's only one candidate, it's probably the series name found_property(title_candidates[0], 'series', 0.4) # if we only have 1 remaining valid group in the folder containing the # file, then it's likely that it is the series name try: series_candidates = mtree.node_at((-3, )).unidentified_leaves() except ValueError: series_candidates = [] if len(series_candidates) == 1: found_property(series_candidates[0], 'series', 0.3) # if there's a path group that only contains the season info, then the # previous one is most likely the series title (ie: ../series/season X/..) eps = [ node for node in mtree.nodes() if 'season' in node.guess and 'episodeNumber' not in node.guess ] if eps: previous = [ node for node in mtree.unidentified_leaves() if node.node_idx[0] == eps[0].node_idx[0] - 1 ] if len(previous) == 1: found_property(previous[0], 'series', 0.5) # reduce the confidence of unlikely series for node in mtree.nodes(): if 'series' in node.guess: if node.guess['series'].lower() in unlikely_series: new_confidence = node.guess.confidence('series') * 0.5 node.guess.set_confidence('series', new_confidence)
def process(mtree): def previous_group(g): for leaf in mtree.unidentified_leaves()[::-1]: if leaf.node_idx < g.node_idx: return leaf def next_group(g): for leaf in mtree.unidentified_leaves(): if leaf.node_idx > g.node_idx: return leaf def same_group(g1, g2): return g1.node_idx[:2] == g2.node_idx[:2] bonus = [ node for node in mtree.leaves() if 'bonusNumber' in node.guess ] if bonus: bonusTitle = next_group(bonus[0]) if same_group(bonusTitle, bonus[0]): found_property(bonusTitle, 'bonusTitle', 0.8) filmNumber = [ node for node in mtree.leaves() if 'filmNumber' in node.guess ] if filmNumber: filmSeries = previous_group(filmNumber[0]) found_property(filmSeries, 'filmSeries', 0.9) title = next_group(filmNumber[0]) found_property(title, 'title', 0.9) season = [ node for node in mtree.leaves() if 'season' in node.guess ] if season and 'bonusNumber' in mtree.info: series = previous_group(season[0]) if same_group(series, season[0]): found_property(series, 'series', 0.9)
def process(self, mtree): """ try to identify the remaining unknown groups by looking at their position relative to other known elements """ eps = [node for node in mtree.leaves() if 'episodeNumber' in node.guess] if eps: self.match_from_epnum_position(mtree, eps[0]) else: # if we don't have the episode number, but at least 2 groups in the # basename, then it's probably series - eptitle basename = mtree.node_at((-2,)) title_candidates = [n for n in basename.unidentified_leaves() if n.clean_value.lower() not in non_episode_title] if len(title_candidates) >= 2: found_property(title_candidates[0], 'series', 0.4) found_property(title_candidates[1], 'title', 0.4) elif len(title_candidates) == 1: # but if there's only one candidate, it's probably the series name found_property(title_candidates[0], 'series', 0.4) # if we only have 1 remaining valid group in the folder containing the # file, then it's likely that it is the series name try: series_candidates = mtree.node_at((-3,)).unidentified_leaves() except ValueError: series_candidates = [] if len(series_candidates) == 1: found_property(series_candidates[0], 'series', 0.3) # if there's a path group that only contains the season info, then the # previous one is most likely the series title (ie: ../series/season X/..) eps = [node for node in mtree.nodes() if 'season' in node.guess and 'episodeNumber' not in node.guess] if eps: previous = [node for node in mtree.unidentified_leaves() if node.node_idx[0] == eps[0].node_idx[0] - 1] if len(previous) == 1: found_property(previous[0], 'series', 0.5) # reduce the confidence of unlikely series for node in mtree.nodes(): if 'series' in node.guess: if node.guess['series'].lower() in unlikely_series: new_confidence = node.guess.confidence('series') * 0.5 node.guess.set_confidence('series', new_confidence)
def process(mtree): def previous_group(g): for leaf in mtree.unidentified_leaves()[::-1]: if leaf.node_idx < g.node_idx: return leaf def next_group(g): for leaf in mtree.unidentified_leaves(): if leaf.node_idx > g.node_idx: return leaf def same_group(g1, g2): return g1.node_idx[:2] == g2.node_idx[:2] bonus = [node for node in mtree.leaves() if 'bonusNumber' in node.guess] if bonus: bonusTitle = next_group(bonus[0]) if same_group(bonusTitle, bonus[0]): found_property(bonusTitle, 'bonusTitle', 0.8) filmNumber = [ node for node in mtree.leaves() if 'filmNumber' in node.guess ] if filmNumber: filmSeries = previous_group(filmNumber[0]) found_property(filmSeries, 'filmSeries', 0.9) title = next_group(filmNumber[0]) found_property(title, 'title', 0.9) season = [node for node in mtree.leaves() if 'season' in node.guess] if season and 'bonusNumber' in mtree.info: series = previous_group(season[0]) if same_group(series, season[0]): found_property(series, 'series', 0.9)
def match_from_epnum_position(self, mtree, node): epnum_idx = node.node_idx # a few helper functions to be able to filter using high-level semantics def before_epnum_in_same_pathgroup(): return [leaf for leaf in mtree.unidentified_leaves() if (leaf.node_idx[0] == epnum_idx[0] and leaf.node_idx[1:] < epnum_idx[1:])] def after_epnum_in_same_pathgroup(): return [leaf for leaf in mtree.unidentified_leaves() if (leaf.node_idx[0] == epnum_idx[0] and leaf.node_idx[1:] > epnum_idx[1:])] def after_epnum_in_same_explicitgroup(): return [leaf for leaf in mtree.unidentified_leaves() if (leaf.node_idx[:2] == epnum_idx[:2] and leaf.node_idx[2:] > epnum_idx[2:])] # epnumber is the first group and there are only 2 after it in same # path group # -> series title - episode title title_candidates = [n for n in after_epnum_in_same_pathgroup() if n.clean_value.lower() not in non_episode_title] if ('title' not in mtree.info and # no title before_epnum_in_same_pathgroup() == [] and # no groups before len(title_candidates) == 2): # only 2 groups after found_property(title_candidates[0], 'series', confidence=0.4) found_property(title_candidates[1], 'title', confidence=0.4) return # if we have at least 1 valid group before the episodeNumber, then it's # probably the series name series_candidates = before_epnum_in_same_pathgroup() if len(series_candidates) >= 1: found_property(series_candidates[0], 'series', confidence=0.7) # only 1 group after (in the same path group) and it's probably the # episode title title_candidates = [n for n in after_epnum_in_same_pathgroup() if n.clean_value.lower() not in non_episode_title] if len(title_candidates) == 1: found_property(title_candidates[0], 'title', confidence=0.5) return else: # try in the same explicit group, with lower confidence title_candidates = [n for n in after_epnum_in_same_explicitgroup() if n.clean_value.lower() not in non_episode_title] if len(title_candidates) == 1: found_property(title_candidates[0], 'title', confidence=0.4) return elif len(title_candidates) > 1: found_property(title_candidates[0], 'title', confidence=0.3) return # get the one with the longest value title_candidates = [n for n in after_epnum_in_same_pathgroup() if n.clean_value.lower() not in non_episode_title] if title_candidates: maxidx = -1 maxv = -1 for i, c in enumerate(title_candidates): if len(c.clean_value) > maxv: maxidx = i maxv = len(c.clean_value) found_property(title_candidates[maxidx], 'title', confidence=0.3)
def match_from_epnum_position(mtree, node): epnum_idx = node.node_idx # a few helper functions to be able to filter using high-level semantics def before_epnum_in_same_pathgroup(): return [ leaf for leaf in mtree.unidentified_leaves() if (leaf.node_idx[0] == epnum_idx[0] and leaf.node_idx[1:] < epnum_idx[1:]) ] def after_epnum_in_same_pathgroup(): return [ leaf for leaf in mtree.unidentified_leaves() if (leaf.node_idx[0] == epnum_idx[0] and leaf.node_idx[1:] > epnum_idx[1:]) ] def after_epnum_in_same_explicitgroup(): return [ leaf for leaf in mtree.unidentified_leaves() if (leaf.node_idx[:2] == epnum_idx[:2] and leaf.node_idx[2:] > epnum_idx[2:]) ] # epnumber is the first group and there are only 2 after it in same # path group # -> series title - episode title title_candidates = [ n for n in after_epnum_in_same_pathgroup() if n.clean_value.lower() not in non_episode_title ] if ('title' not in mtree.info and # no title before_epnum_in_same_pathgroup() == [] and # no groups before len(title_candidates) == 2): # only 2 groups after found_property(title_candidates[0], 'series', confidence=0.4) found_property(title_candidates[1], 'title', confidence=0.4) return # if we have at least 1 valid group before the episodeNumber, then it's # probably the series name series_candidates = before_epnum_in_same_pathgroup() if len(series_candidates) >= 1: found_property(series_candidates[0], 'series', confidence=0.7) # only 1 group after (in the same path group) and it's probably the # episode title title_candidates = [ n for n in after_epnum_in_same_pathgroup() if n.clean_value.lower() not in non_episode_title ] if len(title_candidates) == 1: found_property(title_candidates[0], 'title', confidence=0.5) return else: # try in the same explicit group, with lower confidence title_candidates = [ n for n in after_epnum_in_same_explicitgroup() if n.clean_value.lower() not in non_episode_title ] if len(title_candidates) == 1: found_property(title_candidates[0], 'title', confidence=0.4) return elif len(title_candidates) > 1: found_property(title_candidates[0], 'title', confidence=0.3) return # get the one with the longest value title_candidates = [ n for n in after_epnum_in_same_pathgroup() if n.clean_value.lower() not in non_episode_title ] if title_candidates: maxidx = -1 maxv = -1 for i, c in enumerate(title_candidates): if len(c.clean_value) > maxv: maxidx = i maxv = len(c.clean_value) found_property(title_candidates[maxidx], 'title', confidence=0.3)