Esempio n. 1
0
class GuessEpisodeSpecial(Transformer):
    def __init__(self):
        Transformer.__init__(self, -205)
        self.container = PropertiesContainer()
        self.container.register_property('special', 'Special', 'Bonus', 'Omake', 'Ova', 'Oav', 'Pilot', 'Unaired')
        self.container.register_property('special', 'Extras?', canonical_form='Extras')

    def guess_special(self, string, node=None, options=None):
        properties = self.container.find_properties(string, node, 'special', multiple=True)
        guesses = self.container.as_guess(properties, multiple=True)
        return guesses

    def second_pass_options(self, mtree, options=None):
        if not mtree.guess.get('type', '').startswith('episode'):
            for unidentified_leaf in mtree.unidentified_leaves():
                properties = self.container.find_properties(unidentified_leaf.value, unidentified_leaf, 'special')
                guess = self.container.as_guess(properties)
                if guess:
                    return {'type': 'episode'}
        return None

    def supported_properties(self):
        return self.container.get_supported_properties()

    def process(self, mtree, options=None):
        if mtree.guess.get('type', '').startswith('episode') and (not mtree.info.get('episodeNumber') or mtree.info.get('season') == 0):
            for title_leaf in mtree.leaves_containing('title'):
                guesses = self.guess_special(title_leaf.value, title_leaf, options)
                for guess in guesses:
                    found_guess(title_leaf, guess, update_guess=False)
            for unidentified_leaf in mtree.unidentified_leaves():
                guesses = self.guess_special(unidentified_leaf.value, unidentified_leaf, options)
                for guess in guesses:
                    found_guess(unidentified_leaf, guess, update_guess=False)
        return None
Esempio n. 2
0
class GuessVideoRexps(Transformer):
    def __init__(self):
        Transformer.__init__(self, 25)

        self.container = PropertiesContainer(canonical_from_pattern=False)

        self.container.register_property(None, 'cd' + _psep + '(?P<cdNumber>[0-9])(?:' + _psep + 'of' + _psep + '(?P<cdNumberTotal>[0-9]))?', confidence=1.0, enhance=False, global_span=True, formatter=parse_numeral)
        self.container.register_property('cdNumberTotal', '([1-9])' + _psep + 'cds?', confidence=0.9, enhance=False, formatter=parse_numeral)

        self.container.register_property('bonusNumber', 'x([0-9]{1,2})', enhance=False, global_span=True, formatter=parse_numeral)

        self.container.register_property('filmNumber', 'f([0-9]{1,2})', enhance=False, global_span=True, formatter=parse_numeral)

        self.container.register_property('edition', 'collector', 'collector-edition', 'edition-collector', canonical_form='Collector Edition')
        self.container.register_property('edition', 'special-edition', 'edition-special', canonical_form='Special Edition')
        self.container.register_property('edition', 'criterion', 'criterion-edition', 'edition-criterion', canonical_form='Criterion Edition')
        self.container.register_property('edition', 'deluxe', 'cdeluxe-edition', 'edition-deluxe', canonical_form='Deluxe Edition')
        self.container.register_property('edition', 'director\'?s?-cut', 'director\'?s?-cut-edition', 'edition-director\'?s?-cut', canonical_form='Director\'s cut')

    def supported_properties(self):
        return self.container.get_supported_properties()

    def guess_video_rexps(self, string, node=None, options=None):
        found = self.container.find_properties(string, node, options)
        return self.container.as_guess(found, string)

    def process(self, mtree, options=None):
        GuessFinder(self.guess_video_rexps, None, self.log, options).process_nodes(mtree.unidentified_leaves())
class GuessEpisodeSpecial(Transformer):
    def __init__(self):
        Transformer.__init__(self, -205)
        self.container = PropertiesContainer()
        self.container.register_property('special', 'Special', 'Bonus',
                                         'Omake', 'Ova', 'Oav', 'Pilot',
                                         'Unaired')
        self.container.register_property('special',
                                         'Extras?',
                                         canonical_form='Extras')

    def guess_special(self, string, node=None, options=None):
        properties = self.container.find_properties(string,
                                                    node,
                                                    'special',
                                                    multiple=True)
        guesses = self.container.as_guess(properties, multiple=True)
        return guesses

    def second_pass_options(self, mtree, options=None):
        if not mtree.guess.get('type', '').startswith('episode'):
            for unidentified_leaf in mtree.unidentified_leaves():
                properties = self.container.find_properties(
                    unidentified_leaf.value, unidentified_leaf, 'special')
                guess = self.container.as_guess(properties)
                if guess:
                    return {'type': 'episode'}
        return None

    def supported_properties(self):
        return self.container.get_supported_properties()

    def process(self, mtree, options=None):
        if mtree.guess.get('type', '').startswith('episode') and (
                not mtree.info.get('episodeNumber')
                or mtree.info.get('season') == 0):
            for title_leaf in mtree.leaves_containing('title'):
                guesses = self.guess_special(title_leaf.value, title_leaf,
                                             options)
                for guess in guesses:
                    found_guess(title_leaf, guess, update_guess=False)
            for unidentified_leaf in mtree.unidentified_leaves():
                guesses = self.guess_special(unidentified_leaf.value,
                                             unidentified_leaf, options)
                for guess in guesses:
                    found_guess(unidentified_leaf, guess, update_guess=False)
        return None
class GuessEpisodesRexps(Transformer):
    def __init__(self):
        Transformer.__init__(self, 20)

        self.container = PropertiesContainer(enhance=False, canonical_from_pattern=False)

        def episode_parser(value):
            values = re_split('[a-zA-Z]', value)
            values = [x for x in values if x]
            ret = []
            for letters_elt in values:
                dashed_values = letters_elt.split('-')
                dashed_values = [x for x in dashed_values if x]
                if len(dashed_values) > 1:
                    for _ in range(0, len(dashed_values) - 1):
                        start_dash_ep = parse_numeral(dashed_values[0])
                        end_dash_ep = parse_numeral(dashed_values[1])
                        for dash_ep in range(start_dash_ep, end_dash_ep + 1):
                            ret.append(dash_ep)
                else:
                    ret.append(parse_numeral(letters_elt))
            if len(ret) > 1:
                return {None: ret[0], 'episodeList': ret}  # TODO: Should support seasonList also
            elif len(ret) > 0:
                return ret[0]
            else:
                return None

        class ResolutionCollisionValidator(object):
            def validate(self, prop, string, node, match, entry_start, entry_end):
                return len(match.group(2)) < 3

        self.container.register_property(None, r'((?:season|saison)' + sep + '?(?P<season>' + numeral + '))', confidence=1.0, formatter=parse_numeral)
        self.container.register_property(None, r'(s(?P<season>' + digital_numeral + ')[^0-9]?' + sep + '?(?P<episodeNumber>(?:e' + digital_numeral + '(?:' + sep + '?[e-]' + digital_numeral + ')*)))[^0-9]', confidence=1.0, formatter={None: parse_numeral, 'episodeNumber': episode_parser}, validator=NoValidator())
        self.container.register_property(None, r'[^0-9]((?P<season>' + digital_numeral + ')[^0-9 .-]?-?(?P<episodeNumber>(?:x' + digital_numeral + '(?:' + sep + '?[x-]' + digital_numeral + ')*)))[^0-9]', confidence=1.0, formatter={None: parse_numeral, 'episodeNumber': episode_parser}, validator=ChainedValidator(DefaultValidator(), ResolutionCollisionValidator()))
        self.container.register_property(None, r'(s(?P<season>' + digital_numeral + '))[^0-9]', confidence=0.6, formatter=parse_numeral, validator=NoValidator())
        self.container.register_property(None, r'((?P<episodeNumber>' + digital_numeral + ')v[23])', confidence=0.6, formatter=parse_numeral)
        self.container.register_property(None, r'((?:ep)' + sep + r'(?P<episodeNumber>' + numeral + '))[^0-9]', confidence=0.7, formatter=parse_numeral)
        self.container.register_property(None, r'(e(?P<episodeNumber>' + digital_numeral + '))', confidence=0.6, formatter=parse_numeral)
        self.container.register_property(None, r'\A ?((?P<episodeNumber>' + '\d{2}' + '))' + sep, confidence=0.4, formatter=parse_numeral)
        self.container.register_property(None, r'\A ?(0(?P<episodeNumber>' + '\d+' + '))' + sep, confidence=0.4, formatter=parse_numeral)

        self.container.register_canonical_properties('other', 'FiNAL', 'Complete', validator=WeakValidator())

    def supported_properties(self):
        return ['episodeNumber', 'season']

    def guess_episodes_rexps(self, string, node=None, options=None):
        found = self.container.find_properties(string, node)
        return self.container.as_guess(found, string)

    def should_process(self, mtree, options=None):
        return mtree.guess.get('type', '').startswith('episode')

    def process(self, mtree, options=None):
        GuessFinder(self.guess_episodes_rexps, None, self.log, options).process_nodes(mtree.unidentified_leaves())
Esempio n. 5
0
 def guess_regexps_id(self, string, node=None, options=None):
     container = PropertiesContainer(enhance=False,
                                     canonical_from_pattern=False)
     for regexp in options.get("id_regexps"):
         container.register_property('regexpId',
                                     regexp,
                                     confidence=1.0,
                                     validator=NoValidator())
     found = container.find_properties(string, node, options)
     return container.as_guess(found, string)
Esempio n. 6
0
 def guess_regexps_id(self, string, node=None, options=None):
     container = PropertiesContainer(
         enhance=False,
         canonical_from_pattern=False)
     for regexp in options.get("id_regexps"):
         container.register_property(
             'regexpId',
             regexp,
             confidence=1.0,
             validator=NoValidator())
     found = container.find_properties(string, node, options)
     return container.as_guess(found, string)
class GuessEpisodeDetails(Transformer):
    def __init__(self):
        Transformer.__init__(self, -205)
        self.container = PropertiesContainer()
        self.container.register_property(
            "episodeDetails", "Special", "Bonus", "Omake", "Ova", "Oav", "Pilot", "Unaired"
        )
        self.container.register_property("episodeDetails", "Extras?", canonical_form="Extras")

    def guess_details(self, string, node=None, options=None):
        properties = self.container.find_properties(string, node, options, "episodeDetails", multiple=True)
        guesses = self.container.as_guess(properties, multiple=True)
        return guesses

    def second_pass_options(self, mtree, options=None):
        if not mtree.guess.get("type", "").startswith("episode"):
            for unidentified_leaf in mtree.unidentified_leaves():
                properties = self.container.find_properties(
                    unidentified_leaf.value, unidentified_leaf, options, "episodeDetails"
                )
                guess = self.container.as_guess(properties)
                if guess:
                    return {"type": "episode"}
        return None

    def supported_properties(self):
        return self.container.get_supported_properties()

    def process(self, mtree, options=None):
        if mtree.guess.get("type", "").startswith("episode") and (
            not mtree.info.get("episodeNumber") or mtree.info.get("season") == 0
        ):

            for leaf in itertools.chain(mtree.leaves_containing("title"), mtree.unidentified_leaves()):
                guesses = self.guess_details(leaf.value, leaf, options)
                for guess in guesses:
                    found_guess(leaf, guess, update_guess=False)

        return None
Esempio n. 8
0
    def expected_series(string, node=None, options=None):
        container = PropertiesContainer(enhance=True, canonical_from_pattern=False)

        for expected_serie in options.get('expected_series'):
            if expected_serie.startswith('re:'):
                expected_serie = expected_serie[3:]
                expected_serie = expected_serie.replace(' ', '-')
                container.register_property('series', expected_serie, enhance=True)
            else:
                expected_serie = re.escape(expected_serie)
                container.register_property('series', expected_serie, enhance=False)

        found = container.find_properties(string, node, options)
        return container.as_guess(found, string)
Esempio n. 9
0
    def expected_series(self, string, node=None, options=None):
        container = PropertiesContainer(enhance=True, canonical_from_pattern=False)

        for expected_serie in options.get('expected_series'):
            if expected_serie.startswith('re:'):
                expected_serie = expected_serie[3:]
                expected_serie = expected_serie.replace(' ', '-')
                container.register_property('series', expected_serie, enhance=True)
            else:
                expected_serie = re.escape(expected_serie)
                container.register_property('series', expected_serie, enhance=False)

        found = container.find_properties(string, node, options)
        return container.as_guess(found, string)
Esempio n. 10
0
class GuessEpisodesRexps(Transformer):
    def __init__(self):
        Transformer.__init__(self, 20)

        self.container = PropertiesContainer(enhance=False, canonical_from_pattern=False)

        def episode_parser(value):
            values = re_split('[a-zA-Z]', value)
            values = [x for x in values if x]
            ret = []
            for letters_elt in values:
                dashed_values = letters_elt.split('-')
                dashed_values = [x for x in dashed_values if x]
                if len(dashed_values) > 1:
                    for _ in range(0, len(dashed_values) - 1):
                        start_dash_ep = parse_numeral(dashed_values[0])
                        end_dash_ep = parse_numeral(dashed_values[1])
                        for dash_ep in range(start_dash_ep, end_dash_ep + 1):
                            ret.append(dash_ep)
                else:
                    ret.append(parse_numeral(letters_elt))
            if len(ret) > 1:
                return {None: ret[0], 'episodeList': ret}  # TODO: Should support seasonList also
            elif len(ret) > 0:
                return ret[0]
            else:
                return None

        self.container.register_property(None, r'((?:season|saison)' + sep + '?(?P<season>' + numeral + '))', confidence=1.0, formatter=parse_numeral)
        self.container.register_property(None, r'(s(?P<season>' + digital_numeral + ')[^0-9]?' + sep + '?(?P<episodeNumber>(?:e' + digital_numeral + '(?:' + sep + '?[e-]' + digital_numeral + ')*)))[^0-9]', confidence=1.0, formatter={None: parse_numeral, 'episodeNumber': episode_parser}, validator=NoValidator())
        self.container.register_property(None, r'[^0-9]((?P<season>' + digital_numeral + ')[^0-9 .-]?-?(?P<episodeNumber>(?:x' + digital_numeral + '(?:' + sep + '?[x-]' + digital_numeral + ')*)))[^0-9]', confidence=1.0, formatter={None: parse_numeral, 'episodeNumber': episode_parser})
        self.container.register_property(None, r'(s(?P<season>' + digital_numeral + '))[^0-9]', confidence=0.6, formatter=parse_numeral, validator=NoValidator())
        self.container.register_property(None, r'((?P<episodeNumber>' + digital_numeral + ')v[23])', confidence=0.6, formatter=parse_numeral)
        self.container.register_property(None, r'((?:ep)' + sep + r'(?P<episodeNumber>' + numeral + '))[^0-9]', confidence=0.7, formatter=parse_numeral)
        self.container.register_property(None, r'(e(?P<episodeNumber>' + digital_numeral + '))', confidence=0.6, formatter=parse_numeral)

        self.container.register_canonical_properties('other', 'FiNAL', 'Complete', validator=WeakValidator())

    def supported_properties(self):
        return ['episodeNumber', 'season']

    def guess_episodes_rexps(self, string, node=None, options=None):
        found = self.container.find_properties(string, node)
        return self.container.as_guess(found, string)

    def should_process(self, mtree, options=None):
        return mtree.guess.get('type', '').startswith('episode')

    def process(self, mtree, options=None):
        GuessFinder(self.guess_episodes_rexps, None, self.log, options).process_nodes(mtree.unidentified_leaves())
class GuessWeakEpisodesRexps(Transformer):
    def __init__(self):
        Transformer.__init__(self, 15)

        of_separators = ['of', 'sur', '/', '\\']
        of_separators_re = re.compile(build_or_pattern(of_separators, escape=True), re.IGNORECASE)

        self.container = PropertiesContainer(enhance=False, canonical_from_pattern=False)

        episode_words = ['episodes?']

        def _formater(episode_number):
            epnum = parse_numeral(episode_number)
            if not valid_year(epnum):
                if epnum > 100:
                    season, epnum = epnum // 100, epnum % 100
                    # episodes which have a season > 50 are most likely errors
                    # (Simpson is at 25!)
                    if season > 50:
                        return None
                    return {'season': season, 'episodeNumber': epnum}
                else:
                    return epnum

        self.container.register_property(['episodeNumber', 'season'], '[0-9]{2,4}', confidence=0.6, formatter=_formater, disabler=lambda options: options.get('episode_prefer_number') if options else False)
        self.container.register_property(['episodeNumber', 'season'], '[0-9]{4}', confidence=0.6, formatter=_formater)
        self.container.register_property('episodeNumber', '[^0-9](\d{1,3})', confidence=0.6, formatter=parse_numeral, disabler=lambda options: not options.get('episode_prefer_number') if options else True)
        self.container.register_property(None, '(' + build_or_pattern(episode_words) + sep + '?(?P<episodeNumber>' + numeral + '))[^0-9]', confidence=0.4, formatter=parse_numeral)
        self.container.register_property(None, r'(?P<episodeNumber>' + numeral + ')' + sep + '?' + of_separators_re.pattern + sep + '?(?P<episodeCount>' + numeral +')', confidence=0.6, formatter=parse_numeral)
        self.container.register_property('episodeNumber', r'^' + sep + '?(\d{1,3})' + sep, confidence=0.4, formatter=parse_numeral, disabler=lambda options: not options.get('episode_prefer_number') if options else True)
        self.container.register_property('episodeNumber', sep + r'(\d{1,3})' + sep + '?$', confidence=0.4, formatter=parse_numeral, disabler=lambda options: not options.get('episode_prefer_number') if options else True)

    def supported_properties(self):
        return self.container.get_supported_properties()

    def guess_weak_episodes_rexps(self, string, node=None, options=None):
        if node and 'episodeNumber' in node.root.info:
            return None

        properties = self.container.find_properties(string, node, options)
        guess = self.container.as_guess(properties, string)

        return guess

    def should_process(self, mtree, options=None):
        return mtree.guess.get('type', '').startswith('episode')

    def process(self, mtree, options=None):
        GuessFinder(self.guess_weak_episodes_rexps, 0.6, self.log, options).process_nodes(mtree.unidentified_leaves())
Esempio n. 12
0
class GuessWebsite(Transformer):
    def __init__(self):
        Transformer.__init__(self, 45)

        self.container = PropertiesContainer(enhance=False,
                                             canonical_from_pattern=False)

        tlds = []

        f = resource_stream('guessit', 'tlds-alpha-by-domain.txt')
        f.readline()
        next(f)
        for tld in f:
            tld = tld.strip()
            if b'--' in tld:
                continue
            tlds.append(tld.decode("utf-8"))
        f.close()

        tlds_pattern = build_or_pattern(
            tlds)  # All registered domain extension
        safe_tlds_pattern = build_or_pattern(
            ['com', 'org', 'net'])  # For sure a website extension
        safe_subdomains_pattern = build_or_pattern(
            ['www'])  # For sure a website subdomain
        safe_prefix_tlds_pattern = build_or_pattern(
            ['co', 'com', 'org', 'net'])  # Those words before a tlds are sure

        self.container.register_property(
            'website', '(?:' + safe_subdomains_pattern + '\.)+' +
            r'(?:[a-z-]+\.)+' + r'(?:' + tlds_pattern + r')+')
        self.container.register_property(
            'website', '(?:' + safe_subdomains_pattern + '\.)*' +
            r'[a-z-]+\.' + r'(?:' + safe_tlds_pattern + r')+')
        self.container.register_property(
            'website',
            '(?:' + safe_subdomains_pattern + '\.)*' + r'[a-z-]+\.' + r'(?:' +
            safe_prefix_tlds_pattern + r'\.)+' + r'(?:' + tlds_pattern + r')+')

    def supported_properties(self):
        return self.container.get_supported_properties()

    def guess_website(self, string, node=None, options=None):
        found = self.container.find_properties(string, node, 'website')
        return self.container.as_guess(found, string)

    def process(self, mtree, options=None):
        GuessFinder(self.guess_website, 1.0, self.log,
                    options).process_nodes(mtree.unidentified_leaves())
class GuessWeakEpisodesRexps(Transformer):
    def __init__(self):
        Transformer.__init__(self, 15)

        self.properties = PropertiesContainer(enhance=False, canonical_from_pattern=False)

        def _formater(episodeNumber):
            epnum = parse_numeral(episodeNumber)
            if not valid_year(epnum):
                if epnum > 100:
                    season, epnum = epnum // 100, epnum % 100
                    # episodes which have a season > 50 are most likely errors
                    # (Simpson is at 25!)
                    if season > 50:
                        return None
                    return {'season': season, 'episodeNumber': epnum}
                else:
                    return epnum

        self.properties.register_property(['episodeNumber', 'season'], '[0-9]{2,4}', confidence=0.6, formatter=_formater)
        self.properties.register_property('episodeNumber', '(?:episode)' + sep + '(' + numeral + ')[^0-9]', confidence=0.3)

    def supported_properties(self):
        return self.properties.get_supported_properties()

    def guess_weak_episodes_rexps(self, string, node=None, options=None):
        if node and 'episodeNumber' in node.root.info:
            return None

        properties = self.properties.find_properties(string, node)
        guess = self.properties.as_guess(properties, string)

        return guess

    def should_process(self, mtree, options=None):
        return mtree.guess.get('type', '').startswith('episode')

    def process(self, mtree, options=None):
        GuessFinder(self.guess_weak_episodes_rexps, 0.6, self.log, options).process_nodes(mtree.unidentified_leaves())
Esempio n. 14
0
class GuessWebsite(Transformer):
    def __init__(self):
        Transformer.__init__(self, 45)

        self.container = PropertiesContainer(enhance=False,
                                             canonical_from_pattern=False)

        tlds_pattern = build_or_pattern(
            TLDS)  # All registered domain extension
        safe_tlds_pattern = build_or_pattern(
            ['com', 'org', 'net'])  # For sure a website extension
        safe_subdomains_pattern = build_or_pattern(
            ['www'])  # For sure a website subdomain
        safe_prefix_tlds_pattern = build_or_pattern(
            ['co', 'com', 'org', 'net'])  # Those words before a tlds are sure

        self.container.register_property(
            'website', '(?:' + safe_subdomains_pattern + '\.)+' +
            r'(?:[a-z-]+\.)+' + r'(?:' + tlds_pattern + r')+')
        self.container.register_property(
            'website', '(?:' + safe_subdomains_pattern + '\.)*' +
            r'[a-z-]+\.' + r'(?:' + safe_tlds_pattern + r')+')
        self.container.register_property(
            'website',
            '(?:' + safe_subdomains_pattern + '\.)*' + r'[a-z-]+\.' + r'(?:' +
            safe_prefix_tlds_pattern + r'\.)+' + r'(?:' + tlds_pattern + r')+')

    def supported_properties(self):
        return self.container.get_supported_properties()

    def guess_website(self, string, node=None, options=None):
        found = self.container.find_properties(string, node, options,
                                               'website')
        return self.container.as_guess(found, string)

    def process(self, mtree, options=None):
        GuessFinder(self.guess_website, 1.0, self.log,
                    options).process_nodes(mtree.unidentified_leaves())
Esempio n. 15
0
class GuessWebsite(Transformer):
    def __init__(self):
        Transformer.__init__(self, 45)

        self.container = PropertiesContainer(enhance=False, canonical_from_pattern=False)

        tlds_pattern = build_or_pattern(TLDS)  # All registered domain extension
        safe_tlds_pattern = build_or_pattern(['com', 'org', 'net'])  # For sure a website extension
        safe_subdomains_pattern = build_or_pattern(['www'])  # For sure a website subdomain
        safe_prefix_tlds_pattern = build_or_pattern(['co', 'com', 'org', 'net'])  # Those words before a tlds are sure

        self.container.register_property('website', '(?:' + safe_subdomains_pattern + '\.)+' + r'(?:[a-z-]+\.)+' + r'(?:' + tlds_pattern + r')+')
        self.container.register_property('website', '(?:' + safe_subdomains_pattern + '\.)*' + r'[a-z-]+\.' + r'(?:' + safe_tlds_pattern + r')+')
        self.container.register_property('website', '(?:' + safe_subdomains_pattern + '\.)*' + r'[a-z-]+\.' + r'(?:' + safe_prefix_tlds_pattern + r'\.)+' + r'(?:' + tlds_pattern + r')+')

    def supported_properties(self):
        return self.container.get_supported_properties()

    def guess_website(self, string, node=None, options=None):
        found = self.container.find_properties(string, node, 'website')
        return self.container.as_guess(found, string)

    def process(self, mtree, options=None):
        GuessFinder(self.guess_website, 1.0, self.log, options).process_nodes(mtree.unidentified_leaves())
Esempio n. 16
0
class GuessWebsite(Transformer):
    def __init__(self):
        Transformer.__init__(self, 45)

        self.container = PropertiesContainer(enhance=False, canonical_from_pattern=False)

        tlds = []

        f = resource_stream('guessit', 'tlds-alpha-by-domain.txt')
        f.readline()
        next(f)
        for tld in f:
            tld = tld.strip()
            if b'--' in tld:
                continue
            tlds.append(tld.decode("utf-8"))
        f.close()

        tlds_pattern = build_or_pattern(tlds)  # All registered domain extension
        safe_tlds_pattern = build_or_pattern(['com', 'org', 'net'])  # For sure a website extension
        safe_subdomains_pattern = build_or_pattern(['www'])  # For sure a website subdomain
        safe_prefix_tlds_pattern = build_or_pattern(['co', 'com', 'org', 'net'])  # Those words before a tlds are sure

        self.container.register_property('website', '(?:' + safe_subdomains_pattern + '\.)+' + r'(?:[a-z-]+\.)+' + r'(?:' + tlds_pattern + r')+')
        self.container.register_property('website', '(?:' + safe_subdomains_pattern + '\.)*' + r'[a-z-]+\.' + r'(?:' + safe_tlds_pattern + r')+')
        self.container.register_property('website', '(?:' + safe_subdomains_pattern + '\.)*' + r'[a-z-]+\.' + r'(?:' + safe_prefix_tlds_pattern + r'\.)+' + r'(?:' + tlds_pattern + r')+')

    def supported_properties(self):
        return self.container.get_supported_properties()

    def guess_website(self, string, node=None, options=None):
        found = self.container.find_properties(string, node, 'website')
        return self.container.as_guess(found, string)

    def process(self, mtree, options=None):
        GuessFinder(self.guess_website, 1.0, self.log, options).process_nodes(mtree.unidentified_leaves())
Esempio n. 17
0
class GuessEpisodesRexps(Transformer):
    def __init__(self):
        Transformer.__init__(self, 20)

        of_separators = ['of', 'sur', '/', '\\']
        of_separators_re = re.compile(build_or_pattern(of_separators, escape=True), re.IGNORECASE)

        season_words = ['seasons?', 'saisons?', 'series?']
        episode_words = ['episodes?']

        season_markers = ['s']
        episode_markers = ['e', 'ep']

        self.container = PropertiesContainer(enhance=False, canonical_from_pattern=False)

        season_words_re = re.compile(build_or_pattern(season_words), re.IGNORECASE)
        episode_words_re = re.compile(build_or_pattern(episode_words), re.IGNORECASE)

        season_markers_re = re.compile(build_or_pattern(season_markers), re.IGNORECASE)
        episode_markers_re = re.compile(build_or_pattern(episode_markers), re.IGNORECASE)

        def episode_parser_x(value):
            return list_parser(value, 'episodeList', discrete_separators_re=re.compile('x', re.IGNORECASE))

        def episode_parser_e(value):
            return list_parser(value, 'episodeList', discrete_separators_re=re.compile('e', re.IGNORECASE), fill_gaps=True)

        def episode_parser(value):
            return list_parser(value, 'episodeList')

        def season_parser(value):
            return list_parser(value, 'seasonList')

        class ResolutionCollisionValidator(object):
            @staticmethod
            def validate(prop, string, node, match, entry_start, entry_end):
                # Invalidate when season or episode is more than 100.
                try:
                    season_value = season_parser(match.group(2))
                    episode_value = episode_parser_x(match.group(3))
                    return season_value < 100 or episode_value < 100
                except:
                    # This may occur for 1xAll or patterns like this.
                    return True

        self.container.register_property(None, r'(' + season_words_re.pattern + sep + '?(?P<season>' + numeral + ')' + sep + '?' + season_words_re.pattern + '?)', confidence=1.0, formatter=parse_numeral)
        self.container.register_property(None, r'(' + season_words_re.pattern + sep + '?(?P<season>' + digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + digital_numeral + ')*)' + sep + '?' + season_words_re.pattern + '?)' + sep, confidence=1.0, formatter={None: parse_numeral, 'season': season_parser}, validator=ChainedValidator(DefaultValidator(), FormatterValidator('season', lambda x: len(x) > 1 if hasattr(x, '__len__') else False)))

        self.container.register_property(None, r'(' + season_markers_re.pattern + '(?P<season>' + digital_numeral + ')[^0-9]?' + sep + '?(?P<episodeNumber>(?:e' + digital_numeral + '(?:' + sep + '?[e-]' + digital_numeral + ')*)))', confidence=1.0, formatter={None: parse_numeral, 'episodeNumber': episode_parser_e, 'season': season_parser}, validator=NoValidator())
        self.container.register_property(None, r'(' + season_markers_re.pattern + '(?P<season>' + digital_numeral + ')[^0-9]?' + sep + '?(?P<episodeNumber>(?:e' + digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + digital_numeral + ')*)))', confidence=1.0, formatter={None: parse_numeral, 'episodeNumber': episode_parser, 'season': season_parser}, validator=NoValidator())

        self.container.register_property(None, sep + r'((?P<season>' + digital_numeral + ')' + sep + '' + '(?P<episodeNumber>(?:x' + sep + digital_numeral + '(?:' + sep + '[x-]' + digital_numeral + ')*)))', confidence=1.0, formatter={None: parse_numeral, 'episodeNumber': episode_parser_x, 'season': season_parser}, validator=ChainedValidator(DefaultValidator(), ResolutionCollisionValidator()))
        self.container.register_property(None, r'((?P<season>' + digital_numeral + ')' + '(?P<episodeNumber>(?:x' + digital_numeral + '(?:[x-]' + digital_numeral + ')*)))', confidence=1.0, formatter={None: parse_numeral, 'episodeNumber': episode_parser_x, 'season': season_parser}, validator=ChainedValidator(DefaultValidator(), ResolutionCollisionValidator()))
        self.container.register_property(None, r'(' + season_markers_re.pattern + '(?P<season>' + digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + digital_numeral + ')*))', confidence=0.6, formatter={None: parse_numeral, 'season': season_parser}, validator=NoValidator())

        self.container.register_property(None, r'((?P<episodeNumber>' + digital_numeral + ')' + sep + '?v(?P<version>\d+))', confidence=0.6, formatter=parse_numeral)
        self.container.register_property('version', sep + r'(V\d+)' + sep, confidence=0.6, formatter=parse_numeral, validator=NoValidator())
        self.container.register_property(None, r'(ep' + sep + r'?(?P<episodeNumber>' + digital_numeral + ')' + sep + '?)', confidence=0.7, formatter=parse_numeral)
        self.container.register_property(None, r'(ep' + sep + r'?(?P<episodeNumber>' + digital_numeral + ')' + sep + '?v(?P<version>\d+))', confidence=0.7, formatter=parse_numeral)

        self.container.register_property(None, r'(' + episode_markers_re.pattern + '(?P<episodeNumber>' + digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + digital_numeral + ')*))', confidence=0.6, formatter={None: parse_numeral, 'episodeNumber': episode_parser})
        self.container.register_property(None, r'(' + episode_words_re.pattern + sep + '?(?P<episodeNumber>' + digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + digital_numeral + ')*)' + sep + '?' + episode_words_re.pattern + '?)', confidence=0.8, formatter={None: parse_numeral, 'episodeNumber': episode_parser})

        self.container.register_property(None, r'(' + episode_markers_re.pattern + '(?P<episodeNumber>' + digital_numeral + ')' + sep + '?v(?P<version>\d+))', confidence=0.6, formatter={None: parse_numeral, 'episodeNumber': episode_parser})
        self.container.register_property(None, r'(' + episode_words_re.pattern + sep + '?(?P<episodeNumber>' + digital_numeral + ')' + sep + '?v(?P<version>\d+))', confidence=0.8, formatter={None: parse_numeral, 'episodeNumber': episode_parser})

        self.container.register_property('episodeNumber', r'^' + sep + '+(\d{2}' + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + '\d{2}' + ')*)' + sep, confidence=0.4, formatter=episode_parser)
        self.container.register_property('episodeNumber', r'^' + sep + '+0(\d{1,2}' + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + '0\d{1,2}' + ')*)' + sep, confidence=0.4, formatter=episode_parser)
        self.container.register_property('episodeNumber', sep + r'(\d{2}' + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + r'\d{2}' + ')*)' + sep + '+$', confidence=0.4, formatter=episode_parser)
        self.container.register_property('episodeNumber', sep + r'0(\d{1,2}' + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + r'0\d{1,2}' + ')*)' + sep + '+$', confidence=0.4, formatter=episode_parser)

        self.container.register_property(None, r'((?P<episodeNumber>' + numeral + ')' + sep + '?' + of_separators_re.pattern + sep + '?(?P<episodeCount>' + numeral + ')(?:' + sep + '?(?:episodes?|eps?))?)', confidence=0.7, formatter=parse_numeral)
        self.container.register_property(None, r'((?:episodes?|eps?)' + sep + '?(?P<episodeNumber>' + numeral + ')' + sep + '?' + of_separators_re.pattern + sep + '?(?P<episodeCount>' + numeral + '))', confidence=0.7, formatter=parse_numeral)
        self.container.register_property(None, r'((?:seasons?|saisons?|s)' + sep + '?(?P<season>' + numeral + ')' + sep + '?' + of_separators_re.pattern + sep + '?(?P<seasonCount>' + numeral + '))', confidence=0.7, formatter=parse_numeral)
        self.container.register_property(None, r'((?P<season>' + numeral + ')' + sep + '?' + of_separators_re.pattern + sep + '?(?P<seasonCount>' + numeral + ')' + sep + '?(?:seasons?|saisons?|s))', confidence=0.7, formatter=parse_numeral)

        self.container.register_canonical_properties('other', 'FiNAL', 'Complete', validator=WeakValidator())

        self.container.register_property(None, r'[^0-9]((?P<season>' + digital_numeral + ')[^0-9 .-]?-?(?P<other>xAll))', confidence=1.0, formatter={None: parse_numeral, 'other': lambda x: 'Complete', 'season': season_parser}, validator=ChainedValidator(DefaultValidator(), ResolutionCollisionValidator()))

    def register_arguments(self, opts, naming_opts, output_opts, information_opts, webservice_opts, other_options):
        naming_opts.add_argument('-E', '--episode-prefer-number', action='store_true', dest='episode_prefer_number', default=False,
                               help='Guess "serie.213.avi" as the episodeNumber 213. Without this option, '
                                    'it will be guessed as season 2, episodeNumber 13')

    def supported_properties(self):
        return ['episodeNumber', 'season', 'episodeList', 'seasonList', 'episodeCount', 'seasonCount', 'version', 'other']

    def guess_episodes_rexps(self, string, node=None, options=None):
        found = self.container.find_properties(string, node, options)
        guess = self.container.as_guess(found, string)
        if guess and node:
            if 'season' in guess and 'episodeNumber' in guess:
                # If two guesses contains both season and episodeNumber in same group, create an episodeList
                for existing_guess in node.group_node().guesses:
                    if 'season' in existing_guess and 'episodeNumber' in existing_guess:
                        if 'episodeList' not in existing_guess:
                            existing_guess['episodeList'] = [existing_guess['episodeNumber']]
                        existing_guess['episodeList'].append(guess['episodeNumber'])
                        existing_guess['episodeList'].sort()
                        if existing_guess['episodeNumber'] > guess['episodeNumber']:
                            existing_guess.set_confidence('episodeNumber', 0)
                        else:
                            guess.set_confidence('episodeNumber', 0)
                        guess['episodeList'] = list(existing_guess['episodeList'])
            elif 'episodeNumber' in guess:
                # If two guesses contains only episodeNumber in same group, remove the existing one.
                for existing_guess in node.group_node().guesses:
                    if 'episodeNumber' in existing_guess:
                        for k, v in existing_guess.items():
                            if k in guess:
                                del guess[k]
        return guess

    def should_process(self, mtree, options=None):
        return mtree.guess.get('type', '').startswith('episode')

    def process(self, mtree, options=None):
        GuessFinder(self.guess_episodes_rexps, None, self.log, options).process_nodes(mtree.unidentified_leaves())
    def guess_release_group(self, string, node=None, options=None):
        if options and options.get('expected_group'):
            expected_container = PropertiesContainer(
                enhance=True, canonical_from_pattern=False)
            for expected_group in options.get('expected_group'):
                if expected_group.startswith('re:'):
                    expected_group = expected_group[3:]
                    expected_group = expected_group.replace(' ', '-')
                    expected_container.register_property('releaseGroup',
                                                         expected_group,
                                                         enhance=True)
                else:
                    expected_group = re.escape(expected_group)
                    expected_container.register_property('releaseGroup',
                                                         expected_group,
                                                         enhance=False)

            found = expected_container.find_properties(string, node, options,
                                                       'releaseGroup')
            guess = expected_container.as_guess(found, string,
                                                self.validate_group_name)
            if guess:
                return guess

        found = self.container.find_properties(string, node, options,
                                               'releaseGroup')
        guess = self.container.as_guess(found, string,
                                        self.validate_group_name)
        validated_guess = None
        if guess:
            group_node = node.group_node()
            if group_node:
                for leaf in group_node.leaves_containing(
                        self.previous_safe_properties):
                    if self.validate_node(leaf, node, True):
                        if leaf.root.value[leaf.span[1]] == '-':
                            guess.metadata().confidence = 1
                        else:
                            guess.metadata().confidence = 0.7
                        validated_guess = guess

            if not validated_guess:
                # If previous group last leaf is identified as a safe property,
                # consider the raw value as a releaseGroup
                previous_group_node = node.previous_group_node()
                if previous_group_node:
                    for leaf in previous_group_node.leaves_containing(
                            self.previous_safe_properties):
                        if self.validate_node(leaf, node, False):
                            guess = Guess({'releaseGroup': node.value},
                                          confidence=1,
                                          input=node.value,
                                          span=(0, len(node.value)))
                            if self.validate_group_name(guess):
                                node.guess = guess
                                validated_guess = guess

            if validated_guess:
                # If following group nodes have only one unidentified leaf, it belongs to the release group
                next_group_node = node

                while True:
                    next_group_node = next_group_node.next_group_node()
                    if next_group_node:
                        leaves = list(next_group_node.leaves())
                        if len(leaves) == 1 and not leaves[0].guess:
                            validated_guess['releaseGroup'] = validated_guess[
                                'releaseGroup'] + leaves[0].value
                            leaves[0].guess = validated_guess
                        else:
                            break
                    else:
                        break

            if not validated_guess and node.is_explicit(
            ) and node.node_last_idx == 0:  # first node from group
                validated_guess = build_guess(
                    node,
                    'releaseGroup',
                    value=node.value[1:len(node.value) - 1])
                validated_guess.metadata().confidence = 0.4
                validated_guess.metadata().span = 1, len(node.value)
                node.guess = validated_guess

        if validated_guess:
            # Strip brackets
            validated_guess['releaseGroup'] = strip_brackets(
                validated_guess['releaseGroup'])

        return validated_guess
class GuessReleaseGroup(Transformer):
    def __init__(self):
        Transformer.__init__(self, -190)

        self.container = PropertiesContainer(canonical_from_pattern=False)
        self._allowed_groupname_pattern = '[\w@#€£$&!\?]'
        self._forbidden_groupname_lambda = [
            lambda elt: elt in ['rip', 'by', 'for', 'par', 'pour', 'bonus'],
            lambda elt: self._is_number(elt)
        ]
        # If the previous property in this list, the match will be considered as safe
        # and group name can contain a separator.
        self.previous_safe_properties = [
            'videoCodec', 'format', 'videoApi', 'audioCodec', 'audioProfile',
            'videoProfile', 'audioChannels', 'other'
        ]
        self.previous_safe_values = {'other': ['Complete']}
        self.next_safe_properties = ['extension', 'website']
        self.next_safe_values = {'format': ['Telesync']}
        self.container.sep_replace_char = '-'
        self.container.canonical_from_pattern = False
        self.container.enhance = True
        self.container.register_property('releaseGroup',
                                         self._allowed_groupname_pattern + '+')
        self.container.register_property(
            'releaseGroup', self._allowed_groupname_pattern + '+-' +
            self._allowed_groupname_pattern + '+')
        self.re_sep = re.compile('(' + sep + ')')

    def register_arguments(self, opts, naming_opts, output_opts,
                           information_opts, webservice_opts, other_options):
        naming_opts.add_argument(
            '-G',
            '--expected-group',
            action='append',
            dest='expected_group',
            help='Expected release group (can be used multiple times)')

    def supported_properties(self):
        return self.container.get_supported_properties()

    def _is_number(self, s):
        try:
            int(s)
            return True
        except ValueError:
            return False

    def validate_group_name(self, guess):
        val = guess['releaseGroup']
        if len(val) > 1:
            checked_val = ""
            forbidden = False
            for elt in self.re_sep.split(
                    val
            ):  # separators are in the list because of capturing group
                if forbidden:
                    # Previous was forbidden, don't had separator
                    forbidden = False
                    continue
                for forbidden_lambda in self._forbidden_groupname_lambda:
                    forbidden = forbidden_lambda(elt.lower())
                    if forbidden:
                        if checked_val:
                            # Removing previous separator
                            checked_val = checked_val[0:len(checked_val) - 1]
                        break
                if not forbidden:
                    checked_val += elt

            val = checked_val
            if not val:
                return False
            if self.re_sep.match(val[-1]):
                val = val[:len(val) - 1]
            if self.re_sep.match(val[0]):
                val = val[1:]
            guess['releaseGroup'] = val
            forbidden = False
            for forbidden_lambda in self._forbidden_groupname_lambda:
                forbidden = forbidden_lambda(val.lower())
                if forbidden:
                    break
            if not forbidden:
                return True
        return False

    def is_leaf_previous(self, leaf, node):
        if leaf.span[1] <= node.span[0]:
            for idx in range(leaf.span[1], node.span[0]):
                if leaf.root.value[idx] not in sep:
                    return False
            return True
        return False

    def validate_next_leaves(self, node):
        if 'series' in node.root.info or 'title' in node.root.info:
            # --expected-series or --expected-title is used.
            return True

        # Make sure to avoid collision with 'series' or 'title' guessed later. Should be more precise.
        leaves = node.root.unidentified_leaves()
        return len(list(leaves)) > 1

    def validate_node(self, leaf, node, safe=False):
        if not self.is_leaf_previous(leaf, node):
            return False
        if not self.validate_next_leaves(node):
            return False
        if safe:
            for k, v in leaf.guess.items():
                if k in self.previous_safe_values and not v in self.previous_safe_values[
                        k]:
                    return False
        return True

    def guess_release_group(self, string, node=None, options=None):
        if options and options.get('expected_group'):
            expected_container = PropertiesContainer(
                enhance=True, canonical_from_pattern=False)
            for expected_group in options.get('expected_group'):
                if expected_group.startswith('re:'):
                    expected_group = expected_group[3:]
                    expected_group = expected_group.replace(' ', '-')
                    expected_container.register_property('releaseGroup',
                                                         expected_group,
                                                         enhance=True)
                else:
                    expected_group = re.escape(expected_group)
                    expected_container.register_property('releaseGroup',
                                                         expected_group,
                                                         enhance=False)

            found = expected_container.find_properties(string, node, options,
                                                       'releaseGroup')
            guess = expected_container.as_guess(found, string,
                                                self.validate_group_name)
            if guess:
                return guess

        found = self.container.find_properties(string, node, options,
                                               'releaseGroup')
        guess = self.container.as_guess(found, string,
                                        self.validate_group_name)
        validated_guess = None
        if guess:
            group_node = node.group_node()
            if group_node:
                for leaf in group_node.leaves_containing(
                        self.previous_safe_properties):
                    if self.validate_node(leaf, node, True):
                        if leaf.root.value[leaf.span[1]] == '-':
                            guess.metadata().confidence = 1
                        else:
                            guess.metadata().confidence = 0.7
                        validated_guess = guess

            if not validated_guess:
                # If previous group last leaf is identified as a safe property,
                # consider the raw value as a releaseGroup
                previous_group_node = node.previous_group_node()
                if previous_group_node:
                    for leaf in previous_group_node.leaves_containing(
                            self.previous_safe_properties):
                        if self.validate_node(leaf, node, False):
                            guess = Guess({'releaseGroup': node.value},
                                          confidence=1,
                                          input=node.value,
                                          span=(0, len(node.value)))
                            if self.validate_group_name(guess):
                                node.guess = guess
                                validated_guess = guess

            if validated_guess:
                # If following group nodes have only one unidentified leaf, it belongs to the release group
                next_group_node = node

                while True:
                    next_group_node = next_group_node.next_group_node()
                    if next_group_node:
                        leaves = list(next_group_node.leaves())
                        if len(leaves) == 1 and not leaves[0].guess:
                            validated_guess['releaseGroup'] = validated_guess[
                                'releaseGroup'] + leaves[0].value
                            leaves[0].guess = validated_guess
                        else:
                            break
                    else:
                        break

            if not validated_guess and node.is_explicit(
            ) and node.node_last_idx == 0:  # first node from group
                validated_guess = build_guess(
                    node,
                    'releaseGroup',
                    value=node.value[1:len(node.value) - 1])
                validated_guess.metadata().confidence = 0.4
                validated_guess.metadata().span = 1, len(node.value)
                node.guess = validated_guess

        if validated_guess:
            # Strip brackets
            validated_guess['releaseGroup'] = strip_brackets(
                validated_guess['releaseGroup'])

        return validated_guess

    def process(self, mtree, options=None):
        GuessFinder(self.guess_release_group, None, self.log,
                    options).process_nodes(mtree.unidentified_leaves())
Esempio n. 20
0
class GuessEpisodesRexps(Transformer):
    def __init__(self):
        Transformer.__init__(self, 20)

        range_separators = ['-', 'to', 'a']
        discrete_separators = ['&', 'and', 'et']
        of_separators = ['of', 'sur', '/', '\\']

        season_words = ['seasons?', 'saisons?', 'series?']
        episode_words = ['episodes?']

        season_markers = ['s']
        episode_markers = ['e', 'ep']

        discrete_sep = sep
        for range_separator in range_separators:
            discrete_sep = discrete_sep.replace(range_separator, '')
        discrete_separators.append(discrete_sep)
        all_separators = list(range_separators)
        all_separators.extend(discrete_separators)

        self.container = PropertiesContainer(enhance=False,
                                             canonical_from_pattern=False)

        range_separators_re = re.compile(build_or_pattern(range_separators),
                                         re.IGNORECASE)
        discrete_separators_re = re.compile(
            build_or_pattern(discrete_separators), re.IGNORECASE)
        all_separators_re = re.compile(build_or_pattern(all_separators),
                                       re.IGNORECASE)
        of_separators_re = re.compile(
            build_or_pattern(of_separators, escape=True), re.IGNORECASE)

        season_words_re = re.compile(build_or_pattern(season_words),
                                     re.IGNORECASE)
        episode_words_re = re.compile(build_or_pattern(episode_words),
                                      re.IGNORECASE)

        season_markers_re = re.compile(build_or_pattern(season_markers),
                                       re.IGNORECASE)
        episode_markers_re = re.compile(build_or_pattern(episode_markers),
                                        re.IGNORECASE)

        def list_parser(value,
                        propertyListName,
                        discrete_separators_re=discrete_separators_re,
                        range_separators_re=range_separators_re,
                        allow_discrete=False,
                        fill_gaps=False):
            discrete_elements = filter(lambda x: x != '',
                                       discrete_separators_re.split(value))
            discrete_elements = [x.strip() for x in discrete_elements]

            proper_discrete_elements = []
            i = 0
            while i < len(discrete_elements):
                if i < len(discrete_elements) - 2 and range_separators_re.match(
                        discrete_elements[i + 1]):
                    proper_discrete_elements.append(discrete_elements[i] +
                                                    discrete_elements[i + 1] +
                                                    discrete_elements[i + 2])
                    i += 3
                else:
                    match = range_separators_re.search(discrete_elements[i])
                    if match and match.start() == 0:
                        proper_discrete_elements[i -
                                                 1] = proper_discrete_elements[
                                                     i -
                                                     1] + discrete_elements[i]
                    elif match and match.end() == len(discrete_elements[i]):
                        proper_discrete_elements.append(discrete_elements[i] +
                                                        discrete_elements[i +
                                                                          1])
                    else:
                        proper_discrete_elements.append(discrete_elements[i])
                    i += 1

            discrete_elements = proper_discrete_elements

            ret = []

            for discrete_element in discrete_elements:
                range_values = filter(
                    lambda x: x != '',
                    range_separators_re.split(discrete_element))
                range_values = [x.strip() for x in range_values]
                if len(range_values) > 1:
                    for x in range(0, len(range_values) - 1):
                        start_range_ep = parse_numeral(range_values[x])
                        end_range_ep = parse_numeral(range_values[x + 1])
                        for range_ep in range(start_range_ep,
                                              end_range_ep + 1):
                            if range_ep not in ret:
                                ret.append(range_ep)
                else:
                    discrete_value = parse_numeral(discrete_element)
                    if discrete_value not in ret:
                        ret.append(discrete_value)

            if len(ret) > 1:
                if not allow_discrete:
                    valid_ret = []
                    # replace discrete elements by ranges
                    valid_ret.append(ret[0])
                    for i in range(0, len(ret) - 1):
                        previous = valid_ret[len(valid_ret) - 1]
                        if ret[i + 1] < previous:
                            pass
                        else:
                            valid_ret.append(ret[i + 1])
                    ret = valid_ret
                if fill_gaps:
                    ret = list(range(min(ret), max(ret) + 1))
                if len(ret) > 1:
                    return {None: ret[0], propertyListName: ret}
            if len(ret) > 0:
                return ret[0]
            return None

        def episode_parser_x(value):
            return list_parser(value,
                               'episodeList',
                               discrete_separators_re=re.compile(
                                   'x', re.IGNORECASE))

        def episode_parser_e(value):
            return list_parser(value,
                               'episodeList',
                               discrete_separators_re=re.compile(
                                   'e', re.IGNORECASE),
                               fill_gaps=True)

        def episode_parser(value):
            return list_parser(value, 'episodeList')

        def season_parser(value):
            return list_parser(value, 'seasonList')

        class ResolutionCollisionValidator(object):
            def validate(self, prop, string, node, match, entry_start,
                         entry_end):
                return len(match.group(2)) < 3  #limit

        self.container.register_property(None,
                                         r'(' + season_words_re.pattern + sep +
                                         '?(?P<season>' + numeral + ')' + sep +
                                         '?' + season_words_re.pattern + '?)',
                                         confidence=1.0,
                                         formatter=parse_numeral)
        self.container.register_property(
            None,
            r'(' + season_words_re.pattern + sep + '?(?P<season>' +
            digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern +
            sep + '?' + digital_numeral + ')*)' + sep + '?' +
            season_words_re.pattern + '?)' + sep,
            confidence=1.0,
            formatter={
                None: parse_numeral,
                'season': season_parser
            },
            validator=ChainedValidator(
                DefaultValidator(),
                FormatterValidator(
                    'season', lambda x: len(x) > 1
                    if hasattr(x, '__len__') else False)))

        self.container.register_property(
            None,
            r'(' + season_markers_re.pattern + '(?P<season>' +
            digital_numeral + ')[^0-9]?' + sep + '?(?P<episodeNumber>(?:e' +
            digital_numeral + '(?:' + sep + '?[e-]' + digital_numeral +
            ')*)))',
            confidence=1.0,
            formatter={
                None: parse_numeral,
                'episodeNumber': episode_parser_e,
                'season': season_parser
            },
            validator=NoValidator())
        #self.container.register_property(None, r'[^0-9]((?P<season>' + digital_numeral + ')[^0-9 .-]?-?(?P<episodeNumber>(?:x' + digital_numeral + '(?:' + sep + '?[x-]' + digital_numeral + ')*)))', confidence=1.0, formatter={None: parse_numeral, 'episodeNumber': episode_parser_x, 'season': season_parser}, validator=ChainedValidator(DefaultValidator(), ResolutionCollisionValidator()))
        self.container.register_property(
            None,
            sep + r'((?P<season>' + digital_numeral + ')' + sep + '' +
            '(?P<episodeNumber>(?:x' + sep + digital_numeral + '(?:' + sep +
            '[x-]' + digital_numeral + ')*)))',
            confidence=1.0,
            formatter={
                None: parse_numeral,
                'episodeNumber': episode_parser_x,
                'season': season_parser
            },
            validator=ChainedValidator(DefaultValidator(),
                                       ResolutionCollisionValidator()))
        self.container.register_property(
            None,
            r'((?P<season>' + digital_numeral + ')' +
            '(?P<episodeNumber>(?:x' + digital_numeral + '(?:[x-]' +
            digital_numeral + ')*)))',
            confidence=1.0,
            formatter={
                None: parse_numeral,
                'episodeNumber': episode_parser_x,
                'season': season_parser
            },
            validator=ChainedValidator(DefaultValidator(),
                                       ResolutionCollisionValidator()))
        self.container.register_property(
            None,
            r'(' + season_markers_re.pattern + '(?P<season>' +
            digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern +
            sep + '?' + digital_numeral + ')*))',
            confidence=0.6,
            formatter={
                None: parse_numeral,
                'season': season_parser
            },
            validator=NoValidator())

        self.container.register_property(None,
                                         r'((?P<episodeNumber>' +
                                         digital_numeral + ')' + sep +
                                         '?v(?P<version>\d+))',
                                         confidence=0.6,
                                         formatter=parse_numeral)
        self.container.register_property(
            None,
            r'(ep' + sep + r'?(?P<episodeNumber>' + numeral + ')' + sep + '?)',
            confidence=0.7,
            formatter=parse_numeral)
        self.container.register_property(
            None,
            r'(ep' + sep + r'?(?P<episodeNumber>' + numeral + ')' + sep +
            '?v(?P<version>\d+))',
            confidence=0.7,
            formatter=parse_numeral)

        self.container.register_property(
            None,
            r'(' + episode_markers_re.pattern + '(?P<episodeNumber>' +
            digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern +
            sep + '?' + digital_numeral + ')*))',
            confidence=0.6,
            formatter={
                None: parse_numeral,
                'episodeNumber': episode_parser
            })
        self.container.register_property(
            None,
            r'(' + episode_words_re.pattern + sep + '?(?P<episodeNumber>' +
            digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern +
            sep + '?' + digital_numeral + ')*)' + sep + '?' +
            episode_words_re.pattern + '?)',
            confidence=0.8,
            formatter={
                None: parse_numeral,
                'episodeNumber': episode_parser
            })

        self.container.register_property(
            None,
            r'(' + episode_markers_re.pattern + '(?P<episodeNumber>' +
            digital_numeral + ')' + sep + '?v(?P<version>\d+))',
            confidence=0.6,
            formatter={
                None: parse_numeral,
                'episodeNumber': episode_parser
            })
        self.container.register_property(
            None,
            r'(' + episode_words_re.pattern + sep + '?(?P<episodeNumber>' +
            digital_numeral + ')' + sep + '?v(?P<version>\d+))',
            confidence=0.8,
            formatter={
                None: parse_numeral,
                'episodeNumber': episode_parser
            })

        self.container.register_property('episodeNumber',
                                         r'^ ?(\d{2})' + sep,
                                         confidence=0.4,
                                         formatter=parse_numeral)
        self.container.register_property('episodeNumber',
                                         r'^ ?(\d{2})' + sep,
                                         confidence=0.4,
                                         formatter=parse_numeral)
        self.container.register_property('episodeNumber',
                                         r'^ ?0(\d{1,2})' + sep,
                                         confidence=0.4,
                                         formatter=parse_numeral)
        self.container.register_property('episodeNumber',
                                         sep + r'(\d{2}) ?$',
                                         confidence=0.4,
                                         formatter=parse_numeral)
        self.container.register_property('episodeNumber',
                                         sep + r'0(\d{1,2}) ?$',
                                         confidence=0.4,
                                         formatter=parse_numeral)

        self.container.register_property(
            None,
            r'((?P<episodeNumber>' + numeral + ')' + sep + '?' +
            of_separators_re.pattern + sep + '?(?P<episodeCount>' + numeral +
            ')(?:' + sep + '?(?:episodes?|eps?))?)',
            confidence=0.7,
            formatter=parse_numeral)
        self.container.register_property(
            None,
            r'((?:episodes?|eps?)' + sep + '?(?P<episodeNumber>' + numeral +
            ')' + sep + '?' + of_separators_re.pattern + sep +
            '?(?P<episodeCount>' + numeral + '))',
            confidence=0.7,
            formatter=parse_numeral)
        self.container.register_property(None,
                                         r'((?:seasons?|saisons?|s)' + sep +
                                         '?(?P<season>' + numeral + ')' + sep +
                                         '?' + of_separators_re.pattern + sep +
                                         '?(?P<seasonCount>' + numeral + '))',
                                         confidence=0.7,
                                         formatter=parse_numeral)
        self.container.register_property(
            None,
            r'((?P<season>' + numeral + ')' + sep + '?' +
            of_separators_re.pattern + sep + '?(?P<seasonCount>' + numeral +
            ')' + sep + '?(?:seasons?|saisons?|s))',
            confidence=0.7,
            formatter=parse_numeral)

        self.container.register_canonical_properties('other',
                                                     'FiNAL',
                                                     'Complete',
                                                     validator=WeakValidator())

        self.container.register_property(
            None,
            r'[^0-9]((?P<season>' + digital_numeral +
            ')[^0-9 .-]?-?(?P<other>xAll))',
            confidence=1.0,
            formatter={
                None: parse_numeral,
                'other': lambda x: 'Complete',
                'season': season_parser
            },
            validator=ChainedValidator(DefaultValidator(),
                                       ResolutionCollisionValidator()))

    def register_options(self, opts, naming_opts, output_opts,
                         information_opts, webservice_opts, other_options):
        naming_opts.add_option(
            '-E',
            '--episode-prefer-number',
            action='store_true',
            dest='episode_prefer_number',
            default=False,
            help=
            'Guess "serie.213.avi" as the episodeNumber 213. Without this option, '
            'it will be guessed as season 2, episodeNumber 13')

    def supported_properties(self):
        return [
            'episodeNumber', 'season', 'episodeList', 'seasonList',
            'episodeCount', 'seasonCount', 'version', 'other'
        ]

    def guess_episodes_rexps(self, string, node=None, options=None):
        found = self.container.find_properties(string, node, options)
        return self.container.as_guess(found, string)

    def should_process(self, mtree, options=None):
        return mtree.guess.get('type', '').startswith('episode')

    def process(self, mtree, options=None):
        GuessFinder(self.guess_episodes_rexps, None, self.log,
                    options).process_nodes(mtree.unidentified_leaves())
Esempio n. 21
0
class GuessReleaseGroup(Transformer):
    def __init__(self):
        Transformer.__init__(self, -190)
        self.container = PropertiesContainer(canonical_from_pattern=False)
        self._allowed_groupname_pattern = '[\w@#€£$&]'
        self._forbidden_groupname_lambda = [
            lambda elt: elt in ['rip', 'by', 'for', 'par', 'pour', 'bonus'],
            lambda elt: self._is_number(elt),
        ]
        # If the previous property in this list, the match will be considered as safe
        # and group name can contain a separator.
        self.previous_safe_properties = [
            'videoCodec', 'format', 'videoApi', 'audioCodec', 'audioProfile',
            'videoProfile', 'audioChannels'
        ]

        self.container.sep_replace_char = '-'
        self.container.canonical_from_pattern = False
        self.container.enhance = True
        self.container.register_property('releaseGroup',
                                         self._allowed_groupname_pattern + '+')
        self.container.register_property(
            'releaseGroup', self._allowed_groupname_pattern + '+-' +
            self._allowed_groupname_pattern + '+')

    def supported_properties(self):
        return self.container.get_supported_properties()

    def _is_number(self, s):
        try:
            int(s)
            return True
        except ValueError:
            return False

    def validate_group_name(self, guess):
        val = guess['releaseGroup']
        if len(val) >= 2:

            if '-' in val:
                checked_val = ""
                for elt in val.split('-'):
                    forbidden = False
                    for forbidden_lambda in self._forbidden_groupname_lambda:
                        forbidden = forbidden_lambda(elt.lower())
                        if forbidden:
                            break
                    if not forbidden:
                        if checked_val:
                            checked_val += '-'
                        checked_val += elt
                    else:
                        break
                val = checked_val
                if not val:
                    return False
                guess['releaseGroup'] = val

            forbidden = False
            for forbidden_lambda in self._forbidden_groupname_lambda:
                forbidden = forbidden_lambda(val.lower())
                if forbidden:
                    break
            if not forbidden:
                return True
        return False

    def is_leaf_previous(self, leaf, node):
        if leaf.span[1] <= node.span[0]:
            for idx in range(leaf.span[1], node.span[0]):
                if not leaf.root.value[idx] in sep:
                    return False
            return True
        return False

    def guess_release_group(self, string, node=None, options=None):
        found = self.container.find_properties(string, node, 'releaseGroup')
        guess = self.container.as_guess(found,
                                        string,
                                        self.validate_group_name,
                                        sep_replacement='-')
        validated_guess = None
        if guess:
            explicit_group_node = node.group_node()
            if explicit_group_node:
                for leaf in explicit_group_node.leaves_containing(
                        self.previous_safe_properties):
                    if self.is_leaf_previous(leaf, node):
                        if leaf.root.value[leaf.span[1]] == '-':
                            guess.metadata().confidence = 1
                        else:
                            guess.metadata().confidence = 0.7
                        validated_guess = guess

            if not validated_guess:
                # If previous group last leaf is identified as a safe property,
                # consider the raw value as a releaseGroup
                previous_group_node = node.previous_group_node()
                if previous_group_node:
                    for leaf in previous_group_node.leaves_containing(
                            self.previous_safe_properties):
                        if self.is_leaf_previous(leaf, node):
                            guess = Guess({'releaseGroup': node.value},
                                          confidence=1,
                                          input=node.value,
                                          span=(0, len(node.value)))
                            if self.validate_group_name(guess):
                                node.guess = guess
                                validated_guess = guess

            if validated_guess:
                # If following group nodes have only one unidentified leaf, it belongs to the release group
                next_group_node = node

                while True:
                    next_group_node = next_group_node.next_group_node()
                    if next_group_node:
                        leaves = list(next_group_node.leaves())
                        if len(leaves) == 1 and not leaves[0].guess:
                            validated_guess['releaseGroup'] = validated_guess[
                                'releaseGroup'] + leaves[0].value
                            leaves[0].guess = validated_guess
                        else:
                            break
                    else:
                        break

        if validated_guess:
            # Strip brackets
            validated_guess['releaseGroup'] = strip_brackets(
                validated_guess['releaseGroup'])

        return validated_guess

    def process(self, mtree, options=None):
        GuessFinder(self.guess_release_group, None, self.log,
                    options).process_nodes(mtree.unidentified_leaves())
Esempio n. 22
0
class GuessProperties(Transformer):
    def __init__(self):
        Transformer.__init__(self, 35)

        self.container = PropertiesContainer()
        self.qualities = QualitiesContainer()

        def register_property(propname, props, **kwargs):
            """props a dict of {value: [patterns]}"""
            for canonical_form, patterns in props.items():
                if isinstance(patterns, tuple):
                    patterns2, pattern_kwarg = patterns
                    if kwargs:
                        current_kwarg = dict(kwargs)
                        current_kwarg.update(pattern_kwarg)
                    else:
                        current_kwarg = dict(pattern_kwarg)
                    current_kwarg['canonical_form'] = canonical_form
                    self.container.register_property(propname, *patterns2, **current_kwarg)
                elif kwargs:
                    current_kwarg = dict(kwargs)
                    current_kwarg['canonical_form'] = canonical_form
                    self.container.register_property(propname, *patterns, **current_kwarg)
                else:
                    self.container.register_property(propname, *patterns, canonical_form=canonical_form)

        def register_quality(propname, quality_dict):
            """props a dict of {canonical_form: quality}"""
            for canonical_form, quality in quality_dict.items():
                self.qualities.register_quality(propname, canonical_form, quality)

        register_property('container', {'mp4': ['MP4']})

        # http://en.wikipedia.org/wiki/Pirated_movie_release_types
        register_property('format', {'VHS': ['VHS', 'VHS-Rip'],
                                     'Cam': ['CAM', 'CAMRip', 'HD-CAM'],
                                     #'Telesync': ['TELESYNC', 'PDVD'],
                                     'Telesync': (['TS', 'HD-TS'], {'confidence': 0.4}),
                                     'Workprint': ['WORKPRINT', 'WP'],
                                     'Telecine': ['TELECINE', 'TC'],
                                     'PPV': ['PPV', 'PPV-Rip'],  # Pay Per View
                                     'TV': ['SD-TV', 'SD-TV-Rip', 'Rip-SD-TV', 'TV-Rip', 'Rip-TV'],
                                     'DVB': ['DVB-Rip', 'DVB', 'PD-TV'],
                                     'DVD': ['DVD', 'DVD-Rip', 'VIDEO-TS', 'DVD-R', 'DVD-9', 'DVD-5'],
                                     'HDTV': ['HD-TV', 'TV-RIP-HD', 'HD-TV-RIP', 'HD-RIP'],
                                     'VOD': ['VOD', 'VOD-Rip'],
                                     'WEBRip': ['WEB-Rip'],
                                     'WEB-DL': ['WEB-DL', 'WEB-HD', 'WEB'],
                                     'HD-DVD': ['HD-DVD-Rip', 'HD-DVD'],
                                     'BluRay': ['Blu-ray(?:-Rip)?', 'B[DR]', 'B[DR]-Rip', 'BD[59]', 'BD25', 'BD50']
                                     })

        register_quality('format', {'VHS': -100,
                                    'Cam': -90,
                                    'Telesync': -80,
                                    'Workprint': -70,
                                    'Telecine': -60,
                                    'PPV': -50,
                                    'TV': -30,
                                    'DVB': -20,
                                    'DVD': 0,
                                    'HDTV': 20,
                                    'VOD': 40,
                                    'WEBRip': 50,
                                    'WEB-DL': 60,
                                    'HD-DVD': 80,
                                    'BluRay': 100
                                    })

        register_property('screenSize', {'360p': ['(?:\d{3,}(?:\\|\/|x|\*))?360(?:i|p?x?)'],
                                         '368p': ['(?:\d{3,}(?:\\|\/|x|\*))?368(?:i|p?x?)'],
                                         '480p': ['(?:\d{3,}(?:\\|\/|x|\*))?480(?:i|p?x?)'],
                                         #'480p': (['hr'], {'confidence': 0.2}), # duplicate dict key
                                         '576p': ['(?:\d{3,}(?:\\|\/|x|\*))?576(?:i|p?x?)'],
                                         '720p': ['(?:\d{3,}(?:\\|\/|x|\*))?720(?:i|p?x?)'],
                                         '900p': ['(?:\d{3,}(?:\\|\/|x|\*))?900(?:i|p?x?)'],
                                         '1080i': ['(?:\d{3,}(?:\\|\/|x|\*))?1080i'],
                                         '1080p': ['(?:\d{3,}(?:\\|\/|x|\*))?1080p?x?'],
                                         '4K': ['(?:\d{3,}(?:\\|\/|x|\*))?2160(?:i|p?x?)']
                                         },
                          validator=ChainedValidator(DefaultValidator(), OnlyOneValidator()))

        class ResolutionValidator(object):
            """Make sure our match is surrounded by separators, or by another entry"""
            @staticmethod
            def validate(prop, string, node, match, entry_start, entry_end):
                """
                span = _get_span(prop, match)
                span = _trim_span(span, string[span[0]:span[1]])
                start, end = span

                sep_start = start <= 0 or string[start - 1] in sep
                sep_end = end >= len(string) or string[end] in sep
                start_by_other = start in entry_end
                end_by_other = end in entry_start
                if (sep_start or start_by_other) and (sep_end or end_by_other):
                    return True
                return False
                """
                return True

        _digits_re = re.compile('\d+')

        def resolution_formatter(value):
            digits = _digits_re.findall(value)
            return 'x'.join(digits)

        self.container.register_property('screenSize', '\d{3,4}-?[x\*]-?\d{3,4}', canonical_from_pattern=False, formatter=resolution_formatter, validator=ChainedValidator(DefaultValidator(), ResolutionValidator()))

        register_quality('screenSize', {'360p': -300,
                                        '368p': -200,
                                        '480p': -100,
                                        '576p': 0,
                                        '720p': 100,
                                        '900p': 130,
                                        '1080i': 180,
                                        '1080p': 200,
                                        '4K': 400
                                        })

        _videoCodecProperty = {'Real': ['Rv\d{2}'],  # http://en.wikipedia.org/wiki/RealVideo
                               'Mpeg2': ['Mpeg2'],
                               'DivX': ['DVDivX', 'DivX'],
                               'XviD': ['XviD'],
                               'h264': ['[hx]-264(?:-AVC)?', 'MPEG-4(?:-AVC)'],
                               'h265': ['[hx]-265(?:-HEVC)?', 'HEVC']
                               }

        register_property('videoCodec', _videoCodecProperty)

        register_quality('videoCodec', {'Real': -50,
                                        'Mpeg2': -30,
                                        'DivX': -10,
                                        'XviD': 0,
                                        'h264': 100,
                                        'h265': 150
                                        })

        # http://blog.mediacoderhq.com/h264-profiles-and-levels/
        # http://fr.wikipedia.org/wiki/H.264
        self.container.register_property('videoProfile', 'BP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess]))
        self.container.register_property('videoProfile', 'XP', 'EP', canonical_form='XP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess]))
        self.container.register_property('videoProfile', 'MP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess]))
        self.container.register_property('videoProfile', 'HP', 'HiP', canonical_form='HP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess]))
        self.container.register_property('videoProfile', '10.?bit', 'Hi10P', canonical_form='10bit')
        self.container.register_property('videoProfile', '8.?bit', canonical_form='8bit')
        self.container.register_property('videoProfile', 'Hi422P', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess]))
        self.container.register_property('videoProfile', 'Hi444PP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess]))

        register_quality('videoProfile', {'BP': -20,
                                          'XP': -10,
                                          'MP': 0,
                                          'HP': 10,
                                          '10bit': 15,
                                          'Hi422P': 25,
                                          'Hi444PP': 35
                                          })

        # has nothing to do here (or on filenames for that matter), but some
        # releases use it and it helps to identify release groups, so we adapt
        register_property('videoApi', {'DXVA': ['DXVA']})

        register_property('audioCodec', {'MP3': ['MP3', 'LAME', 'LAME(?:\d)+-(?:\d)+'],
                                         'DolbyDigital': ['DD'],
                                         'AAC': ['AAC'],
                                         'AC3': ['AC3'],
                                         'Flac': ['FLAC'],
                                         'DTS': (['DTS'], {'validator': LeftValidator()}),
                                         'TrueHD': ['True-HD']
                                         })

        register_quality('audioCodec', {'MP3': 10,
                                        'DolbyDigital': 30,
                                        'AAC': 35,
                                        'AC3': 40,
                                        'Flac': 45,
                                        'DTS': 60,
                                        'TrueHD': 70
                                        })

        self.container.register_property('audioProfile', 'HD', validator=LeavesValidator(lambdas=[lambda node: node.guess.get('audioCodec') == 'DTS']))
        self.container.register_property('audioProfile', 'HD-MA', canonical_form='HDMA', validator=LeavesValidator(lambdas=[lambda node: node.guess.get('audioCodec') == 'DTS']))
        self.container.register_property('audioProfile', 'HE', validator=LeavesValidator(lambdas=[lambda node: node.guess.get('audioCodec') == 'AAC']))
        self.container.register_property('audioProfile', 'LC', validator=LeavesValidator(lambdas=[lambda node: node.guess.get('audioCodec') == 'AAC']))
        self.container.register_property('audioProfile', 'HQ', validator=LeavesValidator(lambdas=[lambda node: node.guess.get('audioCodec') == 'AC3']))

        register_quality('audioProfile', {'HD': 20,
                                          'HDMA': 50,
                                          'LC': 0,
                                          'HQ': 0,
                                          'HE': 20
                                          })

        register_property('audioChannels', {'7.1': ['7[\W_]1', '7ch', '8ch'],
                                            '5.1': ['5[\W_]1', '5ch', '6ch'],
                                            '2.0': ['2[\W_]0', '2ch', 'stereo'],
                                            '1.0': ['1[\W_]0', '1ch', 'mono']
                                            })

        register_quality('audioChannels', {'7.1': 200,
                                           '5.1': 100,
                                           '2.0': 0,
                                           '1.0': -100
                                           })

        self.container.register_property('episodeFormat', r'Minisodes?', canonical_form='Minisode')

        self.container.register_property('crc32', '(?:[a-fA-F]|[0-9]){8}', enhance=False, canonical_from_pattern=False)

        weak_episode_words = ['pt', 'part']
        self.container.register_property(None, '(' + build_or_pattern(weak_episode_words) + sep + '?(?P<part>' + numeral + '))[^0-9]', enhance=False, canonical_from_pattern=False, confidence=0.4, formatter=parse_numeral)

        register_property('other', {'AudioFix': ['Audio-Fix', 'Audio-Fixed'],
                                    'SyncFix': ['Sync-Fix', 'Sync-Fixed'],
                                    'DualAudio': ['Dual-Audio'],
                                    'WideScreen': ['ws', 'wide-screen'],
                                    'Netflix': ['Netflix', 'NF']
                                    })

        self.container.register_property('other', 'Real', 'Fix', canonical_form='Proper', validator=NeighborValidator())
        self.container.register_property('other', 'Proper', 'Repack', 'Rerip', canonical_form='Proper')
        self.container.register_property('other', 'Fansub', canonical_form='Fansub')
        self.container.register_property('other', 'Fastsub', canonical_form='Fastsub')
        self.container.register_property('other', '(?:Seasons?' + sep + '?)?Complete', canonical_form='Complete')
        self.container.register_property('other', 'R5', 'RC', canonical_form='R5')
        self.container.register_property('other', 'Pre-Air', 'Preair', canonical_form='Preair')

        self.container.register_canonical_properties('other', 'Screener', 'Remux', '3D', 'HD', 'mHD', 'HDLight', 'HQ',
                                                     'DDC',
                                                     'HR', 'PAL', 'SECAM', 'NTSC')
        self.container.register_canonical_properties('other', 'Limited', 'Complete', 'Classic', 'Unrated', 'LiNE', 'Bonus', 'Trailer', validator=WeakValidator())

        for prop in self.container.get_properties('format'):
            self.container.register_property('other', prop.pattern + '(-?Scr(?:eener)?)', canonical_form='Screener')

        for exts in (subtitle_exts, info_exts, video_exts):
            for container in exts:
                self.container.register_property('container', container, confidence=0.3)

    def guess_properties(self, string, node=None, options=None):
        found = self.container.find_properties(string, node, options)
        return self.container.as_guess(found, string)

    def supported_properties(self):
        return self.container.get_supported_properties()

    def process(self, mtree, options=None):
        GuessFinder(self.guess_properties, 1.0, self.log, options).process_nodes(mtree.unidentified_leaves())
        proper_count = 0
        for other_leaf in mtree.leaves_containing('other'):
            if 'other' in other_leaf.info and 'Proper' in other_leaf.info['other']:
                proper_count += 1
        if proper_count:
            found_property(mtree, 'properCount', proper_count)

    def rate_quality(self, guess, *props):
        return self.qualities.rate_quality(guess, *props)
Esempio n. 23
0
class GuessWeakEpisodesRexps(Transformer):
    def __init__(self):
        Transformer.__init__(self, 15)

        of_separators = ['of', 'sur', '/', '\\']
        of_separators_re = re.compile(
            build_or_pattern(of_separators, escape=True), re.IGNORECASE)

        self.container = PropertiesContainer(enhance=False,
                                             canonical_from_pattern=False,
                                             remove_duplicates=True)

        episode_words = ['episodes?']

        def episode_list_parser(value):
            return list_parser(value, 'episodeList')

        def season_episode_parser(episode_number):
            epnum = parse_numeral(episode_number)
            if not valid_year(epnum):
                if epnum > 100:
                    season, epnum = epnum // 100, epnum % 100
                    # episodes which have a season > 50 are most likely errors
                    # (Simpson is at 25!)
                    if season > 50:
                        return None
                    return {'season': season, 'episodeNumber': epnum}
                else:
                    return epnum

        self.container.register_property(
            ['episodeNumber', 'season'],
            '[0-9]{2,4}',
            confidence=0.6,
            formatter=season_episode_parser,
            disabler=lambda options: options.get('episode_prefer_number')
            if options else False)
        self.container.register_property(['episodeNumber', 'season'],
                                         '[0-9]{4}',
                                         confidence=0.6,
                                         formatter=season_episode_parser)
        self.container.register_property(
            None,
            '(' + build_or_pattern(episode_words) + sep +
            '?(?P<episodeNumber>' + numeral + '))[^0-9]',
            confidence=0.4,
            formatter=parse_numeral)
        self.container.register_property(
            None,
            r'(?P<episodeNumber>' + numeral + ')' + sep + '?' +
            of_separators_re.pattern + sep + '?(?P<episodeCount>' + numeral +
            ')',
            confidence=0.6,
            formatter=parse_numeral)
        self.container.register_property(
            'episodeNumber',
            '[^0-9](\d{2,3}' + '(?:' + sep + '?' + all_separators_re.pattern +
            sep + '?' + '\d{2,3}' + ')*)',
            confidence=0.4,
            formatter=episode_list_parser,
            disabler=lambda options: not options.get('episode_prefer_number')
            if options else True)
        self.container.register_property(
            'episodeNumber',
            r'^' + sep + '?(\d{2,3}' + '(?:' + sep + '?' +
            all_separators_re.pattern + sep + '?' + '\d{2,3}' + ')*)' + sep,
            confidence=0.4,
            formatter=episode_list_parser,
            disabler=lambda options: not options.get('episode_prefer_number')
            if options else True)
        self.container.register_property(
            'episodeNumber',
            sep + r'(\d{2,3}' + '(?:' + sep + '?' + all_separators_re.pattern +
            sep + '?' + '\d{2,3}' + ')*)' + sep + '?$',
            confidence=0.4,
            formatter=episode_list_parser,
            disabler=lambda options: not options.get('episode_prefer_number')
            if options else True)

    def supported_properties(self):
        return self.container.get_supported_properties()

    def guess_weak_episodes_rexps(self, string, node=None, options=None):
        properties = self.container.find_properties(string, node, options)
        guess = self.container.as_guess(properties, string)

        if node and guess:
            if 'episodeNumber' in guess and 'season' in guess:
                existing_guesses = list(
                    filter(lambda x: 'season' in x and 'episodeNumber' in x,
                           node.group_node().guesses))
                if existing_guesses:
                    return None
            elif 'episodeNumber' in guess:
                # If we only have episodeNumber in the guess, and another node contains both season and episodeNumber
                # keep only the second.
                safe_guesses = list(
                    filter(lambda x: 'season' in x and 'episodeNumber' in x,
                           node.group_node().guesses))
                if safe_guesses:
                    return None
                else:
                    # If we have other nodes containing episodeNumber, create an episodeList.
                    existing_guesses = list(
                        filter(
                            lambda x: 'season' not in x and 'episodeNumber' in
                            x,
                            node.group_node().guesses))
                    for existing_guess in existing_guesses:
                        if 'episodeList' not in existing_guess:
                            existing_guess['episodeList'] = [
                                existing_guess['episodeNumber']
                            ]
                        existing_guess['episodeList'].append(
                            guess['episodeNumber'])
                        existing_guess['episodeList'].sort()
                        if existing_guess['episodeNumber'] > guess[
                                'episodeNumber']:
                            existing_guess.set_confidence('episodeNumber', 0)
                        else:
                            guess.set_confidence('episodeNumber', 0)
                        guess['episodeList'] = list(
                            existing_guess['episodeList'])

        return guess

    def should_process(self, mtree, options=None):
        return mtree.guess.get('type', '').startswith('episode')

    def process(self, mtree, options=None):
        GuessFinder(self.guess_weak_episodes_rexps, 0.6, self.log,
                    options).process_nodes(mtree.unidentified_leaves())
Esempio n. 24
0
class GuessProperties(Transformer):
    def __init__(self):
        Transformer.__init__(self, 35)

        self.container = PropertiesContainer()
        self.qualities = QualitiesContainer()

        def register_property(propname, props):
            """props a dict of {value: [patterns]}"""
            for canonical_form, patterns in props.items():
                if isinstance(patterns, tuple):
                    patterns2, kwargs = patterns
                    kwargs = dict(kwargs)
                    kwargs['canonical_form'] = canonical_form
                    self.container.register_property(propname, *patterns2, **kwargs)

                else:
                    self.container.register_property(propname, *patterns, canonical_form=canonical_form)

        def register_quality(propname, quality_dict):
            """props a dict of {canonical_form: quality}"""
            for canonical_form, quality in quality_dict.items():
                self.qualities.register_quality(propname, canonical_form, quality)

        register_property('container', {'mp4': ['MP4']})

        # http://en.wikipedia.org/wiki/Pirated_movie_release_types
        register_property('format', {'VHS': ['VHS'],
                                     'Cam': ['CAM', 'CAMRip'],
                                     'Telesync': ['TELESYNC', 'PDVD'],
                                     'Telesync': (['TS'], {'confidence': 0.2}),
                                     'Workprint': ['WORKPRINT', 'WP'],
                                     'Telecine': ['TELECINE', 'TC'],
                                     'PPV': ['PPV', 'PPV-Rip'],  # Pay Per View
                                     'TV': ['SD-TV', 'SD-TV-Rip', 'Rip-SD-TV', 'TV-Rip', 'Rip-TV'],
                                     'DVB': ['DVB-Rip', 'DVB', 'PD-TV'],
                                     'DVD': ['DVD', 'DVD-Rip', 'VIDEO-TS'],
                                     'HDTV': ['HD-TV', 'TV-RIP-HD', 'HD-TV-RIP'],
                                     'VOD': ['VOD', 'VOD-Rip'],
                                     'WEBRip': ['WEB-Rip'],
                                     'WEB-DL': ['WEB-DL'],
                                     'HD-DVD': ['HD-(?:DVD)?-Rip', 'HD-DVD'],
                                     'BluRay': ['Blu-ray', 'B[DR]', 'B[DR]-Rip', 'BD[59]', 'BD25', 'BD50']
                                     })

        register_quality('format', {'VHS': -100,
                                    'Cam': -90,
                                    'Telesync': -80,
                                    'Workprint': -70,
                                    'Telecine': -60,
                                    'PPV': -50,
                                    'TV': -30,
                                    'DVB': -20,
                                    'DVD': 0,
                                    'HDTV': 20,
                                    'VOD': 40,
                                    'WEBRip': 50,
                                    'WEB-DL': 60,
                                    'HD-DVD': 80,
                                    'BluRay': 100
                                    })

        register_property('screenSize', {'360p': ['(?:\d{3,}(?:\\|\/|x|\*))?360(?:i|p?x?)'],
                                         '368p': ['(?:\d{3,}(?:\\|\/|x|\*))?368(?:i|p?x?)'],
                                         '480p': ['(?:\d{3,}(?:\\|\/|x|\*))?480(?:i|p?x?)'],
                                         '480p': (['hr'], {'confidence': 0.2}),
                                         '576p': ['(?:\d{3,}(?:\\|\/|x|\*))?576(?:i|p?x?)'],
                                         '720p': ['(?:\d{3,}(?:\\|\/|x|\*))?720(?:i|p?x?)'],
                                         '900p': ['(?:\d{3,}(?:\\|\/|x|\*))?900(?:i|p?x?)'],
                                         '1080i': ['(?:\d{3,}(?:\\|\/|x|\*))?1080i'],
                                         '1080p': ['(?:\d{3,}(?:\\|\/|x|\*))?1080(?:p?x?)'],
                                         '4K': ['(?:\d{3,}(?:\\|\/|x|\*))?2160(?:i|p?x?)']
                                         })

        register_quality('screenSize', {'360p': -300,
                                        '368p': -200,
                                        '480p': -100,
                                        '576p': 0,
                                        '720p': 100,
                                        '900p': 130,
                                        '1080i': 180,
                                        '1080p': 200,
                                        '4K': 400
                                        })

        _videoCodecProperty = {'Real': ['Rv\d{2}'],  # http://en.wikipedia.org/wiki/RealVideo
                               'Mpeg2': ['Mpeg2'],
                               'DivX': ['DVDivX', 'DivX'],
                               'XviD': ['XviD'],
                               'h264': ['[hx]-264(?:-AVC)?', 'MPEG-4(?:-AVC)'],
                               'h265': ['[hx]-265(?:-HEVC)?', 'HEVC']
                               }

        register_property('videoCodec', _videoCodecProperty)

        register_quality('videoCodec', {'Real': -50,
                                        'Mpeg2': -30,
                                        'DivX': -10,
                                        'XviD': 0,
                                        'h264': 100,
                                        'h265': 150
                                        })

        # http://blog.mediacoderhq.com/h264-profiles-and-levels/
        # http://fr.wikipedia.org/wiki/H.264
        self.container.register_property('videoProfile', 'BP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess]))
        self.container.register_property('videoProfile', 'XP', 'EP', canonical_form='XP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess]))
        self.container.register_property('videoProfile', 'MP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess]))
        self.container.register_property('videoProfile', 'HP', 'HiP', canonical_form='HP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess]))
        self.container.register_property('videoProfile', '10.?bit', 'Hi10P', canonical_form='10bit', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess]))
        self.container.register_property('videoProfile', 'Hi422P', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess]))
        self.container.register_property('videoProfile', 'Hi444PP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess]))

        register_quality('videoProfile', {'BP': -20,
                                          'XP': -10,
                                          'MP': 0,
                                          'HP': 10,
                                          '10bit': 15,
                                          'Hi422P': 25,
                                          'Hi444PP': 35
                                          })

        # has nothing to do here (or on filenames for that matter), but some
        # releases use it and it helps to identify release groups, so we adapt
        register_property('videoApi', {'DXVA': ['DXVA']})

        register_property('audioCodec', {'MP3': ['MP3'],
                                         'DolbyDigital': ['DD'],
                                         'AAC': ['AAC'],
                                         'AC3': ['AC3'],
                                         'Flac': ['FLAC'],
                                         'DTS': ['DTS'],
                                         'TrueHD': ['True-HD']
                                         })

        register_quality('audioCodec', {'MP3': 10,
                                        'DolbyDigital': 30,
                                        'AAC': 35,
                                        'AC3': 40,
                                        'Flac': 45,
                                        'DTS': 60,
                                        'TrueHD': 70
                                        })

        self.container.register_property('audioProfile', 'HD', validator=LeavesValidator(lambdas=[lambda node: node.guess.get('audioCodec') == 'DTS']))
        self.container.register_property('audioProfile', 'HD-MA', canonical_form='HDMA', validator=LeavesValidator(lambdas=[lambda node: node.guess.get('audioCodec') == 'DTS']))
        self.container.register_property('audioProfile', 'HE', validator=LeavesValidator(lambdas=[lambda node: node.guess.get('audioCodec') == 'AAC']))
        self.container.register_property('audioProfile', 'LC', validator=LeavesValidator(lambdas=[lambda node: node.guess.get('audioCodec') == 'AAC']))
        self.container.register_property('audioProfile', 'HQ', validator=LeavesValidator(lambdas=[lambda node: node.guess.get('audioCodec') == 'AC3']))

        register_quality('audioProfile', {'HD': 20,
                                          'HDMA': 50,
                                          'LC': 0,
                                          'HQ': 0,
                                          'HE': 20
                                          })

        register_property('audioChannels', {'7.1': ['7[\W_]1', '7ch'],
                                            '5.1': ['5[\W_]1', '5ch'],
                                            '2.0': ['2[\W_]0', '2ch', 'stereo'],
                                            '1.0': ['1[\W_]0', '1ch', 'mono']
                                            })

        register_quality('audioChannels', {'7.1': 200,
                                           '5.1': 100,
                                           '2.0': 0,
                                           '1.0': -100
                                           })

        self.container.register_property('episodeFormat', r'Minisodes?', canonical_form='Minisode')

        register_property('other', {'AudioFix': ['Audio-Fix', 'Audio-Fixed'],
                                    'SyncFix': ['Sync-Fix', 'Sync-Fixed'],
                                    'DualAudio': ['Dual-Audio'],
                                    'WideScreen': ['ws', 'wide-screen'],
                                    })

        self.container.register_property('other', 'Real', 'Fix', canonical_form="Proper", validator=WeakValidator())
        self.container.register_property('other', 'Proper', 'Repack', 'Rerip', canonical_form="Proper")

        self.container.register_canonical_properties('other', 'R5', 'Screener', '3D', 'HD', 'HQ', 'DDC')
        self.container.register_canonical_properties('other', 'Limited', 'Complete', 'Classic', 'Unrated', 'LiNE', 'Bonus', 'Trailer', validator=WeakValidator())

        for prop in self.container.get_properties('format'):
            self.container.register_property('other', prop.pattern + '(-?Scr(?:eener)?)', canonical_form='Screener')

        for exts in (subtitle_exts, info_exts, video_exts):
            for container in exts:
                self.container.register_property('container', container, confidence=0.3)

    def guess_properties(self, string, node=None, options=None):
        found = self.container.find_properties(string, node)
        return self.container.as_guess(found, string)

    def supported_properties(self):
        return self.container.get_supported_properties()

    def process(self, mtree, options=None):
        GuessFinder(self.guess_properties, 1.0, self.log, options).process_nodes(mtree.unidentified_leaves())

    def rate_quality(self, guess, *props):
        return self.qualities.rate_quality(guess, *props)
Esempio n. 25
0
class GuessProperties(Transformer):
    def __init__(self):
        Transformer.__init__(self, 35)

        self.container = PropertiesContainer()
        self.qualities = QualitiesContainer()

        def register_property(propname, props, **kwargs):
            """props a dict of {value: [patterns]}"""
            for canonical_form, patterns in props.items():
                if isinstance(patterns, tuple):
                    patterns2, pattern_kwarg = patterns
                    if kwargs:
                        current_kwarg = dict(kwargs)
                        current_kwarg.update(pattern_kwarg)
                    else:
                        current_kwarg = dict(pattern_kwarg)
                    current_kwarg["canonical_form"] = canonical_form
                    self.container.register_property(propname, *patterns2, **current_kwarg)
                elif kwargs:
                    current_kwarg = dict(kwargs)
                    current_kwarg["canonical_form"] = canonical_form
                    self.container.register_property(propname, *patterns, **current_kwarg)
                else:
                    self.container.register_property(propname, *patterns, canonical_form=canonical_form)

        def register_quality(propname, quality_dict):
            """props a dict of {canonical_form: quality}"""
            for canonical_form, quality in quality_dict.items():
                self.qualities.register_quality(propname, canonical_form, quality)

        # http://en.wikipedia.org/wiki/Pirated_movie_release_types
        register_property(
            "format",
            {
                "VHS": ["VHS", "VHS-Rip"],
                "Cam": ["CAM", "CAMRip", "HD-CAM"],
                #'Telesync': ['TELESYNC', 'PDVD'],
                "Telesync": (["TS", "HD-TS"], {"confidence": 0.4}),
                "Workprint": ["WORKPRINT", "WP"],
                "Telecine": ["TELECINE", "TC"],
                "PPV": ["PPV", "PPV-Rip"],  # Pay Per View
                "TV": ["SD-TV", "SD-TV-Rip", "Rip-SD-TV", "TV-Rip", "Rip-TV"],
                "DVB": ["DVB-Rip", "DVB", "PD-TV"],
                "DVD": ["DVD", "DVD-Rip", "VIDEO-TS", "DVD-R", "DVD-9", "DVD-5"],
                "HDTV": ["HD-TV", "TV-RIP-HD", "HD-TV-RIP", "HD-RIP"],
                "VOD": ["VOD", "VOD-Rip"],
                "WEBRip": ["WEB-Rip"],
                "WEB-DL": ["WEB-DL", "WEB-HD", "WEB"],
                "HD-DVD": ["HD-DVD-Rip", "HD-DVD"],
                "BluRay": ["Blu-ray(?:-Rip)?", "B[DR]", "B[DR]-Rip", "BD[59]", "BD25", "BD50"],
            },
        )

        register_quality(
            "format",
            {
                "VHS": -100,
                "Cam": -90,
                "Telesync": -80,
                "Workprint": -70,
                "Telecine": -60,
                "PPV": -50,
                "TV": -30,
                "DVB": -20,
                "DVD": 0,
                "HDTV": 20,
                "VOD": 40,
                "WEBRip": 50,
                "WEB-DL": 60,
                "HD-DVD": 80,
                "BluRay": 100,
            },
        )

        register_property(
            "screenSize",
            {
                "360p": ["(?:\d{3,}(?:\\|\/|x|\*))?360(?:i|p?x?)"],
                "368p": ["(?:\d{3,}(?:\\|\/|x|\*))?368(?:i|p?x?)"],
                "480p": ["(?:\d{3,}(?:\\|\/|x|\*))?480(?:i|p?x?)"],
                #'480p': (['hr'], {'confidence': 0.2}), # duplicate dict key
                "576p": ["(?:\d{3,}(?:\\|\/|x|\*))?576(?:i|p?x?)"],
                "720p": ["(?:\d{3,}(?:\\|\/|x|\*))?720(?:i|p?x?)"],
                "900p": ["(?:\d{3,}(?:\\|\/|x|\*))?900(?:i|p?x?)"],
                "1080i": ["(?:\d{3,}(?:\\|\/|x|\*))?1080i"],
                "1080p": ["(?:\d{3,}(?:\\|\/|x|\*))?1080p?x?"],
                "4K": ["(?:\d{3,}(?:\\|\/|x|\*))?2160(?:i|p?x?)"],
            },
            validator=ChainedValidator(DefaultValidator(), OnlyOneValidator()),
        )

        _digits_re = re.compile("\d+")

        def resolution_formatter(value):
            digits = _digits_re.findall(value)
            return "x".join(digits)

        self.container.register_property(
            "screenSize", "\d{3,4}-?[x\*]-?\d{3,4}", canonical_from_pattern=False, formatter=resolution_formatter
        )

        register_quality(
            "screenSize",
            {
                "360p": -300,
                "368p": -200,
                "480p": -100,
                "576p": 0,
                "720p": 100,
                "900p": 130,
                "1080i": 180,
                "1080p": 200,
                "4K": 400,
            },
        )

        _videoCodecProperty = {
            "Real": ["Rv\d{2}"],  # http://en.wikipedia.org/wiki/RealVideo
            "Mpeg2": ["Mpeg2"],
            "DivX": ["DVDivX", "DivX"],
            "XviD": ["XviD"],
            "h264": ["[hx]-264(?:-AVC)?", "MPEG-4(?:-AVC)"],
            "h265": ["[hx]-265(?:-HEVC)?", "HEVC"],
        }

        register_property("videoCodec", _videoCodecProperty)

        register_quality("videoCodec", {"Real": -50, "Mpeg2": -30, "DivX": -10, "XviD": 0, "h264": 100, "h265": 150})

        # http://blog.mediacoderhq.com/h264-profiles-and-levels/
        # http://fr.wikipedia.org/wiki/H.264
        self.container.register_property(
            "videoProfile", "BP", validator=LeavesValidator(lambdas=[lambda node: "videoCodec" in node.guess])
        )
        self.container.register_property(
            "videoProfile",
            "XP",
            "EP",
            canonical_form="XP",
            validator=LeavesValidator(lambdas=[lambda node: "videoCodec" in node.guess]),
        )
        self.container.register_property(
            "videoProfile", "MP", validator=LeavesValidator(lambdas=[lambda node: "videoCodec" in node.guess])
        )
        self.container.register_property(
            "videoProfile",
            "HP",
            "HiP",
            canonical_form="HP",
            validator=LeavesValidator(lambdas=[lambda node: "videoCodec" in node.guess]),
        )
        self.container.register_property("videoProfile", "10.?bit", "Hi10P", canonical_form="10bit")
        self.container.register_property("videoProfile", "8.?bit", canonical_form="8bit")
        self.container.register_property(
            "videoProfile", "Hi422P", validator=LeavesValidator(lambdas=[lambda node: "videoCodec" in node.guess])
        )
        self.container.register_property(
            "videoProfile", "Hi444PP", validator=LeavesValidator(lambdas=[lambda node: "videoCodec" in node.guess])
        )

        register_quality(
            "videoProfile", {"BP": -20, "XP": -10, "MP": 0, "HP": 10, "10bit": 15, "Hi422P": 25, "Hi444PP": 35}
        )

        # has nothing to do here (or on filenames for that matter), but some
        # releases use it and it helps to identify release groups, so we adapt
        register_property("videoApi", {"DXVA": ["DXVA"]})

        register_property(
            "audioCodec",
            {
                "MP3": ["MP3", "LAME", "LAME(?:\d)+-(?:\d)+"],
                "DolbyDigital": ["DD"],
                "AAC": ["AAC"],
                "AC3": ["AC3"],
                "Flac": ["FLAC"],
                "DTS": (["DTS"], {"validator": LeftValidator()}),
                "TrueHD": ["True-HD"],
            },
        )

        register_quality(
            "audioCodec", {"MP3": 10, "DolbyDigital": 30, "AAC": 35, "AC3": 40, "Flac": 45, "DTS": 60, "TrueHD": 70}
        )

        self.container.register_property(
            "audioProfile",
            "HD",
            validator=LeavesValidator(lambdas=[lambda node: node.guess.get("audioCodec") == "DTS"]),
        )
        self.container.register_property(
            "audioProfile",
            "HD-MA",
            canonical_form="HDMA",
            validator=LeavesValidator(lambdas=[lambda node: node.guess.get("audioCodec") == "DTS"]),
        )
        self.container.register_property(
            "audioProfile",
            "HE",
            validator=LeavesValidator(lambdas=[lambda node: node.guess.get("audioCodec") == "AAC"]),
        )
        self.container.register_property(
            "audioProfile",
            "LC",
            validator=LeavesValidator(lambdas=[lambda node: node.guess.get("audioCodec") == "AAC"]),
        )
        self.container.register_property(
            "audioProfile",
            "HQ",
            validator=LeavesValidator(lambdas=[lambda node: node.guess.get("audioCodec") == "AC3"]),
        )

        register_quality("audioProfile", {"HD": 20, "HDMA": 50, "LC": 0, "HQ": 0, "HE": 20})

        register_property(
            "audioChannels",
            {
                "7.1": ["7[\W_]1", "7ch", "8ch"],
                "5.1": ["5[\W_]1", "5ch", "6ch"],
                "2.0": ["2[\W_]0", "2ch", "stereo"],
                "1.0": ["1[\W_]0", "1ch", "mono"],
            },
        )

        register_quality("audioChannels", {"7.1": 200, "5.1": 100, "2.0": 0, "1.0": -100})

        self.container.register_property("episodeFormat", r"Minisodes?", canonical_form="Minisode")

        self.container.register_property("crc32", "(?:[a-fA-F]|[0-9]){8}", enhance=False, canonical_from_pattern=False)

        part_words = ["pt", "part"]
        self.container.register_property(
            None,
            "(" + build_or_pattern(part_words) + sep + "?(?P<part>" + numeral + "))[^0-9]",
            enhance=False,
            canonical_from_pattern=False,
            confidence=0.4,
            formatter=parse_numeral,
        )

        register_property(
            "other",
            {
                "AudioFix": ["Audio-Fix", "Audio-Fixed"],
                "SyncFix": ["Sync-Fix", "Sync-Fixed"],
                "DualAudio": ["Dual-Audio"],
                "WideScreen": ["ws", "wide-screen"],
                "Netflix": ["Netflix", "NF"],
            },
        )

        self.container.register_property(
            "other",
            "Real",
            "Fix",
            canonical_form="Proper",
            validator=ChainedValidator(FullMatchValidator(), NeighborValidator()),
        )
        self.container.register_property("other", "Proper", "Repack", "Rerip", canonical_form="Proper")
        self.container.register_property(
            "other",
            "Fansub",
            canonical_form="Fansub",
            validator=ChainedValidator(FullMatchValidator(), NeighborValidator()),
        )
        self.container.register_property(
            "other",
            "Fastsub",
            canonical_form="Fastsub",
            validator=ChainedValidator(FullMatchValidator(), NeighborValidator()),
        )
        self.container.register_property("other", "(?:Seasons?" + sep + "?)?Complete", canonical_form="Complete")
        self.container.register_property("other", "R5", "RC", canonical_form="R5")
        self.container.register_property("other", "Pre-Air", "Preair", canonical_form="Preair")
        self.container.register_property("other", "CC")  # Close Caption
        self.container.register_property("other", "LD", "MD")  # Line/Mic Dubbed

        self.container.register_canonical_properties(
            "other", "Screener", "Remux", "3D", "HD", "mHD", "HDLight", "HQ", "DDC", "HR", "PAL", "SECAM", "NTSC"
        )
        self.container.register_canonical_properties(
            "other", "Limited", "Complete", "Classic", "Unrated", "LiNE", "Bonus", "Trailer", validator=WeakValidator()
        )

        for prop in self.container.get_properties("format"):
            self.container.register_property("other", prop.pattern + "(-?Scr(?:eener)?)", canonical_form="Screener")

        for exts in (subtitle_exts, info_exts, video_exts):
            for container in exts:
                self.container.register_property("container", container, confidence=0.3)

    def guess_properties(self, string, node=None, options=None):
        found = self.container.find_properties(string, node, options)
        guess = self.container.as_guess(found, string)

        if guess and node:
            if "part" in guess:
                # If two guesses contains both part in same group, create an partList
                for existing_guess in node.group_node().guesses:
                    if "part" in existing_guess:
                        if "partList" not in existing_guess:
                            existing_guess["partList"] = [existing_guess["part"]]
                        existing_guess["partList"].append(guess["part"])
                        existing_guess["partList"].sort()
                        if existing_guess["part"] > guess["part"]:
                            existing_guess.set_confidence("part", 0)
                        else:
                            guess.set_confidence("part", 0)
                        guess["partList"] = list(existing_guess["partList"])

        return guess

    def supported_properties(self):
        supported_properties = list(self.container.get_supported_properties())
        supported_properties.append("partList")
        return supported_properties

    def process(self, mtree, options=None):
        GuessFinder(self.guess_properties, 1.0, self.log, options).process_nodes(mtree.unidentified_leaves())
        proper_count = 0
        for other_leaf in mtree.leaves_containing("other"):
            if "other" in other_leaf.info and "Proper" in other_leaf.info["other"]:
                proper_count += 1
        if proper_count:
            found_property(mtree, "properCount", proper_count)

    def rate_quality(self, guess, *props):
        return self.qualities.rate_quality(guess, *props)
Esempio n. 26
0
class GuessWeakEpisodesRexps(Transformer):
    def __init__(self):
        Transformer.__init__(self, 15)

        of_separators = ["of", "sur", "/", "\\"]
        of_separators_re = re.compile(build_or_pattern(of_separators, escape=True), re.IGNORECASE)

        self.container = PropertiesContainer(enhance=False, canonical_from_pattern=False, remove_duplicates=True)

        episode_words = ["episodes?"]

        def episode_list_parser(value):
            return list_parser(value, "episodeList")

        def season_episode_parser(episode_number):
            epnum = parse_numeral(episode_number)
            if not valid_year(epnum):
                if epnum > 100:
                    season, epnum = epnum // 100, epnum % 100
                    # episodes which have a season > 50 are most likely errors
                    # (Simpson is at 25!)
                    if season > 50:
                        return None
                    return {"season": season, "episodeNumber": epnum}
                else:
                    return epnum

        self.container.register_property(
            ["episodeNumber", "season"],
            "[0-9]{2,4}",
            confidence=0.6,
            formatter=season_episode_parser,
            disabler=lambda options: options.get("episode_prefer_number") if options else False,
        )
        self.container.register_property(
            ["episodeNumber", "season"], "[0-9]{4}", confidence=0.6, formatter=season_episode_parser
        )
        self.container.register_property(
            None,
            "(" + build_or_pattern(episode_words) + sep + "?(?P<episodeNumber>" + numeral + "))[^0-9]",
            confidence=0.4,
            formatter=parse_numeral,
        )
        self.container.register_property(
            None,
            r"(?P<episodeNumber>"
            + numeral
            + ")"
            + sep
            + "?"
            + of_separators_re.pattern
            + sep
            + "?(?P<episodeCount>"
            + numeral
            + ")",
            confidence=0.6,
            formatter=parse_numeral,
        )
        self.container.register_property(
            "episodeNumber",
            "[^0-9](\d{2,3}" + "(?:" + sep + "?" + all_separators_re.pattern + sep + "?" + "\d{2,3}" + ")*)",
            confidence=0.4,
            formatter=episode_list_parser,
            disabler=lambda options: not options.get("episode_prefer_number") if options else True,
        )
        self.container.register_property(
            "episodeNumber",
            r"^"
            + sep
            + "?(\d{2,3}"
            + "(?:"
            + sep
            + "?"
            + all_separators_re.pattern
            + sep
            + "?"
            + "\d{2,3}"
            + ")*)"
            + sep,
            confidence=0.4,
            formatter=episode_list_parser,
            disabler=lambda options: not options.get("episode_prefer_number") if options else True,
        )
        self.container.register_property(
            "episodeNumber",
            sep
            + r"(\d{2,3}"
            + "(?:"
            + sep
            + "?"
            + all_separators_re.pattern
            + sep
            + "?"
            + "\d{2,3}"
            + ")*)"
            + sep
            + "?$",
            confidence=0.4,
            formatter=episode_list_parser,
            disabler=lambda options: not options.get("episode_prefer_number") if options else True,
        )

    def supported_properties(self):
        return self.container.get_supported_properties()

    def guess_weak_episodes_rexps(self, string, node=None, options=None):
        properties = self.container.find_properties(string, node, options)
        guess = self.container.as_guess(properties, string)

        if node and guess:
            if "episodeNumber" in guess and "season" in guess:
                existing_guesses = list(
                    filter(lambda x: "season" in x and "episodeNumber" in x, node.group_node().guesses)
                )
                if existing_guesses:
                    return None
            elif "episodeNumber" in guess:
                # If we only have episodeNumber in the guess, and another node contains both season and episodeNumber
                # keep only the second.
                safe_guesses = list(filter(lambda x: "season" in x and "episodeNumber" in x, node.group_node().guesses))
                if safe_guesses:
                    return None
                else:
                    # If we have other nodes containing episodeNumber, create an episodeList.
                    existing_guesses = list(
                        filter(lambda x: "season" not in x and "episodeNumber" in x, node.group_node().guesses)
                    )
                    for existing_guess in existing_guesses:
                        if "episodeList" not in existing_guess:
                            existing_guess["episodeList"] = [existing_guess["episodeNumber"]]
                        existing_guess["episodeList"].append(guess["episodeNumber"])
                        existing_guess["episodeList"].sort()
                        if existing_guess["episodeNumber"] > guess["episodeNumber"]:
                            existing_guess.set_confidence("episodeNumber", 0)
                        else:
                            guess.set_confidence("episodeNumber", 0)
                        guess["episodeList"] = list(existing_guess["episodeList"])

        return guess

    def should_process(self, mtree, options=None):
        return mtree.guess.get("type", "").startswith("episode")

    def process(self, mtree, options=None):
        GuessFinder(self.guess_weak_episodes_rexps, 0.6, self.log, options).process_nodes(mtree.unidentified_leaves())
Esempio n. 27
0
class GuessEpisodesRexps(Transformer):
    def __init__(self):
        Transformer.__init__(self, 20)

        range_separators = ['-', 'to', 'a']
        discrete_separators = ['&', 'and', 'et']
        of_separators = ['of', 'sur', '/', '\\']

        season_words = ['seasons?', 'saisons?', 'series?']
        episode_words = ['episodes?']

        season_markers = ['s']
        episode_markers = ['e', 'ep']

        discrete_sep = sep
        for range_separator in range_separators:
            discrete_sep = discrete_sep.replace(range_separator, '')
        discrete_separators.append(discrete_sep)
        all_separators = list(range_separators)
        all_separators.extend(discrete_separators)

        self.container = PropertiesContainer(enhance=False, canonical_from_pattern=False)

        range_separators_re = re.compile(build_or_pattern(range_separators), re.IGNORECASE)
        discrete_separators_re = re.compile(build_or_pattern(discrete_separators), re.IGNORECASE)
        all_separators_re = re.compile(build_or_pattern(all_separators), re.IGNORECASE)
        of_separators_re = re.compile(build_or_pattern(of_separators, escape=True), re.IGNORECASE)

        season_words_re = re.compile(build_or_pattern(season_words), re.IGNORECASE)
        episode_words_re = re.compile(build_or_pattern(episode_words), re.IGNORECASE)

        season_markers_re = re.compile(build_or_pattern(season_markers), re.IGNORECASE)
        episode_markers_re = re.compile(build_or_pattern(episode_markers), re.IGNORECASE)

        def list_parser(value, propertyListName, discrete_separators_re=discrete_separators_re, range_separators_re=range_separators_re, allow_discrete=False, fill_gaps=False):
            discrete_elements = filter(lambda x: x != '', discrete_separators_re.split(value))
            discrete_elements = [x.strip() for x in discrete_elements]

            proper_discrete_elements = []
            i = 0
            while i < len(discrete_elements):
                if i < len(discrete_elements) - 2 and range_separators_re.match(discrete_elements[i+1]):
                    proper_discrete_elements.append(discrete_elements[i] + discrete_elements[i+1] + discrete_elements[i+2])
                    i += 3
                else:
                    match = range_separators_re.search(discrete_elements[i])
                    if match and match.start() == 0:
                        proper_discrete_elements[i-1] = proper_discrete_elements[i-1] + discrete_elements[i]
                    elif match and match.end() == len(discrete_elements[i]):
                        proper_discrete_elements.append(discrete_elements[i] + discrete_elements[i + 1])
                    else:
                        proper_discrete_elements.append(discrete_elements[i])
                    i += 1

            discrete_elements = proper_discrete_elements

            ret = []

            for discrete_element in discrete_elements:
                range_values = filter(lambda x: x != '', range_separators_re.split(discrete_element))
                range_values = [x.strip() for x in range_values]
                if len(range_values) > 1:
                    for x in range(0, len(range_values) - 1):
                        start_range_ep = parse_numeral(range_values[x])
                        end_range_ep = parse_numeral(range_values[x+1])
                        for range_ep in range(start_range_ep, end_range_ep + 1):
                            if range_ep not in ret:
                                ret.append(range_ep)
                else:
                    discrete_value = parse_numeral(discrete_element)
                    if discrete_value not in ret:
                        ret.append(discrete_value)

            if len(ret) > 1:
                if not allow_discrete:
                    valid_ret = []
                    # replace discrete elements by ranges
                    valid_ret.append(ret[0])
                    for i in range(0, len(ret) - 1):
                        previous = valid_ret[len(valid_ret) - 1]
                        if ret[i+1] < previous:
                            pass
                        else:
                            valid_ret.append(ret[i+1])
                    ret = valid_ret
                if fill_gaps:
                    ret = list(range(min(ret), max(ret) + 1))
                if len(ret) > 1:
                    return {None: ret[0], propertyListName: ret}
            if len(ret) > 0:
                return ret[0]
            return None

        def episode_parser_x(value):
            return list_parser(value, 'episodeList', discrete_separators_re=re.compile('x', re.IGNORECASE))

        def episode_parser_e(value):
            return list_parser(value, 'episodeList', discrete_separators_re=re.compile('e',re.IGNORECASE), fill_gaps=True)

        def episode_parser(value):
            return list_parser(value, 'episodeList')

        def season_parser(value):
            return list_parser(value, 'seasonList')

        class ResolutionCollisionValidator(object):
            def validate(self, prop, string, node, match, entry_start, entry_end):
                return len(match.group(2)) < 3 #limit

        self.container.register_property(None, r'(' + season_words_re.pattern + sep + '?(?P<season>' + numeral + ')' + sep + '?' + season_words_re.pattern + '?)', confidence=1.0, formatter=parse_numeral)
        self.container.register_property(None, r'(' + season_words_re.pattern + sep + '?(?P<season>' + digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + digital_numeral + ')*)' + sep + '?' + season_words_re.pattern + '?)' + sep, confidence=1.0, formatter={None: parse_numeral, 'season': season_parser}, validator=ChainedValidator(DefaultValidator(), FormatterValidator('season', lambda x: len(x) > 1 if hasattr(x, '__len__') else False)))

        self.container.register_property(None, r'(' + season_markers_re.pattern + '(?P<season>' + digital_numeral + ')[^0-9]?' + sep + '?(?P<episodeNumber>(?:e' + digital_numeral + '(?:' + sep + '?[e-]' + digital_numeral + ')*)))', confidence=1.0, formatter={None: parse_numeral, 'episodeNumber': episode_parser_e, 'season': season_parser}, validator=NoValidator())
        #self.container.register_property(None, r'[^0-9]((?P<season>' + digital_numeral + ')[^0-9 .-]?-?(?P<episodeNumber>(?:x' + digital_numeral + '(?:' + sep + '?[x-]' + digital_numeral + ')*)))', confidence=1.0, formatter={None: parse_numeral, 'episodeNumber': episode_parser_x, 'season': season_parser}, validator=ChainedValidator(DefaultValidator(), ResolutionCollisionValidator()))
        self.container.register_property(None, sep + r'((?P<season>' + digital_numeral + ')' + sep + '' + '(?P<episodeNumber>(?:x' + sep + digital_numeral + '(?:' + sep + '[x-]' + digital_numeral + ')*)))', confidence=1.0, formatter={None: parse_numeral, 'episodeNumber': episode_parser_x, 'season': season_parser}, validator=ChainedValidator(DefaultValidator(), ResolutionCollisionValidator()))
        self.container.register_property(None, r'((?P<season>' + digital_numeral + ')' + '(?P<episodeNumber>(?:x' + digital_numeral + '(?:[x-]' + digital_numeral + ')*)))', confidence=1.0, formatter={None: parse_numeral, 'episodeNumber': episode_parser_x, 'season': season_parser}, validator=ChainedValidator(DefaultValidator(), ResolutionCollisionValidator()))
        self.container.register_property(None, r'(' + season_markers_re.pattern + '(?P<season>' + digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + digital_numeral + ')*))', confidence=0.6, formatter={None: parse_numeral, 'season': season_parser}, validator=NoValidator())

        self.container.register_property(None, r'((?P<episodeNumber>' + digital_numeral + ')' + sep + '?v(?P<version>\d+))', confidence=0.6, formatter=parse_numeral)
        self.container.register_property(None, r'(ep' + sep + r'?(?P<episodeNumber>' + numeral + ')' + sep + '?)', confidence=0.7, formatter=parse_numeral)
        self.container.register_property(None, r'(ep' + sep + r'?(?P<episodeNumber>' + numeral + ')' + sep + '?v(?P<version>\d+))', confidence=0.7, formatter=parse_numeral)


        self.container.register_property(None, r'(' + episode_markers_re.pattern + '(?P<episodeNumber>' + digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + digital_numeral + ')*))', confidence=0.6, formatter={None: parse_numeral, 'episodeNumber': episode_parser})
        self.container.register_property(None, r'(' + episode_words_re.pattern + sep + '?(?P<episodeNumber>' + digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + digital_numeral + ')*)' + sep + '?' + episode_words_re.pattern + '?)', confidence=0.8, formatter={None: parse_numeral, 'episodeNumber': episode_parser})

        self.container.register_property(None, r'(' + episode_markers_re.pattern + '(?P<episodeNumber>' + digital_numeral + ')'  + sep + '?v(?P<version>\d+))', confidence=0.6, formatter={None: parse_numeral, 'episodeNumber': episode_parser})
        self.container.register_property(None, r'(' + episode_words_re.pattern + sep + '?(?P<episodeNumber>' + digital_numeral + ')'  + sep + '?v(?P<version>\d+))', confidence=0.8, formatter={None: parse_numeral, 'episodeNumber': episode_parser})


        self.container.register_property('episodeNumber', r'^ ?(\d{2})' + sep, confidence=0.4, formatter=parse_numeral)
        self.container.register_property('episodeNumber', r'^ ?(\d{2})' + sep, confidence=0.4, formatter=parse_numeral)
        self.container.register_property('episodeNumber', r'^ ?0(\d{1,2})' + sep, confidence=0.4, formatter=parse_numeral)
        self.container.register_property('episodeNumber', sep + r'(\d{2}) ?$', confidence=0.4, formatter=parse_numeral)
        self.container.register_property('episodeNumber', sep + r'0(\d{1,2}) ?$', confidence=0.4, formatter=parse_numeral)

        self.container.register_property(None, r'((?P<episodeNumber>' + numeral + ')' + sep + '?' + of_separators_re.pattern + sep + '?(?P<episodeCount>' + numeral + ')(?:' + sep + '?(?:episodes?|eps?))?)', confidence=0.7, formatter=parse_numeral)
        self.container.register_property(None, r'((?:episodes?|eps?)' + sep + '?(?P<episodeNumber>' + numeral + ')' + sep + '?' + of_separators_re.pattern + sep + '?(?P<episodeCount>' + numeral + '))', confidence=0.7, formatter=parse_numeral)
        self.container.register_property(None, r'((?:seasons?|saisons?|s)' + sep + '?(?P<season>' + numeral + ')' + sep + '?' + of_separators_re.pattern + sep + '?(?P<seasonCount>' + numeral + '))', confidence=0.7, formatter=parse_numeral)
        self.container.register_property(None, r'((?P<season>' + numeral + ')' + sep + '?' + of_separators_re.pattern + sep + '?(?P<seasonCount>' + numeral + ')' + sep + '?(?:seasons?|saisons?|s))', confidence=0.7, formatter=parse_numeral)

        self.container.register_canonical_properties('other', 'FiNAL', 'Complete', validator=WeakValidator())

        self.container.register_property(None, r'[^0-9]((?P<season>' + digital_numeral + ')[^0-9 .-]?-?(?P<other>xAll))', confidence=1.0, formatter={None: parse_numeral, 'other': lambda x: 'Complete', 'season': season_parser}, validator=ChainedValidator(DefaultValidator(), ResolutionCollisionValidator()))

    def register_options(self, opts, naming_opts, output_opts, information_opts, webservice_opts, other_options):
        naming_opts.add_option('-E', '--episode-prefer-number', action='store_true', dest='episode_prefer_number', default=False,
                               help='Guess "serie.213.avi" as the episodeNumber 213. Without this option, '
                                    'it will be guessed as season 2, episodeNumber 13')

    def supported_properties(self):
        return ['episodeNumber', 'season', 'episodeList', 'seasonList', 'episodeCount', 'seasonCount', 'version', 'other']

    def guess_episodes_rexps(self, string, node=None, options=None):
        found = self.container.find_properties(string, node, options)
        return self.container.as_guess(found, string)

    def should_process(self, mtree, options=None):
        return mtree.guess.get('type', '').startswith('episode')

    def process(self, mtree, options=None):
        GuessFinder(self.guess_episodes_rexps, None, self.log, options).process_nodes(mtree.unidentified_leaves())
Esempio n. 28
0
class GuessReleaseGroup(Transformer):
    def __init__(self):
        Transformer.__init__(self, -190)
        self.container = PropertiesContainer(canonical_from_pattern=False)
        self._allowed_groupname_pattern = '[\w@#€£$&]'
        self._forbidden_groupname_lambda = [lambda elt: elt in ['rip', 'by', 'for', 'par', 'pour', 'bonus'],
                               lambda elt: self._is_number(elt),
                               ]
        # If the previous property in this list, the match will be considered as safe
        # and group name can contain a separator.
        self.previous_safe_properties = ['videoCodec', 'format', 'videoApi', 'audioCodec', 'audioProfile', 'videoProfile', 'audioChannels']

        self.container.sep_replace_char = '-'
        self.container.canonical_from_pattern = False
        self.container.enhance = True
        self.container.register_property('releaseGroup', self._allowed_groupname_pattern + '+')
        self.container.register_property('releaseGroup', self._allowed_groupname_pattern + '+-' + self._allowed_groupname_pattern + '+')

    def supported_properties(self):
        return self.container.get_supported_properties()

    def _is_number(self, s):
        try:
            int(s)
            return True
        except ValueError:
            return False

    def validate_group_name(self, guess):
        val = guess['releaseGroup']
        if len(val) >= 2:

            if '-' in val:
                checked_val = ""
                for elt in val.split('-'):
                    forbidden = False
                    for forbidden_lambda in self._forbidden_groupname_lambda:
                        forbidden = forbidden_lambda(elt.lower())
                        if forbidden:
                            break
                    if not forbidden:
                        if checked_val:
                            checked_val += '-'
                        checked_val += elt
                    else:
                        break
                val = checked_val
                if not val:
                    return False
                guess['releaseGroup'] = val

            forbidden = False
            for forbidden_lambda in self._forbidden_groupname_lambda:
                forbidden = forbidden_lambda(val.lower())
                if forbidden:
                    break
            if not forbidden:
                return True
        return False

    def is_leaf_previous(self, leaf, node):
        if leaf.span[1] <= node.span[0]:
            for idx in range(leaf.span[1], node.span[0]):
                if not leaf.root.value[idx] in sep:
                    return False
            return True
        return False

    def guess_release_group(self, string, node=None, options=None):
        found = self.container.find_properties(string, node, 'releaseGroup')
        guess = self.container.as_guess(found, string, self.validate_group_name, sep_replacement='-')
        validated_guess = None
        if guess:
            explicit_group_node = node.group_node()
            if explicit_group_node:
                for leaf in explicit_group_node.leaves_containing(self.previous_safe_properties):
                    if self.is_leaf_previous(leaf, node):
                        if leaf.root.value[leaf.span[1]] == '-':
                            guess.metadata().confidence = 1
                        else:
                            guess.metadata().confidence = 0.7
                        validated_guess = guess

            if not validated_guess:
                # If previous group last leaf is identified as a safe property,
                # consider the raw value as a releaseGroup
                previous_group_node = node.previous_group_node()
                if previous_group_node:
                    for leaf in previous_group_node.leaves_containing(self.previous_safe_properties):
                        if self.is_leaf_previous(leaf, node):
                            guess = Guess({'releaseGroup': node.value}, confidence=1, input=node.value, span=(0, len(node.value)))
                            if self.validate_group_name(guess):
                                node.guess = guess
                                validated_guess = guess

            if validated_guess:
                # If following group nodes have only one unidentified leaf, it belongs to the release group
                next_group_node = node

                while True:
                    next_group_node = next_group_node.next_group_node()
                    if next_group_node:
                        leaves = list(next_group_node.leaves())
                        if len(leaves) == 1 and not leaves[0].guess:
                            validated_guess['releaseGroup'] = validated_guess['releaseGroup'] + leaves[0].value
                            leaves[0].guess = validated_guess
                        else:
                            break
                    else:
                        break

        if validated_guess:
            # Strip brackets
            validated_guess['releaseGroup'] = strip_brackets(validated_guess['releaseGroup'])

        return validated_guess

    def process(self, mtree, options=None):
        GuessFinder(self.guess_release_group, None, self.log, options).process_nodes(mtree.unidentified_leaves())
Esempio n. 29
0
class GuessEpisodesRexps(Transformer):
    def __init__(self):
        Transformer.__init__(self, 20)

        of_separators = ['of', 'sur', '/', '\\']
        of_separators_re = re.compile(
            build_or_pattern(of_separators, escape=True), re.IGNORECASE)

        season_words = ['seasons?', 'saisons?', 'series?']
        episode_words = ['episodes?']

        season_markers = ['s']
        episode_markers = ['e', 'ep']

        self.container = PropertiesContainer(enhance=False,
                                             canonical_from_pattern=False)

        season_words_re = re.compile(build_or_pattern(season_words),
                                     re.IGNORECASE)
        episode_words_re = re.compile(build_or_pattern(episode_words),
                                      re.IGNORECASE)

        season_markers_re = re.compile(build_or_pattern(season_markers),
                                       re.IGNORECASE)
        episode_markers_re = re.compile(build_or_pattern(episode_markers),
                                        re.IGNORECASE)

        def episode_parser_x(value):
            return list_parser(value,
                               'episodeList',
                               discrete_separators_re=re.compile(
                                   'x', re.IGNORECASE))

        def episode_parser_e(value):
            return list_parser(value,
                               'episodeList',
                               discrete_separators_re=re.compile(
                                   'e', re.IGNORECASE),
                               fill_gaps=True)

        def episode_parser(value):
            return list_parser(value, 'episodeList')

        def season_parser(value):
            return list_parser(value, 'seasonList')

        class ResolutionCollisionValidator(object):
            @staticmethod
            def validate(prop, string, node, match, entry_start, entry_end):
                return len(match.group(2)) < 3  # limit

        self.container.register_property(None,
                                         r'(' + season_words_re.pattern + sep +
                                         '?(?P<season>' + numeral + ')' + sep +
                                         '?' + season_words_re.pattern + '?)',
                                         confidence=1.0,
                                         formatter=parse_numeral)
        self.container.register_property(
            None,
            r'(' + season_words_re.pattern + sep + '?(?P<season>' +
            digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern +
            sep + '?' + digital_numeral + ')*)' + sep + '?' +
            season_words_re.pattern + '?)' + sep,
            confidence=1.0,
            formatter={
                None: parse_numeral,
                'season': season_parser
            },
            validator=ChainedValidator(
                DefaultValidator(),
                FormatterValidator(
                    'season', lambda x: len(x) > 1
                    if hasattr(x, '__len__') else False)))

        self.container.register_property(
            None,
            r'(' + season_markers_re.pattern + '(?P<season>' +
            digital_numeral + ')[^0-9]?' + sep + '?(?P<episodeNumber>(?:e' +
            digital_numeral + '(?:' + sep + '?[e-]' + digital_numeral +
            ')*)))',
            confidence=1.0,
            formatter={
                None: parse_numeral,
                'episodeNumber': episode_parser_e,
                'season': season_parser
            },
            validator=NoValidator())
        self.container.register_property(
            None,
            r'(' + season_markers_re.pattern + '(?P<season>' +
            digital_numeral + ')[^0-9]?' + sep + '?(?P<episodeNumber>(?:e' +
            digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern +
            sep + '?' + digital_numeral + ')*)))',
            confidence=1.0,
            formatter={
                None: parse_numeral,
                'episodeNumber': episode_parser,
                'season': season_parser
            },
            validator=NoValidator())

        self.container.register_property(
            None,
            sep + r'((?P<season>' + digital_numeral + ')' + sep + '' +
            '(?P<episodeNumber>(?:x' + sep + digital_numeral + '(?:' + sep +
            '[x-]' + digital_numeral + ')*)))',
            confidence=1.0,
            formatter={
                None: parse_numeral,
                'episodeNumber': episode_parser_x,
                'season': season_parser
            },
            validator=ChainedValidator(DefaultValidator(),
                                       ResolutionCollisionValidator()))
        self.container.register_property(
            None,
            r'((?P<season>' + digital_numeral + ')' +
            '(?P<episodeNumber>(?:x' + digital_numeral + '(?:[x-]' +
            digital_numeral + ')*)))',
            confidence=1.0,
            formatter={
                None: parse_numeral,
                'episodeNumber': episode_parser_x,
                'season': season_parser
            },
            validator=ChainedValidator(DefaultValidator(),
                                       ResolutionCollisionValidator()))
        self.container.register_property(
            None,
            r'(' + season_markers_re.pattern + '(?P<season>' +
            digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern +
            sep + '?' + digital_numeral + ')*))',
            confidence=0.6,
            formatter={
                None: parse_numeral,
                'season': season_parser
            },
            validator=NoValidator())

        self.container.register_property(None,
                                         r'((?P<episodeNumber>' +
                                         digital_numeral + ')' + sep +
                                         '?v(?P<version>\d+))',
                                         confidence=0.6,
                                         formatter=parse_numeral)
        self.container.register_property('version',
                                         sep + r'(V\d+)' + sep,
                                         confidence=0.6,
                                         formatter=parse_numeral,
                                         validator=NoValidator())
        self.container.register_property(
            None,
            r'(ep' + sep + r'?(?P<episodeNumber>' + digital_numeral + ')' +
            sep + '?)',
            confidence=0.7,
            formatter=parse_numeral)
        self.container.register_property(
            None,
            r'(ep' + sep + r'?(?P<episodeNumber>' + digital_numeral + ')' +
            sep + '?v(?P<version>\d+))',
            confidence=0.7,
            formatter=parse_numeral)

        self.container.register_property(
            None,
            r'(' + episode_markers_re.pattern + '(?P<episodeNumber>' +
            digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern +
            sep + '?' + digital_numeral + ')*))',
            confidence=0.6,
            formatter={
                None: parse_numeral,
                'episodeNumber': episode_parser
            })
        self.container.register_property(
            None,
            r'(' + episode_words_re.pattern + sep + '?(?P<episodeNumber>' +
            digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern +
            sep + '?' + digital_numeral + ')*)' + sep + '?' +
            episode_words_re.pattern + '?)',
            confidence=0.8,
            formatter={
                None: parse_numeral,
                'episodeNumber': episode_parser
            })

        self.container.register_property(
            None,
            r'(' + episode_markers_re.pattern + '(?P<episodeNumber>' +
            digital_numeral + ')' + sep + '?v(?P<version>\d+))',
            confidence=0.6,
            formatter={
                None: parse_numeral,
                'episodeNumber': episode_parser
            })
        self.container.register_property(
            None,
            r'(' + episode_words_re.pattern + sep + '?(?P<episodeNumber>' +
            digital_numeral + ')' + sep + '?v(?P<version>\d+))',
            confidence=0.8,
            formatter={
                None: parse_numeral,
                'episodeNumber': episode_parser
            })

        self.container.register_property('episodeNumber',
                                         r'^' + sep + '+(\d{2}' + '(?:' + sep +
                                         '?' + all_separators_re.pattern +
                                         sep + '?' + '\d{2}' + ')*)' + sep,
                                         confidence=0.4,
                                         formatter=episode_parser)
        self.container.register_property(
            'episodeNumber',
            r'^' + sep + '+0(\d{1,2}' + '(?:' + sep + '?' +
            all_separators_re.pattern + sep + '?' + '0\d{1,2}' + ')*)' + sep,
            confidence=0.4,
            formatter=episode_parser)
        self.container.register_property('episodeNumber',
                                         sep + r'(\d{2}' + '(?:' + sep + '?' +
                                         all_separators_re.pattern + sep +
                                         '?' + r'\d{2}' + ')*)' + sep + '+$',
                                         confidence=0.4,
                                         formatter=episode_parser)
        self.container.register_property(
            'episodeNumber',
            sep + r'0(\d{1,2}' + '(?:' + sep + '?' +
            all_separators_re.pattern + sep + '?' + r'0\d{1,2}' + ')*)' + sep +
            '+$',
            confidence=0.4,
            formatter=episode_parser)

        self.container.register_property(
            None,
            r'((?P<episodeNumber>' + numeral + ')' + sep + '?' +
            of_separators_re.pattern + sep + '?(?P<episodeCount>' + numeral +
            ')(?:' + sep + '?(?:episodes?|eps?))?)',
            confidence=0.7,
            formatter=parse_numeral)
        self.container.register_property(
            None,
            r'((?:episodes?|eps?)' + sep + '?(?P<episodeNumber>' + numeral +
            ')' + sep + '?' + of_separators_re.pattern + sep +
            '?(?P<episodeCount>' + numeral + '))',
            confidence=0.7,
            formatter=parse_numeral)
        self.container.register_property(None,
                                         r'((?:seasons?|saisons?|s)' + sep +
                                         '?(?P<season>' + numeral + ')' + sep +
                                         '?' + of_separators_re.pattern + sep +
                                         '?(?P<seasonCount>' + numeral + '))',
                                         confidence=0.7,
                                         formatter=parse_numeral)
        self.container.register_property(
            None,
            r'((?P<season>' + numeral + ')' + sep + '?' +
            of_separators_re.pattern + sep + '?(?P<seasonCount>' + numeral +
            ')' + sep + '?(?:seasons?|saisons?|s))',
            confidence=0.7,
            formatter=parse_numeral)

        self.container.register_canonical_properties('other',
                                                     'FiNAL',
                                                     'Complete',
                                                     validator=WeakValidator())

        self.container.register_property(
            None,
            r'[^0-9]((?P<season>' + digital_numeral +
            ')[^0-9 .-]?-?(?P<other>xAll))',
            confidence=1.0,
            formatter={
                None: parse_numeral,
                'other': lambda x: 'Complete',
                'season': season_parser
            },
            validator=ChainedValidator(DefaultValidator(),
                                       ResolutionCollisionValidator()))

    def register_arguments(self, opts, naming_opts, output_opts,
                           information_opts, webservice_opts, other_options):
        naming_opts.add_argument(
            '-E',
            '--episode-prefer-number',
            action='store_true',
            dest='episode_prefer_number',
            default=False,
            help=
            'Guess "serie.213.avi" as the episodeNumber 213. Without this option, '
            'it will be guessed as season 2, episodeNumber 13')

    def supported_properties(self):
        return [
            'episodeNumber', 'season', 'episodeList', 'seasonList',
            'episodeCount', 'seasonCount', 'version', 'other'
        ]

    def guess_episodes_rexps(self, string, node=None, options=None):
        found = self.container.find_properties(string, node, options)
        guess = self.container.as_guess(found, string)
        if guess and node:
            if 'season' in guess and 'episodeNumber' in guess:
                # If two guesses contains both season and episodeNumber in same group, create an episodeList
                for existing_guess in node.group_node().guesses:
                    if 'season' in existing_guess and 'episodeNumber' in existing_guess:
                        if 'episodeList' not in existing_guess:
                            existing_guess['episodeList'] = [
                                existing_guess['episodeNumber']
                            ]
                        existing_guess['episodeList'].append(
                            guess['episodeNumber'])
                        existing_guess['episodeList'].sort()
                        if existing_guess['episodeNumber'] > guess[
                                'episodeNumber']:
                            existing_guess.set_confidence('episodeNumber', 0)
                        else:
                            guess.set_confidence('episodeNumber', 0)
                        guess['episodeList'] = list(
                            existing_guess['episodeList'])
            elif 'episodeNumber' in guess:
                # If two guesses contains only episodeNumber in same group, remove the existing one.
                for existing_guess in node.group_node().guesses:
                    if 'episodeNumber' in existing_guess:
                        for k, v in existing_guess.items():
                            if k in guess:
                                del guess[k]
        return guess

    def should_process(self, mtree, options=None):
        return mtree.guess.get('type', '').startswith('episode')

    def process(self, mtree, options=None):
        GuessFinder(self.guess_episodes_rexps, None, self.log,
                    options).process_nodes(mtree.unidentified_leaves())
Esempio n. 30
0
class GuessReleaseGroup(Transformer):
    def __init__(self):
        Transformer.__init__(self, -190)

        self.container = PropertiesContainer(canonical_from_pattern=False)
        self._allowed_groupname_pattern = '[\w@#€£$&!\?]'
        self._forbidden_groupname_lambda = [lambda elt: elt in ['rip', 'by', 'for', 'par', 'pour', 'bonus'],
                                            lambda elt: self._is_number(elt)]
        # If the previous property in this list, the match will be considered as safe
        # and group name can contain a separator.
        self.previous_safe_properties = ['videoCodec', 'format', 'videoApi', 'audioCodec', 'audioProfile', 'videoProfile', 'audioChannels', 'other']
        self.previous_safe_values = {'other': ['Complete']}
        self.next_safe_properties = ['extension', 'website']
        self.next_safe_values = {'format': ['Telesync']}
        self.container.sep_replace_char = '-'
        self.container.canonical_from_pattern = False
        self.container.enhance = True
        self.container.register_property('releaseGroup', self._allowed_groupname_pattern + '+')
        self.container.register_property('releaseGroup', self._allowed_groupname_pattern + '+-' + self._allowed_groupname_pattern + '+')
        self.re_sep = re.compile('(' + sep + ')')

    def register_arguments(self, opts, naming_opts, output_opts, information_opts, webservice_opts, other_options):
        naming_opts.add_argument('-G', '--expected-group', action='append', dest='expected_group',
                               help='Expected release group (can be used multiple times)')

    def supported_properties(self):
        return self.container.get_supported_properties()

    def _is_number(self, s):
        try:
            int(s)
            return True
        except ValueError:
            return False

    def validate_group_name(self, guess):
        val = guess['releaseGroup']
        if len(val) > 1:
            checked_val = ""
            forbidden = False
            for elt in self.re_sep.split(val): # separators are in the list because of capturing group
                if forbidden:
                    # Previous was forbidden, don't had separator
                    forbidden = False
                    continue
                for forbidden_lambda in self._forbidden_groupname_lambda:
                    forbidden = forbidden_lambda(elt.lower())
                    if forbidden:
                        if checked_val:
                            # Removing previous separator
                            checked_val = checked_val[0:len(checked_val) - 1]
                        break
                if not forbidden:
                    checked_val += elt

            val = checked_val
            if not val:
                return False
            if self.re_sep.match(val[-1]):
                val = val[:len(val)-1]
            if self.re_sep.match(val[0]):
                val = val[1:]
            guess['releaseGroup'] = val
            forbidden = False
            for forbidden_lambda in self._forbidden_groupname_lambda:
                forbidden = forbidden_lambda(val.lower())
                if forbidden:
                    break
            if not forbidden:
                return True
        return False

    def is_leaf_previous(self, leaf, node):
        if leaf.span[1] <= node.span[0]:
            for idx in range(leaf.span[1], node.span[0]):
                if leaf.root.value[idx] not in sep:
                    return False
            return True
        return False

    def validate_next_leaves(self, node):
        if 'series' in node.root.info or 'title' in node.root.info:
            # --expected-series or --expected-title is used.
            return True

        # Make sure to avoid collision with 'series' or 'title' guessed later. Should be more precise.
        leaves = node.root.unidentified_leaves()
        return len(list(leaves)) > 1

    def validate_node(self, leaf, node, safe=False):
        if not self.is_leaf_previous(leaf, node):
            return False
        if not self.validate_next_leaves(node):
            return False
        if safe:
            for k, v in leaf.guess.items():
                if k in self.previous_safe_values and not v in self.previous_safe_values[k]:
                    return False
        return True

    def guess_release_group(self, string, node=None, options=None):
        if options and options.get('expected_group'):
            expected_container = PropertiesContainer(enhance=True, canonical_from_pattern=False)
            for expected_group in options.get('expected_group'):
                if expected_group.startswith('re:'):
                    expected_group = expected_group[3:]
                    expected_group = expected_group.replace(' ', '-')
                    expected_container.register_property('releaseGroup', expected_group, enhance=True)
                else:
                    expected_group = re.escape(expected_group)
                    expected_container.register_property('releaseGroup', expected_group, enhance=False)

            found = expected_container.find_properties(string, node, options, 'releaseGroup')
            guess = expected_container.as_guess(found, string, self.validate_group_name)
            if guess:
                return guess

        found = self.container.find_properties(string, node, options, 'releaseGroup')
        guess = self.container.as_guess(found, string, self.validate_group_name)
        validated_guess = None
        if guess:
            group_node = node.group_node()
            if group_node:
                for leaf in group_node.leaves_containing(self.previous_safe_properties):
                    if self.validate_node(leaf, node, True):
                        if leaf.root.value[leaf.span[1]] == '-':
                            guess.metadata().confidence = 1
                        else:
                            guess.metadata().confidence = 0.7
                        validated_guess = guess

            if not validated_guess:
                # If previous group last leaf is identified as a safe property,
                # consider the raw value as a releaseGroup
                previous_group_node = node.previous_group_node()
                if previous_group_node:
                    for leaf in previous_group_node.leaves_containing(self.previous_safe_properties):
                        if self.validate_node(leaf, node, False):
                            guess = Guess({'releaseGroup': node.value}, confidence=1, input=node.value, span=(0, len(node.value)))
                            if self.validate_group_name(guess):
                                node.guess = guess
                                validated_guess = guess

            if validated_guess:
                # If following group nodes have only one unidentified leaf, it belongs to the release group
                next_group_node = node

                while True:
                    next_group_node = next_group_node.next_group_node()
                    if next_group_node:
                        leaves = list(next_group_node.leaves())
                        if len(leaves) == 1 and not leaves[0].guess:
                            validated_guess['releaseGroup'] = validated_guess['releaseGroup'] + leaves[0].value
                            leaves[0].guess = validated_guess
                        else:
                            break
                    else:
                        break

            if not validated_guess and node.is_explicit() and node.node_last_idx == 0: # first node from group
                validated_guess = build_guess(node, 'releaseGroup', value=node.value[1:len(node.value)-1])
                validated_guess.metadata().confidence = 0.4
                validated_guess.metadata().span = 1, len(node.value)
                node.guess = validated_guess

        if validated_guess:
            # Strip brackets
            validated_guess['releaseGroup'] = strip_brackets(validated_guess['releaseGroup'])

        return validated_guess

    def process(self, mtree, options=None):
        GuessFinder(self.guess_release_group, None, self.log, options).process_nodes(mtree.unidentified_leaves())
Esempio n. 31
0
    def guess_release_group(self, string, node=None, options=None):
        if options and options.get('expected_group'):
            expected_container = PropertiesContainer(enhance=True, canonical_from_pattern=False)
            for expected_group in options.get('expected_group'):
                if expected_group.startswith('re:'):
                    expected_group = expected_group[3:]
                    expected_group = expected_group.replace(' ', '-')
                    expected_container.register_property('releaseGroup', expected_group, enhance=True)
                else:
                    expected_group = re.escape(expected_group)
                    expected_container.register_property('releaseGroup', expected_group, enhance=False)

            found = expected_container.find_properties(string, node, options, 'releaseGroup')
            guess = expected_container.as_guess(found, string, self.validate_group_name)
            if guess:
                return guess

        found = self.container.find_properties(string, node, options, 'releaseGroup')
        guess = self.container.as_guess(found, string, self.validate_group_name)
        validated_guess = None
        if guess:
            group_node = node.group_node()
            if group_node:
                for leaf in group_node.leaves_containing(self.previous_safe_properties):
                    if self.validate_node(leaf, node, True):
                        if leaf.root.value[leaf.span[1]] == '-':
                            guess.metadata().confidence = 1
                        else:
                            guess.metadata().confidence = 0.7
                        validated_guess = guess

            if not validated_guess:
                # If previous group last leaf is identified as a safe property,
                # consider the raw value as a releaseGroup
                previous_group_node = node.previous_group_node()
                if previous_group_node:
                    for leaf in previous_group_node.leaves_containing(self.previous_safe_properties):
                        if self.validate_node(leaf, node, False):
                            guess = Guess({'releaseGroup': node.value}, confidence=1, input=node.value, span=(0, len(node.value)))
                            if self.validate_group_name(guess):
                                node.guess = guess
                                validated_guess = guess

            if validated_guess:
                # If following group nodes have only one unidentified leaf, it belongs to the release group
                next_group_node = node

                while True:
                    next_group_node = next_group_node.next_group_node()
                    if next_group_node:
                        leaves = list(next_group_node.leaves())
                        if len(leaves) == 1 and not leaves[0].guess:
                            validated_guess['releaseGroup'] = validated_guess['releaseGroup'] + leaves[0].value
                            leaves[0].guess = validated_guess
                        else:
                            break
                    else:
                        break

            if not validated_guess and node.is_explicit() and node.node_last_idx == 0: # first node from group
                validated_guess = build_guess(node, 'releaseGroup', value=node.value[1:len(node.value)-1])
                validated_guess.metadata().confidence = 0.4
                validated_guess.metadata().span = 1, len(node.value)
                node.guess = validated_guess

        if validated_guess:
            # Strip brackets
            validated_guess['releaseGroup'] = strip_brackets(validated_guess['releaseGroup'])

        return validated_guess
Esempio n. 32
0
class GuessVideoRexps(Transformer):
    def __init__(self):
        Transformer.__init__(self, 25)

        self.container = PropertiesContainer(canonical_from_pattern=False)

        self.container.register_property(
            None,
            'cd' + _psep + '(?P<cdNumber>[0-9])(?:' + _psep + 'of' + _psep +
            '(?P<cdNumberTotal>[0-9]))?',
            confidence=1.0,
            enhance=False,
            global_span=True,
            formatter=parse_numeral)
        self.container.register_property('cdNumberTotal',
                                         '([1-9])' + _psep + 'cds?',
                                         confidence=0.9,
                                         enhance=False,
                                         formatter=parse_numeral)

        self.container.register_property('bonusNumber',
                                         'x([0-9]{1,2})',
                                         enhance=False,
                                         global_span=True,
                                         formatter=parse_numeral)

        self.container.register_property('filmNumber',
                                         'f([0-9]{1,2})',
                                         enhance=False,
                                         global_span=True,
                                         formatter=parse_numeral)

        self.container.register_property('edition',
                                         'collector',
                                         'collector-edition',
                                         'edition-collector',
                                         canonical_form='Collector Edition')
        self.container.register_property('edition',
                                         'special-edition',
                                         'edition-special',
                                         canonical_form='Special Edition')
        self.container.register_property('edition',
                                         'criterion',
                                         'criterion-edition',
                                         'edition-criterion',
                                         canonical_form='Criterion Edition')
        self.container.register_property('edition',
                                         'deluxe',
                                         'cdeluxe-edition',
                                         'edition-deluxe',
                                         canonical_form='Deluxe Edition')
        self.container.register_property('edition',
                                         'director\'?s?-cut',
                                         'director\'?s?-cut-edition',
                                         'edition-director\'?s?-cut',
                                         canonical_form='Director\'s cut')

    def supported_properties(self):
        return self.container.get_supported_properties()

    def guess_video_rexps(self, string, node=None, options=None):
        found = self.container.find_properties(string, node, options)
        return self.container.as_guess(found, string)

    def process(self, mtree, options=None):
        GuessFinder(self.guess_video_rexps, None, self.log,
                    options).process_nodes(mtree.unidentified_leaves())
Esempio n. 33
0
class GuessProperties(Transformer):
    def __init__(self):
        Transformer.__init__(self, 35)

        self.container = PropertiesContainer()
        self.qualities = QualitiesContainer()

        def register_property(propname, props, **kwargs):
            """props a dict of {value: [patterns]}"""
            for canonical_form, patterns in props.items():
                if isinstance(patterns, tuple):
                    patterns2, pattern_kwarg = patterns
                    if kwargs:
                        current_kwarg = dict(kwargs)
                        current_kwarg.update(pattern_kwarg)
                    else:
                        current_kwarg = dict(pattern_kwarg)
                    current_kwarg['canonical_form'] = canonical_form
                    self.container.register_property(propname, *patterns2, **current_kwarg)
                elif kwargs:
                    current_kwarg = dict(kwargs)
                    current_kwarg['canonical_form'] = canonical_form
                    self.container.register_property(propname, *patterns, **current_kwarg)
                else:
                    self.container.register_property(propname, *patterns, canonical_form=canonical_form)

        def register_quality(propname, quality_dict):
            """props a dict of {canonical_form: quality}"""
            for canonical_form, quality in quality_dict.items():
                self.qualities.register_quality(propname, canonical_form, quality)

        register_property('container', {'mp4': ['MP4']})

        # http://en.wikipedia.org/wiki/Pirated_movie_release_types
        register_property('format', {'VHS': ['VHS', 'VHS-Rip'],
                                     'Cam': ['CAM', 'CAMRip', 'HD-CAM'],
                                     #'Telesync': ['TELESYNC', 'PDVD'],
                                     'Telesync': (['TS', 'HD-TS'], {'confidence': 0.4}),
                                     'Workprint': ['WORKPRINT', 'WP'],
                                     'Telecine': ['TELECINE', 'TC'],
                                     'PPV': ['PPV', 'PPV-Rip'],  # Pay Per View
                                     'TV': ['SD-TV', 'SD-TV-Rip', 'Rip-SD-TV', 'TV-Rip', 'Rip-TV'],
                                     'DVB': ['DVB-Rip', 'DVB', 'PD-TV'],
                                     'DVD': ['DVD', 'DVD-Rip', 'VIDEO-TS', 'DVD-R', 'DVD-9', 'DVD-5'],
                                     'HDTV': ['HD-TV', 'TV-RIP-HD', 'HD-TV-RIP', 'HD-RIP'],
                                     'VOD': ['VOD', 'VOD-Rip'],
                                     'WEBRip': ['WEB-Rip'],
                                     'WEB-DL': ['WEB-DL', 'WEB-HD', 'WEB'],
                                     'HD-DVD': ['HD-DVD-Rip', 'HD-DVD'],
                                     'BluRay': ['Blu-ray(?:-Rip)?', 'B[DR]', 'B[DR]-Rip', 'BD[59]', 'BD25', 'BD50']
                                     })

        register_quality('format', {'VHS': -100,
                                    'Cam': -90,
                                    'Telesync': -80,
                                    'Workprint': -70,
                                    'Telecine': -60,
                                    'PPV': -50,
                                    'TV': -30,
                                    'DVB': -20,
                                    'DVD': 0,
                                    'HDTV': 20,
                                    'VOD': 40,
                                    'WEBRip': 50,
                                    'WEB-DL': 60,
                                    'HD-DVD': 80,
                                    'BluRay': 100
                                    })

        register_property('screenSize', {'360p': ['(?:\d{3,}(?:\\|\/|x|\*))?360(?:i|p?x?)'],
                                         '368p': ['(?:\d{3,}(?:\\|\/|x|\*))?368(?:i|p?x?)'],
                                         '480p': ['(?:\d{3,}(?:\\|\/|x|\*))?480(?:i|p?x?)'],
                                         #'480p': (['hr'], {'confidence': 0.2}), # duplicate dict key
                                         '576p': ['(?:\d{3,}(?:\\|\/|x|\*))?576(?:i|p?x?)'],
                                         '720p': ['(?:\d{3,}(?:\\|\/|x|\*))?720(?:i|p?x?)'],
                                         '900p': ['(?:\d{3,}(?:\\|\/|x|\*))?900(?:i|p?x?)'],
                                         '1080i': ['(?:\d{3,}(?:\\|\/|x|\*))?1080i'],
                                         '1080p': ['(?:\d{3,}(?:\\|\/|x|\*))?1080p?x?'],
                                         '4K': ['(?:\d{3,}(?:\\|\/|x|\*))?2160(?:i|p?x?)']
                                         },
                          validator=ChainedValidator(DefaultValidator(), OnlyOneValidator()))

        class ResolutionValidator(object):
            """Make sure our match is surrounded by separators, or by another entry"""
            @staticmethod
            def validate(prop, string, node, match, entry_start, entry_end):
                """
                span = _get_span(prop, match)
                span = _trim_span(span, string[span[0]:span[1]])
                start, end = span

                sep_start = start <= 0 or string[start - 1] in sep
                sep_end = end >= len(string) or string[end] in sep
                start_by_other = start in entry_end
                end_by_other = end in entry_start
                if (sep_start or start_by_other) and (sep_end or end_by_other):
                    return True
                return False
                """
                return True

        _digits_re = re.compile('\d+')

        def resolution_formatter(value):
            digits = _digits_re.findall(value)
            return 'x'.join(digits)

        self.container.register_property('screenSize', '\d{3,4}-?[x\*]-?\d{3,4}', canonical_from_pattern=False, formatter=resolution_formatter, validator=ChainedValidator(DefaultValidator(), ResolutionValidator()))

        register_quality('screenSize', {'360p': -300,
                                        '368p': -200,
                                        '480p': -100,
                                        '576p': 0,
                                        '720p': 100,
                                        '900p': 130,
                                        '1080i': 180,
                                        '1080p': 200,
                                        '4K': 400
                                        })

        _videoCodecProperty = {'Real': ['Rv\d{2}'],  # http://en.wikipedia.org/wiki/RealVideo
                               'Mpeg2': ['Mpeg2'],
                               'DivX': ['DVDivX', 'DivX'],
                               'XviD': ['XviD'],
                               'h264': ['[hx]-264(?:-AVC)?', 'MPEG-4(?:-AVC)'],
                               'h265': ['[hx]-265(?:-HEVC)?', 'HEVC']
                               }

        register_property('videoCodec', _videoCodecProperty)

        register_quality('videoCodec', {'Real': -50,
                                        'Mpeg2': -30,
                                        'DivX': -10,
                                        'XviD': 0,
                                        'h264': 100,
                                        'h265': 150
                                        })

        # http://blog.mediacoderhq.com/h264-profiles-and-levels/
        # http://fr.wikipedia.org/wiki/H.264
        self.container.register_property('videoProfile', 'BP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess]))
        self.container.register_property('videoProfile', 'XP', 'EP', canonical_form='XP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess]))
        self.container.register_property('videoProfile', 'MP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess]))
        self.container.register_property('videoProfile', 'HP', 'HiP', canonical_form='HP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess]))
        self.container.register_property('videoProfile', '10.?bit', 'Hi10P', canonical_form='10bit')
        self.container.register_property('videoProfile', '8.?bit', canonical_form='8bit')
        self.container.register_property('videoProfile', 'Hi422P', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess]))
        self.container.register_property('videoProfile', 'Hi444PP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess]))

        register_quality('videoProfile', {'BP': -20,
                                          'XP': -10,
                                          'MP': 0,
                                          'HP': 10,
                                          '10bit': 15,
                                          'Hi422P': 25,
                                          'Hi444PP': 35
                                          })

        # has nothing to do here (or on filenames for that matter), but some
        # releases use it and it helps to identify release groups, so we adapt
        register_property('videoApi', {'DXVA': ['DXVA']})

        register_property('audioCodec', {'MP3': ['MP3', 'LAME', 'LAME(?:\d)+-(?:\d)+'],
                                         'DolbyDigital': ['DD'],
                                         'AAC': ['AAC'],
                                         'AC3': ['AC3'],
                                         'Flac': ['FLAC'],
                                         'DTS': (['DTS'], {'validator': LeftValidator()}),
                                         'TrueHD': ['True-HD']
                                         })

        register_quality('audioCodec', {'MP3': 10,
                                        'DolbyDigital': 30,
                                        'AAC': 35,
                                        'AC3': 40,
                                        'Flac': 45,
                                        'DTS': 60,
                                        'TrueHD': 70
                                        })

        self.container.register_property('audioProfile', 'HD', validator=LeavesValidator(lambdas=[lambda node: node.guess.get('audioCodec') == 'DTS']))
        self.container.register_property('audioProfile', 'HD-MA', canonical_form='HDMA', validator=LeavesValidator(lambdas=[lambda node: node.guess.get('audioCodec') == 'DTS']))
        self.container.register_property('audioProfile', 'HE', validator=LeavesValidator(lambdas=[lambda node: node.guess.get('audioCodec') == 'AAC']))
        self.container.register_property('audioProfile', 'LC', validator=LeavesValidator(lambdas=[lambda node: node.guess.get('audioCodec') == 'AAC']))
        self.container.register_property('audioProfile', 'HQ', validator=LeavesValidator(lambdas=[lambda node: node.guess.get('audioCodec') == 'AC3']))

        register_quality('audioProfile', {'HD': 20,
                                          'HDMA': 50,
                                          'LC': 0,
                                          'HQ': 0,
                                          'HE': 20
                                          })

        register_property('audioChannels', {'7.1': ['7[\W_]1', '7ch', '8ch'],
                                            '5.1': ['5[\W_]1', '5ch', '6ch'],
                                            '2.0': ['2[\W_]0', '2ch', 'stereo'],
                                            '1.0': ['1[\W_]0', '1ch', 'mono']
                                            })

        register_quality('audioChannels', {'7.1': 200,
                                           '5.1': 100,
                                           '2.0': 0,
                                           '1.0': -100
                                           })

        self.container.register_property('episodeFormat', r'Minisodes?', canonical_form='Minisode')

        self.container.register_property('crc32', '(?:[a-fA-F]|[0-9]){8}', enhance=False, canonical_from_pattern=False)

        part_words = ['pt', 'part']
        self.container.register_property(None, '(' + build_or_pattern(part_words) + sep + '?(?P<part>' + numeral + '))[^0-9]', enhance=False, canonical_from_pattern=False, confidence=0.4, formatter=parse_numeral)

        register_property('other', {'AudioFix': ['Audio-Fix', 'Audio-Fixed'],
                                    'SyncFix': ['Sync-Fix', 'Sync-Fixed'],
                                    'DualAudio': ['Dual-Audio'],
                                    'WideScreen': ['ws', 'wide-screen'],
                                    'Netflix': ['Netflix', 'NF']
                                    })

        self.container.register_property('other', 'Real', 'Fix', canonical_form='Proper', validator=ChainedValidator(FullMatchValidator(), NeighborValidator()))
        self.container.register_property('other', 'Proper', 'Repack', 'Rerip', canonical_form='Proper')
        self.container.register_property('other', 'Fansub', canonical_form='Fansub', validator=ChainedValidator(FullMatchValidator(), NeighborValidator()))
        self.container.register_property('other', 'Fastsub', canonical_form='Fastsub', validator=ChainedValidator(FullMatchValidator(), NeighborValidator()))
        self.container.register_property('other', '(?:Seasons?' + sep + '?)?Complete', canonical_form='Complete')
        self.container.register_property('other', 'R5', 'RC', canonical_form='R5')
        self.container.register_property('other', 'Pre-Air', 'Preair', canonical_form='Preair')
        self.container.register_property('other', 'CC')  # Close Caption
        self.container.register_property('other', 'LD', 'MD')  # Line/Mic Dubbed

        self.container.register_canonical_properties('other', 'Screener', 'Remux', '3D', 'HD', 'mHD', 'HDLight', 'HQ',
                                                     'DDC',
                                                     'HR', 'PAL', 'SECAM', 'NTSC')
        self.container.register_canonical_properties('other', 'Limited', 'Complete', 'Classic', 'Unrated', 'LiNE', 'Bonus', 'Trailer', validator=WeakValidator())

        for prop in self.container.get_properties('format'):
            self.container.register_property('other', prop.pattern + '(-?Scr(?:eener)?)', canonical_form='Screener')

        for exts in (subtitle_exts, info_exts, video_exts):
            for container in exts:
                self.container.register_property('container', container, confidence=0.3)

    def guess_properties(self, string, node=None, options=None):
        found = self.container.find_properties(string, node, options)
        guess = self.container.as_guess(found, string)

        if guess and node:
            if 'part' in guess:
                # If two guesses contains both part in same group, create an partList
                for existing_guess in node.group_node().guesses:
                    if 'part' in existing_guess:
                        if 'partList' not in existing_guess:
                            existing_guess['partList'] = [existing_guess['part']]
                        existing_guess['partList'].append(guess['part'])
                        existing_guess['partList'].sort()
                        if existing_guess['part'] > guess['part']:
                            existing_guess.set_confidence('part', 0)
                        else:
                            guess.set_confidence('part', 0)
                        guess['partList'] = list(existing_guess['partList'])

        return guess

    def supported_properties(self):
        supported_properties = list(self.container.get_supported_properties())
        supported_properties.append('partList')
        return supported_properties

    def process(self, mtree, options=None):
        GuessFinder(self.guess_properties, 1.0, self.log, options).process_nodes(mtree.unidentified_leaves())
        proper_count = 0
        for other_leaf in mtree.leaves_containing('other'):
            if 'other' in other_leaf.info and 'Proper' in other_leaf.info['other']:
                proper_count += 1
        if proper_count:
            found_property(mtree, 'properCount', proper_count)

    def rate_quality(self, guess, *props):
        return self.qualities.rate_quality(guess, *props)
Esempio n. 34
0
class GuessEpisodesRexps(Transformer):
    def __init__(self):
        Transformer.__init__(self, 20)

        self.container = PropertiesContainer(enhance=False, canonical_from_pattern=False)

        def episode_parser(value):
            values = re_split("[a-zA-Z]", value)
            values = [x for x in values if x]
            ret = []
            for letters_elt in values:
                dashed_values = letters_elt.split("-")
                dashed_values = [x for x in dashed_values if x]
                if len(dashed_values) > 1:
                    for _ in range(0, len(dashed_values) - 1):
                        start_dash_ep = parse_numeral(dashed_values[0])
                        end_dash_ep = parse_numeral(dashed_values[1])
                        for dash_ep in range(start_dash_ep, end_dash_ep + 1):
                            ret.append(dash_ep)
                else:
                    ret.append(parse_numeral(letters_elt))
            if len(ret) > 1:
                return {None: ret[0], "episodeList": ret}  # TODO: Should support seasonList also
            elif len(ret) > 0:
                return ret[0]
            else:
                return None

        self.container.register_property(
            None, r"((?:season|saison)" + sep + "?(?P<season>" + numeral + "))", confidence=1.0, formatter=parse_numeral
        )
        self.container.register_property(
            None,
            r"(s(?P<season>"
            + digital_numeral
            + ")[^0-9]?"
            + sep
            + "?(?P<episodeNumber>(?:e"
            + digital_numeral
            + "(?:"
            + sep
            + "?[e-]"
            + digital_numeral
            + ")*)))[^0-9]",
            confidence=1.0,
            formatter={None: parse_numeral, "episodeNumber": episode_parser},
            validator=NoValidator(),
        )
        self.container.register_property(
            None,
            r"[^0-9]((?P<season>"
            + digital_numeral
            + ")[^0-9 .-]?-?(?P<episodeNumber>(?:x"
            + digital_numeral
            + "(?:"
            + sep
            + "?[x-]"
            + digital_numeral
            + ")*)))[^0-9]",
            confidence=1.0,
            formatter={None: parse_numeral, "episodeNumber": episode_parser},
        )
        self.container.register_property(
            None,
            r"(s(?P<season>" + digital_numeral + "))[^0-9]",
            confidence=0.6,
            formatter=parse_numeral,
            validator=NoValidator(),
        )
        self.container.register_property(
            None, r"((?P<episodeNumber>" + digital_numeral + ")v[23])", confidence=0.6, formatter=parse_numeral
        )
        self.container.register_property(
            None,
            r"((?:ep)" + sep + r"(?P<episodeNumber>" + numeral + "))[^0-9]",
            confidence=0.7,
            formatter=parse_numeral,
        )
        self.container.register_property(
            None, r"(e(?P<episodeNumber>" + digital_numeral + "))", confidence=0.6, formatter=parse_numeral
        )

        self.container.register_canonical_properties("other", "FiNAL", "Complete", validator=WeakValidator())

    def supported_properties(self):
        return ["episodeNumber", "season"]

    def guess_episodes_rexps(self, string, node=None, options=None):
        found = self.container.find_properties(string, node)
        return self.container.as_guess(found, string)

    def should_process(self, mtree, options=None):
        return mtree.guess.get("type", "").startswith("episode")

    def process(self, mtree, options=None):
        GuessFinder(self.guess_episodes_rexps, None, self.log, options).process_nodes(mtree.unidentified_leaves())