Esempio n. 1
0
def edition():
    """
    Builder for rebulk object.
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk().regex_defaults(
        flags=re.IGNORECASE,
        abbreviations=[dash]).string_defaults(ignore_case=True)
    rebulk.defaults(name='edition', validator=seps_surround)

    rebulk.regex('collector',
                 'collector-edition',
                 'edition-collector',
                 value='Collector Edition')
    rebulk.regex('special-edition',
                 'edition-special',
                 value='Special Edition',
                 conflict_solver=lambda match, other: other
                 if other.name == 'episode_details' and other.value ==
                 'Special' else '__default__')
    rebulk.regex('criterion-edition',
                 'edition-criterion',
                 value='Criterion Edition')
    rebulk.regex('deluxe',
                 'deluxe-edition',
                 'edition-deluxe',
                 value='Deluxe Edition')
    rebulk.regex('director\'?s?-cut',
                 'director\'?s?-cut-edition',
                 'edition-director\'?s?-cut',
                 value='Director\'s cut')

    return rebulk
Esempio n. 2
0
def streaming_service(config):  # pylint: disable=too-many-statements,unused-argument
    """Streaming service property.

    :param config: rule configuration
    :type config: dict
    :return:
    :rtype: Rebulk
    """
    rebulk = Rebulk(
        disabled=lambda context: is_disabled(context, 'streaming_service'))
    rebulk = rebulk.string_defaults(ignore_case=True).regex_defaults(
        flags=re.IGNORECASE, abbreviations=[dash])
    rebulk.defaults(name='streaming_service', tags=['source-prefix'])

    regex_prefix = 're:'

    for value, items in config.items():
        patterns = items if isinstance(items, list) else [items]
        for pattern in patterns:
            if isinstance(pattern, dict):
                kwargs = pattern
                pattern = pattern['pattern']
            else:
                kwargs = {}
            regex = kwargs.pop('regex', False)
            if regex or pattern.startswith(regex_prefix):
                rebulk.regex(pattern[len(regex_prefix):],
                             value=value,
                             **kwargs)
            else:
                rebulk.string(pattern, value=value, **kwargs)

    rebulk.rules(ValidateStreamingService)

    return rebulk
Esempio n. 3
0
def streaming_service():
    """Streaming service property.

    :return:
    :rtype: Rebulk
    """
    rebulk = Rebulk().string_defaults(ignore_case=True).regex_defaults(
        flags=re.IGNORECASE, abbreviations=[dash])
    rebulk.defaults(name='streaming_service', tags=['format-prefix'])

    rebulk.string('AE', 'A&E', value='A&E')
    rebulk.string('AMBC', value='ABC')
    rebulk.string('AMC', value='AMC')
    rebulk.string('AMZN', 'AmazonPrime', value='Amazon Prime')
    rebulk.regex('Amazon-Prime', value='Amazon Prime')
    rebulk.string('AS', 'AdultSwim', value='Adult Swim')
    rebulk.regex('Adult-Swim', value='Adult Swim')
    rebulk.string('iP', 'BBCiPlayer', value='BBC iPlayer')
    rebulk.regex('BBC-iPlayer', value='BBC iPlayer')
    rebulk.string('CBS', value='CBS')
    rebulk.string('CC', 'ComedyCentral', value='Comedy Central')
    rebulk.regex('Comedy-Central', value='Comedy Central')
    rebulk.string('CR', 'CrunchyRoll', value='Crunchy Roll')
    rebulk.regex('Crunchy-Roll', value='Crunchy Roll')
    rebulk.string('CW', 'TheCW', value='The CW')
    rebulk.regex('The-CW', value='The CW')
    rebulk.string('DISC', 'Discovery', value='Discovery')
    rebulk.string('DIY', value='DIY Network')
    rebulk.string('DSNY', 'Disney', value='Disney')
    rebulk.string('EPIX', 'ePix', value='ePix')
    rebulk.string('HBO', 'HBOGo', value='HBO Go')
    rebulk.regex('HBO-Go', value='HBO Go')
    rebulk.string('HIST', 'History', value='History')
    rebulk.string('ID', value='Investigation Discovery')
    rebulk.string('IFC', 'IFC', value='IFC')
    rebulk.string('PBS', 'PBS', value='PBS')
    rebulk.string('NATG', 'NationalGeographic', value='National Geographic')
    rebulk.regex('National-Geographic', value='National Geographic')
    rebulk.string('NBA', 'NBATV', value='NBA TV')
    rebulk.regex('NBA-TV', value='NBA TV')
    rebulk.string('NBC', value='NBC')
    rebulk.string('NFL', value='NFL')
    rebulk.string('NICK', 'Nickelodeon', value='Nickelodeon')
    rebulk.string('NF', 'Netflix', value='Netflix')
    rebulk.string('iTunes', value='iTunes')
    rebulk.string('RTE', value='RTÉ One')
    rebulk.string('SESO', 'SeeSo', value='SeeSo')
    rebulk.string('SPKE', 'SpikeTV', 'Spike TV', value='Spike TV')
    rebulk.string('SYFY', 'Syfy', value='Syfy')
    rebulk.string('TFOU', 'TFou', value='TFou')
    rebulk.string('TLC', value='TLC')
    rebulk.string('TV3', value='TV3 Ireland')
    rebulk.string('TV4', value='TV4 Sweeden')
    rebulk.string('TVL', 'TVLand', 'TV Land', value='TV Land')
    rebulk.string('UFC', value='UFC')
    rebulk.string('USAN', value='USA Network')

    rebulk.rules(ValidateStreamingService)

    return rebulk
Esempio n. 4
0
def video_codec():
    """
    Builder for rebulk object.
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True)
    rebulk.defaults(name="video_codec")

    rebulk.regex(r"Rv\d{2}", value="Real")
    rebulk.regex("Mpeg2", value="Mpeg2")
    rebulk.regex("DVDivX", "DivX", value="DivX")
    rebulk.regex("XviD", value="XviD")
    rebulk.regex("[hx]-?264(?:-?AVC(HD)?)?", "MPEG-?4(?:-?AVC(HD)?)", "AVCHD", value="h264")
    rebulk.regex("[hx]-?265(?:-?HEVC)?", "HEVC", value="h265")

    # http://blog.mediacoderhq.com/h264-profiles-and-levels/
    # http://fr.wikipedia.org/wiki/H.264
    rebulk.defaults(name="video_profile", validator=seps_surround)

    rebulk.regex('10.?bit', 'Hi10P', value='10bit')
    rebulk.regex('8.?bit', value='8bit')

    rebulk.string('BP', value='BP', tags='video_profile.rule')
    rebulk.string('XP', 'EP', value='XP', tags='video_profile.rule')
    rebulk.string('MP', value='MP', tags='video_profile.rule')
    rebulk.string('HP', 'HiP', value='HP', tags='video_profile.rule')
    rebulk.regex('Hi422P', value='Hi422P', tags='video_profile.rule')
    rebulk.regex('Hi444PP', value='Hi444PP', tags='video_profile.rule')

    rebulk.string('DXVA', value='DXVA', name='video_api')

    rebulk.rules(ValidateVideoCodec, VideoProfileRule)

    return rebulk
Esempio n. 5
0
def edition():
    """
    Builder for rebulk object.
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True)
    rebulk.defaults(name='edition', validator=seps_surround)

    rebulk.regex('collector', 'collector-edition', 'edition-collector', value='Collector Edition')
    rebulk.regex('special-edition', 'edition-special', value='Special Edition',
                 conflict_solver=lambda match, other: other
                 if other.name == 'episode_details' and other.value == 'Special'
                 else '__default__')
    rebulk.string('se', value='Special Edition', tags='has-neighbor')
    rebulk.regex('criterion-edition', 'edition-criterion', value='Criterion Edition')
    rebulk.regex('deluxe', 'deluxe-edition', 'edition-deluxe', value='Deluxe Edition')
    rebulk.regex('limited', 'limited-edition', value='Limited Edition', tags=['has-neighbor', 'release-group-prefix'])
    rebulk.regex(r'theatrical-cut', r'theatrical-edition', r'theatrical', value='Theatrical Edition')
    rebulk.regex(r"director'?s?-cut", r"director'?s?-cut-edition", r"edition-director'?s?-cut", 'DC',
                 value="Director's Cut")
    rebulk.regex('extended', 'extended-?cut', 'extended-?version',
                 value='Extended', tags=['has-neighbor', 'release-group-prefix'])
    rebulk.regex('alternat(e|ive)(?:-?Cut)?', value='Alternative Cut', tags=['has-neighbor', 'release-group-prefix'])
    for value in ('Remastered', 'Uncensored', 'Uncut', 'Unrated'):
        rebulk.string(value, value=value, tags=['has-neighbor', 'release-group-prefix'])
    rebulk.string('Festival', value='Festival', tags=['has-neighbor-before', 'has-neighbor-after'])

    return rebulk
Esempio n. 6
0
def path():
    """
    Builder for rebulk object.
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk()
    rebulk.defaults(name="path", marker=True)

    def mark_path(input_string, context):
        """
        Functional pattern to mark path elements.

        :param input_string:
        :return:
        """
        ret = []
        if context.get('name_only', False):
            ret.append((0, len(input_string)))
        else:
            indices = list(find_all(input_string, '/'))
            indices += list(find_all(input_string, '\\'))
            indices += [-1, len(input_string)]

            indices.sort()

            for i in range(0, len(indices) - 1):
                ret.append((indices[i] + 1, indices[i + 1]))

        return ret

    rebulk.functional(mark_path)
    return rebulk
Esempio n. 7
0
def video_codec():
    """
    Builder for rebulk object.
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True)
    rebulk.defaults(name="video_codec")

    rebulk.regex(r"Rv\d{2}", value="Real")
    rebulk.regex("Mpeg2", value="Mpeg2")
    rebulk.regex("DVDivX", "DivX", value="DivX")
    rebulk.regex("XviD", value="XviD")
    rebulk.regex("[hx]-?264(?:-?AVC(HD)?)?", "MPEG-?4(?:-?AVC(HD)?)", "AVCHD", value="h264")
    rebulk.regex("[hx]-?265(?:-?HEVC)?", "HEVC", value="h265")

    # http://blog.mediacoderhq.com/h264-profiles-and-levels/
    # http://fr.wikipedia.org/wiki/H.264
    rebulk.defaults(name="video_profile", validator=seps_surround)

    rebulk.regex('10.?bits?', 'Hi10P', value='10bit')
    rebulk.regex('8.?bits?', value='8bit')

    rebulk.string('BP', value='BP', tags='video_profile.rule')
    rebulk.string('XP', 'EP', value='XP', tags='video_profile.rule')
    rebulk.string('MP', value='MP', tags='video_profile.rule')
    rebulk.string('HP', 'HiP', value='HP', tags='video_profile.rule')
    rebulk.regex('Hi422P', value='Hi422P', tags='video_profile.rule')
    rebulk.regex('Hi444PP', value='Hi444PP', tags='video_profile.rule')

    rebulk.string('DXVA', value='DXVA', name='video_api')

    rebulk.rules(ValidateVideoCodec, VideoProfileRule)

    return rebulk
Esempio n. 8
0
def bit_rate(config):  # pylint:disable=unused-argument
    """
    Builder for rebulk object.

    :param config: rule configuration
    :type config: dict
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk(disabled=lambda context: (is_disabled(
        context, 'audio_bit_rate') and is_disabled(context, 'video_bit_rate')))
    rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])
    rebulk.defaults(name='audio_bit_rate', validator=seps_surround)
    rebulk.regex(
        r'\d+-?[kmg]b(ps|its?)',
        r'\d+\.\d+-?[kmg]b(ps|its?)',
        conflict_solver=(lambda match, other: match
                         if other.name == 'audio_channels' and
                         'weak-audio_channels' not in other.tags else other),
        formatter=BitRate.fromstring,
        tags=['release-group-prefix'])

    rebulk.rules(BitRateTypeRule)

    return rebulk
Esempio n. 9
0
def streaming_service(config):
    """Streaming service property.

    :param config: rule configuration
    :type config: dict
    :return:
    :rtype: Rebulk
    """
    rebulk = Rebulk(
        disabled=lambda context: is_disabled(context, 'streaming_service'))
    rebulk = rebulk.string_defaults(ignore_case=True).regex_defaults(
        flags=re.IGNORECASE, abbreviations=[dash])
    rebulk.defaults(name='streaming_service', tags=['source-prefix'])

    for value, items in config.items():
        patterns = items if isinstance(items, list) else [items]
        for pattern in patterns:
            if pattern.startswith('re:'):
                rebulk.regex(pattern, value=value)
            else:
                rebulk.string(pattern, value=value)

    rebulk.rules(ValidateStreamingService)

    return rebulk
Esempio n. 10
0
def format_():
    """
    Builder for rebulk object.
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])
    rebulk.defaults(name="format")

    rebulk.regex("VHS", "VHS-?Rip", value="VHS")
    rebulk.regex("CAM", "CAM-?Rip", "HD-?CAM", value="Cam")
    rebulk.regex("TELESYNC", "TS", "HD-?TS", value="Telesync")
    rebulk.regex("WORKPRINT", "WP", value="Workprint")
    rebulk.regex("TELECINE", "TC", value="Telecine")
    rebulk.regex("PPV", "PPV-?Rip", value="PPV")  # Pay Per View
    rebulk.regex("SD-?TV", "SD-?TV-?Rip", "Rip-?SD-?TV", "TV-?Rip",
                 "Rip-?TV", value="TV")  # TV is too common to allow matching
    rebulk.regex("DVB-?Rip", "DVB", "PD-?TV", value="DVB")
    rebulk.regex("DVD", "DVD-?Rip", "VIDEO-?TS", "DVD-?R(?:$|(?!E))",  # "DVD-?R(?:$|^E)" => DVD-Real ...
                 "DVD-?9", "DVD-?5", value="DVD")

    rebulk.regex("HD-?TV", "TV-?RIP-?HD", "HD-?TV-?RIP", "HD-?RIP", value="HDTV")
    rebulk.regex("VOD", "VOD-?Rip", value="VOD")
    rebulk.regex("WEB-?Rip", value="WEBRip")
    rebulk.regex("WEB-?DL", "WEB-?HD", "WEB", value="WEB-DL")
    rebulk.regex("HD-?DVD-?Rip", "HD-?DVD", value="HD-DVD")
    rebulk.regex("Blu-?ray(?:-?Rip)?", "B[DR]", "B[DR]-?Rip", "BD[59]", "BD25", "BD50", value="BluRay")

    rebulk.rules(ValidateFormat)

    return rebulk
Esempio n. 11
0
def website():
    """
    Builder for rebulk object.
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE)
    rebulk.defaults(name="website")

    tlds = [
        l.strip().decode('utf-8') for l in resource_stream(
            'guessit', 'tlds-alpha-by-domain.txt').readlines()
        if b'--' not in l
    ][1:]  # All registered domain extension

    safe_tlds = ['com', 'org', 'net']  # For sure a website extension
    safe_subdomains = ['www']  # For sure a website subdomain
    safe_prefix = ['co', 'com', 'org',
                   'net']  # Those words before a tlds are sure

    if REGEX_AVAILABLE:
        rebulk.regex(
            r'(?:[^a-z0-9]|^)((?:\L<safe_subdomains>\.)+(?:[a-z-]+\.)+(?:\L<tlds>))(?:[^a-z0-9]|$)',
            safe_subdomains=safe_subdomains,
            tlds=tlds,
            children=True)
        rebulk.regex(
            r'(?:[^a-z0-9]|^)((?:\L<safe_subdomains>\.)*[a-z-]+\.(?:\L<safe_tlds>))(?:[^a-z0-9]|$)',
            safe_subdomains=safe_subdomains,
            safe_tlds=safe_tlds,
            children=True)
        rebulk.regex(
            r'(?:[^a-z0-9]|^)((?:\L<safe_subdomains>\.)*[a-z-]+\.(?:\L<safe_prefix>\.)+(?:\L<tlds>))(?:[^a-z0-9]|$)',
            safe_subdomains=safe_subdomains,
            safe_prefix=safe_prefix,
            tlds=tlds,
            children=True)
    else:
        rebulk.regex(r'(?:[^a-z0-9]|^)((?:' +
                     build_or_pattern(safe_subdomains) +
                     r'\.)+(?:[a-z-]+\.)+(?:' + build_or_pattern(tlds) +
                     r'))(?:[^a-z0-9]|$)',
                     children=True)
        rebulk.regex(r'(?:[^a-z0-9]|^)((?:' +
                     build_or_pattern(safe_subdomains) + r'\.)*[a-z-]+\.(?:' +
                     build_or_pattern(safe_tlds) + r'))(?:[^a-z0-9]|$)',
                     safe_subdomains=safe_subdomains,
                     safe_tlds=safe_tlds,
                     children=True)
        rebulk.regex(r'(?:[^a-z0-9]|^)((?:' +
                     build_or_pattern(safe_subdomains) + r'\.)*[a-z-]+\.(?:' +
                     build_or_pattern(safe_prefix) + r'\.)+(?:' +
                     build_or_pattern(tlds) + r'))(?:[^a-z0-9]|$)',
                     safe_subdomains=safe_subdomains,
                     safe_prefix=safe_prefix,
                     tlds=tlds,
                     children=True)

    return rebulk
Esempio n. 12
0
def format_():
    """
    Builder for rebulk object.
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])
    rebulk.defaults(name="format")

    rebulk.regex("VHS", "VHS-?Rip", value="VHS")
    rebulk.regex("CAM", "CAM-?Rip", "HD-?CAM", value="Cam")
    rebulk.regex("TELESYNC", "TS", "HD-?TS", value="Telesync")
    rebulk.regex("WORKPRINT", "WP", value="Workprint")
    rebulk.regex("TELECINE", "TC", value="Telecine")
    rebulk.regex("PPV", "PPV-?Rip", value="PPV")  # Pay Per View
    rebulk.regex("SD-?TV",
                 "SD-?TV-?Rip",
                 "Rip-?SD-?TV",
                 "TV-?Rip",
                 "Rip-?TV",
                 value="TV")  # TV is too common to allow matching
    rebulk.regex("DVB-?Rip", "DVB", "PD-?TV", value="DVB")
    rebulk.regex(
        "DVD",
        "DVD-?Rip",
        "VIDEO-?TS",
        "DVD-?R(?:$|(?!E))",  # "DVD-?R(?:$|^E)" => DVD-Real ...
        "DVD-?9",
        "DVD-?5",
        value="DVD")

    rebulk.regex("HD-?TV",
                 "TV-?RIP-?HD",
                 "HD-?TV-?RIP",
                 "HD-?RIP",
                 value="HDTV")
    rebulk.regex("VOD", "VOD-?Rip", value="VOD")
    rebulk.regex("WEB-?Rip", "WEB-?DL-?Rip", "WEB-?Cap", value="WEBRip")
    rebulk.regex("WEB-?DL", "WEB-?HD", "WEB", value="WEB-DL")
    rebulk.regex("HD-?DVD-?Rip", "HD-?DVD", value="HD-DVD")
    rebulk.regex("Blu-?ray(?:-?Rip)?",
                 "B[DR]",
                 "B[DR]-?Rip",
                 "BD[59]",
                 "BD25",
                 "BD50",
                 value="BluRay")
    rebulk.regex("AHDTV", value="AHDTV")
    rebulk.regex("HDTC", value="HDTC")
    rebulk.regex("DSR",
                 "DSR?-?Rip",
                 "SAT-?Rip",
                 "DTH",
                 "DTH-?Rip",
                 value="SATRip")

    rebulk.rules(ValidateFormat)

    return rebulk
Esempio n. 13
0
def edition():
    """
    Builder for rebulk object.
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk().regex_defaults(
        flags=re.IGNORECASE,
        abbreviations=[dash]).string_defaults(ignore_case=True)
    rebulk.defaults(name='edition', validator=seps_surround)

    rebulk.regex('collector',
                 'collector-edition',
                 'edition-collector',
                 value='Collector Edition')
    rebulk.regex('special-edition',
                 'edition-special',
                 value='Special Edition',
                 conflict_solver=lambda match, other: other
                 if other.name == 'episode_details' and other.value ==
                 'Special' else '__default__')
    rebulk.string('se', value='Special Edition', tags='has-neighbor')
    rebulk.regex('criterion-edition',
                 'edition-criterion',
                 value='Criterion Edition')
    rebulk.regex('deluxe',
                 'deluxe-edition',
                 'edition-deluxe',
                 value='Deluxe Edition')
    rebulk.regex('limited',
                 'limited-edition',
                 value='Limited Edition',
                 tags=['has-neighbor', 'release-group-prefix'])
    rebulk.regex(r'theatrical-cut',
                 r'theatrical-edition',
                 r'theatrical',
                 value='Theatrical Edition')
    rebulk.regex(r"director'?s?-cut",
                 r"director'?s?-cut-edition",
                 r"edition-director'?s?-cut",
                 'DC',
                 value="Director's Cut")
    rebulk.regex('extended',
                 'extended-?cut',
                 'extended-?version',
                 value='Extended',
                 tags=['has-neighbor', 'release-group-prefix'])
    rebulk.regex('alternat(e|ive)(?:-?Cut)?',
                 value='Alternative Cut',
                 tags=['has-neighbor', 'release-group-prefix'])
    for value in ('Remastered', 'Uncensored', 'Uncut', 'Unrated'):
        rebulk.string(value,
                      value=value,
                      tags=['has-neighbor', 'release-group-prefix'])
    rebulk.string('Festival',
                  value='Festival',
                  tags=['has-neighbor-before', 'has-neighbor-after'])

    return rebulk
Esempio n. 14
0
def screen_size():
    """
    Builder for rebulk object.
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    def conflict_solver(match, other):
        """
        Conflict solver for most screen_size.
        """
        if other.name == 'screen_size':
            if 'resolution' in other.tags:
                # The chtouile to solve conflict in "720 x 432" string matching both 720p pattern
                int_value = _digits_re.findall(match.raw)[-1]
                if other.value.startswith(int_value):
                    return match
            return other
        return '__default__'

    rebulk = Rebulk().string_defaults(ignore_case=True).regex_defaults(
        flags=re.IGNORECASE)
    rebulk.defaults(name="screen_size",
                    validator=seps_surround,
                    conflict_solver=conflict_solver)

    rebulk.regex(r'(?:\d{3,}(?:x|\*))?360(?:i)', value='360i')
    rebulk.regex(r'(?:\d{3,}(?:x|\*))?360(?:p?x?)', value='360p')
    rebulk.regex(r"(?:\d{3,}(?:x|\*))?368(?:p?x?)", value="368p")
    rebulk.regex(r'(?:\d{3,}(?:x|\*))?480(?:i)', value='480i')
    rebulk.regex(r'(?:\d{3,}(?:x|\*))?480(?:p?x?)', value='480p')
    rebulk.regex(r'(?:\d{3,}(?:x|\*))?576(?:i)', value='576i')
    rebulk.regex(r'(?:\d{3,}(?:x|\*))?576(?:p?x?)', value='576p')
    rebulk.regex(r'(?:\d{3,}(?:x|\*))?720(?:p?(?:50|60)?x?)', value='720p')
    rebulk.regex(r"(?:\d{3,}(?:x|\*))?720(?:p(?:50|60)?x?)", value="720p")
    rebulk.regex(r"(?:\d{3,}(?:x|\*))?720p?hd", value="720p")
    rebulk.regex(r'(?:\d{3,}(?:x|\*))?900(?:i)', value='900i')
    rebulk.regex(r'(?:\d{3,}(?:x|\*))?900(?:p?x?)', value='900p')
    rebulk.regex(r"(?:\d{3,}(?:x|\*))?1080i", value="1080i")
    rebulk.regex(r"(?:\d{3,}(?:x|\*))?1080p?x?", value="1080p")
    rebulk.regex(r"(?:\d{3,}(?:x|\*))?1080(?:p(?:50|60)?x?)", value="1080p")
    rebulk.regex(r"(?:\d{3,}(?:x|\*))?1080p?hd", value="1080p")
    rebulk.regex(r'(?:\d{3,}(?:x|\*))?2160(?:p?x?)', value='2160p')
    rebulk.string('4k', value='2160p')
    rebulk.regex(r'(?:\d{3,}(?:x|\*))?4320(?:p?x?)', value='4320p')

    _digits_re = re.compile(r'\d+')

    rebulk.defaults(name="screen_size", validator=seps_surround)
    rebulk.regex(r'\d{3,}-?(?:x|\*)-?\d{3,}',
                 formatter=lambda value: 'x'.join(_digits_re.findall(value)),
                 abbreviations=[dash],
                 tags=['resolution'],
                 conflict_solver=lambda match, other: '__default__'
                 if other.name == 'screen_size' else other)

    rebulk.rules(ScreenSizeOnlyOne, RemoveScreenSizeConflicts)

    return rebulk
Esempio n. 15
0
def container():
    """
    Builder for rebulk object.
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE).string_defaults(
        ignore_case=True)
    rebulk.defaults(
        name='container',
        formatter=lambda value: value.strip(seps),
        tags=['extension'],
        conflict_solver=lambda match, other: other
        if other.name in ['format', 'video_codec'] or other.name == 'container'
        and 'extension' not in other.tags else '__default__')

    subtitles = ['srt', 'idx', 'sub', 'ssa', 'ass']
    info = ['nfo']
    videos = [
        '3g2', '3gp', '3gp2', 'asf', 'avi', 'divx', 'flv', 'm4v', 'mk2', 'mka',
        'mkv', 'mov', 'mp4', 'mp4a', 'mpeg', 'mpg', 'ogg', 'ogm', 'ogv', 'qt',
        'ra', 'ram', 'rm', 'ts', 'wav', 'webm', 'wma', 'wmv', 'iso', 'vob'
    ]
    torrent = ['torrent']
    nzb = ['nzb']

    rebulk.regex(r'\.' + build_or_pattern(subtitles) + '$',
                 exts=subtitles,
                 tags=['extension', 'subtitle'])
    rebulk.regex(r'\.' + build_or_pattern(info) + '$',
                 exts=info,
                 tags=['extension', 'info'])
    rebulk.regex(r'\.' + build_or_pattern(videos) + '$',
                 exts=videos,
                 tags=['extension', 'video'])
    rebulk.regex(r'\.' + build_or_pattern(torrent) + '$',
                 exts=torrent,
                 tags=['extension', 'torrent'])
    rebulk.regex(r'\.' + build_or_pattern(nzb) + '$',
                 exts=nzb,
                 tags=['extension', 'nzb'])

    rebulk.defaults(
        name='container',
        validator=seps_surround,
        formatter=lambda s: s.lower(),
        conflict_solver=lambda match, other: match
        if other.name in ['format', 'video_codec'] or other.name == 'container'
        and 'extension' in other.tags else '__default__')

    rebulk.string(*[sub for sub in subtitles if sub not in ['sub']],
                  tags=['subtitle'])
    rebulk.string(*videos, tags=['video'])
    rebulk.string(*torrent, tags=['torrent'])
    rebulk.string(*nzb, tags=['nzb'])

    return rebulk
Esempio n. 16
0
def container(config):
    """
    Builder for rebulk object.

    :param config: rule configuration
    :type config: dict
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'container'))
    rebulk = rebulk.regex_defaults(flags=re.IGNORECASE).string_defaults(
        ignore_case=True)
    rebulk.defaults(name='container',
                    formatter=lambda value: value.strip(seps),
                    tags=['extension'],
                    conflict_solver=lambda match, other: other if other.name in
                    ('source', 'video_codec') or other.name == 'container' and
                    'extension' not in other.tags else '__default__')

    subtitles = config['subtitles']
    info = config['info']
    videos = config['videos']
    torrent = config['torrent']
    nzb = config['nzb']

    rebulk.regex(r'\.' + build_or_pattern(subtitles) + '$',
                 exts=subtitles,
                 tags=['extension', 'subtitle'])
    rebulk.regex(r'\.' + build_or_pattern(info) + '$',
                 exts=info,
                 tags=['extension', 'info'])
    rebulk.regex(r'\.' + build_or_pattern(videos) + '$',
                 exts=videos,
                 tags=['extension', 'video'])
    rebulk.regex(r'\.' + build_or_pattern(torrent) + '$',
                 exts=torrent,
                 tags=['extension', 'torrent'])
    rebulk.regex(r'\.' + build_or_pattern(nzb) + '$',
                 exts=nzb,
                 tags=['extension', 'nzb'])

    rebulk.defaults(clear=True,
                    name='container',
                    validator=seps_surround,
                    formatter=lambda s: s.lower(),
                    conflict_solver=lambda match, other: match if other.name in
                    ('source', 'video_codec') or other.name == 'container' and
                    'extension' in other.tags else '__default__')

    rebulk.string(*[sub for sub in subtitles if sub not in ('sub', 'ass')],
                  tags=['subtitle'])
    rebulk.string(*videos, tags=['video'])
    rebulk.string(*torrent, tags=['torrent'])
    rebulk.string(*nzb, tags=['nzb'])

    return rebulk
Esempio n. 17
0
def website():
    """
    Builder for rebulk object.
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE)
    rebulk.defaults(name="website")

    tlds = [l.strip().decode('utf-8')
            for l in resource_stream('guessit', 'tlds-alpha-by-domain.txt').readlines()
            if b'--' not in l][1:]  # All registered domain extension

    safe_tlds = ['com', 'org', 'net']  # For sure a website extension
    safe_subdomains = ['www']  # For sure a website subdomain
    safe_prefix = ['co', 'com', 'org', 'net']  # Those words before a tlds are sure

    rebulk.regex(r'(?:[^a-z0-9]|^)((?:'+build_or_pattern(safe_subdomains) +
                 r'\.)+(?:[a-z-]+\.)+(?:'+build_or_pattern(tlds) +
                 r'))(?:[^a-z0-9]|$)',
                 children=True)
    rebulk.regex(r'(?:[^a-z0-9]|^)((?:'+build_or_pattern(safe_subdomains) +
                 r'\.)*[a-z-]+\.(?:'+build_or_pattern(safe_tlds) +
                 r'))(?:[^a-z0-9]|$)',
                 safe_subdomains=safe_subdomains, safe_tlds=safe_tlds, children=True)
    rebulk.regex(r'(?:[^a-z0-9]|^)((?:'+build_or_pattern(safe_subdomains) +
                 r'\.)*[a-z-]+\.(?:'+build_or_pattern(safe_prefix) +
                 r'\.)+(?:'+build_or_pattern(tlds) +
                 r'))(?:[^a-z0-9]|$)',
                 safe_subdomains=safe_subdomains, safe_prefix=safe_prefix, tlds=tlds, children=True)

    class PreferTitleOverWebsite(Rule):
        """
        If found match is more likely a title, remove website.
        """
        consequence = RemoveMatch

        @staticmethod
        def valid_followers(match):
            """
            Validator for next website matches
            """
            return any(name in ['season', 'episode', 'year'] for name in match.names)

        def when(self, matches, context):
            to_remove = []
            for website_match in matches.named('website'):
                suffix = matches.next(website_match, PreferTitleOverWebsite.valid_followers, 0)
                if suffix:
                    to_remove.append(website_match)
            return to_remove

    rebulk.rules(PreferTitleOverWebsite)

    return rebulk
Esempio n. 18
0
def screen_size(config):
    """
    Builder for rebulk object.

    :param config: rule configuration
    :type config: dict
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    interlaced = frozenset(config['interlaced'])
    progressive = frozenset(config['progressive'])
    frame_rates = frozenset(config['frame_rates'])
    min_ar = config['min_ar']
    max_ar = config['max_ar']

    rebulk = Rebulk()
    rebulk = rebulk.string_defaults(ignore_case=True).regex_defaults(
        flags=re.IGNORECASE)

    rebulk.defaults(
        name='screen_size',
        validator=seps_surround,
        abbreviations=[dash],
        disabled=lambda context: is_disabled(context, 'screen_size'))

    frame_rate_pattern = build_or_pattern(frame_rates, name='frame_rate')
    interlaced_pattern = build_or_pattern(interlaced, name='height')
    progressive_pattern = build_or_pattern(progressive, name='height')

    res_pattern = r'(?:(?P<width>\d{3,4})(?:x|\*))?'
    rebulk.regex(res_pattern + interlaced_pattern + r'(?P<scan_type>i)' +
                 frame_rate_pattern + '?')
    rebulk.regex(res_pattern + progressive_pattern + r'(?P<scan_type>p)' +
                 frame_rate_pattern + '?')
    rebulk.regex(res_pattern + progressive_pattern +
                 r'(?P<scan_type>p)?(?:hd)')
    rebulk.regex(res_pattern + progressive_pattern + r'(?P<scan_type>p)?x?')
    rebulk.string('4k',
                  value='2160p',
                  conflict_solver=lambda match, other: '__default__'
                  if other.name == 'screen_size' else match)
    rebulk.regex(r'(?P<width>\d{3,4})-?(?:x|\*)-?(?P<height>\d{3,4})',
                 conflict_solver=lambda match, other: '__default__'
                 if other.name == 'screen_size' else other)

    rebulk.regex(frame_rate_pattern + '-?(?:p|fps)',
                 name='frame_rate',
                 formatter=FrameRate.fromstring,
                 disabled=lambda context: is_disabled(context, 'frame_rate'))

    rebulk.rules(PostProcessScreenSize(progressive, min_ar, max_ar),
                 ScreenSizeOnlyOne, ResolveScreenSizeConflicts)

    return rebulk
Esempio n. 19
0
def streaming_service():
    """Streaming service property.

    :return:
    :rtype: Rebulk
    """
    rebulk = Rebulk().string_defaults(ignore_case=True).regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])
    rebulk.defaults(name='streaming_service', validator=seps_surround)

    rebulk.string('AE', 'A&E', value='A&E')
    rebulk.string('AMBC', value='ABC')
    rebulk.string('AMZN', 'AmazonPrime', value='Amazon Prime')
    rebulk.regex('Amazon-Prime', value='Amazon Prime')
    rebulk.string('AS', 'AdultSwim', value='Adult Swim')
    rebulk.regex('Adult-Swim', value='Adult Swim')
    rebulk.string('iP', 'BBCiPlayer', value='BBC iPlayer')
    rebulk.regex('BBC-iPlayer', value='BBC iPlayer')
    rebulk.string('CBS', value='CBS')
    rebulk.string('CC', 'ComedyCentral', value='Comedy Central')
    rebulk.regex('Comedy-Central', value='Comedy Central')
    rebulk.string('CR', 'CrunchyRoll', value='Crunchy Roll')
    rebulk.regex('Crunchy-Roll', value='Crunchy Roll')
    rebulk.string('CW', 'TheCW', value='The CW')
    rebulk.regex('The-CW', value='The CW')
    rebulk.string('DISC', 'Discovery', value='Discovery')
    rebulk.string('DSNY', 'Disney', value='Disney')
    rebulk.string('EPIX', 'ePix', value='ePix')
    rebulk.string('HBO', 'HBOGo', value='HBO Go')
    rebulk.regex('HBO-Go', value='HBO Go')
    rebulk.string('HIST', 'History', value='History')
    rebulk.string('IFC', 'IFC', value='IFC')
    rebulk.string('PBS', 'PBS', value='PBS')
    rebulk.string('NATG', 'NationalGeographic', value='National Geographic')
    rebulk.regex('National-Geographic', value='National Geographic')
    rebulk.string('NBA', 'NBATV', value='NBA TV')
    rebulk.regex('NBA-TV', value='NBA TV')
    rebulk.string('NBC', value='NBC')
    rebulk.string('NFL', value='NFL')
    rebulk.string('NICK', 'Nickelodeon', value='Nickelodeon')
    rebulk.string('NF', 'Netflix', value='Netflix')
    rebulk.string('SESO', 'SeeSo', value='SeeSo')
    rebulk.string('SPKE', 'SpikeTV', 'Spike TV', value='Spike TV')
    rebulk.string('SYFY', 'Syfy', value='Syfy')
    rebulk.string('TFOU', 'TFou', value='TFou')
    rebulk.string('TVL', 'TVLand', 'TV Land', value='TV Land')
    rebulk.string('UFC', value='UFC')

    rebulk.rules(ValidateStreamingService)

    return rebulk
Esempio n. 20
0
def screen_size():
    """
    Builder for rebulk object.
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    def conflict_solver(match, other):
        """
        Conflict solver for most screen_size.
        """
        if other.name == 'screen_size':
            if 'resolution' in other.tags:
                # The chtouile to solve conflict in "720 x 432" string matching both 720p pattern
                int_value = _digits_re.findall(match.raw)[-1]
                if other.value.startswith(int_value):
                    return match
            return other
        return '__default__'

    rebulk = Rebulk().string_defaults(ignore_case=True).regex_defaults(flags=re.IGNORECASE)
    rebulk.defaults(name="screen_size", validator=seps_surround, conflict_solver=conflict_solver)

    rebulk.regex(r"(?:\d{3,}(?:x|\*))?360(?:i|p?x?)", value="360p")
    rebulk.regex(r"(?:\d{3,}(?:x|\*))?368(?:i|p?x?)", value="368p")
    rebulk.regex(r"(?:\d{3,}(?:x|\*))?480(?:i|p?x?)", value="480p")
    rebulk.regex(r"(?:\d{3,}(?:x|\*))?576(?:i|p?x?)", value="576p")
    rebulk.regex(r"(?:\d{3,}(?:x|\*))?720(?:i|p?(?:50|60)?x?)", value="720p")
    rebulk.regex(r"(?:\d{3,}(?:x|\*))?720(?:p(?:50|60)?x?)", value="720p")
    rebulk.regex(r"(?:\d{3,}(?:x|\*))?720p?hd", value="720p")
    rebulk.regex(r"(?:\d{3,}(?:x|\*))?900(?:i|p?x?)", value="900p")
    rebulk.regex(r"(?:\d{3,}(?:x|\*))?1080i", value="1080i")
    rebulk.regex(r"(?:\d{3,}(?:x|\*))?1080p?x?", value="1080p")
    rebulk.regex(r"(?:\d{3,}(?:x|\*))?1080(?:p(?:50|60)?x?)", value="1080p")
    rebulk.regex(r"(?:\d{3,}(?:x|\*))?1080p?hd", value="1080p")
    rebulk.regex(r"(?:\d{3,}(?:x|\*))?2160(?:i|p?x?)", value="4K")
    rebulk.string('4k', value='4K')

    _digits_re = re.compile(r'\d+')

    rebulk.defaults(name="screen_size", validator=seps_surround)
    rebulk.regex(r'\d{3,}-?(?:x|\*)-?\d{3,}',
                 formatter=lambda value: 'x'.join(_digits_re.findall(value)),
                 abbreviations=[dash],
                 tags=['resolution'],
                 conflict_solver=lambda match, other: '__default__' if other.name == 'screen_size' else other)

    rebulk.rules(ScreenSizeOnlyOne, RemoveScreenSizeConflicts)

    return rebulk
Esempio n. 21
0
def size(config):  # pylint:disable=unused-argument
    """
    Builder for rebulk object.

    :param config: rule configuration
    :type config: dict
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'size'))
    rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])
    rebulk.defaults(name='size', validator=seps_surround)
    rebulk.regex(r'\d+-?[mgt]b', r'\d+\.\d+-?[mgt]b', formatter=Size.fromstring, tags=['release-group-prefix'])

    return rebulk
Esempio n. 22
0
def country(config, common_words):
    """
    Builder for rebulk object.

    :param config: rule configuration
    :type config: dict
    :param common_words: common words
    :type common_words: set
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'country'))
    rebulk = rebulk.defaults(name='country')

    def find_countries(string, context=None):
        """
        Find countries in given string.
        """
        allowed_countries = context.get(
            'allowed_countries') if context else None
        return CountryFinder(allowed_countries, common_words).find(string)

    rebulk.functional(
        find_countries,
        #  Prefer language and any other property over country if not US or GB.
        conflict_solver=lambda match, other: match
        if other.name != 'language' or match.value not in
        (babelfish.Country('US'), babelfish.Country('GB')) else other,
        properties={'country': [None]},
        disabled=lambda context: not context.get('allowed_countries'))

    babelfish.country_converters['guessit'] = GuessitCountryConverter(
        config['synonyms'])

    return rebulk
Esempio n. 23
0
def edition(config):  # pylint:disable=unused-argument
    """
    Builder for rebulk object.

    :param config: rule configuration
    :type config: dict
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'edition'))
    rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True)
    rebulk.defaults(name='edition', validator=seps_surround)

    load_config_patterns(rebulk, config.get('edition'))

    return rebulk
Esempio n. 24
0
def country(config, common_words):
    """
    Builder for rebulk object.

    :param config: rule configuration
    :type config: dict
    :param common_words: common words
    :type common_words: set
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'country'))
    rebulk = rebulk.defaults(name='country')

    def find_countries(string, context=None):
        """
        Find countries in given string.
        """
        allowed_countries = context.get('allowed_countries') if context else None
        return CountryFinder(allowed_countries, common_words).find(string)

    rebulk.functional(find_countries,
                      #  Prefer language and any other property over country if not US or GB.
                      conflict_solver=lambda match, other: match
                      if other.name != 'language' or match.value not in (babelfish.Country('US'),
                                                                         babelfish.Country('GB'))
                      else other,
                      properties={'country': [None]},
                      disabled=lambda context: not context.get('allowed_countries'))

    babelfish.country_converters['guessit'] = GuessitCountryConverter(config['synonyms'])

    return rebulk
Esempio n. 25
0
def size():
    """
    Builder for rebulk object.
    :return: Created Rebulk object
    :rtype: Rebulk
    """

    def format_size(value):
        """Format size using uppercase and no space."""
        return re.sub(r'(?<=\d)[.](?=[^\d])', '', value.upper())

    rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])
    rebulk.defaults(name='size', validator=seps_surround)
    rebulk.regex(r'\d+\.?[mgt]b', r'\d+\.\d+[mgt]b', formatter=format_size, tags=['release-group-prefix'])

    return rebulk
Esempio n. 26
0
def groups(config):
    """
    Builder for rebulk object.

    :param config: rule configuration
    :type config: dict
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk()
    rebulk.defaults(name="group", marker=True)

    starting = config['starting']
    ending = config['ending']

    if len(starting) != len(ending):
        raise ConfigurationException("Starting and ending groups must have the same length")

    def mark_groups(input_string):
        """
        Functional pattern to mark groups (...), [...] and {...}.

        :param input_string:
        :return:
        """
        openings = ([], ) * len(starting)
        i = 0

        ret = []
        for char in input_string:
            start_type = starting.find(char)
            if start_type > -1:
                openings[start_type].append(i)

            i += 1

            end_type = ending.find(char)
            if end_type > -1:
                try:
                    start_index = openings[end_type].pop()
                    ret.append((start_index, i))
                except IndexError:
                    pass
        return ret

    rebulk.functional(mark_groups)
    return rebulk
Esempio n. 27
0
def container(config):
    """
    Builder for rebulk object.

    :param config: rule configuration
    :type config: dict
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'container'))
    rebulk = rebulk.regex_defaults(flags=re.IGNORECASE).string_defaults(ignore_case=True)
    rebulk.defaults(name='container',
                    formatter=lambda value: value.strip(seps),
                    tags=['extension'],
                    conflict_solver=lambda match, other: other
                    if other.name in ('source', 'video_codec') or
                    other.name == 'container' and 'extension' not in other.tags
                    else '__default__')

    subtitles = config['subtitles']
    info = config['info']
    videos = config['videos']
    torrent = config['torrent']
    nzb = config['nzb']

    rebulk.regex(r'\.'+build_or_pattern(subtitles)+'$', exts=subtitles, tags=['extension', 'subtitle'])
    rebulk.regex(r'\.'+build_or_pattern(info)+'$', exts=info, tags=['extension', 'info'])
    rebulk.regex(r'\.'+build_or_pattern(videos)+'$', exts=videos, tags=['extension', 'video'])
    rebulk.regex(r'\.'+build_or_pattern(torrent)+'$', exts=torrent, tags=['extension', 'torrent'])
    rebulk.regex(r'\.'+build_or_pattern(nzb)+'$', exts=nzb, tags=['extension', 'nzb'])

    rebulk.defaults(name='container',
                    validator=seps_surround,
                    formatter=lambda s: s.lower(),
                    conflict_solver=lambda match, other: match
                    if other.name in ('source',
                                      'video_codec') or other.name == 'container' and 'extension' in other.tags
                    else '__default__')

    rebulk.string(*[sub for sub in subtitles if sub not in ('sub', 'ass')], tags=['subtitle'])
    rebulk.string(*videos, tags=['video'])
    rebulk.string(*torrent, tags=['torrent'])
    rebulk.string(*nzb, tags=['nzb'])

    return rebulk
Esempio n. 28
0
def size():
    """
    Builder for rebulk object.
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    def format_size(value):
        """Format size using uppercase and no space."""
        return re.sub(r'(?<=\d)[.](?=[^\d])', '', value.upper())

    rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])
    rebulk.defaults(name='size', validator=seps_surround)
    rebulk.regex(r'\d+\.?[mgt]b',
                 r'\d+\.\d+[mgt]b',
                 formatter=format_size,
                 tags=['release-group-prefix'])

    return rebulk
Esempio n. 29
0
def container():
    """
    Builder for rebulk object.
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE).string_defaults(ignore_case=True)
    rebulk.defaults(name='container',
                    formatter=lambda value: value.strip(seps),
                    tags=['extension'],
                    conflict_solver=lambda match, other: other
                    if other.name in ['format', 'video_codec'] or
                    other.name == 'container' and 'extension' not in other.tags
                    else '__default__')

    subtitles = ['srt', 'idx', 'sub', 'ssa', 'ass']
    info = ['nfo']
    videos = ['3g2', '3gp', '3gp2', 'asf', 'avi', 'divx', 'flv', 'm4v', 'mk2',
              'mka', 'mkv', 'mov', 'mp4', 'mp4a', 'mpeg', 'mpg', 'ogg', 'ogm',
              'ogv', 'qt', 'ra', 'ram', 'rm', 'ts', 'wav', 'webm', 'wma', 'wmv',
              'iso', 'vob']
    torrent = ['torrent']
    nzb = ['nzb']

    rebulk.regex(r'\.'+build_or_pattern(subtitles)+'$', exts=subtitles, tags=['extension', 'subtitle'])
    rebulk.regex(r'\.'+build_or_pattern(info)+'$', exts=info, tags=['extension', 'info'])
    rebulk.regex(r'\.'+build_or_pattern(videos)+'$', exts=videos, tags=['extension', 'video'])
    rebulk.regex(r'\.'+build_or_pattern(torrent)+'$', exts=torrent, tags=['extension', 'torrent'])
    rebulk.regex(r'\.'+build_or_pattern(nzb)+'$', exts=nzb, tags=['extension', 'nzb'])

    rebulk.defaults(name='container',
                    validator=seps_surround,
                    formatter=lambda s: s.lower(),
                    conflict_solver=lambda match, other: match
                    if other.name in ['format',
                                      'video_codec'] or other.name == 'container' and 'extension' in other.tags
                    else '__default__')

    rebulk.string(*[sub for sub in subtitles if sub not in ['sub']], tags=['subtitle'])
    rebulk.string(*videos, tags=['video'])
    rebulk.string(*torrent, tags=['torrent'])
    rebulk.string(*nzb, tags=['nzb'])

    return rebulk
Esempio n. 30
0
def edition():
    """
    Builder for rebulk object.
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True)
    rebulk.defaults(name='edition', validator=seps_surround)

    rebulk.regex('collector', 'collector-edition', 'edition-collector', value='Collector Edition')
    rebulk.regex('special-edition', 'edition-special', value='Special Edition',
                 conflict_solver=lambda match, other: other
                 if other.name == 'episode_details' and other.value == 'Special'
                 else '__default__')
    rebulk.regex('criterion-edition', 'edition-criterion', value='Criterion Edition')
    rebulk.regex('deluxe', 'deluxe-edition', 'edition-deluxe', value='Deluxe Edition')
    rebulk.regex('director\'?s?-cut', 'director\'?s?-cut-edition', 'edition-director\'?s?-cut', value='Director\'s cut')

    return rebulk
Esempio n. 31
0
def streaming_service(config):  # pylint: disable=too-many-statements,unused-argument
    """Streaming service property.

    :param config: rule configuration
    :type config: dict
    :return:
    :rtype: Rebulk
    """
    rebulk = Rebulk(
        disabled=lambda context: is_disabled(context, 'streaming_service'))
    rebulk = rebulk.string_defaults(ignore_case=True).regex_defaults(
        flags=re.IGNORECASE, abbreviations=[dash])
    rebulk.defaults(name='streaming_service', tags=['source-prefix'])

    load_config_patterns(rebulk, config)

    rebulk.rules(ValidateStreamingService)

    return rebulk
Esempio n. 32
0
def bit_rate(config):  # pylint:disable=unused-argument
    """
    Builder for rebulk object.

    :param config: rule configuration
    :type config: dict
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk(disabled=lambda context: (is_disabled(context, 'audio_bit_rate')
                                              and is_disabled(context, 'video_bit_rate')))
    rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])
    rebulk.defaults(name='audio_bit_rate', validator=seps_surround)

    load_config_patterns(rebulk, config.get('bit_rate'))

    rebulk.rules(BitRateTypeRule)

    return rebulk
Esempio n. 33
0
def crc():
    """
    Builder for rebulk object.
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE)
    rebulk.defaults(validator=seps_surround)

    rebulk.regex('(?:[a-fA-F]|[0-9]){8}', name='crc32',
                 conflict_solver=lambda match, other: match
                 if other.name in ['episode', 'season']
                 else '__default__')

    rebulk.functional(guess_idnumber, name='uuid',
                      conflict_solver=lambda match, other: match
                      if other.name in ['episode', 'season']
                      else '__default__')
    return rebulk
Esempio n. 34
0
def other():
    """
    Builder for rebulk object.
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True)
    rebulk.defaults(name="other", validator=seps_surround)

    rebulk.regex('Audio-?Fix', 'Audio-?Fixed', value='AudioFix')
    rebulk.regex('Sync-?Fix', 'Sync-?Fixed', value='SyncFix')
    rebulk.regex('Dual-?Audio', value='DualAudio')
    rebulk.regex('ws', 'wide-?screen', value='WideScreen')
    rebulk.string('Netflix', 'NF', value='Netflix')

    rebulk.string('Real', 'Fix', 'Fixed', value='Proper', tags=['has-neighbor-before', 'has-neighbor-after'])
    rebulk.string('Proper', 'Repack', 'Rerip', value='Proper')
    rebulk.string('Fansub', value='Fansub', tags='has-neighbor')
    rebulk.string('Fastsub', value='Fastsub', tags='has-neighbor')

    rebulk.regex('(?:Seasons?-)?Complete', value='Complete', tags=['release-group-prefix'],
                 validator=lambda match: seps_surround(match) and match.raw.lower().strip(seps) != "complete")
    rebulk.string('R5', 'RC', value='R5')
    rebulk.regex('Pre-?Air', value='Preair')

    for value in (
            'Screener', 'Remux', 'Remastered', '3D', 'HD', 'mHD', 'HDLight', 'HQ', 'DDC', 'HR', 'PAL', 'SECAM', 'NTSC',
            'CC', 'LD', 'MD', 'XXX'):
        rebulk.string(value, value=value)

    for value in ('Limited', 'Complete', 'Classic', 'Unrated', 'LiNE', 'Bonus', 'Trailer', 'FINAL', 'Retail', 'Uncut',
                  'Extended', 'Extended Cut'):
        rebulk.string(value, value=value, tags=['has-neighbor', 'release-group-prefix'])

    rebulk.string('VO', 'OV', value='OV', tags='has-neighbor')

    rebulk.regex('Scr(?:eener)?', value='Screener', validator=None, tags='other.validate.screener')

    rebulk.rules(ValidateHasNeighbor, ValidateHasNeighborAfter, ValidateHasNeighborBefore, ValidateScreenerRule,
                 ProperCountRule)

    return rebulk
Esempio n. 35
0
def other():
    """
    Builder for rebulk object.
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True)
    rebulk.defaults(name="other", validator=seps_surround)

    rebulk.regex('Audio-?Fix', 'Audio-?Fixed', value='AudioFix')
    rebulk.regex('Sync-?Fix', 'Sync-?Fixed', value='SyncFix')
    rebulk.regex('Dual-?Audio', value='DualAudio')
    rebulk.regex('ws', 'wide-?screen', value='WideScreen')
    rebulk.string('Netflix', 'NF', value='Netflix')

    rebulk.string('Real', 'Fix', 'Fixed', value='Proper', tags=['has-neighbor-before', 'has-neighbor-after'])
    rebulk.string('Proper', 'Repack', 'Rerip', value='Proper')
    rebulk.string('Fansub', value='Fansub', tags='has-neighbor')
    rebulk.string('Fastsub', value='Fastsub', tags='has-neighbor')

    rebulk.regex('(?:Seasons?-)?Complete', value='Complete', tags=['release-group-prefix'],
                 validator=lambda match: seps_surround(match) and match.raw.lower().strip(seps) != "complete")
    rebulk.string('R5', 'RC', value='R5')
    rebulk.regex('Pre-?Air', value='Preair')

    for value in (
            'Screener', 'Remux', 'Remastered', '3D', 'HD', 'mHD', 'HDLight', 'HQ', 'DDC', 'HR', 'PAL', 'SECAM', 'NTSC',
            'CC', 'LD', 'MD', 'XXX'):
        rebulk.string(value, value=value)

    for value in ('Limited', 'Complete', 'Classic', 'Unrated', 'LiNE', 'Bonus', 'Trailer', 'FINAL', 'Retail', 'Uncut',
                  'Extended', 'Extended Cut'):
        rebulk.string(value, value=value, tags=['has-neighbor', 'release-group-prefix'])

    rebulk.string('VO', 'OV', value='OV', tags='has-neighbor')

    rebulk.regex('Scr(?:eener)?', value='Screener', validator=None, tags='other.validate.screener')

    rebulk.rules(ValidateHasNeighbor, ValidateHasNeighborAfter, ValidateHasNeighborBefore, ValidateScreenerRule,
                 ProperCountRule)

    return rebulk
Esempio n. 36
0
def crc():
    """
    Builder for rebulk object.
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE)
    rebulk.defaults(validator=seps_surround)

    rebulk.regex('(?:[a-fA-F]|[0-9]){8}',
                 name='crc32',
                 conflict_solver=lambda match, other: match
                 if other.name in ['episode', 'season'] else '__default__')

    rebulk.functional(
        guess_idnumber,
        name='uuid',
        conflict_solver=lambda match, other: match
        if other.name in ['episode', 'season'] else '__default__')
    return rebulk
Esempio n. 37
0
def groups():
    """
    Builder for rebulk object.
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk()
    rebulk.defaults(name="group", marker=True)

    starting = '([{'
    ending = ')]}'

    def mark_groups(input_string):
        """
        Functional pattern to mark groups (...), [...] and {...}.

        :param input_string:
        :return:
        """
        openings = ([], [], [])
        i = 0

        ret = []
        for char in input_string:
            start_type = starting.find(char)
            if start_type > -1:
                openings[start_type].append(i)

            i += 1

            end_type = ending.find(char)
            if end_type > -1:
                try:
                    start_index = openings[end_type].pop()
                    ret.append((start_index, i))
                except IndexError:
                    pass
        return ret

    rebulk.functional(mark_groups)
    return rebulk
Esempio n. 38
0
def website():
    """
    Builder for rebulk object.
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE)
    rebulk.defaults(name="website")

    tlds = [l.strip().decode('utf-8')
            for l in resource_stream('guessit', 'tlds-alpha-by-domain.txt').readlines()
            if b'--' not in l][1:]  # All registered domain extension

    safe_tlds = ['com', 'org', 'net']  # For sure a website extension
    safe_subdomains = ['www']  # For sure a website subdomain
    safe_prefix = ['co', 'com', 'org', 'net']  # Those words before a tlds are sure

    if REGEX_AVAILABLE:
        rebulk.regex(r'(?:[^a-z0-9]|^)((?:\L<safe_subdomains>\.)+(?:[a-z-]+\.)+(?:\L<tlds>))(?:[^a-z0-9]|$)',
                     safe_subdomains=safe_subdomains, tlds=tlds, children=True)
        rebulk.regex(r'(?:[^a-z0-9]|^)((?:\L<safe_subdomains>\.)*[a-z-]+\.(?:\L<safe_tlds>))(?:[^a-z0-9]|$)',
                     safe_subdomains=safe_subdomains, safe_tlds=safe_tlds, children=True)
        rebulk.regex(
            r'(?:[^a-z0-9]|^)((?:\L<safe_subdomains>\.)*[a-z-]+\.(?:\L<safe_prefix>\.)+(?:\L<tlds>))(?:[^a-z0-9]|$)',
            safe_subdomains=safe_subdomains, safe_prefix=safe_prefix, tlds=tlds, children=True)
    else:
        rebulk.regex(r'(?:[^a-z0-9]|^)((?:'+build_or_pattern(safe_subdomains) +
                     r'\.)+(?:[a-z-]+\.)+(?:'+build_or_pattern(tlds) +
                     r'))(?:[^a-z0-9]|$)',
                     children=True)
        rebulk.regex(r'(?:[^a-z0-9]|^)((?:'+build_or_pattern(safe_subdomains) +
                     r'\.)*[a-z-]+\.(?:'+build_or_pattern(safe_tlds) +
                     r'))(?:[^a-z0-9]|$)',
                     safe_subdomains=safe_subdomains, safe_tlds=safe_tlds, children=True)
        rebulk.regex(r'(?:[^a-z0-9]|^)((?:'+build_or_pattern(safe_subdomains) +
                     r'\.)*[a-z-]+\.(?:'+build_or_pattern(safe_prefix) +
                     r'\.)+(?:'+build_or_pattern(tlds) +
                     r'))(?:[^a-z0-9]|$)',
                     safe_subdomains=safe_subdomains, safe_prefix=safe_prefix, tlds=tlds, children=True)

    return rebulk
Esempio n. 39
0
def screen_size(config):
    """
    Builder for rebulk object.

    :param config: rule configuration
    :type config: dict
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    interlaced = frozenset({res for res in config['interlaced']})
    progressive = frozenset({res for res in config['progressive']})
    frame_rates = [re.escape(rate) for rate in config['frame_rates']]
    min_ar = config['min_ar']
    max_ar = config['max_ar']

    rebulk = Rebulk()
    rebulk = rebulk.string_defaults(ignore_case=True).regex_defaults(flags=re.IGNORECASE)

    rebulk.defaults(name='screen_size', validator=seps_surround, abbreviations=[dash],
                    disabled=lambda context: is_disabled(context, 'screen_size'))

    frame_rate_pattern = build_or_pattern(frame_rates, name='frame_rate')
    interlaced_pattern = build_or_pattern(interlaced, name='height')
    progressive_pattern = build_or_pattern(progressive, name='height')

    res_pattern = r'(?:(?P<width>\d{3,4})(?:x|\*))?'
    rebulk.regex(res_pattern + interlaced_pattern + r'(?P<scan_type>i)' + frame_rate_pattern + '?')
    rebulk.regex(res_pattern + progressive_pattern + r'(?P<scan_type>p)' + frame_rate_pattern + '?')
    rebulk.regex(res_pattern + progressive_pattern + r'(?P<scan_type>p)?(?:hd)')
    rebulk.regex(res_pattern + progressive_pattern + r'(?P<scan_type>p)?x?')
    rebulk.string('4k', value='2160p')
    rebulk.regex(r'(?P<width>\d{3,4})-?(?:x|\*)-?(?P<height>\d{3,4})',
                 conflict_solver=lambda match, other: '__default__' if other.name == 'screen_size' else other)

    rebulk.regex(frame_rate_pattern + '(p|fps)', name='frame_rate',
                 formatter=FrameRate.fromstring, disabled=lambda context: is_disabled(context, 'frame_rate'))

    rebulk.rules(PostProcessScreenSize(progressive, min_ar, max_ar), ScreenSizeOnlyOne, ResolveScreenSizeConflicts)

    return rebulk
Esempio n. 40
0
def edition(config):  # pylint:disable=unused-argument
    """
    Builder for rebulk object.

    :param config: rule configuration
    :type config: dict
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'edition'))
    rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True)
    rebulk.defaults(name='edition', validator=seps_surround)

    rebulk.regex('collector', "collector'?s?-edition", 'edition-collector', value='Collector')
    rebulk.regex('special-edition', 'edition-special', value='Special',
                 conflict_solver=lambda match, other: other
                 if other.name == 'episode_details' and other.value == 'Special'
                 else '__default__')
    rebulk.string('se', value='Special', tags='has-neighbor')
    rebulk.string('ddc', value="Director's Definitive Cut")
    rebulk.regex('criterion-edition', 'edition-criterion', 'CC', value='Criterion')
    rebulk.regex('deluxe', 'deluxe-edition', 'edition-deluxe', value='Deluxe')
    rebulk.regex('limited', 'limited-edition', value='Limited', tags=['has-neighbor', 'release-group-prefix'])
    rebulk.regex(r'theatrical-cut', r'theatrical-edition', r'theatrical', value='Theatrical')
    rebulk.regex(r"director'?s?-cut", r"director'?s?-cut-edition", r"edition-director'?s?-cut", 'DC',
                 value="Director's Cut")
    rebulk.regex('extended', 'extended-?cut', 'extended-?version',
                 value='Extended', tags=['has-neighbor', 'release-group-prefix'])
    rebulk.regex(r'super-duper-cut', value='Super Duper Cut')
    rebulk.regex('alternat(e|ive)(?:-?Cut)?', value='Alternative Cut', tags=['has-neighbor', 'release-group-prefix'])
    for value in ('Remastered', 'Uncensored', 'Uncut', 'Unrated'):
        rebulk.string(value, value=value, tags=['has-neighbor', 'release-group-prefix'])
    rebulk.string('Festival', value='Festival', tags=['has-neighbor-before', 'has-neighbor-after'])
    rebulk.regex('imax', 'imax-edition', value='IMAX')
    rebulk.regex('fan-edit(?:ion)?', 'fan-collection', value='Fan')
    rebulk.regex('ultimate-edition', value='Ultimate')
    rebulk.regex("ultimate-collector'?s?-edition", value=['Ultimate', 'Collector'])
    rebulk.regex('ultimate-fan-edit(?:ion)?', 'ultimate-fan-collection', value=['Ultimate', 'Fan'])

    return rebulk
Esempio n. 41
0
def edition(config):  # pylint:disable=unused-argument
    """
    Builder for rebulk object.

    :param config: rule configuration
    :type config: dict
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'edition'))
    rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True)
    rebulk.defaults(name='edition', validator=seps_surround)

    rebulk.regex('collector', "collector'?s?-edition", 'edition-collector', value='Collector')
    rebulk.regex('special-edition', 'edition-special', value='Special',
                 conflict_solver=lambda match, other: other
                 if other.name == 'episode_details' and other.value == 'Special'
                 else '__default__')
    rebulk.string('se', value='Special', tags='has-neighbor')
    rebulk.string('ddc', value="Director's Definitive Cut")
    rebulk.regex('criterion-edition', 'edition-criterion', 'CC', value='Criterion')
    rebulk.regex('deluxe', 'deluxe-edition', 'edition-deluxe', value='Deluxe')
    rebulk.regex('limited', 'limited-edition', value='Limited', tags=['has-neighbor', 'release-group-prefix'])
    rebulk.regex(r'theatrical-cut', r'theatrical-edition', r'theatrical', value='Theatrical')
    rebulk.regex(r"director'?s?-cut", r"director'?s?-cut-edition", r"edition-director'?s?-cut", 'DC',
                 value="Director's Cut")
    rebulk.regex('extended', 'extended-?cut', 'extended-?version',
                 value='Extended', tags=['has-neighbor', 'release-group-prefix'])
    rebulk.regex('alternat(e|ive)(?:-?Cut)?', value='Alternative Cut', tags=['has-neighbor', 'release-group-prefix'])
    for value in ('Remastered', 'Uncensored', 'Uncut', 'Unrated'):
        rebulk.string(value, value=value, tags=['has-neighbor', 'release-group-prefix'])
    rebulk.string('Festival', value='Festival', tags=['has-neighbor-before', 'has-neighbor-after'])
    rebulk.regex('imax', 'imax-edition', value='IMAX')
    rebulk.regex('fan-edit(?:ion)?', 'fan-collection', value='Fan')
    rebulk.regex('ultimate-edition', value='Ultimate')
    rebulk.regex("ultimate-collector'?s?-edition", value=['Ultimate', 'Collector'])
    rebulk.regex('ultimate-fan-edit(?:ion)?', 'ultimate-fan-collection', value=['Ultimate', 'Fan'])

    return rebulk
Esempio n. 42
0
def audio_codec(config):  # pylint:disable=unused-argument
    """
    Builder for rebulk object.

    :param config: rule configuration
    :type config: dict
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk() \
        .regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]) \
        .string_defaults(ignore_case=True)

    def audio_codec_priority(match1, match2):
        """
        Gives priority to audio_codec
        :param match1:
        :type match1:
        :param match2:
        :type match2:
        :return:
        :rtype:
        """
        if match1.name == 'audio_codec' and match2.name in [
                'audio_profile', 'audio_channels'
        ]:
            return match2
        if match1.name in ['audio_profile', 'audio_channels'
                           ] and match2.name == 'audio_codec':
            return match1
        return '__default__'

    rebulk.defaults(
        name='audio_codec',
        conflict_solver=audio_codec_priority,
        disabled=lambda context: is_disabled(context, 'audio_codec'))

    load_config_patterns(rebulk, config.get('audio_codec'))

    rebulk.defaults(
        clear=True,
        name='audio_profile',
        disabled=lambda context: is_disabled(context, 'audio_profile'))

    load_config_patterns(rebulk, config.get('audio_profile'))

    rebulk.defaults(
        clear=True,
        name="audio_channels",
        disabled=lambda context: is_disabled(context, 'audio_channels'))

    load_config_patterns(rebulk, config.get('audio_channels'))

    rebulk.rules(DtsHDRule, DtsRule, AacRule, DolbyDigitalRule,
                 AudioValidatorRule, HqConflictRule,
                 AudioChannelsValidatorRule)

    return rebulk
Esempio n. 43
0
def crc(config):  # pylint:disable=unused-argument
    """
    Builder for rebulk object.

    :param config: rule configuration
    :type config: dict
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'crc32'))
    rebulk = rebulk.regex_defaults(flags=re.IGNORECASE)
    rebulk.defaults(validator=seps_surround)

    rebulk.regex('(?:[a-fA-F]|[0-9]){8}', name='crc32',
                 conflict_solver=lambda match, other: other
                 if other.name in ['episode', 'season']
                 else '__default__')

    rebulk.functional(guess_idnumber, name='uuid',
                      conflict_solver=lambda match, other: match
                      if other.name in ['episode', 'season']
                      else '__default__')
    return rebulk
Esempio n. 44
0
def streaming_service(config):  # pylint: disable=too-many-statements,unused-argument
    """Streaming service property.

    :param config: rule configuration
    :type config: dict
    :return:
    :rtype: Rebulk
    """
    rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'streaming_service'))
    rebulk = rebulk.string_defaults(ignore_case=True).regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])
    rebulk.defaults(name='streaming_service', tags=['source-prefix'])

    for value, items in config.items():
        patterns = items if isinstance(items, list) else [items]
        for pattern in patterns:
            if pattern.startswith('re:'):
                rebulk.regex(pattern, value=value)
            else:
                rebulk.string(pattern, value=value)

    rebulk.rules(ValidateStreamingService)

    return rebulk
Esempio n. 45
0
def crc(config):  # pylint:disable=unused-argument
    """
    Builder for rebulk object.

    :param config: rule configuration
    :type config: dict
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'crc32'))
    rebulk = rebulk.regex_defaults(flags=re.IGNORECASE)
    rebulk.defaults(validator=seps_surround)

    rebulk.regex('(?:[a-fA-F]|[0-9]){8}', name='crc32',
                 conflict_solver=lambda match, other: match
                 if other.name in ['episode', 'season']
                 else '__default__')

    rebulk.functional(guess_idnumber, name='uuid',
                      conflict_solver=lambda match, other: match
                      if other.name in ['episode', 'season']
                      else '__default__')
    return rebulk
Esempio n. 46
0
def audio_codec():
    """
    Builder for rebulk object.
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True)

    def audio_codec_priority(match1, match2):
        """
        Gives priority to audio_codec
        :param match1:
        :type match1:
        :param match2:
        :type match2:
        :return:
        :rtype:
        """
        if match1.name == 'audio_codec' and match2.name in ['audio_profile', 'audio_channels']:
            return match2
        if match1.name in ['audio_profile', 'audio_channels'] and match2.name == 'audio_codec':
            return match1
        return '__default__'

    rebulk.defaults(name="audio_codec", conflict_solver=audio_codec_priority)

    rebulk.regex("MP3", "LAME", r"LAME(?:\d)+-?(?:\d)+", value="MP3")
    rebulk.regex("Dolby", "DolbyDigital", "Dolby-Digital", "DDP?", value="DolbyDigital")
    rebulk.regex("DolbyAtmos", "Dolby-Atmos", "Atmos", value="DolbyAtmos")
    rebulk.regex("AAC", value="AAC")
    rebulk.regex("AC3D?", value="AC3")
    rebulk.regex("Flac", value="FLAC")
    rebulk.regex("DTS", value="DTS")
    rebulk.regex("True-?HD", value="TrueHD")

    rebulk.defaults(name="audio_profile")
    rebulk.string("HD", value="HD", tags="DTS")
    rebulk.regex("HD-?MA", value="HDMA", tags="DTS")
    rebulk.string("HE", value="HE", tags="AAC")
    rebulk.string("LC", value="LC", tags="AAC")
    rebulk.string("HQ", value="HQ", tags="AC3")

    rebulk.defaults(name="audio_channels")
    rebulk.regex(r'(7[\W_][01](?:ch)?)(?:[^\d]|$)', value='7.1', children=True)
    rebulk.regex(r'(5[\W_][01](?:ch)?)(?:[^\d]|$)', value='5.1', children=True)
    rebulk.regex(r'(2[\W_]0(?:ch)?)(?:[^\d]|$)', value='2.0', children=True)
    rebulk.regex('7[01]', value='7.1', validator=seps_after, tags='weak-audio_channels')
    rebulk.regex('5[01]', value='5.1', validator=seps_after, tags='weak-audio_channels')
    rebulk.string('20', value='2.0', validator=seps_after, tags='weak-audio_channels')
    rebulk.string('7ch', '8ch', value='7.1')
    rebulk.string('5ch', '6ch', value='5.1')
    rebulk.string('2ch', 'stereo', value='2.0')
    rebulk.string('1ch', 'mono', value='1.0')

    rebulk.rules(DtsRule, AacRule, Ac3Rule, AudioValidatorRule, HqConflictRule, AudioChannelsValidatorRule)

    return rebulk
Esempio n. 47
0
def audio_codec():
    """
    Builder for rebulk object.
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True)

    def audio_codec_priority(match1, match2):
        """
        Gives priority to audio_codec
        :param match1:
        :type match1:
        :param match2:
        :type match2:
        :return:
        :rtype:
        """
        if match1.name == 'audio_codec' and match2.name in ['audio_profile', 'audio_channels']:
            return match2
        if match1.name in ['audio_profile', 'audio_channels'] and match2.name == 'audio_codec':
            return match1
        return '__default__'

    rebulk.defaults(name="audio_codec", conflict_solver=audio_codec_priority)

    rebulk.regex("MP3", "LAME", r"LAME(?:\d)+-?(?:\d)+", value="MP3")
    rebulk.regex('Dolby', 'DolbyDigital', 'Dolby-Digital', 'DD', 'AC3D?', value='AC3')
    rebulk.regex("DolbyAtmos", "Dolby-Atmos", "Atmos", value="DolbyAtmos")
    rebulk.string("AAC", value="AAC")
    rebulk.string('EAC3', 'DDP', 'DD+', value="EAC3")
    rebulk.string("Flac", value="FLAC")
    rebulk.string("DTS", value="DTS")
    rebulk.regex("True-?HD", value="TrueHD")

    rebulk.defaults(name="audio_profile")
    rebulk.string("HD", value="HD", tags="DTS")
    rebulk.regex("HD-?MA", value="HDMA", tags="DTS")
    rebulk.string("HE", value="HE", tags="AAC")
    rebulk.string("LC", value="LC", tags="AAC")
    rebulk.string("HQ", value="HQ", tags="AC3")

    rebulk.defaults(name="audio_channels")
    rebulk.regex(r'(7[\W_][01](?:ch)?)(?:[^\d]|$)', value='7.1', children=True)
    rebulk.regex(r'(5[\W_][01](?:ch)?)(?:[^\d]|$)', value='5.1', children=True)
    rebulk.regex(r'(2[\W_]0(?:ch)?)(?:[^\d]|$)', value='2.0', children=True)
    rebulk.regex('7[01]', value='7.1', validator=seps_after, tags='weak-audio_channels')
    rebulk.regex('5[01]', value='5.1', validator=seps_after, tags='weak-audio_channels')
    rebulk.string('20', value='2.0', validator=seps_after, tags='weak-audio_channels')
    rebulk.string('7ch', '8ch', value='7.1')
    rebulk.string('5ch', '6ch', value='5.1')
    rebulk.string('2ch', 'stereo', value='2.0')
    rebulk.string('1ch', 'mono', value='1.0')

    rebulk.rules(DtsRule, AacRule, Ac3Rule, AudioValidatorRule, HqConflictRule, AudioChannelsValidatorRule)

    return rebulk
Esempio n. 48
0
def format_():
    """
    Builder for rebulk object.
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])
    rebulk.defaults(name="format", tags=['video-codec-prefix', 'streaming_service.suffix'])

    rebulk.regex("VHS", "VHS-?Rip", value="VHS")
    rebulk.regex("CAM", "CAM-?Rip", "HD-?CAM", value="Cam")
    rebulk.regex("TELESYNC", "TS", "HD-?TS", value="Telesync")
    rebulk.regex("WORKPRINT", "WP", value="Workprint")
    rebulk.regex("TELECINE", "TC", value="Telecine")
    rebulk.regex("PPV", "PPV-?Rip", value="PPV")  # Pay Per View
    rebulk.regex("SD-?TV", "SD-?TV-?Rip", "Rip-?SD-?TV", "TV-?Rip",
                 "Rip-?TV", "TV-?(?=Dub)", value="TV")  # TV is too common to allow matching
    rebulk.regex("DVB-?Rip", "DVB", "PD-?TV", value="DVB")
    rebulk.regex("DVD", "DVD-?Rip", "VIDEO-?TS", "DVD-?R(?:$|(?!E))",  # "DVD-?R(?:$|^E)" => DVD-Real ...
                 "DVD-?9", "DVD-?5", value="DVD")

    rebulk.regex("HD-?TV", "TV-?RIP-?HD", "HD-?TV-?RIP", "HD-?RIP", value="HDTV",
                 conflict_solver=lambda match, other: other if other.name == 'other' else '__default__')
    rebulk.regex("VOD", "VOD-?Rip", value="VOD")
    rebulk.regex("WEB-?Rip", "WEB-?DL-?Rip", "WEB-?Cap", value="WEBRip")
    rebulk.regex("WEB-?DL", "WEB-?HD", "WEB", "DL-?WEB", "DL(?=-?Mux)", value="WEB-DL")
    rebulk.regex("HD-?DVD-?Rip", "HD-?DVD", value="HD-DVD")
    rebulk.regex("Blu-?ray(?:-?Rip)?", "B[DR]", "B[DR]-?Rip", "BD[59]", "BD25", "BD50", value="BluRay")
    rebulk.regex("AHDTV", value="AHDTV")
    rebulk.regex('UHD-?TV', 'UHD-?Rip', value='UHDTV',
                 conflict_solver=lambda match, other: other if other.name == 'other' else '__default__')
    rebulk.regex("HDTC", value="HDTC")
    rebulk.regex("DSR", "DSR?-?Rip", "SAT-?Rip", "DTH", "DTH-?Rip", value="SATRip")

    rebulk.rules(ValidateFormat)

    return rebulk
Esempio n. 49
0
def bit_rate(config):  # pylint:disable=unused-argument
    """
    Builder for rebulk object.

    :param config: rule configuration
    :type config: dict
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk(disabled=lambda context: (is_disabled(context, 'audio_bit_rate')
                                              and is_disabled(context, 'video_bit_rate')))
    rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])
    rebulk.defaults(name='audio_bit_rate', validator=seps_surround)
    rebulk.regex(r'\d+-?[kmg]b(ps|its?)', r'\d+\.\d+-?[kmg]b(ps|its?)',
                 conflict_solver=(
                     lambda match, other: match
                     if other.name == 'audio_channels' and 'weak-audio_channels' not in other.tags
                     else other
                 ),
                 formatter=BitRate.fromstring, tags=['release-group-prefix'])

    rebulk.rules(BitRateTypeRule)

    return rebulk
Esempio n. 50
0
def other(config):  # pylint:disable=unused-argument,too-many-statements
    """
    Builder for rebulk object.

    :param config: rule configuration
    :type config: dict
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'other'))
    rebulk = rebulk.regex_defaults(
        flags=re.IGNORECASE,
        abbreviations=[dash]).string_defaults(ignore_case=True)
    rebulk.defaults(name="other", validator=seps_surround)

    load_config_patterns(rebulk, config.get('other'))

    rebulk.rules(RenameAnotherToOther, ValidateHasNeighbor,
                 ValidateHasNeighborAfter, ValidateHasNeighborBefore,
                 ValidateScreenerRule, ValidateMuxRule, ValidateHardcodedSubs,
                 ValidateStreamingServiceNeighbor, ValidateAtEnd, ValidateReal,
                 ProperCountRule)

    return rebulk
Esempio n. 51
0
def video_codec():
    """
    Builder for rebulk object.
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk().regex_defaults(
        flags=re.IGNORECASE,
        abbreviations=[dash]).string_defaults(ignore_case=True)
    rebulk.defaults(name="video_codec",
                    tags=['source-suffix', 'streaming_service.suffix'])

    rebulk.regex(r'Rv\d{2}', value='RealVideo')
    rebulk.regex('Mpeg2', value='MPEG-2')
    rebulk.regex("DVDivX", "DivX", value="DivX")
    rebulk.regex('XviD', value='Xvid')
    rebulk.regex('[hx]-?264(?:-?AVC(?:HD)?)?',
                 'MPEG-?4(?:-?AVC(?:HD)?)',
                 'AVC(?:HD)?',
                 value='H.264')
    rebulk.regex('[hx]-?265(?:-?HEVC)?', 'HEVC', value='H.265')
    rebulk.regex('(?P<video_codec>hevc)(?P<color_depth>10)',
                 value={
                     'video_codec': 'H.265',
                     'color_depth': '10-bit'
                 },
                 tags=['video-codec-suffix'],
                 children=True)

    # http://blog.mediacoderhq.com/h264-profiles-and-levels/
    # http://fr.wikipedia.org/wiki/H.264
    rebulk.defaults(name="video_profile", validator=seps_surround)

    rebulk.string('BP', value='Baseline', tags='video_profile.rule')
    rebulk.string('XP', 'EP', value='Extended', tags='video_profile.rule')
    rebulk.string('MP', value='Main', tags='video_profile.rule')
    rebulk.string('HP', 'HiP', value='High', tags='video_profile.rule')
    rebulk.regex('Hi422P', value='High 4:2:2')
    rebulk.regex('Hi444PP', value='High 4:4:4 Predictive')
    rebulk.regex('Hi10P?',
                 value='High 10')  # no profile validation is required

    rebulk.string('DXVA', value='DXVA', name='video_api')

    rebulk.defaults(name='color_depth', validator=seps_surround)
    rebulk.regex('12.?bits?', value='12-bit')
    rebulk.regex('10.?bits?', 'YUV420P10', 'Hi10P?', value='10-bit')
    rebulk.regex('8.?bits?', value='8-bit')

    rebulk.rules(ValidateVideoCodec, VideoProfileRule)

    return rebulk
Esempio n. 52
0
def audio_codec():
    """
    Builder for rebulk object.
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True)
    rebulk.defaults(name="audio_codec")

    rebulk.regex("MP3", "LAME", r"LAME(?:\d)+-?(?:\d)+", value="MP3")
    rebulk.regex("DolbyDigital", "Dolby-Digital", "DD", value="DolbyDigital")
    rebulk.regex("DolbyAtmos", "Dolby-Atmos", "Atmos", value="DolbyAtmos")
    rebulk.regex("AAC", value="AAC")
    rebulk.regex("AC3D?", value="AC3")
    rebulk.regex("Flac", value="FLAC")
    rebulk.regex("DTS", value="DTS")
    rebulk.regex("True-?HD", value="TrueHD")

    rebulk.defaults(name="audio_profile")
    rebulk.string("HD", value="HD", tags="DTS")
    rebulk.regex("HD-?MA", value="HDMA", tags="DTS")
    rebulk.string("HE", value="HE", tags="AAC")
    rebulk.string("LC", value="LC", tags="AAC")
    rebulk.string("HQ", value="HQ", tags="AC3")

    rebulk.defaults(name="audio_channels")
    rebulk.regex(r"(7[\W_]1)(?:[^\d]|$)", value="7.1", children=True)
    rebulk.regex(r"(5[\W_]1)(?:[^\d]|$)", value="5.1", children=True)
    rebulk.regex(r"(2[\W_]0)(?:[^\d]|$)", value="2.0", children=True)
    rebulk.string("7ch", "8ch", value="7.1")
    rebulk.string("5ch", "6ch", value="5.1")
    rebulk.string("2ch", "stereo", value="2.0")
    rebulk.string("1ch", "mono", value="1.0")

    rebulk.rules(DtsRule, AacRule, Ac3Rule, AudioValidatorRule, HqConflictRule)

    return rebulk
Esempio n. 53
0
def source(config):  # pylint:disable=unused-argument
    """
    Builder for rebulk object.

    :param config: rule configuration
    :type config: dict
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'source'))
    rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash], private_parent=True, children=True)
    rebulk.defaults(name='source', tags=['video-codec-prefix', 'streaming_service.suffix'])

    rip_prefix = '(?P<other>Rip)-?'
    rip_suffix = '-?(?P<other>Rip)'
    rip_optional_suffix = '(?:' + rip_suffix + ')?'

    def build_source_pattern(*patterns, **kwargs):
        """Helper pattern to build source pattern."""
        prefix_format = kwargs.get('prefix') or ''
        suffix_format = kwargs.get('suffix') or ''

        string_format = prefix_format + '({0})' + suffix_format
        return [string_format.format(pattern) for pattern in patterns]

    def demote_other(match, other):  # pylint: disable=unused-argument
        """Default conflict solver with 'other' property."""
        return other if other.name == 'other' else '__default__'

    rebulk.regex(*build_source_pattern('VHS', suffix=rip_optional_suffix),
                 value={'source': 'VHS', 'other': 'Rip'})
    rebulk.regex(*build_source_pattern('CAM', suffix=rip_optional_suffix),
                 value={'source': 'Camera', 'other': 'Rip'})
    rebulk.regex(*build_source_pattern('HD-?CAM', suffix=rip_optional_suffix),
                 value={'source': 'HD Camera', 'other': 'Rip'})
    rebulk.regex(*build_source_pattern('TELESYNC', 'TS', suffix=rip_optional_suffix),
                 value={'source': 'Telesync', 'other': 'Rip'})
    rebulk.regex(*build_source_pattern('HD-?TELESYNC', 'HD-?TS', suffix=rip_optional_suffix),
                 value={'source': 'HD Telesync', 'other': 'Rip'})
    rebulk.regex(*build_source_pattern('WORKPRINT', 'WP'), value='Workprint')
    rebulk.regex(*build_source_pattern('TELECINE', 'TC', suffix=rip_optional_suffix),
                 value={'source': 'Telecine', 'other': 'Rip'})
    rebulk.regex(*build_source_pattern('HD-?TELECINE', 'HD-?TC', suffix=rip_optional_suffix),
                 value={'source': 'HD Telecine', 'other': 'Rip'})
    rebulk.regex(*build_source_pattern('PPV', suffix=rip_optional_suffix),
                 value={'source': 'Pay-per-view', 'other': 'Rip'})
    rebulk.regex(*build_source_pattern('SD-?TV', suffix=rip_optional_suffix),
                 value={'source': 'TV', 'other': 'Rip'})
    rebulk.regex(*build_source_pattern('TV', suffix=rip_suffix),  # TV is too common to allow matching
                 value={'source': 'TV', 'other': 'Rip'})
    rebulk.regex(*build_source_pattern('TV', 'SD-?TV', prefix=rip_prefix),
                 value={'source': 'TV', 'other': 'Rip'})
    rebulk.regex(*build_source_pattern('TV-?(?=Dub)'), value='TV')
    rebulk.regex(*build_source_pattern('DVB', 'PD-?TV', suffix=rip_optional_suffix),
                 value={'source': 'Digital TV', 'other': 'Rip'})
    rebulk.regex(*build_source_pattern('DVD', suffix=rip_optional_suffix),
                 value={'source': 'DVD', 'other': 'Rip'})
    rebulk.regex(*build_source_pattern('DM', suffix=rip_optional_suffix),
                 value={'source': 'Digital Master', 'other': 'Rip'})
    rebulk.regex(*build_source_pattern('VIDEO-?TS', 'DVD-?R(?:$|(?!E))',  # 'DVD-?R(?:$|^E)' => DVD-Real ...
                                       'DVD-?9', 'DVD-?5'), value='DVD')

    rebulk.regex(*build_source_pattern('HD-?TV', suffix=rip_optional_suffix), conflict_solver=demote_other,
                 value={'source': 'HDTV', 'other': 'Rip'})
    rebulk.regex(*build_source_pattern('TV-?HD', suffix=rip_suffix), conflict_solver=demote_other,
                 value={'source': 'HDTV', 'other': 'Rip'})
    rebulk.regex(*build_source_pattern('TV', suffix='-?(?P<other>Rip-?HD)'), conflict_solver=demote_other,
                 value={'source': 'HDTV', 'other': 'Rip'})

    rebulk.regex(*build_source_pattern('VOD', suffix=rip_optional_suffix),
                 value={'source': 'Video on Demand', 'other': 'Rip'})

    rebulk.regex(*build_source_pattern('WEB', 'WEB-?DL', suffix=rip_suffix),
                 value={'source': 'Web', 'other': 'Rip'})
    # WEBCap is a synonym to WEBRip, mostly used by non english
    rebulk.regex(*build_source_pattern('WEB-?(?P<another>Cap)', suffix=rip_optional_suffix),
                 value={'source': 'Web', 'other': 'Rip', 'another': 'Rip'})
    rebulk.regex(*build_source_pattern('WEB-?DL', 'WEB-?U?HD', 'DL-?WEB', 'DL(?=-?Mux)'),
                 value={'source': 'Web'})
    rebulk.regex('(WEB)', value='Web', tags='weak.source')

    rebulk.regex(*build_source_pattern('HD-?DVD', suffix=rip_optional_suffix),
                 value={'source': 'HD-DVD', 'other': 'Rip'})

    rebulk.regex(*build_source_pattern('Blu-?ray', 'BD', 'BD[59]', 'BD25', 'BD50', suffix=rip_optional_suffix),
                 value={'source': 'Blu-ray', 'other': 'Rip'})
    rebulk.regex(*build_source_pattern('(?P<another>BR)-?(?=Scr(?:eener)?)', '(?P<another>BR)-?(?=Mux)'),  # BRRip
                 value={'source': 'Blu-ray', 'another': 'Reencoded'})
    rebulk.regex(*build_source_pattern('(?P<another>BR)', suffix=rip_suffix),  # BRRip
                 value={'source': 'Blu-ray', 'other': 'Rip', 'another': 'Reencoded'})

    rebulk.regex(*build_source_pattern('Ultra-?Blu-?ray', 'Blu-?ray-?Ultra'), value='Ultra HD Blu-ray')

    rebulk.regex(*build_source_pattern('AHDTV'), value='Analog HDTV')
    rebulk.regex(*build_source_pattern('UHD-?TV', suffix=rip_optional_suffix), conflict_solver=demote_other,
                 value={'source': 'Ultra HDTV', 'other': 'Rip'})
    rebulk.regex(*build_source_pattern('UHD', suffix=rip_suffix), conflict_solver=demote_other,
                 value={'source': 'Ultra HDTV', 'other': 'Rip'})

    rebulk.regex(*build_source_pattern('DSR', 'DTH', suffix=rip_optional_suffix),
                 value={'source': 'Satellite', 'other': 'Rip'})
    rebulk.regex(*build_source_pattern('DSR?', 'SAT', suffix=rip_suffix),
                 value={'source': 'Satellite', 'other': 'Rip'})

    rebulk.rules(ValidateSource, UltraHdBlurayRule)

    return rebulk
Esempio n. 54
0
def episodes(config):
    """
    Builder for rebulk object.

    :param config: rule configuration
    :type config: dict
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    # pylint: disable=too-many-branches,too-many-statements,too-many-locals
    def is_season_episode_disabled(context):
        """Whether season and episode rules should be enabled."""
        return is_disabled(context, 'episode') or is_disabled(context, 'season')

    rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE).string_defaults(ignore_case=True)
    rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator', 'episodeMarker', 'seasonMarker'])

    episode_max_range = config['episode_max_range']
    season_max_range = config['season_max_range']

    def episodes_season_chain_breaker(matches):
        """
        Break chains if there's more than 100 offset between two neighbor values.
        :param matches:
        :type matches:
        :return:
        :rtype:
        """
        eps = matches.named('episode')
        if len(eps) > 1 and abs(eps[-1].value - eps[-2].value) > episode_max_range:
            return True

        seasons = matches.named('season')
        if len(seasons) > 1 and abs(seasons[-1].value - seasons[-2].value) > season_max_range:
            return True
        return False

    rebulk.chain_defaults(chain_breaker=episodes_season_chain_breaker)

    def season_episode_conflict_solver(match, other):
        """
        Conflict solver for episode/season patterns

        :param match:
        :param other:
        :return:
        """
        if match.name != other.name:
            if match.name == 'episode' and other.name == 'year':
                return match
            if match.name in ('season', 'episode'):
                if other.name in ('video_codec', 'audio_codec', 'container', 'date'):
                    return match
                if (other.name == 'audio_channels' and 'weak-audio_channels' not in other.tags
                        and not match.initiator.children.named(match.name + 'Marker')) or (
                            other.name == 'screen_size' and not int_coercable(other.raw)):

                    return match
                if other.name in ('season', 'episode') and match.initiator != other.initiator:
                    if (match.initiator.name in ('weak_episode', 'weak_duplicate')
                            and other.initiator.name in ('weak_episode', 'weak_duplicate')):
                        return '__default__'
                    for current in (match, other):
                        if 'weak-episode' in current.tags or 'x' in current.initiator.raw.lower():
                            return current
        return '__default__'

    season_words = config['season_words']
    episode_words = config['episode_words']
    of_words = config['of_words']
    all_words = config['all_words']
    season_markers = config['season_markers']
    season_ep_markers = config['season_ep_markers']
    disc_markers = config['disc_markers']
    episode_markers = config['episode_markers']
    range_separators = config['range_separators']
    weak_discrete_separators = list(sep for sep in seps_no_fs if sep not in range_separators)
    strong_discrete_separators = config['discrete_separators']
    discrete_separators = strong_discrete_separators + weak_discrete_separators

    max_range_gap = config['max_range_gap']

    def ordering_validator(match):
        """
        Validator for season list. They should be in natural order to be validated.

        episode/season separated by a weak discrete separator should be consecutive, unless a strong discrete separator
        or a range separator is present in the chain (1.3&5 is valid, but 1.3-5 is not valid and 1.3.5 is not valid)
        """
        values = match.children.to_dict()
        if 'season' in values and is_iterable(values['season']):
            # Season numbers must be in natural order to be validated.
            if not list(sorted(values['season'])) == values['season']:
                return False
        if 'episode' in values and is_iterable(values['episode']):
            # Season numbers must be in natural order to be validated.
            if not list(sorted(values['episode'])) == values['episode']:
                return False

        def is_consecutive(property_name):
            """
            Check if the property season or episode has valid consecutive values.
            :param property_name:
            :type property_name:
            :return:
            :rtype:
            """
            previous_match = None
            valid = True
            for current_match in match.children.named(property_name):
                if previous_match:
                    match.children.previous(current_match,
                                            lambda m: m.name == property_name + 'Separator')
                    separator = match.children.previous(current_match,
                                                        lambda m: m.name == property_name + 'Separator', 0)
                    if separator.raw not in range_separators and separator.raw in weak_discrete_separators:
                        if not 0 < current_match.value - previous_match.value <= max_range_gap + 1:
                            valid = False
                    if separator.raw in strong_discrete_separators:
                        valid = True
                        break
                previous_match = current_match
            return valid

        return is_consecutive('episode') and is_consecutive('season')

    # S01E02, 01x02, S01S02S03
    rebulk.chain(formatter={'season': int, 'episode': int},
                 tags=['SxxExx'],
                 abbreviations=[alt_dash],
                 children=True,
                 private_parent=True,
                 validate_all=True,
                 validator={'__parent__': ordering_validator},
                 conflict_solver=season_episode_conflict_solver,
                 disabled=is_season_episode_disabled) \
        .regex(build_or_pattern(season_markers, name='seasonMarker') + r'(?P<season>\d+)@?' +
               build_or_pattern(episode_markers + disc_markers, name='episodeMarker') + r'@?(?P<episode>\d+)',
               validate_all=True,
               validator={'__parent__': seps_before}).repeater('+') \
        .regex(build_or_pattern(episode_markers + disc_markers + discrete_separators + range_separators,
                                name='episodeSeparator',
                                escape=True) +
               r'(?P<episode>\d+)').repeater('*') \
        .chain() \
        .regex(r'(?P<season>\d+)@?' +
               build_or_pattern(season_ep_markers, name='episodeMarker') +
               r'@?(?P<episode>\d+)',
               validate_all=True,
               validator={'__parent__': seps_before}) \
        .chain() \
        .regex(r'(?P<season>\d+)@?' +
               build_or_pattern(season_ep_markers, name='episodeMarker') +
               r'@?(?P<episode>\d+)',
               validate_all=True,
               validator={'__parent__': seps_before}) \
        .regex(build_or_pattern(season_ep_markers + discrete_separators + range_separators,
                                name='episodeSeparator',
                                escape=True) +
               r'(?P<episode>\d+)').repeater('*') \
        .chain() \
        .regex(build_or_pattern(season_markers, name='seasonMarker') + r'(?P<season>\d+)',
               validate_all=True,
               validator={'__parent__': seps_before}) \
        .regex(build_or_pattern(season_markers + discrete_separators + range_separators,
                                name='seasonSeparator',
                                escape=True) +
               r'(?P<season>\d+)').repeater('*')

    # episode_details property
    for episode_detail in ('Special', 'Pilot', 'Unaired', 'Final'):
        rebulk.string(episode_detail, value=episode_detail, name='episode_details',
                      disabled=lambda context: is_disabled(context, 'episode_details'))

    def validate_roman(match):
        """
        Validate a roman match if surrounded by separators
        :param match:
        :type match:
        :return:
        :rtype:
        """
        if int_coercable(match.raw):
            return True
        return seps_surround(match)

    rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator', 'episodeMarker', 'seasonMarker'],
                    validate_all=True, validator={'__parent__': seps_surround}, children=True, private_parent=True,
                    conflict_solver=season_episode_conflict_solver)

    rebulk.chain(abbreviations=[alt_dash],
                 formatter={'season': parse_numeral, 'count': parse_numeral},
                 validator={'__parent__': compose(seps_surround, ordering_validator),
                            'season': validate_roman,
                            'count': validate_roman},
                 disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'season')) \
        .defaults(validator=None) \
        .regex(build_or_pattern(season_words, name='seasonMarker') + '@?(?P<season>' + numeral + ')') \
        .regex(r'' + build_or_pattern(of_words) + '@?(?P<count>' + numeral + ')').repeater('?') \
        .regex(r'@?' + build_or_pattern(range_separators + discrete_separators + ['@'],
                                        name='seasonSeparator', escape=True) +
               r'@?(?P<season>\d+)').repeater('*')

    rebulk.regex(build_or_pattern(episode_words, name='episodeMarker') + r'-?(?P<episode>\d+)' +
                 r'(?:v(?P<version>\d+))?' +
                 r'(?:-?' + build_or_pattern(of_words) + r'-?(?P<count>\d+))?',  # Episode 4
                 abbreviations=[dash], formatter={'episode': int, 'version': int, 'count': int},
                 disabled=lambda context: context.get('type') == 'episode' or is_disabled(context, 'episode'))

    rebulk.regex(build_or_pattern(episode_words, name='episodeMarker') + r'-?(?P<episode>' + numeral + ')' +
                 r'(?:v(?P<version>\d+))?' +
                 r'(?:-?' + build_or_pattern(of_words) + r'-?(?P<count>\d+))?',  # Episode 4
                 abbreviations=[dash],
                 validator={'episode': validate_roman},
                 formatter={'episode': parse_numeral, 'version': int, 'count': int},
                 disabled=lambda context: context.get('type') != 'episode' or is_disabled(context, 'episode'))

    rebulk.regex(r'S?(?P<season>\d+)-?(?:xE|Ex|E|x)-?(?P<other>' + build_or_pattern(all_words) + ')',
                 tags=['SxxExx'],
                 abbreviations=[dash],
                 validator=None,
                 formatter={'season': int, 'other': lambda match: 'Complete'},
                 disabled=lambda context: is_disabled(context, 'season'))

    # 12, 13
    rebulk.chain(tags=['weak-episode'], formatter={'episode': int, 'version': int},
                 disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'episode')) \
        .defaults(validator=None) \
        .regex(r'(?P<episode>\d{2})') \
        .regex(r'v(?P<version>\d+)').repeater('?') \
        .regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{2})').repeater('*')

    # 012, 013
    rebulk.chain(tags=['weak-episode'], formatter={'episode': int, 'version': int},
                 disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'episode')) \
        .defaults(validator=None) \
        .regex(r'0(?P<episode>\d{1,2})') \
        .regex(r'v(?P<version>\d+)').repeater('?') \
        .regex(r'(?P<episodeSeparator>[x-])0(?P<episode>\d{1,2})').repeater('*')

    # 112, 113
    rebulk.chain(tags=['weak-episode'],
                 formatter={'episode': int, 'version': int},
                 name='weak_episode',
                 disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'episode')) \
        .defaults(validator=None) \
        .regex(r'(?P<episode>\d{3,4})') \
        .regex(r'v(?P<version>\d+)').repeater('?') \
        .regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{3,4})').repeater('*')

    # 1, 2, 3
    rebulk.chain(tags=['weak-episode'], formatter={'episode': int, 'version': int},
                 disabled=lambda context: context.get('type') != 'episode' or is_disabled(context, 'episode')) \
        .defaults(validator=None) \
        .regex(r'(?P<episode>\d)') \
        .regex(r'v(?P<version>\d+)').repeater('?') \
        .regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{1,2})').repeater('*')

    # e112, e113, 1e18, 3e19
    # TODO: Enhance rebulk for validator to be used globally (season_episode_validator)
    rebulk.chain(formatter={'season': int, 'episode': int, 'version': int},
                 disabled=lambda context: is_disabled(context, 'episode')) \
        .defaults(validator=None) \
        .regex(r'(?P<season>\d{1,2})?(?P<episodeMarker>e)(?P<episode>\d{1,4})') \
        .regex(r'v(?P<version>\d+)').repeater('?') \
        .regex(r'(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4})').repeater('*')

    # ep 112, ep113, ep112, ep113
    rebulk.chain(abbreviations=[dash], formatter={'episode': int, 'version': int},
                 disabled=lambda context: is_disabled(context, 'episode')) \
        .defaults(validator=None) \
        .regex(r'ep-?(?P<episode>\d{1,4})') \
        .regex(r'v(?P<version>\d+)').repeater('?') \
        .regex(r'(?P<episodeSeparator>ep|e|x|-)(?P<episode>\d{1,4})').repeater('*')

    # cap 112, cap 112_114
    rebulk.chain(abbreviations=[dash],
                 tags=['see-pattern'],
                 formatter={'season': int, 'episode': int},
                 disabled=is_season_episode_disabled) \
        .defaults(validator=None) \
        .regex(r'(?P<seasonMarker>cap)-?(?P<season>\d{1,2})(?P<episode>\d{2})') \
        .regex(r'(?P<episodeSeparator>-)(?P<season>\d{1,2})(?P<episode>\d{2})').repeater('?')

    # 102, 0102
    rebulk.chain(tags=['weak-episode', 'weak-duplicate'],
                 formatter={'season': int, 'episode': int, 'version': int},
                 name='weak_duplicate',
                 conflict_solver=season_episode_conflict_solver,
                 disabled=lambda context: (context.get('episode_prefer_number', False) or
                                           context.get('type') == 'movie') or is_season_episode_disabled(context)) \
        .defaults(validator=None) \
        .regex(r'(?P<season>\d{1,2})(?P<episode>\d{2})') \
        .regex(r'v(?P<version>\d+)').repeater('?') \
        .regex(r'(?P<episodeSeparator>x|-)(?P<episode>\d{2})').repeater('*')

    rebulk.regex(r'v(?P<version>\d+)', children=True, private_parent=True, formatter=int,
                 disabled=lambda context: is_disabled(context, 'version'))

    rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'])

    # TODO: List of words
    # detached of X count (season/episode)
    rebulk.regex(r'(?P<episode>\d+)-?' + build_or_pattern(of_words) +
                 r'-?(?P<count>\d+)-?' + build_or_pattern(episode_words) + '?',
                 abbreviations=[dash], children=True, private_parent=True, formatter=int,
                 disabled=lambda context: is_disabled(context, 'episode'))

    rebulk.regex(r'Minisodes?', name='episode_format', value="Minisode",
                 disabled=lambda context: is_disabled(context, 'episode_format'))

    rebulk.rules(WeakConflictSolver, RemoveInvalidSeason, RemoveInvalidEpisode,
                 SeePatternRange(range_separators + ['_']),
                 EpisodeNumberSeparatorRange(range_separators),
                 SeasonSeparatorRange(range_separators), RemoveWeakIfMovie, RemoveWeakIfSxxExx,
                 RemoveWeakDuplicate, EpisodeDetailValidator, RemoveDetachedEpisodeNumber, VersionValidator,
                 RemoveWeak, RenameToAbsoluteEpisode, CountValidator, EpisodeSingleDigitValidator, RenameToDiscMatch)

    return rebulk
Esempio n. 55
0
def other():
    """
    Builder for rebulk object.
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True)
    rebulk.defaults(name="other", validator=seps_surround)

    rebulk.regex('Audio-?Fix', 'Audio-?Fixed', value='AudioFix')
    rebulk.regex('Sync-?Fix', 'Sync-?Fixed', value='SyncFix')
    rebulk.regex('Dual-?Audio', value='DualAudio')
    rebulk.regex('ws', 'wide-?screen', value='WideScreen')

    rebulk.string('Real', 'Fix', 'Fixed', value='Proper', tags=['has-neighbor-before', 'has-neighbor-after'])
    rebulk.string('Proper', 'Repack', 'Rerip', value='Proper')
    rebulk.string('Fansub', value='Fansub', tags='has-neighbor')
    rebulk.string('Fastsub', value='Fastsub', tags='has-neighbor')

    season_words = build_or_pattern(["seasons?", "series?"])
    complete_articles = build_or_pattern(["The"])

    def validate_complete(match):
        """
        Make sure season word is are defined.
        :param match:
        :type match:
        :return:
        :rtype:
        """
        children = match.children
        if not children.named('completeWordsBefore') and not children.named('completeWordsAfter'):
            return False
        return True

    rebulk.regex('(?P<completeArticle>' + complete_articles + '-)?' +
                 '(?P<completeWordsBefore>' + season_words + '-)?' +
                 'Complete' + '(?P<completeWordsAfter>-' + season_words + ')?',
                 private_names=['completeArticle', 'completeWordsBefore', 'completeWordsAfter'],
                 value={'other': 'Complete'},
                 tags=['release-group-prefix'],
                 validator={'__parent__': compose(seps_surround, validate_complete)})
    rebulk.string('R5', 'RC', value='R5')
    rebulk.regex('Pre-?Air', value='Preair')
    rebulk.regex('(?:PS-?)?Vita', value='PS Vita')

    for value in (
            'Screener', 'Remux', 'Remastered', '3D', 'HD', 'mHD', 'HDLight', 'HQ', 'DDC', 'HR', 'PAL', 'SECAM', 'NTSC',
            'CC', 'LD', 'MD', 'XXX'):
        rebulk.string(value, value=value)
    rebulk.string('LDTV', value='LD')

    for value in ('Limited', 'Complete', 'Classic', 'Unrated', 'LiNE', 'Bonus', 'Trailer', 'FINAL', 'Retail', 'Uncut',
                  'Extended', 'Extended Cut'):
        rebulk.string(value, value=value, tags=['has-neighbor', 'release-group-prefix'])

    rebulk.string('VO', 'OV', value='OV', tags='has-neighbor')

    rebulk.regex('Scr(?:eener)?', value='Screener', validator=None, tags='other.validate.screener')

    rebulk.rules(ValidateHasNeighbor, ValidateHasNeighborAfter, ValidateHasNeighborBefore, ValidateScreenerRule,
                 ProperCountRule)

    return rebulk
Esempio n. 56
0
def video_codec(config):  # pylint:disable=unused-argument
    """
    Builder for rebulk object.

    :param config: rule configuration
    :type config: dict
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk()
    rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True)
    rebulk.defaults(name="video_codec",
                    tags=['source-suffix', 'streaming_service.suffix'],
                    disabled=lambda context: is_disabled(context, 'video_codec'))

    rebulk.regex(r'Rv\d{2}', value='RealVideo')
    rebulk.regex('Mpe?g-?2', '[hx]-?262', value='MPEG-2')
    rebulk.string("DVDivX", "DivX", value="DivX")
    rebulk.string('XviD', value='Xvid')
    rebulk.regex('VC-?1', value='VC-1')
    rebulk.string('VP7', value='VP7')
    rebulk.string('VP8', 'VP80', value='VP8')
    rebulk.string('VP9', value='VP9')
    rebulk.regex('[hx]-?263', value='H.263')
    rebulk.regex('[hx]-?264', '(MPEG-?4)?AVC(?:HD)?', value='H.264')
    rebulk.regex('[hx]-?265', 'HEVC', value='H.265')
    rebulk.regex('(?P<video_codec>hevc)(?P<color_depth>10)', value={'video_codec': 'H.265', 'color_depth': '10-bit'},
                 tags=['video-codec-suffix'], children=True)

    # http://blog.mediacoderhq.com/h264-profiles-and-levels/
    # https://en.wikipedia.org/wiki/H.264/MPEG-4_AVC
    rebulk.defaults(name="video_profile",
                    validator=seps_surround,
                    disabled=lambda context: is_disabled(context, 'video_profile'))

    rebulk.string('BP', value='Baseline', tags='video_profile.rule')
    rebulk.string('XP', 'EP', value='Extended', tags='video_profile.rule')
    rebulk.string('MP', value='Main', tags='video_profile.rule')
    rebulk.string('HP', 'HiP', value='High', tags='video_profile.rule')

    # https://en.wikipedia.org/wiki/Scalable_Video_Coding
    rebulk.string('SC', 'SVC', value='Scalable Video Coding', tags='video_profile.rule')
    # https://en.wikipedia.org/wiki/AVCHD
    rebulk.regex('AVC(?:HD)?', value='Advanced Video Codec High Definition', tags='video_profile.rule')
    # https://en.wikipedia.org/wiki/H.265/HEVC
    rebulk.string('HEVC', value='High Efficiency Video Coding', tags='video_profile.rule')

    rebulk.regex('Hi422P', value='High 4:2:2')
    rebulk.regex('Hi444PP', value='High 4:4:4 Predictive')
    rebulk.regex('Hi10P?', value='High 10')  # no profile validation is required

    rebulk.string('DXVA', value='DXVA', name='video_api',
                  disabled=lambda context: is_disabled(context, 'video_api'))

    rebulk.defaults(name='color_depth',
                    validator=seps_surround,
                    disabled=lambda context: is_disabled(context, 'color_depth'))
    rebulk.regex('12.?bits?', value='12-bit')
    rebulk.regex('10.?bits?', 'YUV420P10', 'Hi10P?', value='10-bit')
    rebulk.regex('8.?bits?', value='8-bit')

    rebulk.rules(ValidateVideoCodec, VideoProfileRule)

    return rebulk
Esempio n. 57
0
def container():
    """
    Builder for rebulk object.
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE).string_defaults(ignore_case=True)
    rebulk.defaults(
        name="container",
        formatter=lambda value: value[1:],
        tags=["extension"],
        conflict_solver=lambda match, other: other
        if other.name in ["format", "video_codec"] or other.name == "container" and "extension" not in other.tags
        else "__default__",
    )

    subtitles = ["srt", "idx", "sub", "ssa", "ass"]
    info = ["nfo"]
    videos = [
        "3g2",
        "3gp",
        "3gp2",
        "asf",
        "avi",
        "divx",
        "flv",
        "m4v",
        "mk2",
        "mka",
        "mkv",
        "mov",
        "mp4",
        "mp4a",
        "mpeg",
        "mpg",
        "ogg",
        "ogm",
        "ogv",
        "qt",
        "ra",
        "ram",
        "rm",
        "ts",
        "wav",
        "webm",
        "wma",
        "wmv",
        "iso",
        "vob",
    ]
    torrent = ["torrent"]

    if REGEX_AVAILABLE:
        rebulk.regex(r"\.\L<exts>$", exts=subtitles, tags=["extension", "subtitle"])
        rebulk.regex(r"\.\L<exts>$", exts=info, tags=["extension", "info"])
        rebulk.regex(r"\.\L<exts>$", exts=videos, tags=["extension", "video"])
        rebulk.regex(r"\.\L<exts>$", exts=torrent, tags=["extension", "torrent"])
    else:
        rebulk.regex(r"\." + build_or_pattern(subtitles) + "$", exts=subtitles, tags=["extension", "subtitle"])
        rebulk.regex(r"\." + build_or_pattern(info) + "$", exts=info, tags=["extension", "info"])
        rebulk.regex(r"\." + build_or_pattern(videos) + "$", exts=videos, tags=["extension", "video"])
        rebulk.regex(r"\." + build_or_pattern(torrent) + "$", exts=torrent, tags=["extension", "torrent"])

    rebulk.defaults(
        name="container",
        validator=seps_surround,
        formatter=lambda s: s.upper(),
        conflict_solver=lambda match, other: match
        if other.name in ["format", "video_codec"] or other.name == "container" and "extension" in other.tags
        else "__default__",
    )

    rebulk.string(*[sub for sub in subtitles if sub not in ["sub"]], tags=["subtitle"])
    rebulk.string(*videos, tags=["video"])
    rebulk.string(*torrent, tags=["torrent"])

    return rebulk
Esempio n. 58
0
def episodes():
    """
    Builder for rebulk object.
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    # pylint: disable=too-many-branches,too-many-statements
    rebulk = Rebulk()
    rebulk.regex_defaults(flags=re.IGNORECASE).string_defaults(ignore_case=True)
    rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'])

    def season_episode_conflict_solver(match, other):
        """
        Conflict solver for episode/season patterns

        :param match:
        :param other:
        :return:
        """
        if match.name in ['season', 'episode'] and other.name in ['screen_size', 'video_codec',
                                                                  'audio_codec', 'audio_channels',
                                                                  'container', 'date']:
            return match
        elif match.name in ['season', 'episode'] and other.name in ['season', 'episode'] \
                and match.initiator != other.initiator:
            if 'x' in match.initiator.raw.lower():
                return match
            if 'x' in other.initiator.raw.lower():
                return other
        return '__default__'

    season_episode_seps = []
    season_episode_seps.extend(seps)
    season_episode_seps.extend(['x', 'X', 'e', 'E'])

    def season_episode_validator(match):
        """
        Validator for season/episode matches
        """
        if match.name in ['season', 'episode'] and match.initiator.start:
            return match.initiator.input_string[match.initiator.start] in season_episode_seps \
                   or match.initiator.input_string[match.initiator.start - 1] in season_episode_seps
        return True

    # 01x02, 01x02x03x04
    rebulk.chain(formatter={'season': int, 'episode': int},
                 tags=['SxxExx'],
                 abbreviations=[alt_dash],
                 children=True,
                 private_parent=True,
                 conflict_solver=season_episode_conflict_solver) \
        .defaults(validator=season_episode_validator) \
        .regex(r'S(?P<season>\d+)@?(?:xE|Ex|E|x)@?(?P<episode>\d+)') \
        .regex(r'(?:(?P<episodeSeparator>xE|Ex|E|x|-|\+|&)(?P<episode>\d+))').repeater('*') \
        .chain() \
        .regex(r'(?P<season>\d+)@?x@?(?P<episode>\d+)') \
        .regex(r'(?:(?P<episodeSeparator>x|-|\+|&)(?P<episode>\d+))').repeater('*') \
        .chain() \
        .regex(r'S(?P<season>\d+)') \
        .regex(r'(?:(?P<seasonSeparator>S|-|\+|&)(?P<season>\d+))').repeater('*')

    # episode_details property
    for episode_detail in ('Special', 'Bonus', 'Omake', 'Ova', 'Oav', 'Pilot', 'Unaired'):
        rebulk.string(episode_detail, value=episode_detail, name='episode_details')
    rebulk.regex(r'Extras?', name='episode_details', value='Extras')

    rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'],
                    validate_all=True, validator={'__parent__': seps_surround}, children=True, private_parent=True)

    season_words = ['season', 'saison', 'serie', 'seasons', 'saisons', 'series']
    episode_words = ['episode', 'episodes', 'ep']
    of_words = ['of', 'sur']
    all_words = ['All']

    rebulk.chain(abbreviations=[alt_dash], formatter={'season': parse_numeral, 'count': parse_numeral}) \
        .defaults(validator=None) \
        .regex(build_or_pattern(season_words) + '@?(?P<season>' + numeral + ')') \
        .regex(r'' + build_or_pattern(of_words) + '@?(?P<count>' + numeral + ')').repeater('?') \
        .regex(r'@?(?P<seasonSeparator>-)@?(?P<season>\d+)').repeater('*') \
        .regex(r'@?(?P<seasonSeparator>\+|&)@?(?P<season>\d+)').repeater('*')


    rebulk.regex(build_or_pattern(episode_words) + r'-?(?P<episode>\d+)' +
                 r'(?:v(?P<version>\d+))?' +
                 r'(?:-?' + build_or_pattern(of_words) + r'?-?(?P<count>\d+))?',  # Episode 4
                 abbreviations=[dash], formatter=int,
                 disabled=lambda context: context.get('type') == 'episode')

    rebulk.regex(build_or_pattern(episode_words) + r'-?(?P<episode>' + numeral + ')' +
                 r'(?:v(?P<version>\d+))?' +
                 r'(?:-?' + build_or_pattern(of_words) + r'?-?(?P<count>\d+))?',  # Episode 4
                 abbreviations=[dash], formatter={'episode': parse_numeral, 'version': int, 'count': int},
                 disabled=lambda context: context.get('type') != 'episode')

    rebulk.regex(r'S?(?P<season>\d+)-?(?:xE|Ex|E|x)-?(?P<other>' + build_or_pattern(all_words) + ')',
                 tags=['SxxExx'],
                 abbreviations=[dash],
                 validator=None,
                 formatter={'season': int, 'other': lambda match: 'Complete'})

    rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'], validate_all=True,
                    validator={'__parent__': seps_surround}, children=True, private_parent=True)

    # 12, 13
    rebulk.chain(tags=['bonus-conflict', 'weak-movie'], formatter={'episode': int, 'version': int}) \
        .defaults(validator=None) \
        .regex(r'(?P<episode>\d{2})') \
        .regex(r'v(?P<version>\d+)').repeater('?') \
        .regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{2})').repeater('*')

    # 012, 013
    rebulk.chain(tags=['bonus-conflict', 'weak-movie'], formatter={'episode': int, 'version': int}) \
        .defaults(validator=None) \
        .regex(r'0(?P<episode>\d{1,2})') \
        .regex(r'v(?P<version>\d+)').repeater('?') \
        .regex(r'(?P<episodeSeparator>[x-])0(?P<episode>\d{1,2})').repeater('*')

    # 112, 113
    rebulk.chain(tags=['bonus-conflict', 'weak-movie'], formatter={'episode': int, 'version': int},
                 disabled=lambda context: not context.get('episode_prefer_number', False)) \
        .defaults(validator=None) \
        .regex(r'(?P<episode>\d{3,4})') \
        .regex(r'v(?P<version>\d+)').repeater('?') \
        .regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{3,4})').repeater('*')

    # 1, 2, 3
    rebulk.chain(tags=['bonus-conflict', 'weak-movie'], formatter={'episode': int, 'version': int},
                 disabled=lambda context: context.get('type') != 'episode') \
        .defaults(validator=None) \
        .regex(r'(?P<episode>\d)') \
        .regex(r'v(?P<version>\d+)').repeater('?') \
        .regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{1,2})').repeater('*')

    # e112, e113
    # TODO: Enhance rebulk for validator to be used globally (season_episode_validator)
    rebulk.chain(formatter={'episode': int, 'version': int}) \
        .defaults(validator=None) \
        .regex(r'e(?P<episode>\d{1,4})') \
        .regex(r'v(?P<version>\d+)').repeater('?') \
        .regex(r'(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4})').repeater('*')

    # ep 112, ep113, ep112, ep113
    rebulk.chain(abbreviations=[dash], formatter={'episode': int, 'version': int}) \
        .defaults(validator=None) \
        .regex(r'ep-?(?P<episode>\d{1,4})') \
        .regex(r'v(?P<version>\d+)').repeater('?') \
        .regex(r'(?P<episodeSeparator>ep|e|x|-)(?P<episode>\d{1,4})').repeater('*')

    # 102, 0102
    rebulk.chain(tags=['bonus-conflict', 'weak-movie', 'weak-duplicate'],
                 formatter={'season': int, 'episode': int, 'version': int},
                 conflict_solver=lambda match, other: match if other.name == 'year' else '__default__',
                 disabled=lambda context: context.get('episode_prefer_number', False)) \
        .defaults(validator=None) \
        .regex(r'(?P<season>\d{1,2})(?P<episode>\d{2})') \
        .regex(r'v(?P<version>\d+)').repeater('?') \
        .regex(r'(?P<episodeSeparator>x|-)(?P<episode>\d{2})').repeater('*')

    rebulk.regex(r'v(?P<version>\d+)', children=True, private_parent=True, formatter=int)

    rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'])

    # TODO: List of words
    # detached of X count (season/episode)
    rebulk.regex(r'(?P<episode>\d+)?-?' + build_or_pattern(of_words) +
                 r'-?(?P<count>\d+)-?' + build_or_pattern(episode_words) + '?',
                 abbreviations=[dash], children=True, private_parent=True, formatter=int)

    rebulk.regex(r'Minisodes?', name='episode_format', value="Minisode")

    # Harcoded movie to disable weak season/episodes
    rebulk.regex('OSS-?117',
                 abbreviations=[dash], name="hardcoded-movies", marker=True,
                 conflict_solver=lambda match, other: None)

    rebulk.rules(EpisodeNumberSeparatorRange, SeasonSeparatorRange, RemoveWeakIfMovie, RemoveWeakIfSxxExx,
                 RemoveWeakDuplicate, EpisodeDetailValidator, RemoveDetachedEpisodeNumber, VersionValidator,
                 CountValidator, EpisodeSingleDigitValidator)

    return rebulk