Example #1
0
def part(config):  # pylint:disable=unused-argument
    """
    Builder for rebulk object.

    :param config: rule configuration
    :type config: dict
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'part'))
    rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash], validator={'__parent__': seps_surround})

    prefixes = config['prefixes']

    def validate_roman(match):
        """
        Validate a roman match if surrounded by separators
        :param match:
        :type match:
        :return:
        :rtype:
        """
        if int_coercable(match.raw):
            return True
        return seps_surround(match)

    rebulk.regex(build_or_pattern(prefixes) + r'-?(?P<part>' + numeral + r')',
                 prefixes=prefixes, validate_all=True, private_parent=True, children=True, formatter=parse_numeral,
                 validator={'part': compose(validate_roman, lambda m: 0 < m.value < 100)})

    return rebulk
Example #2
0
def size(config):  # pylint:disable=unused-argument
    """
    Builder for rebulk object.

    :param config: rule configuration
    :type config: dict
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'size'))
    rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])
    rebulk.defaults(name='size', validator=seps_surround)
    rebulk.regex(r'\d+-?[mgt]b', r'\d+\.\d+-?[mgt]b', formatter=Size.fromstring, tags=['release-group-prefix'])

    return rebulk
Example #3
0
def edition(config):  # pylint:disable=unused-argument
    """
    Builder for rebulk object.

    :param config: rule configuration
    :type config: dict
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'edition'))
    rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True)
    rebulk.defaults(name='edition', validator=seps_surround)

    load_config_patterns(rebulk, config.get('edition'))

    return rebulk
Example #4
0
def bit_rate(config):  # pylint:disable=unused-argument
    """
    Builder for rebulk object.

    :param config: rule configuration
    :type config: dict
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk(disabled=lambda context: (is_disabled(
        context, 'audio_bit_rate') and is_disabled(context, 'video_bit_rate')))
    rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])
    rebulk.defaults(name='audio_bit_rate', validator=seps_surround)
    rebulk.regex(
        r'\d+-?[kmg]b(ps|its?)',
        r'\d+\.\d+-?[kmg]b(ps|its?)',
        conflict_solver=(lambda match, other: match
                         if other.name == 'audio_channels' and
                         'weak-audio_channels' not in other.tags else other),
        formatter=BitRate.fromstring,
        tags=['release-group-prefix'])

    rebulk.rules(BitRateTypeRule)

    return rebulk
Example #5
0
def cds(config):  # pylint:disable=unused-argument
    """
    Builder for rebulk object.

    :param config: rule configuration
    :type config: dict
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'cd'))
    rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])

    rebulk.regex(r'cd-?(?P<cd>\d+)(?:-?of-?(?P<cd_count>\d+))?',
                 validator={'cd': lambda match: 0 < match.value < 100,
                            'cd_count': lambda match: 0 < match.value < 100},
                 formatter={'cd': int, 'cd_count': int},
                 children=True,
                 private_parent=True,
                 properties={'cd': [None], 'cd_count': [None]})
    rebulk.regex(r'(?P<cd_count>\d+)-?cds?',
                 validator={'cd': lambda match: 0 < match.value < 100,
                            'cd_count': lambda match: 0 < match.value < 100},
                 formatter={'cd_count': int},
                 children=True,
                 private_parent=True,
                 properties={'cd': [None], 'cd_count': [None]})

    return rebulk
Example #6
0
def bonus(config):  # pylint:disable=unused-argument
    """
    Builder for rebulk object.

    :param config: rule configuration
    :type config: dict
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'bonus'))
    rebulk = rebulk.regex_defaults(flags=re.IGNORECASE)

    rebulk.regex(r'x(\d+)',
                 name='bonus',
                 private_parent=True,
                 children=True,
                 formatter=int,
                 validator={'__parent__': seps_surround},
                 validate_all=True,
                 conflict_solver=lambda match, conflicting: match
                 if conflicting.name in ('video_codec', 'episode') and
                 'weak-episode' not in conflicting.tags else '__default__')

    rebulk.rules(BonusTitleRule)

    return rebulk
Example #7
0
def container(config):
    """
    Builder for rebulk object.

    :param config: rule configuration
    :type config: dict
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'container'))
    rebulk = rebulk.regex_defaults(flags=re.IGNORECASE).string_defaults(
        ignore_case=True)
    rebulk.defaults(name='container',
                    formatter=lambda value: value.strip(seps),
                    tags=['extension'],
                    conflict_solver=lambda match, other: other if other.name in
                    ('source', 'video_codec') or other.name == 'container' and
                    'extension' not in other.tags else '__default__')

    subtitles = config['subtitles']
    info = config['info']
    videos = config['videos']
    torrent = config['torrent']
    nzb = config['nzb']

    rebulk.regex(r'\.' + build_or_pattern(subtitles) + '$',
                 exts=subtitles,
                 tags=['extension', 'subtitle'])
    rebulk.regex(r'\.' + build_or_pattern(info) + '$',
                 exts=info,
                 tags=['extension', 'info'])
    rebulk.regex(r'\.' + build_or_pattern(videos) + '$',
                 exts=videos,
                 tags=['extension', 'video'])
    rebulk.regex(r'\.' + build_or_pattern(torrent) + '$',
                 exts=torrent,
                 tags=['extension', 'torrent'])
    rebulk.regex(r'\.' + build_or_pattern(nzb) + '$',
                 exts=nzb,
                 tags=['extension', 'nzb'])

    rebulk.defaults(clear=True,
                    name='container',
                    validator=seps_surround,
                    formatter=lambda s: s.lower(),
                    conflict_solver=lambda match, other: match if other.name in
                    ('source', 'video_codec') or other.name == 'container' and
                    'extension' in other.tags else '__default__')

    rebulk.string(*[sub for sub in subtitles if sub not in ('sub', 'ass')],
                  tags=['subtitle'])
    rebulk.string(*videos, tags=['video'])
    rebulk.string(*torrent, tags=['torrent'])
    rebulk.string(*nzb, tags=['nzb'])

    return rebulk
Example #8
0
def cd(config):  # pylint:disable=unused-argument
    """
    Builder for rebulk object.

    :param config: rule configuration
    :type config: dict
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'cd'))
    rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])

    load_config_patterns(rebulk, config)

    return rebulk
Example #9
0
def bonus(config):  # pylint:disable=unused-argument
    """
    Builder for rebulk object.

    :param config: rule configuration
    :type config: dict
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'bonus'))
    rebulk = rebulk.regex_defaults(name='bonus', flags=re.IGNORECASE)

    load_config_patterns(rebulk, config.get('bonus'))

    rebulk.rules(BonusTitleRule)

    return rebulk
Example #10
0
def container(config):
    """
    Builder for rebulk object.

    :param config: rule configuration
    :type config: dict
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'container'))
    rebulk = rebulk.regex_defaults(flags=re.IGNORECASE).string_defaults(ignore_case=True)
    rebulk.defaults(name='container',
                    formatter=lambda value: value.strip(seps),
                    tags=['extension'],
                    conflict_solver=lambda match, other: other
                    if other.name in ('source', 'video_codec') or
                    other.name == 'container' and 'extension' not in other.tags
                    else '__default__')

    subtitles = config['subtitles']
    info = config['info']
    videos = config['videos']
    torrent = config['torrent']
    nzb = config['nzb']

    rebulk.regex(r'\.'+build_or_pattern(subtitles)+'$', exts=subtitles, tags=['extension', 'subtitle'])
    rebulk.regex(r'\.'+build_or_pattern(info)+'$', exts=info, tags=['extension', 'info'])
    rebulk.regex(r'\.'+build_or_pattern(videos)+'$', exts=videos, tags=['extension', 'video'])
    rebulk.regex(r'\.'+build_or_pattern(torrent)+'$', exts=torrent, tags=['extension', 'torrent'])
    rebulk.regex(r'\.'+build_or_pattern(nzb)+'$', exts=nzb, tags=['extension', 'nzb'])

    rebulk.defaults(name='container',
                    validator=seps_surround,
                    formatter=lambda s: s.lower(),
                    conflict_solver=lambda match, other: match
                    if other.name in ('source',
                                      'video_codec') or other.name == 'container' and 'extension' in other.tags
                    else '__default__')

    rebulk.string(*[sub for sub in subtitles if sub not in ('sub', 'ass')], tags=['subtitle'])
    rebulk.string(*videos, tags=['video'])
    rebulk.string(*torrent, tags=['torrent'])
    rebulk.string(*nzb, tags=['nzb'])

    return rebulk
Example #11
0
def bit_rate(config):  # pylint:disable=unused-argument
    """
    Builder for rebulk object.

    :param config: rule configuration
    :type config: dict
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk(disabled=lambda context: (is_disabled(context, 'audio_bit_rate')
                                              and is_disabled(context, 'video_bit_rate')))
    rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])
    rebulk.defaults(name='audio_bit_rate', validator=seps_surround)

    load_config_patterns(rebulk, config.get('bit_rate'))

    rebulk.rules(BitRateTypeRule)

    return rebulk
Example #12
0
def cds(config):  # pylint:disable=unused-argument
    """
    Builder for rebulk object.

    :param config: rule configuration
    :type config: dict
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'cd'))
    rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])

    rebulk.regex(r'cd-?(?P<cd>\d+)(?:-?of-?(?P<cd_count>\d+))?',
                 validator={
                     'cd': lambda match: 0 < match.value < 100,
                     'cd_count': lambda match: 0 < match.value < 100
                 },
                 formatter={
                     'cd': int,
                     'cd_count': int
                 },
                 children=True,
                 private_parent=True,
                 properties={
                     'cd': [None],
                     'cd_count': [None]
                 })
    rebulk.regex(r'(?P<cd_count>\d+)-?cds?',
                 validator={
                     'cd': lambda match: 0 < match.value < 100,
                     'cd_count': lambda match: 0 < match.value < 100
                 },
                 formatter={'cd_count': int},
                 children=True,
                 private_parent=True,
                 properties={
                     'cd': [None],
                     'cd_count': [None]
                 })

    return rebulk
Example #13
0
def bonus(config):  # pylint:disable=unused-argument
    """
    Builder for rebulk object.

    :param config: rule configuration
    :type config: dict
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'bonus'))
    rebulk = rebulk.regex_defaults(flags=re.IGNORECASE)

    rebulk.regex(r'x(\d+)', name='bonus', private_parent=True, children=True, formatter=int,
                 validator={'__parent__': lambda match: seps_surround},
                 conflict_solver=lambda match, conflicting: match
                 if conflicting.name in ('video_codec', 'episode') and 'weak-episode' not in conflicting.tags
                 else '__default__')

    rebulk.rules(BonusTitleRule)

    return rebulk
Example #14
0
def edition(config):  # pylint:disable=unused-argument
    """
    Builder for rebulk object.

    :param config: rule configuration
    :type config: dict
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'edition'))
    rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True)
    rebulk.defaults(name='edition', validator=seps_surround)

    rebulk.regex('collector', "collector'?s?-edition", 'edition-collector', value='Collector')
    rebulk.regex('special-edition', 'edition-special', value='Special',
                 conflict_solver=lambda match, other: other
                 if other.name == 'episode_details' and other.value == 'Special'
                 else '__default__')
    rebulk.string('se', value='Special', tags='has-neighbor')
    rebulk.string('ddc', value="Director's Definitive Cut")
    rebulk.regex('criterion-edition', 'edition-criterion', 'CC', value='Criterion')
    rebulk.regex('deluxe', 'deluxe-edition', 'edition-deluxe', value='Deluxe')
    rebulk.regex('limited', 'limited-edition', value='Limited', tags=['has-neighbor', 'release-group-prefix'])
    rebulk.regex(r'theatrical-cut', r'theatrical-edition', r'theatrical', value='Theatrical')
    rebulk.regex(r"director'?s?-cut", r"director'?s?-cut-edition", r"edition-director'?s?-cut", 'DC',
                 value="Director's Cut")
    rebulk.regex('extended', 'extended-?cut', 'extended-?version',
                 value='Extended', tags=['has-neighbor', 'release-group-prefix'])
    rebulk.regex(r'super-duper-cut', value='Super Duper Cut')
    rebulk.regex('alternat(e|ive)(?:-?Cut)?', value='Alternative Cut', tags=['has-neighbor', 'release-group-prefix'])
    for value in ('Remastered', 'Uncensored', 'Uncut', 'Unrated'):
        rebulk.string(value, value=value, tags=['has-neighbor', 'release-group-prefix'])
    rebulk.string('Festival', value='Festival', tags=['has-neighbor-before', 'has-neighbor-after'])
    rebulk.regex('imax', 'imax-edition', value='IMAX')
    rebulk.regex('fan-edit(?:ion)?', 'fan-collection', value='Fan')
    rebulk.regex('ultimate-edition', value='Ultimate')
    rebulk.regex("ultimate-collector'?s?-edition", value=['Ultimate', 'Collector'])
    rebulk.regex('ultimate-fan-edit(?:ion)?', 'ultimate-fan-collection', value=['Ultimate', 'Fan'])

    return rebulk
Example #15
0
def edition(config):  # pylint:disable=unused-argument
    """
    Builder for rebulk object.

    :param config: rule configuration
    :type config: dict
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'edition'))
    rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True)
    rebulk.defaults(name='edition', validator=seps_surround)

    rebulk.regex('collector', "collector'?s?-edition", 'edition-collector', value='Collector')
    rebulk.regex('special-edition', 'edition-special', value='Special',
                 conflict_solver=lambda match, other: other
                 if other.name == 'episode_details' and other.value == 'Special'
                 else '__default__')
    rebulk.string('se', value='Special', tags='has-neighbor')
    rebulk.string('ddc', value="Director's Definitive Cut")
    rebulk.regex('criterion-edition', 'edition-criterion', 'CC', value='Criterion')
    rebulk.regex('deluxe', 'deluxe-edition', 'edition-deluxe', value='Deluxe')
    rebulk.regex('limited', 'limited-edition', value='Limited', tags=['has-neighbor', 'release-group-prefix'])
    rebulk.regex(r'theatrical-cut', r'theatrical-edition', r'theatrical', value='Theatrical')
    rebulk.regex(r"director'?s?-cut", r"director'?s?-cut-edition", r"edition-director'?s?-cut", 'DC',
                 value="Director's Cut")
    rebulk.regex('extended', 'extended-?cut', 'extended-?version',
                 value='Extended', tags=['has-neighbor', 'release-group-prefix'])
    rebulk.regex('alternat(e|ive)(?:-?Cut)?', value='Alternative Cut', tags=['has-neighbor', 'release-group-prefix'])
    for value in ('Remastered', 'Uncensored', 'Uncut', 'Unrated'):
        rebulk.string(value, value=value, tags=['has-neighbor', 'release-group-prefix'])
    rebulk.string('Festival', value='Festival', tags=['has-neighbor-before', 'has-neighbor-after'])
    rebulk.regex('imax', 'imax-edition', value='IMAX')
    rebulk.regex('fan-edit(?:ion)?', 'fan-collection', value='Fan')
    rebulk.regex('ultimate-edition', value='Ultimate')
    rebulk.regex("ultimate-collector'?s?-edition", value=['Ultimate', 'Collector'])
    rebulk.regex('ultimate-fan-edit(?:ion)?', 'ultimate-fan-collection', value=['Ultimate', 'Fan'])

    return rebulk
Example #16
0
def crc(config):  # pylint:disable=unused-argument
    """
    Builder for rebulk object.

    :param config: rule configuration
    :type config: dict
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'crc32'))
    rebulk = rebulk.regex_defaults(flags=re.IGNORECASE)
    rebulk.defaults(validator=seps_surround)

    rebulk.regex('(?:[a-fA-F]|[0-9]){8}', name='crc32',
                 conflict_solver=lambda match, other: match
                 if other.name in ['episode', 'season']
                 else '__default__')

    rebulk.functional(guess_idnumber, name='uuid',
                      conflict_solver=lambda match, other: match
                      if other.name in ['episode', 'season']
                      else '__default__')
    return rebulk
Example #17
0
def crc(config):  # pylint:disable=unused-argument
    """
    Builder for rebulk object.

    :param config: rule configuration
    :type config: dict
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'crc32'))
    rebulk = rebulk.regex_defaults(flags=re.IGNORECASE)
    rebulk.defaults(validator=seps_surround)

    rebulk.regex('(?:[a-fA-F]|[0-9]){8}', name='crc32',
                 conflict_solver=lambda match, other: other
                 if other.name in ['episode', 'season']
                 else '__default__')

    rebulk.functional(guess_idnumber, name='uuid',
                      conflict_solver=lambda match, other: match
                      if other.name in ['episode', 'season']
                      else '__default__')
    return rebulk
Example #18
0
def bit_rate(config):  # pylint:disable=unused-argument
    """
    Builder for rebulk object.

    :param config: rule configuration
    :type config: dict
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk(disabled=lambda context: (is_disabled(context, 'audio_bit_rate')
                                              and is_disabled(context, 'video_bit_rate')))
    rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])
    rebulk.defaults(name='audio_bit_rate', validator=seps_surround)
    rebulk.regex(r'\d+-?[kmg]b(ps|its?)', r'\d+\.\d+-?[kmg]b(ps|its?)',
                 conflict_solver=(
                     lambda match, other: match
                     if other.name == 'audio_channels' and 'weak-audio_channels' not in other.tags
                     else other
                 ),
                 formatter=BitRate.fromstring, tags=['release-group-prefix'])

    rebulk.rules(BitRateTypeRule)

    return rebulk
Example #19
0
def other(config):  # pylint:disable=unused-argument,too-many-statements
    """
    Builder for rebulk object.

    :param config: rule configuration
    :type config: dict
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'other'))
    rebulk = rebulk.regex_defaults(
        flags=re.IGNORECASE,
        abbreviations=[dash]).string_defaults(ignore_case=True)
    rebulk.defaults(name="other", validator=seps_surround)

    load_config_patterns(rebulk, config.get('other'))

    rebulk.rules(RenameAnotherToOther, ValidateHasNeighbor,
                 ValidateHasNeighborAfter, ValidateHasNeighborBefore,
                 ValidateScreenerRule, ValidateMuxRule, ValidateHardcodedSubs,
                 ValidateStreamingServiceNeighbor, ValidateAtEnd, ValidateReal,
                 ProperCountRule)

    return rebulk
Example #20
0
def source(config):  # pylint:disable=unused-argument
    """
    Builder for rebulk object.

    :param config: rule configuration
    :type config: dict
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'source'))
    rebulk = rebulk.regex_defaults(flags=re.IGNORECASE,
                                   abbreviations=[dash],
                                   private_parent=True,
                                   children=True)
    rebulk.defaults(name='source',
                    tags=['video-codec-prefix', 'streaming_service.suffix'])

    rip_prefix = '(?P<other>Rip)-?'
    rip_suffix = '-?(?P<other>Rip)'
    rip_optional_suffix = '(?:' + rip_suffix + ')?'

    def build_source_pattern(*patterns, **kwargs):
        """Helper pattern to build source pattern."""
        prefix_format = kwargs.get('prefix') or ''
        suffix_format = kwargs.get('suffix') or ''

        string_format = prefix_format + '({0})' + suffix_format
        return [string_format.format(pattern) for pattern in patterns]

    def demote_other(match, other):  # pylint: disable=unused-argument
        """Default conflict solver with 'other' property."""
        return other if other.name == 'other' else '__default__'

    rebulk.regex(*build_source_pattern('VHS', suffix=rip_optional_suffix),
                 value={
                     'source': 'VHS',
                     'other': 'Rip'
                 })
    rebulk.regex(*build_source_pattern('CAM', suffix=rip_optional_suffix),
                 value={
                     'source': 'Camera',
                     'other': 'Rip'
                 })
    rebulk.regex(*build_source_pattern('HD-?CAM', suffix=rip_optional_suffix),
                 value={
                     'source': 'HD Camera',
                     'other': 'Rip'
                 })
    rebulk.regex(*build_source_pattern('TELESYNC',
                                       'TS',
                                       suffix=rip_optional_suffix),
                 value={
                     'source': 'Telesync',
                     'other': 'Rip'
                 })
    rebulk.regex(*build_source_pattern('HD-?TELESYNC',
                                       'HD-?TS',
                                       suffix=rip_optional_suffix),
                 value={
                     'source': 'HD Telesync',
                     'other': 'Rip'
                 })
    rebulk.regex(*build_source_pattern('WORKPRINT', 'WP'), value='Workprint')
    rebulk.regex(*build_source_pattern('TELECINE',
                                       'TC',
                                       suffix=rip_optional_suffix),
                 value={
                     'source': 'Telecine',
                     'other': 'Rip'
                 })
    rebulk.regex(*build_source_pattern('HD-?TELECINE',
                                       'HD-?TC',
                                       suffix=rip_optional_suffix),
                 value={
                     'source': 'HD Telecine',
                     'other': 'Rip'
                 })
    rebulk.regex(*build_source_pattern('PPV', suffix=rip_optional_suffix),
                 value={
                     'source': 'Pay-per-view',
                     'other': 'Rip'
                 })
    rebulk.regex(*build_source_pattern('SD-?TV', suffix=rip_optional_suffix),
                 value={
                     'source': 'TV',
                     'other': 'Rip'
                 })
    rebulk.regex(
        *build_source_pattern(
            'TV', suffix=rip_suffix),  # TV is too common to allow matching
        value={
            'source': 'TV',
            'other': 'Rip'
        })
    rebulk.regex(*build_source_pattern('TV', 'SD-?TV', prefix=rip_prefix),
                 value={
                     'source': 'TV',
                     'other': 'Rip'
                 })
    rebulk.regex(*build_source_pattern('TV-?(?=Dub)'), value='TV')
    rebulk.regex(*build_source_pattern('DVB',
                                       'PD-?TV',
                                       suffix=rip_optional_suffix),
                 value={
                     'source': 'Digital TV',
                     'other': 'Rip'
                 })
    rebulk.regex(*build_source_pattern('DVD', suffix=rip_optional_suffix),
                 value={
                     'source': 'DVD',
                     'other': 'Rip'
                 })
    rebulk.regex(*build_source_pattern('DM', suffix=rip_optional_suffix),
                 value={
                     'source': 'Digital Master',
                     'other': 'Rip'
                 })
    rebulk.regex(
        *build_source_pattern(
            'VIDEO-?TS',
            'DVD-?R(?:$|(?!E))',  # 'DVD-?R(?:$|^E)' => DVD-Real ...
            'DVD-?9',
            'DVD-?5'),
        value='DVD')

    rebulk.regex(*build_source_pattern('HD-?TV', suffix=rip_optional_suffix),
                 conflict_solver=demote_other,
                 value={
                     'source': 'HDTV',
                     'other': 'Rip'
                 })
    rebulk.regex(*build_source_pattern('TV-?HD', suffix=rip_suffix),
                 conflict_solver=demote_other,
                 value={
                     'source': 'HDTV',
                     'other': 'Rip'
                 })
    rebulk.regex(*build_source_pattern('TV', suffix='-?(?P<other>Rip-?HD)'),
                 conflict_solver=demote_other,
                 value={
                     'source': 'HDTV',
                     'other': 'Rip'
                 })

    rebulk.regex(*build_source_pattern('VOD', suffix=rip_optional_suffix),
                 value={
                     'source': 'Video on Demand',
                     'other': 'Rip'
                 })

    rebulk.regex(*build_source_pattern('WEB', 'WEB-?DL', suffix=rip_suffix),
                 value={
                     'source': 'Web',
                     'other': 'Rip'
                 })
    # WEBCap is a synonym to WEBRip, mostly used by non english
    rebulk.regex(*build_source_pattern('WEB-?(?P<another>Cap)',
                                       suffix=rip_optional_suffix),
                 value={
                     'source': 'Web',
                     'other': 'Rip',
                     'another': 'Rip'
                 })
    rebulk.regex(*build_source_pattern('WEB-?DL', 'WEB-?U?HD', 'DL-?WEB',
                                       'DL(?=-?Mux)'),
                 value={'source': 'Web'})
    rebulk.regex('(WEB)', value='Web', tags='weak.source')

    rebulk.regex(*build_source_pattern('HD-?DVD', suffix=rip_optional_suffix),
                 value={
                     'source': 'HD-DVD',
                     'other': 'Rip'
                 })

    rebulk.regex(*build_source_pattern('Blu-?ray',
                                       'BD',
                                       'BD[59]',
                                       'BD25',
                                       'BD50',
                                       suffix=rip_optional_suffix),
                 value={
                     'source': 'Blu-ray',
                     'other': 'Rip'
                 })
    rebulk.regex(
        *build_source_pattern('(?P<another>BR)-?(?=Scr(?:eener)?)',
                              '(?P<another>BR)-?(?=Mux)'),  # BRRip
        value={
            'source': 'Blu-ray',
            'another': 'Reencoded'
        })
    rebulk.regex(
        *build_source_pattern('(?P<another>BR)', suffix=rip_suffix),  # BRRip
        value={
            'source': 'Blu-ray',
            'other': 'Rip',
            'another': 'Reencoded'
        })

    rebulk.regex(*build_source_pattern('Ultra-?Blu-?ray', 'Blu-?ray-?Ultra'),
                 value='Ultra HD Blu-ray')

    rebulk.regex(*build_source_pattern('AHDTV'), value='Analog HDTV')
    rebulk.regex(*build_source_pattern('UHD-?TV', suffix=rip_optional_suffix),
                 conflict_solver=demote_other,
                 value={
                     'source': 'Ultra HDTV',
                     'other': 'Rip'
                 })
    rebulk.regex(*build_source_pattern('UHD', suffix=rip_suffix),
                 conflict_solver=demote_other,
                 value={
                     'source': 'Ultra HDTV',
                     'other': 'Rip'
                 })

    rebulk.regex(*build_source_pattern('DSR',
                                       'DTH',
                                       suffix=rip_optional_suffix),
                 value={
                     'source': 'Satellite',
                     'other': 'Rip'
                 })
    rebulk.regex(*build_source_pattern('DSR?', 'SAT', suffix=rip_suffix),
                 value={
                     'source': 'Satellite',
                     'other': 'Rip'
                 })

    rebulk.rules(ValidateSource, UltraHdBlurayRule)

    return rebulk
Example #21
0
def episodes():
    """
    Builder for rebulk object.
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    # pylint: disable=too-many-branches,too-many-statements
    rebulk = Rebulk()
    rebulk.regex_defaults(flags=re.IGNORECASE).string_defaults(ignore_case=True)
    rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'])

    def season_episode_conflict_solver(match, other):
        """
        Conflict solver for episode/season patterns

        :param match:
        :param other:
        :return:
        """
        if match.name in ['season', 'episode'] and other.name in ['screen_size', 'video_codec',
                                                                  'audio_codec', 'audio_channels',
                                                                  'container', 'date']:
            return match
        elif match.name in ['season', 'episode'] and other.name in ['season', 'episode'] \
                and match.initiator != other.initiator:
            if 'x' in match.initiator.raw.lower():
                return match
            if 'x' in other.initiator.raw.lower():
                return other
        return '__default__'

    season_episode_seps = []
    season_episode_seps.extend(seps)
    season_episode_seps.extend(['x', 'X', 'e', 'E'])

    def season_episode_validator(match):
        """
        Validator for season/episode matches
        """
        if match.name in ['season', 'episode'] and match.initiator.start:
            return match.initiator.input_string[match.initiator.start] in season_episode_seps \
                   or match.initiator.input_string[match.initiator.start - 1] in season_episode_seps
        return True

    # 01x02, 01x02x03x04
    rebulk.chain(formatter={'season': int, 'episode': int},
                 tags=['SxxExx'],
                 abbreviations=[alt_dash],
                 children=True,
                 private_parent=True,
                 conflict_solver=season_episode_conflict_solver) \
        .defaults(validator=season_episode_validator) \
        .regex(r'S(?P<season>\d+)@?(?:xE|Ex|E|x)@?(?P<episode>\d+)') \
        .regex(r'(?:(?P<episodeSeparator>xE|Ex|E|x|-|\+|&)(?P<episode>\d+))').repeater('*') \
        .chain() \
        .regex(r'(?P<season>\d+)@?x@?(?P<episode>\d+)') \
        .regex(r'(?:(?P<episodeSeparator>x|-|\+|&)(?P<episode>\d+))').repeater('*') \
        .chain() \
        .regex(r'S(?P<season>\d+)') \
        .regex(r'(?:(?P<seasonSeparator>S|-|\+|&)(?P<season>\d+))').repeater('*')

    # episode_details property
    for episode_detail in ('Special', 'Bonus', 'Omake', 'Ova', 'Oav', 'Pilot', 'Unaired'):
        rebulk.string(episode_detail, value=episode_detail, name='episode_details')
    rebulk.regex(r'Extras?', name='episode_details', value='Extras')

    rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'],
                    validate_all=True, validator={'__parent__': seps_surround}, children=True, private_parent=True)

    season_words = ['season', 'saison', 'serie', 'seasons', 'saisons', 'series']
    episode_words = ['episode', 'episodes', 'ep']
    of_words = ['of', 'sur']
    all_words = ['All']

    rebulk.chain(abbreviations=[alt_dash], formatter={'season': parse_numeral, 'count': parse_numeral}) \
        .defaults(validator=None) \
        .regex(build_or_pattern(season_words) + '@?(?P<season>' + numeral + ')') \
        .regex(r'' + build_or_pattern(of_words) + '@?(?P<count>' + numeral + ')').repeater('?') \
        .regex(r'@?(?P<seasonSeparator>-)@?(?P<season>\d+)').repeater('*') \
        .regex(r'@?(?P<seasonSeparator>\+|&)@?(?P<season>\d+)').repeater('*')


    rebulk.regex(build_or_pattern(episode_words) + r'-?(?P<episode>\d+)' +
                 r'(?:v(?P<version>\d+))?' +
                 r'(?:-?' + build_or_pattern(of_words) + r'?-?(?P<count>\d+))?',  # Episode 4
                 abbreviations=[dash], formatter=int,
                 disabled=lambda context: context.get('type') == 'episode')

    rebulk.regex(build_or_pattern(episode_words) + r'-?(?P<episode>' + numeral + ')' +
                 r'(?:v(?P<version>\d+))?' +
                 r'(?:-?' + build_or_pattern(of_words) + r'?-?(?P<count>\d+))?',  # Episode 4
                 abbreviations=[dash], formatter={'episode': parse_numeral, 'version': int, 'count': int},
                 disabled=lambda context: context.get('type') != 'episode')

    rebulk.regex(r'S?(?P<season>\d+)-?(?:xE|Ex|E|x)-?(?P<other>' + build_or_pattern(all_words) + ')',
                 tags=['SxxExx'],
                 abbreviations=[dash],
                 validator=None,
                 formatter={'season': int, 'other': lambda match: 'Complete'})

    rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'], validate_all=True,
                    validator={'__parent__': seps_surround}, children=True, private_parent=True)

    # 12, 13
    rebulk.chain(tags=['bonus-conflict', 'weak-movie'], formatter={'episode': int, 'version': int}) \
        .defaults(validator=None) \
        .regex(r'(?P<episode>\d{2})') \
        .regex(r'v(?P<version>\d+)').repeater('?') \
        .regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{2})').repeater('*')

    # 012, 013
    rebulk.chain(tags=['bonus-conflict', 'weak-movie'], formatter={'episode': int, 'version': int}) \
        .defaults(validator=None) \
        .regex(r'0(?P<episode>\d{1,2})') \
        .regex(r'v(?P<version>\d+)').repeater('?') \
        .regex(r'(?P<episodeSeparator>[x-])0(?P<episode>\d{1,2})').repeater('*')

    # 112, 113
    rebulk.chain(tags=['bonus-conflict', 'weak-movie'], formatter={'episode': int, 'version': int},
                 disabled=lambda context: not context.get('episode_prefer_number', False)) \
        .defaults(validator=None) \
        .regex(r'(?P<episode>\d{3,4})') \
        .regex(r'v(?P<version>\d+)').repeater('?') \
        .regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{3,4})').repeater('*')

    # 1, 2, 3
    rebulk.chain(tags=['bonus-conflict', 'weak-movie'], formatter={'episode': int, 'version': int},
                 disabled=lambda context: context.get('type') != 'episode') \
        .defaults(validator=None) \
        .regex(r'(?P<episode>\d)') \
        .regex(r'v(?P<version>\d+)').repeater('?') \
        .regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{1,2})').repeater('*')

    # e112, e113
    # TODO: Enhance rebulk for validator to be used globally (season_episode_validator)
    rebulk.chain(formatter={'episode': int, 'version': int}) \
        .defaults(validator=None) \
        .regex(r'e(?P<episode>\d{1,4})') \
        .regex(r'v(?P<version>\d+)').repeater('?') \
        .regex(r'(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4})').repeater('*')

    # ep 112, ep113, ep112, ep113
    rebulk.chain(abbreviations=[dash], formatter={'episode': int, 'version': int}) \
        .defaults(validator=None) \
        .regex(r'ep-?(?P<episode>\d{1,4})') \
        .regex(r'v(?P<version>\d+)').repeater('?') \
        .regex(r'(?P<episodeSeparator>ep|e|x|-)(?P<episode>\d{1,4})').repeater('*')

    # 102, 0102
    rebulk.chain(tags=['bonus-conflict', 'weak-movie', 'weak-duplicate'],
                 formatter={'season': int, 'episode': int, 'version': int},
                 conflict_solver=lambda match, other: match if other.name == 'year' else '__default__',
                 disabled=lambda context: context.get('episode_prefer_number', False)) \
        .defaults(validator=None) \
        .regex(r'(?P<season>\d{1,2})(?P<episode>\d{2})') \
        .regex(r'v(?P<version>\d+)').repeater('?') \
        .regex(r'(?P<episodeSeparator>x|-)(?P<episode>\d{2})').repeater('*')

    rebulk.regex(r'v(?P<version>\d+)', children=True, private_parent=True, formatter=int)

    rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'])

    # TODO: List of words
    # detached of X count (season/episode)
    rebulk.regex(r'(?P<episode>\d+)?-?' + build_or_pattern(of_words) +
                 r'-?(?P<count>\d+)-?' + build_or_pattern(episode_words) + '?',
                 abbreviations=[dash], children=True, private_parent=True, formatter=int)

    rebulk.regex(r'Minisodes?', name='episode_format', value="Minisode")

    # Harcoded movie to disable weak season/episodes
    rebulk.regex('OSS-?117',
                 abbreviations=[dash], name="hardcoded-movies", marker=True,
                 conflict_solver=lambda match, other: None)

    rebulk.rules(EpisodeNumberSeparatorRange, SeasonSeparatorRange, RemoveWeakIfMovie, RemoveWeakIfSxxExx,
                 RemoveWeakDuplicate, EpisodeDetailValidator, RemoveDetachedEpisodeNumber, VersionValidator,
                 CountValidator, EpisodeSingleDigitValidator)

    return rebulk
Example #22
0
def source(config):  # pylint:disable=unused-argument
    """
    Builder for rebulk object.

    :param config: rule configuration
    :type config: dict
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'source'))
    rebulk = rebulk.regex_defaults(flags=re.IGNORECASE,
                                   abbreviations=[dash],
                                   private_parent=True,
                                   children=True)
    rebulk = rebulk.defaults(
        name='source',
        tags=['video-codec-prefix', 'streaming_service.suffix'],
        validate_all=True,
        validator={'__parent__': or_(seps_before, seps_after)})

    rip_prefix = config['rip_prefix']
    rip_suffix = config['rip_suffix']

    def build_source_pattern(*patterns, prefix='', suffix=''):
        """Helper pattern to build source pattern."""
        return [prefix + f'({pattern})' + suffix for pattern in patterns]

    def demote_other(match, other):  # pylint: disable=unused-argument
        """Default conflict solver with 'other' property."""
        return other if other.name in ['other', 'release_group'
                                       ] else '__default__'

    rebulk.regex(*build_source_pattern('VHS', suffix=optional(rip_suffix)),
                 value={
                     'source': 'VHS',
                     'other': 'Rip'
                 })
    rebulk.regex(*build_source_pattern('CAM', suffix=optional(rip_suffix)),
                 value={
                     'source': 'Camera',
                     'other': 'Rip'
                 })
    rebulk.regex(*build_source_pattern('HD-?CAM', suffix=optional(rip_suffix)),
                 value={
                     'source': 'HD Camera',
                     'other': 'Rip'
                 })
    # For TS, we remove 'streaming_service.suffix' tag to avoid "Shots" being guessed as Showtime and TS.
    rebulk.regex(*build_source_pattern('TELESYNC',
                                       'TS',
                                       suffix=optional(rip_suffix)),
                 value={
                     'source': 'Telesync',
                     'other': 'Rip'
                 },
                 tags=['video-codec-prefix'],
                 overrides=["tags"])
    rebulk.regex(*build_source_pattern('HD-?TELESYNC',
                                       'HD-?TS',
                                       suffix=optional(rip_suffix)),
                 value={
                     'source': 'HD Telesync',
                     'other': 'Rip'
                 })
    rebulk.regex(*build_source_pattern('WORKPRINT', 'WP'), value='Workprint')
    rebulk.regex(*build_source_pattern('TELECINE',
                                       'TC',
                                       suffix=optional(rip_suffix)),
                 value={
                     'source': 'Telecine',
                     'other': 'Rip'
                 })
    rebulk.regex(*build_source_pattern('HD-?TELECINE',
                                       'HD-?TC',
                                       suffix=optional(rip_suffix)),
                 value={
                     'source': 'HD Telecine',
                     'other': 'Rip'
                 })
    rebulk.regex(*build_source_pattern('PPV', suffix=optional(rip_suffix)),
                 value={
                     'source': 'Pay-per-view',
                     'other': 'Rip'
                 })
    rebulk.regex(*build_source_pattern('SD-?TV', suffix=optional(rip_suffix)),
                 value={
                     'source': 'TV',
                     'other': 'Rip'
                 })
    rebulk.regex(
        *build_source_pattern(
            'TV', suffix=rip_suffix),  # TV is too common to allow matching
        value={
            'source': 'TV',
            'other': 'Rip'
        })
    rebulk.regex(*build_source_pattern('TV', 'SD-?TV', prefix=rip_prefix),
                 value={
                     'source': 'TV',
                     'other': 'Rip'
                 })
    rebulk.regex(*build_source_pattern('TV-?(?=Dub)'), value='TV')
    rebulk.regex(*build_source_pattern('DVB',
                                       'PD-?TV',
                                       suffix=optional(rip_suffix)),
                 value={
                     'source': 'Digital TV',
                     'other': 'Rip'
                 })
    rebulk.regex(*build_source_pattern('DVD', suffix=optional(rip_suffix)),
                 value={
                     'source': 'DVD',
                     'other': 'Rip'
                 })
    rebulk.regex(*build_source_pattern('DM', suffix=optional(rip_suffix)),
                 value={
                     'source': 'Digital Master',
                     'other': 'Rip'
                 })
    rebulk.regex(
        *build_source_pattern(
            'VIDEO-?TS',
            'DVD-?R(?:$|(?!E))',  # 'DVD-?R(?:$|^E)' => DVD-Real ...
            'DVD-?9',
            'DVD-?5'),
        value='DVD')

    rebulk.regex(*build_source_pattern('HD-?TV', suffix=optional(rip_suffix)),
                 conflict_solver=demote_other,
                 value={
                     'source': 'HDTV',
                     'other': 'Rip'
                 })
    rebulk.regex(*build_source_pattern('TV-?HD', suffix=rip_suffix),
                 conflict_solver=demote_other,
                 value={
                     'source': 'HDTV',
                     'other': 'Rip'
                 })
    rebulk.regex(*build_source_pattern('TV', suffix='-?(?P<other>Rip-?HD)'),
                 conflict_solver=demote_other,
                 value={
                     'source': 'HDTV',
                     'other': 'Rip'
                 })

    rebulk.regex(*build_source_pattern('VOD', suffix=optional(rip_suffix)),
                 value={
                     'source': 'Video on Demand',
                     'other': 'Rip'
                 })

    rebulk.regex(*build_source_pattern('WEB', 'WEB-?DL', suffix=rip_suffix),
                 value={
                     'source': 'Web',
                     'other': 'Rip'
                 })
    # WEBCap is a synonym to WEBRip, mostly used by non english
    rebulk.regex(*build_source_pattern('WEB-?(?P<another>Cap)',
                                       suffix=optional(rip_suffix)),
                 value={
                     'source': 'Web',
                     'other': 'Rip',
                     'another': 'Rip'
                 })
    rebulk.regex(*build_source_pattern('WEB-?DL', 'WEB-?U?HD', 'DL-?WEB',
                                       'DL(?=-?Mux)'),
                 value={'source': 'Web'})
    rebulk.regex('(WEB)', value='Web', tags='weak.source')

    rebulk.regex(*build_source_pattern('HD-?DVD', suffix=optional(rip_suffix)),
                 value={
                     'source': 'HD-DVD',
                     'other': 'Rip'
                 })

    rebulk.regex(*build_source_pattern('Blu-?ray',
                                       'BD',
                                       'BD[59]',
                                       'BD25',
                                       'BD50',
                                       suffix=optional(rip_suffix)),
                 value={
                     'source': 'Blu-ray',
                     'other': 'Rip'
                 })
    rebulk.regex(
        *build_source_pattern('(?P<another>BR)-?(?=Scr(?:eener)?)',
                              '(?P<another>BR)-?(?=Mux)'),  # BRRip
        value={
            'source': 'Blu-ray',
            'another': 'Reencoded'
        })
    rebulk.regex(
        *build_source_pattern('(?P<another>BR)', suffix=rip_suffix),  # BRRip
        value={
            'source': 'Blu-ray',
            'other': 'Rip',
            'another': 'Reencoded'
        })

    rebulk.regex(*build_source_pattern('Ultra-?Blu-?ray', 'Blu-?ray-?Ultra'),
                 value='Ultra HD Blu-ray')

    rebulk.regex(*build_source_pattern('AHDTV'), value='Analog HDTV')
    rebulk.regex(*build_source_pattern('UHD-?TV', suffix=optional(rip_suffix)),
                 conflict_solver=demote_other,
                 value={
                     'source': 'Ultra HDTV',
                     'other': 'Rip'
                 })
    rebulk.regex(*build_source_pattern('UHD', suffix=rip_suffix),
                 conflict_solver=demote_other,
                 value={
                     'source': 'Ultra HDTV',
                     'other': 'Rip'
                 })

    rebulk.regex(*build_source_pattern('DSR',
                                       'DTH',
                                       suffix=optional(rip_suffix)),
                 value={
                     'source': 'Satellite',
                     'other': 'Rip'
                 })
    rebulk.regex(*build_source_pattern('DSR?', 'SAT', suffix=rip_suffix),
                 value={
                     'source': 'Satellite',
                     'other': 'Rip'
                 })

    rebulk.rules(ValidateSourcePrefixSuffix, ValidateWeakSource,
                 UltraHdBlurayRule)

    return rebulk
Example #23
0
def website(config):
    """
    Builder for rebulk object.

    :param config: rule configuration
    :type config: dict
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'website'))
    rebulk = rebulk.regex_defaults(flags=re.IGNORECASE).string_defaults(
        ignore_case=True)
    rebulk.defaults(name="website")

    with open(
            os.path.join(
                os.path.dirname(os.path.dirname(os.path.dirname(__file__))),
                'tlds-alpha-by-domain.txt')) as tld_file:
        tlds = [
            tld.strip().decode('utf-8') for tld in tld_file.readlines()
            if b'--' not in tld
        ][1:]  # All registered domain extension

    safe_tlds = config['safe_tlds']  # For sure a website extension
    safe_subdomains = config['safe_subdomains']  # For sure a website subdomain
    safe_prefix = config['safe_prefixes']  # Those words before a tlds are sure
    website_prefixes = config['prefixes']

    rebulk.regex(r'(?:[^a-z0-9]|^)((?:' + build_or_pattern(safe_subdomains) +
                 r'\.)+(?:[a-z-]+\.)+(?:' + build_or_pattern(tlds) +
                 r'))(?:[^a-z0-9]|$)',
                 children=True)
    rebulk.regex(r'(?:[^a-z0-9]|^)((?:' + build_or_pattern(safe_subdomains) +
                 r'\.)*[a-z-]+\.(?:' + build_or_pattern(safe_tlds) +
                 r'))(?:[^a-z0-9]|$)',
                 safe_subdomains=safe_subdomains,
                 safe_tlds=safe_tlds,
                 children=True)
    rebulk.regex(r'(?:[^a-z0-9]|^)((?:' + build_or_pattern(safe_subdomains) +
                 r'\.)*[a-z-]+\.(?:' + build_or_pattern(safe_prefix) +
                 r'\.)+(?:' + build_or_pattern(tlds) + r'))(?:[^a-z0-9]|$)',
                 safe_subdomains=safe_subdomains,
                 safe_prefix=safe_prefix,
                 tlds=tlds,
                 children=True)

    rebulk.string(*website_prefixes,
                  validator=seps_surround,
                  private=True,
                  tags=['website.prefix'])

    class PreferTitleOverWebsite(Rule):
        """
        If found match is more likely a title, remove website.
        """
        consequence = RemoveMatch

        @staticmethod
        def valid_followers(match):
            """
            Validator for next website matches
            """
            return match.named('season', 'episode', 'year')

        def when(self, matches, context):
            to_remove = []
            for website_match in matches.named('website'):
                safe = False
                for safe_start in safe_subdomains + safe_prefix:
                    if website_match.value.lower().startswith(safe_start):
                        safe = True
                        break
                if not safe:
                    suffix = matches.next(
                        website_match, PreferTitleOverWebsite.valid_followers,
                        0)
                    if suffix:
                        group = matches.markers.at_match(
                            website_match,
                            lambda marker: marker.name == 'group', 0)
                        if not group:
                            to_remove.append(website_match)
            return to_remove

    rebulk.rules(PreferTitleOverWebsite, ValidateWebsitePrefix)

    return rebulk
Example #24
0
def episodes():
    """
    Builder for rebulk object.
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    #pylint: disable=too-many-branches,too-many-statements
    rebulk = Rebulk()
    rebulk.regex_defaults(flags=re.IGNORECASE).string_defaults(ignore_case=True)
    rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'])

    # 01x02, 01x02x03x04
    if REGEX_AVAILABLE:
        rebulk.regex(r'(?P<season>\d+)@?x@?(?P<episode>\d+)' +
                     r'(?:(?P<episodeSeparator>x|-|\+|&)(?P<episode>\d+))*',
                     # S01E02, S01x02, S01E02E03, S01Ex02, S01xE02, SO1Ex02Ex03
                     r'S(?P<season>\d+)@?(?:xE|Ex|E|x)@?(?P<episode>\d+)' +
                     r'(?:(?P<episodeSeparator>xE|Ex|E|x|-|\+|&)(?P<episode>\d+))*',
                     # S01
                     r'S(?P<season>\d+)' +
                     r'(?:(?P<seasonSeparator>S|-|\+|&)(?P<season>\d+))*',
                     formatter={'season': int, 'episode': int},
                     tags=['SxxExx'],
                     abbreviations=[alt_dash],
                     children=True,
                     private_parent=True,
                     conflict_solver=lambda match, other: match
                     if match.name in ['season', 'episode'] and other.name in ['screen_size', 'video_codec',
                                                                               'audio_codec', 'audio_channels',
                                                                               'container', 'date']
                     else '__default__')
    else:
        rebulk.chain(formatter={'season': int, 'episode': int},
                     tags=['SxxExx'],
                     abbreviations=[alt_dash],
                     children=True,
                     private_parent=True,
                     conflict_solver=lambda match, other: match
                     if match.name in ['season', 'episode'] and other.name in ['screen_size', 'video_codec',
                                                                               'audio_codec', 'audio_channels',
                                                                               'container', 'date']
                     else '__default__') \
            .defaults(validator=None)\
            .regex(r'S(?P<season>\d+)@?(?:xE|Ex|E|x)@?(?P<episode>\d+)') \
            .regex(r'(?:(?P<episodeSeparator>xE|Ex|E|x|-|\+|&)(?P<episode>\d+))').repeater('*') \
            .chain() \
            .regex(r'(?P<season>\d+)@?x@?(?P<episode>\d+)') \
            .regex(r'(?:(?P<episodeSeparator>x|-|\+|&)(?P<episode>\d+))').repeater('*') \
            .chain() \
            .regex(r'S(?P<season>\d+)') \
            .regex(r'(?:(?P<seasonSeparator>S|-|\+|&)(?P<season>\d+))').repeater('*')

    # episode_details property
    for episode_detail in ('Special', 'Bonus', 'Omake', 'Ova', 'Oav', 'Pilot', 'Unaired'):
        rebulk.string(episode_detail, value=episode_detail, name='episode_details')
    rebulk.regex(r'Extras?', name='episode_details', value='Extras')

    rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'],
                    validate_all=True, validator={'__parent__': seps_surround}, children=True, private_parent=True)

    season_words = ['season', 'saison', 'serie', 'seasons', 'saisons', 'series']
    episode_words = ['episode', 'episodes', 'ep']
    of_words = ['of', 'sur']
    all_words = ['All']

    if REGEX_AVAILABLE:
        rebulk.regex(r'\L<season_words>@?(?P<season>' + numeral + ')' +
                     r'(?:@?\L<of_words>@?(?P<count>' + numeral + '))?' +
                     r'(?:@?(?P<seasonSeparator>-)@?(?P<season>\d+))*' +
                     r'(?:@?(?P<seasonSeparator>\+|&)@?(?P<season>\d+))*',
                     of_words=of_words,
                     season_words=season_words,  # Season 1, # Season one
                     abbreviations=[alt_dash], formatter={'season': parse_numeral, 'count': parse_numeral})
    else:
        rebulk.chain(abbreviations=[alt_dash], formatter={'season': parse_numeral, 'count': parse_numeral})\
                .defaults(validator=None)\
                .regex(build_or_pattern(season_words) + '@?(?P<season>' + numeral + ')') \
                .regex(r'' + build_or_pattern(of_words) + '@?(?P<count>' + numeral + ')').repeater('?') \
                .regex(r'@?(?P<seasonSeparator>-)@?(?P<season>\d+)').repeater('*') \
                .regex(r'@?(?P<seasonSeparator>\+|&)@?(?P<season>\d+)').repeater('*')

    if REGEX_AVAILABLE:
        rebulk.regex(r'\L<episode_words>-?(?P<episode>\d+)' +
                     r'(?:v(?P<version>\d+))?' +
                     r'(?:-?\L<of_words>?-?(?P<count>\d+))?',
                     of_words=of_words,
                     episode_words=episode_words,  # Episode 4
                     abbreviations=[dash], formatter=int,
                     disabled=lambda context: context.get('type') == 'episode')
    else:
        rebulk.regex(build_or_pattern(episode_words) + r'-?(?P<episode>\d+)' +
                     r'(?:v(?P<version>\d+))?' +
                     r'(?:-?' + build_or_pattern(of_words) + r'?-?(?P<count>\d+))?',  # Episode 4
                     abbreviations=[dash], formatter=int,
                     disabled=lambda context: context.get('type') == 'episode')

    if REGEX_AVAILABLE:
        rebulk.regex(r'\L<episode_words>-?(?P<episode>' + numeral + ')' +
                     r'(?:v(?P<version>\d+))?' +
                     r'(?:-?\L<of_words>?-?(?P<count>\d+))?',
                     of_words=of_words,
                     episode_words=episode_words,  # Episode 4
                     abbreviations=[dash], formatter={'episode': parse_numeral, 'version': int, 'count': int},
                     disabled=lambda context: context.get('type') != 'episode')
    else:
        rebulk.regex(build_or_pattern(episode_words) + r'-?(?P<episode>' + numeral + ')' +
                     r'(?:v(?P<version>\d+))?' +
                     r'(?:-?'+ build_or_pattern(of_words) + r'?-?(?P<count>\d+))?',  # Episode 4
                     abbreviations=[dash], formatter={'episode': parse_numeral, 'version': int, 'count': int},
                     disabled=lambda context: context.get('type') != 'episode')

    if REGEX_AVAILABLE:
        rebulk.regex(r'S?(?P<season>\d+)-?(?:xE|Ex|E|x)-?(?P<other>\L<all_words>)',
                     tags=['SxxExx'],
                     all_words=all_words,
                     abbreviations=[dash],
                     validator=None,
                     formatter={'season': int, 'other': lambda match: 'Complete'})
    else:
        rebulk.regex(r'S?(?P<season>\d+)-?(?:xE|Ex|E|x)-?(?P<other>'+build_or_pattern(all_words)+')',
                     tags=['SxxExx'],
                     abbreviations=[dash],
                     validator=None,
                     formatter={'season': int, 'other': lambda match: 'Complete'})

    rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'], validate_all=True,
                    validator={'__parent__': seps_surround}, children=True, private_parent=True)

    if REGEX_AVAILABLE:
        # 12, 13
        rebulk.regex(r'(?P<episode>\d{2})' +
                     r'(?:v(?P<version>\d+))?' +
                     r'(?:(?P<episodeSeparator>[x-])(?P<episode>\d{2}))*',
                     tags=['bonus-conflict', 'weak-movie'], formatter={'episode': int, 'version': int})
    else:
        rebulk.chain(tags=['bonus-conflict', 'weak-movie'], formatter={'episode': int, 'version': int}) \
            .defaults(validator=None) \
            .regex(r'(?P<episode>\d{2})')\
            .regex(r'v(?P<version>\d+)').repeater('?')\
            .regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{2})').repeater('*')

    if REGEX_AVAILABLE:
        # 012, 013
        rebulk.regex(r'0(?P<episode>\d{1,2})' +
                     r'(?:v(?P<version>\d+))?' +
                     r'(?:(?P<episodeSeparator>[x-])0(?P<episode>\d{1,2}))*',
                     tags=['bonus-conflict', 'weak-movie'], formatter={'episode': int, 'version': int})
    else:
        rebulk.chain(tags=['bonus-conflict', 'weak-movie'], formatter={'episode': int, 'version': int}) \
            .defaults(validator=None) \
            .regex(r'0(?P<episode>\d{1,2})') \
            .regex(r'v(?P<version>\d+)').repeater('?') \
            .regex(r'(?P<episodeSeparator>[x-])0(?P<episode>\d{1,2})').repeater('*')

    if REGEX_AVAILABLE:
        # 112, 113
        rebulk.regex(r'(?P<episode>\d{3,4})' +
                     r'(?:v(?P<version>\d+))?' +
                     r'(?:(?P<episodeSeparator>[x-])(?P<episode>\d{3,4}))*',
                     tags=['bonus-conflict', 'weak-movie'], formatter={'episode': int, 'version': int},
                     disabled=lambda context: not context.get('episode_prefer_number', False))
    else:
        rebulk.chain(tags=['bonus-conflict', 'weak-movie'], formatter={'episode': int, 'version': int},
                     disabled=lambda context: not context.get('episode_prefer_number', False)) \
            .defaults(validator=None) \
            .regex(r'(?P<episode>\d{3,4})')\
            .regex(r'v(?P<version>\d+)').repeater('?')\
            .regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{3,4})').repeater('*')

    if REGEX_AVAILABLE:
        # 1, 2, 3
        rebulk.regex(r'(?P<episode>\d)' +
                     r'(?:v(?P<version>\d+))?' +
                     r'(?:(?P<episodeSeparator>[x-])(?P<episode>\d{1,2}))*',
                     tags=['bonus-conflict', 'weak-movie'], formatter={'episode': int, 'version': int},
                     disabled=lambda context: context.get('type') != 'episode')
    else:
        rebulk.chain(tags=['bonus-conflict', 'weak-movie'], formatter={'episode': int, 'version': int},
                     disabled=lambda context: context.get('type') != 'episode') \
            .defaults(validator=None) \
            .regex(r'(?P<episode>\d)')\
            .regex(r'v(?P<version>\d+)').repeater('?')\
            .regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{1,2})').repeater('*')

    # e112, e113
    if REGEX_AVAILABLE:
        rebulk.regex(r'e(?P<episode>\d{1,4})' +
                     r'(?:v(?P<version>\d+))?' +
                     r'(?:(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4}))*',
                     formatter={'episode': int, 'version': int})
    else:
        rebulk.chain(formatter={'episode': int, 'version': int}) \
            .defaults(validator=None) \
            .regex(r'e(?P<episode>\d{1,4})')\
            .regex(r'v(?P<version>\d+)').repeater('?')\
            .regex(r'(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4})').repeater('*')

    # ep 112, ep113, ep112, ep113
    if REGEX_AVAILABLE:
        rebulk.regex(r'ep-?(?P<episode>\d{1,4})' +
                     r'(?:v(?P<version>\d+))?' +
                     r'(?:(?P<episodeSeparator>ep|e|x|-)(?P<episode>\d{1,4}))*',
                     abbreviations=[dash],
                     formatter={'episode': int, 'version': int})
    else:
        rebulk.chain(abbreviations=[dash], formatter={'episode': int, 'version': int}) \
            .defaults(validator=None) \
            .regex(r'ep-?(?P<episode>\d{1,4})')\
            .regex(r'v(?P<version>\d+)').repeater('?')\
            .regex(r'(?P<episodeSeparator>ep|e|x|-)(?P<episode>\d{1,4})').repeater('*')

    # 102, 0102
    if REGEX_AVAILABLE:
        rebulk.regex(r'(?P<season>\d{1,2})(?P<episode>\d{2})' +
                     r'(?:v(?P<version>\d+))?' +
                     r'(?:(?P<episodeSeparator>x|-)(?P<episode>\d{2}))*',
                     tags=['bonus-conflict', 'weak-movie', 'weak-duplicate'],
                     formatter={'season': int, 'episode': int, 'version': int},
                     conflict_solver=lambda match, other: match if other.name == 'year' else '__default__',
                     disabled=lambda context: context.get('episode_prefer_number', False))
    else:
        rebulk.chain(tags=['bonus-conflict', 'weak-movie', 'weak-duplicate'],
                     formatter={'season': int, 'episode': int, 'version': int},
                     conflict_solver=lambda match, other: match if other.name == 'year' else '__default__',
                     disabled=lambda context: context.get('episode_prefer_number', False))\
            .defaults(validator=None)\
            .regex(r'(?P<season>\d{1,2})(?P<episode>\d{2})')\
            .regex(r'v(?P<version>\d+)').repeater('?')\
            .regex(r'(?P<episodeSeparator>x|-)(?P<episode>\d{2})').repeater('*')

    rebulk.regex(r'v(?P<version>\d+)', children=True, private_parent=True, formatter=int)

    rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'])

    #TODO: List of words
    # detached of X count (season/episode)
    if REGEX_AVAILABLE:
        rebulk.regex(r'(?P<episode>\d+)?-?\L<of_words>-?(?P<count>\d+)-?\L<episode_words>?', of_words=of_words,
                     episode_words=episode_words, abbreviations=[dash], children=True, private_parent=True,
                     formatter=int)
    else:
        rebulk.regex(r'(?P<episode>\d+)?-?' + build_or_pattern(of_words) +
                     r'-?(?P<count>\d+)-?' + build_or_pattern(episode_words) + '?',
                     abbreviations=[dash], children=True, private_parent=True, formatter=int)

    rebulk.regex(r'Minisodes?', name='episode_format', value="Minisode")

    # Harcoded movie to disable weak season/episodes
    rebulk.regex('OSS-?117',
                 abbreviations=[dash], name="hardcoded-movies", marker=True,
                 conflict_solver=lambda match, other: None)

    rebulk.rules(EpisodeNumberSeparatorRange, SeasonSeparatorRange, RemoveWeakIfMovie, RemoveWeakIfSxxExx,
                 RemoveWeakDuplicate, EpisodeDetailValidator, RemoveDetachedEpisodeNumber, VersionValidator,
                 CountValidator, EpisodeSingleDigitValidator)

    return rebulk
Example #25
0
def source(config):  # pylint:disable=unused-argument
    """
    Builder for rebulk object.

    :param config: rule configuration
    :type config: dict
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'source'))
    rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash], private_parent=True, children=True)
    rebulk.defaults(name='source', tags=['video-codec-prefix', 'streaming_service.suffix'])

    rip_prefix = '(?P<other>Rip)-?'
    rip_suffix = '-?(?P<other>Rip)'
    rip_optional_suffix = '(?:' + rip_suffix + ')?'

    def build_source_pattern(*patterns, **kwargs):
        """Helper pattern to build source pattern."""
        prefix_format = kwargs.get('prefix') or ''
        suffix_format = kwargs.get('suffix') or ''

        string_format = prefix_format + '({0})' + suffix_format
        return [string_format.format(pattern) for pattern in patterns]

    def demote_other(match, other):  # pylint: disable=unused-argument
        """Default conflict solver with 'other' property."""
        return other if other.name == 'other' else '__default__'

    rebulk.regex(*build_source_pattern('VHS', suffix=rip_optional_suffix),
                 value={'source': 'VHS', 'other': 'Rip'})
    rebulk.regex(*build_source_pattern('CAM', suffix=rip_optional_suffix),
                 value={'source': 'Camera', 'other': 'Rip'})
    rebulk.regex(*build_source_pattern('HD-?CAM', suffix=rip_optional_suffix),
                 value={'source': 'HD Camera', 'other': 'Rip'})
    rebulk.regex(*build_source_pattern('TELESYNC', 'TS', suffix=rip_optional_suffix),
                 value={'source': 'Telesync', 'other': 'Rip'})
    rebulk.regex(*build_source_pattern('HD-?TELESYNC', 'HD-?TS', suffix=rip_optional_suffix),
                 value={'source': 'HD Telesync', 'other': 'Rip'})
    rebulk.regex(*build_source_pattern('WORKPRINT', 'WP'), value='Workprint')
    rebulk.regex(*build_source_pattern('TELECINE', 'TC', suffix=rip_optional_suffix),
                 value={'source': 'Telecine', 'other': 'Rip'})
    rebulk.regex(*build_source_pattern('HD-?TELECINE', 'HD-?TC', suffix=rip_optional_suffix),
                 value={'source': 'HD Telecine', 'other': 'Rip'})
    rebulk.regex(*build_source_pattern('PPV', suffix=rip_optional_suffix),
                 value={'source': 'Pay-per-view', 'other': 'Rip'})
    rebulk.regex(*build_source_pattern('SD-?TV', suffix=rip_optional_suffix),
                 value={'source': 'TV', 'other': 'Rip'})
    rebulk.regex(*build_source_pattern('TV', suffix=rip_suffix),  # TV is too common to allow matching
                 value={'source': 'TV', 'other': 'Rip'})
    rebulk.regex(*build_source_pattern('TV', 'SD-?TV', prefix=rip_prefix),
                 value={'source': 'TV', 'other': 'Rip'})
    rebulk.regex(*build_source_pattern('TV-?(?=Dub)'), value='TV')
    rebulk.regex(*build_source_pattern('DVB', 'PD-?TV', suffix=rip_optional_suffix),
                 value={'source': 'Digital TV', 'other': 'Rip'})
    rebulk.regex(*build_source_pattern('DVD', suffix=rip_optional_suffix),
                 value={'source': 'DVD', 'other': 'Rip'})
    rebulk.regex(*build_source_pattern('DM', suffix=rip_optional_suffix),
                 value={'source': 'Digital Master', 'other': 'Rip'})
    rebulk.regex(*build_source_pattern('VIDEO-?TS', 'DVD-?R(?:$|(?!E))',  # 'DVD-?R(?:$|^E)' => DVD-Real ...
                                       'DVD-?9', 'DVD-?5'), value='DVD')

    rebulk.regex(*build_source_pattern('HD-?TV', suffix=rip_optional_suffix), conflict_solver=demote_other,
                 value={'source': 'HDTV', 'other': 'Rip'})
    rebulk.regex(*build_source_pattern('TV-?HD', suffix=rip_suffix), conflict_solver=demote_other,
                 value={'source': 'HDTV', 'other': 'Rip'})
    rebulk.regex(*build_source_pattern('TV', suffix='-?(?P<other>Rip-?HD)'), conflict_solver=demote_other,
                 value={'source': 'HDTV', 'other': 'Rip'})

    rebulk.regex(*build_source_pattern('VOD', suffix=rip_optional_suffix),
                 value={'source': 'Video on Demand', 'other': 'Rip'})

    rebulk.regex(*build_source_pattern('WEB', 'WEB-?DL', suffix=rip_suffix),
                 value={'source': 'Web', 'other': 'Rip'})
    # WEBCap is a synonym to WEBRip, mostly used by non english
    rebulk.regex(*build_source_pattern('WEB-?(?P<another>Cap)', suffix=rip_optional_suffix),
                 value={'source': 'Web', 'other': 'Rip', 'another': 'Rip'})
    rebulk.regex(*build_source_pattern('WEB-?DL', 'WEB-?U?HD', 'DL-?WEB', 'DL(?=-?Mux)'),
                 value={'source': 'Web'})
    rebulk.regex('(WEB)', value='Web', tags='weak.source')

    rebulk.regex(*build_source_pattern('HD-?DVD', suffix=rip_optional_suffix),
                 value={'source': 'HD-DVD', 'other': 'Rip'})

    rebulk.regex(*build_source_pattern('Blu-?ray', 'BD', 'BD[59]', 'BD25', 'BD50', suffix=rip_optional_suffix),
                 value={'source': 'Blu-ray', 'other': 'Rip'})
    rebulk.regex(*build_source_pattern('(?P<another>BR)-?(?=Scr(?:eener)?)', '(?P<another>BR)-?(?=Mux)'),  # BRRip
                 value={'source': 'Blu-ray', 'another': 'Reencoded'})
    rebulk.regex(*build_source_pattern('(?P<another>BR)', suffix=rip_suffix),  # BRRip
                 value={'source': 'Blu-ray', 'other': 'Rip', 'another': 'Reencoded'})

    rebulk.regex(*build_source_pattern('Ultra-?Blu-?ray', 'Blu-?ray-?Ultra'), value='Ultra HD Blu-ray')

    rebulk.regex(*build_source_pattern('AHDTV'), value='Analog HDTV')
    rebulk.regex(*build_source_pattern('UHD-?TV', suffix=rip_optional_suffix), conflict_solver=demote_other,
                 value={'source': 'Ultra HDTV', 'other': 'Rip'})
    rebulk.regex(*build_source_pattern('UHD', suffix=rip_suffix), conflict_solver=demote_other,
                 value={'source': 'Ultra HDTV', 'other': 'Rip'})

    rebulk.regex(*build_source_pattern('DSR', 'DTH', suffix=rip_optional_suffix),
                 value={'source': 'Satellite', 'other': 'Rip'})
    rebulk.regex(*build_source_pattern('DSR?', 'SAT', suffix=rip_suffix),
                 value={'source': 'Satellite', 'other': 'Rip'})

    rebulk.rules(ValidateSource, UltraHdBlurayRule)

    return rebulk
Example #26
0
def other(config):  # pylint:disable=unused-argument,too-many-statements
    """
    Builder for rebulk object.

    :param config: rule configuration
    :type config: dict
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'other'))
    rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True)
    rebulk.defaults(name="other", validator=seps_surround)

    rebulk.regex('Audio-?Fix', 'Audio-?Fixed', value='Audio Fixed')
    rebulk.regex('Sync-?Fix', 'Sync-?Fixed', value='Sync Fixed')
    rebulk.regex('Dual', 'Dual-?Audio', value='Dual Audio')
    rebulk.regex('ws', 'wide-?screen', value='Widescreen')
    rebulk.regex('Re-?Enc(?:oded)?', value='Reencoded')

    rebulk.string('Repack', 'Rerip', value='Proper',
                  tags=['streaming_service.prefix', 'streaming_service.suffix'])
    rebulk.string('Proper', value='Proper',
                  tags=['has-neighbor', 'streaming_service.prefix', 'streaming_service.suffix'])

    rebulk.regex('Real-Proper', 'Real-Repack', 'Real-Rerip', value='Proper',
                 tags=['streaming_service.prefix', 'streaming_service.suffix', 'real'])
    rebulk.regex('Real', value='Proper',
                 tags=['has-neighbor', 'streaming_service.prefix', 'streaming_service.suffix', 'real'])

    rebulk.string('Fix', 'Fixed', value='Fix', tags=['has-neighbor-before', 'has-neighbor-after',
                                                     'streaming_service.prefix', 'streaming_service.suffix'])
    rebulk.string('Dirfix', 'Nfofix', 'Prooffix', value='Fix',
                  tags=['streaming_service.prefix', 'streaming_service.suffix'])
    rebulk.regex('(?:Proof-?)?Sample-?Fix', value='Fix',
                 tags=['streaming_service.prefix', 'streaming_service.suffix'])

    rebulk.string('Fansub', value='Fan Subtitled', tags='has-neighbor')
    rebulk.string('Fastsub', value='Fast Subtitled', tags='has-neighbor')

    season_words = build_or_pattern(["seasons?", "series?"])
    complete_articles = build_or_pattern(["The"])

    def validate_complete(match):
        """
        Make sure season word is are defined.
        :param match:
        :type match:
        :return:
        :rtype:
        """
        children = match.children
        if not children.named('completeWordsBefore') and not children.named('completeWordsAfter'):
            return False
        return True

    rebulk.regex('(?P<completeArticle>' + complete_articles + '-)?' +
                 '(?P<completeWordsBefore>' + season_words + '-)?' +
                 'Complete' + '(?P<completeWordsAfter>-' + season_words + ')?',
                 private_names=['completeArticle', 'completeWordsBefore', 'completeWordsAfter'],
                 value={'other': 'Complete'},
                 tags=['release-group-prefix'],
                 validator={'__parent__': compose(seps_surround, validate_complete)})
    rebulk.string('R5', value='Region 5')
    rebulk.string('RC', value='Region C')
    rebulk.regex('Pre-?Air', value='Preair')
    rebulk.regex('(?:PS-?)?Vita', value='PS Vita')
    rebulk.regex('(HD)(?P<another>Rip)', value={'other': 'HD', 'another': 'Rip'},
                 private_parent=True, children=True, validator={'__parent__': seps_surround}, validate_all=True)

    for value in ('Screener', 'Remux', 'PAL', 'SECAM', 'NTSC', 'XXX'):
        rebulk.string(value, value=value)
    rebulk.string('3D', value='3D', tags='has-neighbor')

    rebulk.string('HQ', value='High Quality', tags='uhdbluray-neighbor')
    rebulk.string('HR', value='High Resolution')
    rebulk.string('LD', value='Line Dubbed')
    rebulk.string('MD', value='Mic Dubbed')
    rebulk.string('mHD', 'HDLight', value='Micro HD')
    rebulk.string('LDTV', value='Low Definition')
    rebulk.string('HFR', value='High Frame Rate')
    rebulk.string('HD', value='HD', validator=None,
                  tags=['streaming_service.prefix', 'streaming_service.suffix'])
    rebulk.regex('Full-?HD', 'FHD', value='Full HD', validator=None,
                 tags=['streaming_service.prefix', 'streaming_service.suffix'])
    rebulk.regex('Ultra-?(?:HD)?', 'UHD', value='Ultra HD', validator=None,
                 tags=['streaming_service.prefix', 'streaming_service.suffix'])
    rebulk.regex('Upscaled?', value='Upscaled')

    for value in ('Complete', 'Classic', 'Bonus', 'Trailer', 'Retail',
                  'Colorized', 'Internal'):
        rebulk.string(value, value=value, tags=['has-neighbor', 'release-group-prefix'])
    rebulk.regex('LiNE', value='Line Audio', tags=['has-neighbor-before', 'has-neighbor-after', 'release-group-prefix'])
    rebulk.regex('Read-?NFO', value='Read NFO')
    rebulk.string('CONVERT', value='Converted', tags='has-neighbor')
    rebulk.string('DOCU', 'DOKU', value='Documentary', tags='has-neighbor')
    rebulk.string('OM', value='Open Matte', tags='has-neighbor')
    rebulk.string('STV', value='Straight to Video', tags='has-neighbor')
    rebulk.string('OAR', value='Original Aspect Ratio', tags='has-neighbor')
    rebulk.string('Complet', value='Complete', tags=['has-neighbor', 'release-group-prefix'])

    for coast in ('East', 'West'):
        rebulk.regex(r'(?:Live-)?(?:Episode-)?' + coast + '-?(?:Coast-)?Feed', value=coast + ' Coast Feed')

    rebulk.string('VO', 'OV', value='Original Video', tags='has-neighbor')
    rebulk.string('Ova', 'Oav', value='Original Animated Video')

    rebulk.regex('Scr(?:eener)?', value='Screener', validator=None,
                 tags=['other.validate.screener', 'source-prefix', 'source-suffix'])
    rebulk.string('Mux', value='Mux', validator=seps_after,
                  tags=['other.validate.mux', 'video-codec-prefix', 'source-suffix'])
    rebulk.string('HC', 'vost', value='Hardcoded Subtitles')

    rebulk.string('SDR', value='Standard Dynamic Range', tags='uhdbluray-neighbor')
    rebulk.regex('HDR(?:10)?', value='HDR10', tags='uhdbluray-neighbor')
    rebulk.regex('Dolby-?Vision', value='Dolby Vision', tags='uhdbluray-neighbor')
    rebulk.regex('BT-?2020', value='BT.2020', tags='uhdbluray-neighbor')

    rebulk.string('Sample', value='Sample', tags=['at-end', 'not-a-release-group'])
    rebulk.string('Extras', value='Extras', tags='has-neighbor')
    rebulk.regex('Digital-?Extras?', value='Extras')
    rebulk.string('Proof', value='Proof', tags=['at-end', 'not-a-release-group'])
    rebulk.string('Obfuscated', 'Scrambled', value='Obfuscated', tags=['at-end', 'not-a-release-group'])
    rebulk.string('xpost', 'postbot', 'asrequested', value='Repost', tags='not-a-release-group')

    rebulk.rules(RenameAnotherToOther, ValidateHasNeighbor, ValidateHasNeighborAfter, ValidateHasNeighborBefore,
                 ValidateScreenerRule, ValidateMuxRule, ValidateHardcodedSubs, ValidateStreamingServiceNeighbor,
                 ValidateAtEnd, ValidateReal, ProperCountRule)

    return rebulk
Example #27
0
def episodes():
    """
    Builder for rebulk object.
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    # pylint: disable=too-many-branches,too-many-statements,too-many-locals
    rebulk = Rebulk()
    rebulk.regex_defaults(flags=re.IGNORECASE).string_defaults(ignore_case=True)
    rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'])

    def episodes_season_chain_breaker(matches):
        """
        Break chains if there's more than 100 offset between two neighbor values.
        :param matches:
        :type matches:
        :return:
        :rtype:
        """
        eps = matches.named('episode')
        if len(eps) > 1 and abs(eps[-1].value - eps[-2].value) > 100:
            return True

        seasons = matches.named('season')
        if len(seasons) > 1 and abs(seasons[-1].value - seasons[-2].value) > 100:
            return True
        return False

    rebulk.chain_defaults(chain_breaker=episodes_season_chain_breaker)

    def season_episode_conflict_solver(match, other):
        """
        Conflict solver for episode/season patterns

        :param match:
        :param other:
        :return:
        """
        if match.name in ['season', 'episode'] and other.name in ['screen_size', 'video_codec',
                                                                  'audio_codec', 'audio_channels',
                                                                  'container', 'date']:
            return match
        elif match.name in ['season', 'episode'] and other.name in ['season', 'episode'] \
                and match.initiator != other.initiator:
            if 'weak-episode' in match.tags:
                return match
            if 'weak-episode' in other.tags:
                return other
            if 'x' in match.initiator.raw.lower():
                return match
            if 'x' in other.initiator.raw.lower():
                return other
        return '__default__'

    season_episode_seps = []
    season_episode_seps.extend(seps)
    season_episode_seps.extend(['x', 'X', 'e', 'E'])

    season_words = ['season', 'saison', 'serie', 'seasons', 'saisons', 'series']
    episode_words = ['episode', 'episodes', 'eps', 'ep']
    of_words = ['of', 'sur']
    all_words = ['All']
    season_markers = ["S"]
    season_ep_markers = ["x"]
    episode_markers = ["xE", "Ex", "EP", "E", "x"]
    range_separators = ['-', '~', 'to', 'a']
    weak_discrete_separators = list(sep for sep in seps if sep not in range_separators)
    strong_discrete_separators = ['+', '&', 'and', 'et']
    discrete_separators = strong_discrete_separators + weak_discrete_separators

    def ordering_validator(match):
        """
        Validator for season list. They should be in natural order to be validated.

        episode/season separated by a weak discrete separator should be consecutive, unless a strong discrete separator
        or a range separator is present in the chain (1.3&5 is valid, but 1.3-5 is not valid and 1.3.5 is not valid)
        """
        values = match.children.to_dict(implicit=True)
        if 'season' in values and is_iterable(values['season']):
            # Season numbers must be in natural order to be validated.
            if not list(sorted(values['season'])) == values['season']:
                return False
        if 'episode' in values and is_iterable(values['episode']):
            # Season numbers must be in natural order to be validated.
            if not list(sorted(values['episode'])) == values['episode']:
                return False

        def is_consecutive(property_name):
            """
            Check if the property season or episode has valid consecutive values.
            :param property_name:
            :type property_name:
            :return:
            :rtype:
            """
            previous_match = None
            valid = True
            for current_match in match.children.named(property_name):
                if previous_match:
                    match.children.previous(current_match,
                                            lambda m: m.name == property_name + 'Separator')
                    separator = match.children.previous(current_match,
                                                        lambda m: m.name == property_name + 'Separator', 0)
                    if separator.raw not in range_separators and separator.raw in weak_discrete_separators:
                        if not current_match.value - previous_match.value == 1:
                            valid = False
                    if separator.raw in strong_discrete_separators:
                        valid = True
                        break
                previous_match = current_match
            return valid

        return is_consecutive('episode') and is_consecutive('season')

    # S01E02, 01x02, S01S02S03
    rebulk.chain(formatter={'season': int, 'episode': int},
                 tags=['SxxExx'],
                 abbreviations=[alt_dash],
                 children=True,
                 private_parent=True,
                 validate_all=True,
                 validator={'__parent__': ordering_validator},
                 conflict_solver=season_episode_conflict_solver) \
        .regex(build_or_pattern(season_markers) + r'(?P<season>\d+)@?' +
               build_or_pattern(episode_markers) + r'@?(?P<episode>\d+)',
               validate_all=True,
               validator={'__parent__': seps_before}).repeater('+') \
        .regex(build_or_pattern(episode_markers + discrete_separators + range_separators,
                                name='episodeSeparator',
                                escape=True) +
               r'(?P<episode>\d+)').repeater('*') \
        .chain() \
        .regex(r'(?P<season>\d+)@?' +
               build_or_pattern(season_ep_markers) +
               r'@?(?P<episode>\d+)',
               validate_all=True,
               validator={'__parent__': seps_before}) \
        .chain() \
        .regex(r'(?P<season>\d+)@?' +
               build_or_pattern(season_ep_markers) +
               r'@?(?P<episode>\d+)',
               validate_all=True,
               validator={'__parent__': seps_before}) \
        .regex(build_or_pattern(season_ep_markers + discrete_separators + range_separators,
                                name='episodeSeparator',
                                escape=True) +
               r'(?P<episode>\d+)').repeater('*') \
        .chain() \
        .regex(build_or_pattern(season_markers) + r'(?P<season>\d+)',
               validate_all=True,
               validator={'__parent__': seps_before}) \
        .regex(build_or_pattern(season_markers + discrete_separators + range_separators,
                                name='seasonSeparator',
                                escape=True) +
               r'(?P<season>\d+)').repeater('*')

    # episode_details property
    for episode_detail in ('Special', 'Bonus', 'Omake', 'Ova', 'Oav', 'Pilot', 'Unaired'):
        rebulk.string(episode_detail, value=episode_detail, name='episode_details')
    rebulk.regex(r'Extras?', name='episode_details', value='Extras')

    rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'],
                    validate_all=True, validator={'__parent__': seps_surround}, children=True, private_parent=True)

    def validate_roman(match):
        """
        Validate a roman match if surrounded by separators
        :param match:
        :type match:
        :return:
        :rtype:
        """
        if int_coercable(match.raw):
            return True
        return seps_surround(match)

    rebulk.chain(abbreviations=[alt_dash],
                 formatter={'season': parse_numeral, 'count': parse_numeral},
                 validator={'__parent__': compose(seps_surround, ordering_validator),
                            'season': validate_roman,
                            'count': validate_roman}) \
        .defaults(validator=None) \
        .regex(build_or_pattern(season_words) + '@?(?P<season>' + numeral + ')') \
        .regex(r'' + build_or_pattern(of_words) + '@?(?P<count>' + numeral + ')').repeater('?') \
        .regex(r'@?(?P<seasonSeparator>' +
               build_or_pattern(range_separators + discrete_separators + ['@'], escape=True) +
               r')@?(?P<season>\d+)').repeater('*')

    rebulk.regex(build_or_pattern(episode_words) + r'-?(?P<episode>\d+)' +
                 r'(?:v(?P<version>\d+))?' +
                 r'(?:-?' + build_or_pattern(of_words) + r'-?(?P<count>\d+))?',  # Episode 4
                 abbreviations=[dash], formatter=int,
                 disabled=lambda context: context.get('type') == 'episode')

    rebulk.regex(build_or_pattern(episode_words) + r'-?(?P<episode>' + numeral + ')' +
                 r'(?:v(?P<version>\d+))?' +
                 r'(?:-?' + build_or_pattern(of_words) + r'-?(?P<count>\d+))?',  # Episode 4
                 abbreviations=[dash],
                 validator={'episode': validate_roman},
                 formatter={'episode': parse_numeral, 'version': int, 'count': int},
                 disabled=lambda context: context.get('type') != 'episode')

    rebulk.regex(r'S?(?P<season>\d+)-?(?:xE|Ex|E|x)-?(?P<other>' + build_or_pattern(all_words) + ')',
                 tags=['SxxExx'],
                 abbreviations=[dash],
                 validator=None,
                 formatter={'season': int, 'other': lambda match: 'Complete'})

    rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'], validate_all=True,
                    validator={'__parent__': seps_surround}, children=True, private_parent=True)

    # 12, 13
    rebulk.chain(tags=['bonus-conflict', 'weak-movie', 'weak-episode'], formatter={'episode': int, 'version': int}) \
        .defaults(validator=None) \
        .regex(r'(?P<episode>\d{2})') \
        .regex(r'v(?P<version>\d+)').repeater('?') \
        .regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{2})').repeater('*')

    # 012, 013
    rebulk.chain(tags=['bonus-conflict', 'weak-movie', 'weak-episode'], formatter={'episode': int, 'version': int}) \
        .defaults(validator=None) \
        .regex(r'0(?P<episode>\d{1,2})') \
        .regex(r'v(?P<version>\d+)').repeater('?') \
        .regex(r'(?P<episodeSeparator>[x-])0(?P<episode>\d{1,2})').repeater('*')

    # 112, 113
    rebulk.chain(tags=['bonus-conflict', 'weak-movie', 'weak-episode'], formatter={'episode': int, 'version': int},
                 disabled=lambda context: not context.get('episode_prefer_number', False)) \
        .defaults(validator=None) \
        .regex(r'(?P<episode>\d{3,4})') \
        .regex(r'v(?P<version>\d+)').repeater('?') \
        .regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{3,4})').repeater('*')

    # 1, 2, 3
    rebulk.chain(tags=['bonus-conflict', 'weak-movie', 'weak-episode'], formatter={'episode': int, 'version': int},
                 disabled=lambda context: context.get('type') != 'episode') \
        .defaults(validator=None) \
        .regex(r'(?P<episode>\d)') \
        .regex(r'v(?P<version>\d+)').repeater('?') \
        .regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{1,2})').repeater('*')

    # e112, e113
    # TODO: Enhance rebulk for validator to be used globally (season_episode_validator)
    rebulk.chain(formatter={'episode': int, 'version': int}) \
        .defaults(validator=None) \
        .regex(r'e(?P<episode>\d{1,4})') \
        .regex(r'v(?P<version>\d+)').repeater('?') \
        .regex(r'(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4})').repeater('*')

    # ep 112, ep113, ep112, ep113
    rebulk.chain(abbreviations=[dash], formatter={'episode': int, 'version': int}) \
        .defaults(validator=None) \
        .regex(r'ep-?(?P<episode>\d{1,4})') \
        .regex(r'v(?P<version>\d+)').repeater('?') \
        .regex(r'(?P<episodeSeparator>ep|e|x|-)(?P<episode>\d{1,4})').repeater('*')

    # 102, 0102
    rebulk.chain(tags=['bonus-conflict', 'weak-movie', 'weak-episode', 'weak-duplicate'],
                 formatter={'season': int, 'episode': int, 'version': int},
                 conflict_solver=lambda match, other: match if other.name == 'year' else '__default__',
                 disabled=lambda context: context.get('episode_prefer_number', False)) \
        .defaults(validator=None) \
        .regex(r'(?P<season>\d{1,2})(?P<episode>\d{2})') \
        .regex(r'v(?P<version>\d+)').repeater('?') \
        .regex(r'(?P<episodeSeparator>x|-)(?P<episode>\d{2})').repeater('*')

    rebulk.regex(r'v(?P<version>\d+)', children=True, private_parent=True, formatter=int)

    rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'])

    # TODO: List of words
    # detached of X count (season/episode)
    rebulk.regex(r'(?P<episode>\d+)?-?' + build_or_pattern(of_words) +
                 r'-?(?P<count>\d+)-?' + build_or_pattern(episode_words) + '?',
                 abbreviations=[dash], children=True, private_parent=True, formatter=int)

    rebulk.regex(r'Minisodes?', name='episode_format', value="Minisode")

    # Harcoded movie to disable weak season/eps
    rebulk.regex('OSS-?117',
                 abbreviations=[dash], name="hardcoded-movies", marker=True,
                 conflict_solver=lambda match, other: None)

    rebulk.rules(EpisodeNumberSeparatorRange(range_separators),
                 SeasonSeparatorRange(range_separators), RemoveWeakIfMovie, RemoveWeakIfSxxExx,
                 RemoveWeakDuplicate, EpisodeDetailValidator, RemoveDetachedEpisodeNumber, VersionValidator,
                 CountValidator, EpisodeSingleDigitValidator)

    return rebulk
Example #28
0
def video_codec(config):  # pylint:disable=unused-argument
    """
    Builder for rebulk object.

    :param config: rule configuration
    :type config: dict
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk()
    rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True)
    rebulk.defaults(name="video_codec",
                    tags=['source-suffix', 'streaming_service.suffix'],
                    disabled=lambda context: is_disabled(context, 'video_codec'))

    rebulk.regex(r'Rv\d{2}', value='RealVideo')
    rebulk.regex('Mpe?g-?2', '[hx]-?262', value='MPEG-2')
    rebulk.string("DVDivX", "DivX", value="DivX")
    rebulk.string('XviD', value='Xvid')
    rebulk.regex('VC-?1', value='VC-1')
    rebulk.string('VP7', value='VP7')
    rebulk.string('VP8', 'VP80', value='VP8')
    rebulk.string('VP9', value='VP9')
    rebulk.regex('[hx]-?263', value='H.263')
    rebulk.regex('[hx]-?264', '(MPEG-?4)?AVC(?:HD)?', value='H.264')
    rebulk.regex('[hx]-?265', 'HEVC', value='H.265')
    rebulk.regex('(?P<video_codec>hevc)(?P<color_depth>10)', value={'video_codec': 'H.265', 'color_depth': '10-bit'},
                 tags=['video-codec-suffix'], children=True)

    # http://blog.mediacoderhq.com/h264-profiles-and-levels/
    # https://en.wikipedia.org/wiki/H.264/MPEG-4_AVC
    rebulk.defaults(name="video_profile",
                    validator=seps_surround,
                    disabled=lambda context: is_disabled(context, 'video_profile'))

    rebulk.string('BP', value='Baseline', tags='video_profile.rule')
    rebulk.string('XP', 'EP', value='Extended', tags='video_profile.rule')
    rebulk.string('MP', value='Main', tags='video_profile.rule')
    rebulk.string('HP', 'HiP', value='High', tags='video_profile.rule')

    # https://en.wikipedia.org/wiki/Scalable_Video_Coding
    rebulk.string('SC', 'SVC', value='Scalable Video Coding', tags='video_profile.rule')
    # https://en.wikipedia.org/wiki/AVCHD
    rebulk.regex('AVC(?:HD)?', value='Advanced Video Codec High Definition', tags='video_profile.rule')
    # https://en.wikipedia.org/wiki/H.265/HEVC
    rebulk.string('HEVC', value='High Efficiency Video Coding', tags='video_profile.rule')

    rebulk.regex('Hi422P', value='High 4:2:2')
    rebulk.regex('Hi444PP', value='High 4:4:4 Predictive')
    rebulk.regex('Hi10P?', value='High 10')  # no profile validation is required

    rebulk.string('DXVA', value='DXVA', name='video_api',
                  disabled=lambda context: is_disabled(context, 'video_api'))

    rebulk.defaults(name='color_depth',
                    validator=seps_surround,
                    disabled=lambda context: is_disabled(context, 'color_depth'))
    rebulk.regex('12.?bits?', value='12-bit')
    rebulk.regex('10.?bits?', 'YUV420P10', 'Hi10P?', value='10-bit')
    rebulk.regex('8.?bits?', value='8-bit')

    rebulk.rules(ValidateVideoCodec, VideoProfileRule)

    return rebulk
Example #29
0
def other(config):  # pylint:disable=unused-argument,too-many-statements
    """
    Builder for rebulk object.

    :param config: rule configuration
    :type config: dict
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'other'))
    rebulk = rebulk.regex_defaults(
        flags=re.IGNORECASE,
        abbreviations=[dash]).string_defaults(ignore_case=True)
    rebulk.defaults(name="other", validator=seps_surround)

    rebulk.regex('Audio-?Fix', 'Audio-?Fixed', value='Audio Fixed')
    rebulk.regex('Sync-?Fix', 'Sync-?Fixed', value='Sync Fixed')
    rebulk.regex('Dual', 'Dual-?Audio', value='Dual Audio')
    rebulk.regex('ws', 'wide-?screen', value='Widescreen')
    rebulk.regex('Re-?Enc(?:oded)?', value='Reencoded')

    rebulk.string(
        'Proper',
        'Repack',
        'Rerip',
        value='Proper',
        tags=['streaming_service.prefix', 'streaming_service.suffix'])

    rebulk.regex(
        'Real-Proper',
        'Real-Repack',
        'Real-Rerip',
        value='Proper',
        tags=['streaming_service.prefix', 'streaming_service.suffix', 'real'])
    rebulk.string('Fix',
                  'Fixed',
                  value='Fix',
                  tags=[
                      'has-neighbor-before', 'has-neighbor-after',
                      'streaming_service.prefix', 'streaming_service.suffix'
                  ])
    rebulk.string(
        'Dirfix',
        'Nfofix',
        'Prooffix',
        value='Fix',
        tags=['streaming_service.prefix', 'streaming_service.suffix'])
    rebulk.regex('(?:Proof-?)?Sample-?Fix',
                 value='Fix',
                 tags=['streaming_service.prefix', 'streaming_service.suffix'])

    rebulk.string('Fansub', value='Fan Subtitled', tags='has-neighbor')
    rebulk.string('Fastsub', value='Fast Subtitled', tags='has-neighbor')

    season_words = build_or_pattern(["seasons?", "series?"])
    complete_articles = build_or_pattern(["The"])

    def validate_complete(match):
        """
        Make sure season word is are defined.
        :param match:
        :type match:
        :return:
        :rtype:
        """
        children = match.children
        if not children.named('completeWordsBefore') and not children.named(
                'completeWordsAfter'):
            return False
        return True

    rebulk.regex(
        '(?P<completeArticle>' + complete_articles + '-)?' +
        '(?P<completeWordsBefore>' + season_words + '-)?' + 'Complete' +
        '(?P<completeWordsAfter>-' + season_words + ')?',
        private_names=[
            'completeArticle', 'completeWordsBefore', 'completeWordsAfter'
        ],
        value={'other': 'Complete'},
        tags=['release-group-prefix'],
        validator={'__parent__': compose(seps_surround, validate_complete)})
    rebulk.string('R5', value='Region 5')
    rebulk.string('RC', value='Region C')
    rebulk.regex('Pre-?Air', value='Preair')
    rebulk.regex('(?:PS-?)?Vita', value='PS Vita')
    rebulk.regex('(HD)(?P<another>Rip)',
                 value={
                     'other': 'HD',
                     'another': 'Rip'
                 },
                 private_parent=True,
                 children=True,
                 validator={'__parent__': seps_surround},
                 validate_all=True)

    for value in ('Screener', 'Remux', '3D', 'PAL', 'SECAM', 'NTSC', 'XXX'):
        rebulk.string(value, value=value)

    rebulk.string('HQ', value='High Quality', tags='uhdbluray-neighbor')
    rebulk.string('HR', value='High Resolution')
    rebulk.string('LD', value='Line Dubbed')
    rebulk.string('MD', value='Mic Dubbed')
    rebulk.string('mHD', 'HDLight', value='Micro HD')
    rebulk.string('LDTV', value='Low Definition')
    rebulk.string('HFR', value='High Frame Rate')
    rebulk.string(
        'HD',
        value='HD',
        validator=None,
        tags=['streaming_service.prefix', 'streaming_service.suffix'])
    rebulk.regex('Full-?HD',
                 'FHD',
                 value='Full HD',
                 validator=None,
                 tags=['streaming_service.prefix', 'streaming_service.suffix'])
    rebulk.regex('Ultra-?(?:HD)?',
                 'UHD',
                 value='Ultra HD',
                 validator=None,
                 tags=['streaming_service.prefix', 'streaming_service.suffix'])
    rebulk.regex('Upscaled?', value='Upscaled')

    for value in ('Complete', 'Classic', 'Bonus', 'Trailer', 'Retail',
                  'Colorized', 'Internal'):
        rebulk.string(value,
                      value=value,
                      tags=['has-neighbor', 'release-group-prefix'])
    rebulk.regex('LiNE',
                 value='Line Audio',
                 tags=[
                     'has-neighbor-before', 'has-neighbor-after',
                     'release-group-prefix'
                 ])
    rebulk.regex('Read-?NFO', value='Read NFO')
    rebulk.string('CONVERT', value='Converted', tags='has-neighbor')
    rebulk.string('DOCU', 'DOKU', value='Documentary', tags='has-neighbor')
    rebulk.string('OM', value='Open Matte', tags='has-neighbor')
    rebulk.string('STV', value='Straight to Video', tags='has-neighbor')
    rebulk.string('OAR', value='Original Aspect Ratio', tags='has-neighbor')
    rebulk.string('Complet',
                  value='Complete',
                  tags=['has-neighbor', 'release-group-prefix'])

    for coast in ('East', 'West'):
        rebulk.regex(r'(?:Live-)?(?:Episode-)?' + coast + '-?(?:Coast-)?Feed',
                     value=coast + ' Coast Feed')

    rebulk.string('VO', 'OV', value='Original Video', tags='has-neighbor')
    rebulk.string('Ova', 'Oav', value='Original Animated Video')

    rebulk.regex(
        'Scr(?:eener)?',
        value='Screener',
        validator=None,
        tags=['other.validate.screener', 'source-prefix', 'source-suffix'])
    rebulk.string(
        'Mux',
        value='Mux',
        validator=seps_after,
        tags=['other.validate.mux', 'video-codec-prefix', 'source-suffix'])
    rebulk.string('HC', 'vost', value='Hardcoded Subtitles')

    rebulk.string('SDR',
                  value='Standard Dynamic Range',
                  tags='uhdbluray-neighbor')
    rebulk.regex('HDR(?:10)?', value='HDR10', tags='uhdbluray-neighbor')
    rebulk.regex('Dolby-?Vision',
                 value='Dolby Vision',
                 tags='uhdbluray-neighbor')
    rebulk.regex('BT-?2020', value='BT.2020', tags='uhdbluray-neighbor')

    rebulk.string('Sample',
                  value='Sample',
                  tags=['at-end', 'not-a-release-group'])
    rebulk.string('Proof',
                  value='Proof',
                  tags=['at-end', 'not-a-release-group'])
    rebulk.string('Obfuscated',
                  'Scrambled',
                  value='Obfuscated',
                  tags=['at-end', 'not-a-release-group'])
    rebulk.string('xpost',
                  'postbot',
                  'asrequested',
                  value='Repost',
                  tags='not-a-release-group')

    rebulk.rules(RenameAnotherToOther, ValidateHasNeighbor,
                 ValidateHasNeighborAfter, ValidateHasNeighborBefore,
                 ValidateScreenerRule, ValidateMuxRule, ValidateHardcodedSubs,
                 ValidateStreamingServiceNeighbor, ValidateAtEnd,
                 ProperCountRule)

    return rebulk
Example #30
0
def website(config):
    """
    Builder for rebulk object.

    :param config: rule configuration
    :type config: dict
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'website'))
    rebulk = rebulk.regex_defaults(flags=re.IGNORECASE).string_defaults(ignore_case=True)
    rebulk.defaults(name="website")

    with resource_stream('guessit', 'tlds-alpha-by-domain.txt') as tld_file:
        tlds = [
            tld.strip().decode('utf-8')
            for tld in tld_file.readlines()
            if b'--' not in tld
        ][1:]  # All registered domain extension

    safe_tlds = config['safe_tlds']  # For sure a website extension
    safe_subdomains = config['safe_subdomains']  # For sure a website subdomain
    safe_prefix = config['safe_prefixes']  # Those words before a tlds are sure
    website_prefixes = config['prefixes']

    rebulk.regex(r'(?:[^a-z0-9]|^)((?:'+build_or_pattern(safe_subdomains) +
                 r'\.)+(?:[a-z-]+\.)+(?:'+build_or_pattern(tlds) +
                 r'))(?:[^a-z0-9]|$)',
                 children=True)
    rebulk.regex(r'(?:[^a-z0-9]|^)((?:'+build_or_pattern(safe_subdomains) +
                 r'\.)*[a-z-]+\.(?:'+build_or_pattern(safe_tlds) +
                 r'))(?:[^a-z0-9]|$)',
                 safe_subdomains=safe_subdomains, safe_tlds=safe_tlds, children=True)
    rebulk.regex(r'(?:[^a-z0-9]|^)((?:'+build_or_pattern(safe_subdomains) +
                 r'\.)*[a-z-]+\.(?:'+build_or_pattern(safe_prefix) +
                 r'\.)+(?:'+build_or_pattern(tlds) +
                 r'))(?:[^a-z0-9]|$)',
                 safe_subdomains=safe_subdomains, safe_prefix=safe_prefix, tlds=tlds, children=True)

    rebulk.string(*website_prefixes,
                  validator=seps_surround, private=True, tags=['website.prefix'])

    class PreferTitleOverWebsite(Rule):
        """
        If found match is more likely a title, remove website.
        """
        consequence = RemoveMatch

        @staticmethod
        def valid_followers(match):
            """
            Validator for next website matches
            """
            return any(name in ['season', 'episode', 'year'] for name in match.names)

        def when(self, matches, context):
            to_remove = []
            for website_match in matches.named('website'):
                safe = False
                for safe_start in safe_subdomains + safe_prefix:
                    if website_match.value.lower().startswith(safe_start):
                        safe = True
                        break
                if not safe:
                    suffix = matches.next(website_match, PreferTitleOverWebsite.valid_followers, 0)
                    if suffix:
                        to_remove.append(website_match)
            return to_remove

    rebulk.rules(PreferTitleOverWebsite, ValidateWebsitePrefix)

    return rebulk