def part(config): # pylint:disable=unused-argument """ Builder for rebulk object. :param config: rule configuration :type config: dict :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'part')) rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash], validator={'__parent__': seps_surround}) prefixes = config['prefixes'] def validate_roman(match): """ Validate a roman match if surrounded by separators :param match: :type match: :return: :rtype: """ if int_coercable(match.raw): return True return seps_surround(match) rebulk.regex(build_or_pattern(prefixes) + r'-?(?P<part>' + numeral + r')', prefixes=prefixes, validate_all=True, private_parent=True, children=True, formatter=parse_numeral, validator={'part': compose(validate_roman, lambda m: 0 < m.value < 100)}) return rebulk
def size(config): # pylint:disable=unused-argument """ Builder for rebulk object. :param config: rule configuration :type config: dict :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'size')) rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]) rebulk.defaults(name='size', validator=seps_surround) rebulk.regex(r'\d+-?[mgt]b', r'\d+\.\d+-?[mgt]b', formatter=Size.fromstring, tags=['release-group-prefix']) return rebulk
def edition(config): # pylint:disable=unused-argument """ Builder for rebulk object. :param config: rule configuration :type config: dict :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'edition')) rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True) rebulk.defaults(name='edition', validator=seps_surround) load_config_patterns(rebulk, config.get('edition')) return rebulk
def bit_rate(config): # pylint:disable=unused-argument """ Builder for rebulk object. :param config: rule configuration :type config: dict :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk(disabled=lambda context: (is_disabled( context, 'audio_bit_rate') and is_disabled(context, 'video_bit_rate'))) rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]) rebulk.defaults(name='audio_bit_rate', validator=seps_surround) rebulk.regex( r'\d+-?[kmg]b(ps|its?)', r'\d+\.\d+-?[kmg]b(ps|its?)', conflict_solver=(lambda match, other: match if other.name == 'audio_channels' and 'weak-audio_channels' not in other.tags else other), formatter=BitRate.fromstring, tags=['release-group-prefix']) rebulk.rules(BitRateTypeRule) return rebulk
def cds(config): # pylint:disable=unused-argument """ Builder for rebulk object. :param config: rule configuration :type config: dict :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'cd')) rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]) rebulk.regex(r'cd-?(?P<cd>\d+)(?:-?of-?(?P<cd_count>\d+))?', validator={'cd': lambda match: 0 < match.value < 100, 'cd_count': lambda match: 0 < match.value < 100}, formatter={'cd': int, 'cd_count': int}, children=True, private_parent=True, properties={'cd': [None], 'cd_count': [None]}) rebulk.regex(r'(?P<cd_count>\d+)-?cds?', validator={'cd': lambda match: 0 < match.value < 100, 'cd_count': lambda match: 0 < match.value < 100}, formatter={'cd_count': int}, children=True, private_parent=True, properties={'cd': [None], 'cd_count': [None]}) return rebulk
def bonus(config): # pylint:disable=unused-argument """ Builder for rebulk object. :param config: rule configuration :type config: dict :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'bonus')) rebulk = rebulk.regex_defaults(flags=re.IGNORECASE) rebulk.regex(r'x(\d+)', name='bonus', private_parent=True, children=True, formatter=int, validator={'__parent__': seps_surround}, validate_all=True, conflict_solver=lambda match, conflicting: match if conflicting.name in ('video_codec', 'episode') and 'weak-episode' not in conflicting.tags else '__default__') rebulk.rules(BonusTitleRule) return rebulk
def container(config): """ Builder for rebulk object. :param config: rule configuration :type config: dict :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'container')) rebulk = rebulk.regex_defaults(flags=re.IGNORECASE).string_defaults( ignore_case=True) rebulk.defaults(name='container', formatter=lambda value: value.strip(seps), tags=['extension'], conflict_solver=lambda match, other: other if other.name in ('source', 'video_codec') or other.name == 'container' and 'extension' not in other.tags else '__default__') subtitles = config['subtitles'] info = config['info'] videos = config['videos'] torrent = config['torrent'] nzb = config['nzb'] rebulk.regex(r'\.' + build_or_pattern(subtitles) + '$', exts=subtitles, tags=['extension', 'subtitle']) rebulk.regex(r'\.' + build_or_pattern(info) + '$', exts=info, tags=['extension', 'info']) rebulk.regex(r'\.' + build_or_pattern(videos) + '$', exts=videos, tags=['extension', 'video']) rebulk.regex(r'\.' + build_or_pattern(torrent) + '$', exts=torrent, tags=['extension', 'torrent']) rebulk.regex(r'\.' + build_or_pattern(nzb) + '$', exts=nzb, tags=['extension', 'nzb']) rebulk.defaults(clear=True, name='container', validator=seps_surround, formatter=lambda s: s.lower(), conflict_solver=lambda match, other: match if other.name in ('source', 'video_codec') or other.name == 'container' and 'extension' in other.tags else '__default__') rebulk.string(*[sub for sub in subtitles if sub not in ('sub', 'ass')], tags=['subtitle']) rebulk.string(*videos, tags=['video']) rebulk.string(*torrent, tags=['torrent']) rebulk.string(*nzb, tags=['nzb']) return rebulk
def cd(config): # pylint:disable=unused-argument """ Builder for rebulk object. :param config: rule configuration :type config: dict :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'cd')) rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]) load_config_patterns(rebulk, config) return rebulk
def bonus(config): # pylint:disable=unused-argument """ Builder for rebulk object. :param config: rule configuration :type config: dict :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'bonus')) rebulk = rebulk.regex_defaults(name='bonus', flags=re.IGNORECASE) load_config_patterns(rebulk, config.get('bonus')) rebulk.rules(BonusTitleRule) return rebulk
def container(config): """ Builder for rebulk object. :param config: rule configuration :type config: dict :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'container')) rebulk = rebulk.regex_defaults(flags=re.IGNORECASE).string_defaults(ignore_case=True) rebulk.defaults(name='container', formatter=lambda value: value.strip(seps), tags=['extension'], conflict_solver=lambda match, other: other if other.name in ('source', 'video_codec') or other.name == 'container' and 'extension' not in other.tags else '__default__') subtitles = config['subtitles'] info = config['info'] videos = config['videos'] torrent = config['torrent'] nzb = config['nzb'] rebulk.regex(r'\.'+build_or_pattern(subtitles)+'$', exts=subtitles, tags=['extension', 'subtitle']) rebulk.regex(r'\.'+build_or_pattern(info)+'$', exts=info, tags=['extension', 'info']) rebulk.regex(r'\.'+build_or_pattern(videos)+'$', exts=videos, tags=['extension', 'video']) rebulk.regex(r'\.'+build_or_pattern(torrent)+'$', exts=torrent, tags=['extension', 'torrent']) rebulk.regex(r'\.'+build_or_pattern(nzb)+'$', exts=nzb, tags=['extension', 'nzb']) rebulk.defaults(name='container', validator=seps_surround, formatter=lambda s: s.lower(), conflict_solver=lambda match, other: match if other.name in ('source', 'video_codec') or other.name == 'container' and 'extension' in other.tags else '__default__') rebulk.string(*[sub for sub in subtitles if sub not in ('sub', 'ass')], tags=['subtitle']) rebulk.string(*videos, tags=['video']) rebulk.string(*torrent, tags=['torrent']) rebulk.string(*nzb, tags=['nzb']) return rebulk
def bit_rate(config): # pylint:disable=unused-argument """ Builder for rebulk object. :param config: rule configuration :type config: dict :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk(disabled=lambda context: (is_disabled(context, 'audio_bit_rate') and is_disabled(context, 'video_bit_rate'))) rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]) rebulk.defaults(name='audio_bit_rate', validator=seps_surround) load_config_patterns(rebulk, config.get('bit_rate')) rebulk.rules(BitRateTypeRule) return rebulk
def cds(config): # pylint:disable=unused-argument """ Builder for rebulk object. :param config: rule configuration :type config: dict :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'cd')) rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]) rebulk.regex(r'cd-?(?P<cd>\d+)(?:-?of-?(?P<cd_count>\d+))?', validator={ 'cd': lambda match: 0 < match.value < 100, 'cd_count': lambda match: 0 < match.value < 100 }, formatter={ 'cd': int, 'cd_count': int }, children=True, private_parent=True, properties={ 'cd': [None], 'cd_count': [None] }) rebulk.regex(r'(?P<cd_count>\d+)-?cds?', validator={ 'cd': lambda match: 0 < match.value < 100, 'cd_count': lambda match: 0 < match.value < 100 }, formatter={'cd_count': int}, children=True, private_parent=True, properties={ 'cd': [None], 'cd_count': [None] }) return rebulk
def bonus(config): # pylint:disable=unused-argument """ Builder for rebulk object. :param config: rule configuration :type config: dict :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'bonus')) rebulk = rebulk.regex_defaults(flags=re.IGNORECASE) rebulk.regex(r'x(\d+)', name='bonus', private_parent=True, children=True, formatter=int, validator={'__parent__': lambda match: seps_surround}, conflict_solver=lambda match, conflicting: match if conflicting.name in ('video_codec', 'episode') and 'weak-episode' not in conflicting.tags else '__default__') rebulk.rules(BonusTitleRule) return rebulk
def edition(config): # pylint:disable=unused-argument """ Builder for rebulk object. :param config: rule configuration :type config: dict :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'edition')) rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True) rebulk.defaults(name='edition', validator=seps_surround) rebulk.regex('collector', "collector'?s?-edition", 'edition-collector', value='Collector') rebulk.regex('special-edition', 'edition-special', value='Special', conflict_solver=lambda match, other: other if other.name == 'episode_details' and other.value == 'Special' else '__default__') rebulk.string('se', value='Special', tags='has-neighbor') rebulk.string('ddc', value="Director's Definitive Cut") rebulk.regex('criterion-edition', 'edition-criterion', 'CC', value='Criterion') rebulk.regex('deluxe', 'deluxe-edition', 'edition-deluxe', value='Deluxe') rebulk.regex('limited', 'limited-edition', value='Limited', tags=['has-neighbor', 'release-group-prefix']) rebulk.regex(r'theatrical-cut', r'theatrical-edition', r'theatrical', value='Theatrical') rebulk.regex(r"director'?s?-cut", r"director'?s?-cut-edition", r"edition-director'?s?-cut", 'DC', value="Director's Cut") rebulk.regex('extended', 'extended-?cut', 'extended-?version', value='Extended', tags=['has-neighbor', 'release-group-prefix']) rebulk.regex(r'super-duper-cut', value='Super Duper Cut') rebulk.regex('alternat(e|ive)(?:-?Cut)?', value='Alternative Cut', tags=['has-neighbor', 'release-group-prefix']) for value in ('Remastered', 'Uncensored', 'Uncut', 'Unrated'): rebulk.string(value, value=value, tags=['has-neighbor', 'release-group-prefix']) rebulk.string('Festival', value='Festival', tags=['has-neighbor-before', 'has-neighbor-after']) rebulk.regex('imax', 'imax-edition', value='IMAX') rebulk.regex('fan-edit(?:ion)?', 'fan-collection', value='Fan') rebulk.regex('ultimate-edition', value='Ultimate') rebulk.regex("ultimate-collector'?s?-edition", value=['Ultimate', 'Collector']) rebulk.regex('ultimate-fan-edit(?:ion)?', 'ultimate-fan-collection', value=['Ultimate', 'Fan']) return rebulk
def edition(config): # pylint:disable=unused-argument """ Builder for rebulk object. :param config: rule configuration :type config: dict :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'edition')) rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True) rebulk.defaults(name='edition', validator=seps_surround) rebulk.regex('collector', "collector'?s?-edition", 'edition-collector', value='Collector') rebulk.regex('special-edition', 'edition-special', value='Special', conflict_solver=lambda match, other: other if other.name == 'episode_details' and other.value == 'Special' else '__default__') rebulk.string('se', value='Special', tags='has-neighbor') rebulk.string('ddc', value="Director's Definitive Cut") rebulk.regex('criterion-edition', 'edition-criterion', 'CC', value='Criterion') rebulk.regex('deluxe', 'deluxe-edition', 'edition-deluxe', value='Deluxe') rebulk.regex('limited', 'limited-edition', value='Limited', tags=['has-neighbor', 'release-group-prefix']) rebulk.regex(r'theatrical-cut', r'theatrical-edition', r'theatrical', value='Theatrical') rebulk.regex(r"director'?s?-cut", r"director'?s?-cut-edition", r"edition-director'?s?-cut", 'DC', value="Director's Cut") rebulk.regex('extended', 'extended-?cut', 'extended-?version', value='Extended', tags=['has-neighbor', 'release-group-prefix']) rebulk.regex('alternat(e|ive)(?:-?Cut)?', value='Alternative Cut', tags=['has-neighbor', 'release-group-prefix']) for value in ('Remastered', 'Uncensored', 'Uncut', 'Unrated'): rebulk.string(value, value=value, tags=['has-neighbor', 'release-group-prefix']) rebulk.string('Festival', value='Festival', tags=['has-neighbor-before', 'has-neighbor-after']) rebulk.regex('imax', 'imax-edition', value='IMAX') rebulk.regex('fan-edit(?:ion)?', 'fan-collection', value='Fan') rebulk.regex('ultimate-edition', value='Ultimate') rebulk.regex("ultimate-collector'?s?-edition", value=['Ultimate', 'Collector']) rebulk.regex('ultimate-fan-edit(?:ion)?', 'ultimate-fan-collection', value=['Ultimate', 'Fan']) return rebulk
def crc(config): # pylint:disable=unused-argument """ Builder for rebulk object. :param config: rule configuration :type config: dict :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'crc32')) rebulk = rebulk.regex_defaults(flags=re.IGNORECASE) rebulk.defaults(validator=seps_surround) rebulk.regex('(?:[a-fA-F]|[0-9]){8}', name='crc32', conflict_solver=lambda match, other: match if other.name in ['episode', 'season'] else '__default__') rebulk.functional(guess_idnumber, name='uuid', conflict_solver=lambda match, other: match if other.name in ['episode', 'season'] else '__default__') return rebulk
def crc(config): # pylint:disable=unused-argument """ Builder for rebulk object. :param config: rule configuration :type config: dict :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'crc32')) rebulk = rebulk.regex_defaults(flags=re.IGNORECASE) rebulk.defaults(validator=seps_surround) rebulk.regex('(?:[a-fA-F]|[0-9]){8}', name='crc32', conflict_solver=lambda match, other: other if other.name in ['episode', 'season'] else '__default__') rebulk.functional(guess_idnumber, name='uuid', conflict_solver=lambda match, other: match if other.name in ['episode', 'season'] else '__default__') return rebulk
def bit_rate(config): # pylint:disable=unused-argument """ Builder for rebulk object. :param config: rule configuration :type config: dict :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk(disabled=lambda context: (is_disabled(context, 'audio_bit_rate') and is_disabled(context, 'video_bit_rate'))) rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]) rebulk.defaults(name='audio_bit_rate', validator=seps_surround) rebulk.regex(r'\d+-?[kmg]b(ps|its?)', r'\d+\.\d+-?[kmg]b(ps|its?)', conflict_solver=( lambda match, other: match if other.name == 'audio_channels' and 'weak-audio_channels' not in other.tags else other ), formatter=BitRate.fromstring, tags=['release-group-prefix']) rebulk.rules(BitRateTypeRule) return rebulk
def other(config): # pylint:disable=unused-argument,too-many-statements """ Builder for rebulk object. :param config: rule configuration :type config: dict :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'other')) rebulk = rebulk.regex_defaults( flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True) rebulk.defaults(name="other", validator=seps_surround) load_config_patterns(rebulk, config.get('other')) rebulk.rules(RenameAnotherToOther, ValidateHasNeighbor, ValidateHasNeighborAfter, ValidateHasNeighborBefore, ValidateScreenerRule, ValidateMuxRule, ValidateHardcodedSubs, ValidateStreamingServiceNeighbor, ValidateAtEnd, ValidateReal, ProperCountRule) return rebulk
def source(config): # pylint:disable=unused-argument """ Builder for rebulk object. :param config: rule configuration :type config: dict :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'source')) rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash], private_parent=True, children=True) rebulk.defaults(name='source', tags=['video-codec-prefix', 'streaming_service.suffix']) rip_prefix = '(?P<other>Rip)-?' rip_suffix = '-?(?P<other>Rip)' rip_optional_suffix = '(?:' + rip_suffix + ')?' def build_source_pattern(*patterns, **kwargs): """Helper pattern to build source pattern.""" prefix_format = kwargs.get('prefix') or '' suffix_format = kwargs.get('suffix') or '' string_format = prefix_format + '({0})' + suffix_format return [string_format.format(pattern) for pattern in patterns] def demote_other(match, other): # pylint: disable=unused-argument """Default conflict solver with 'other' property.""" return other if other.name == 'other' else '__default__' rebulk.regex(*build_source_pattern('VHS', suffix=rip_optional_suffix), value={ 'source': 'VHS', 'other': 'Rip' }) rebulk.regex(*build_source_pattern('CAM', suffix=rip_optional_suffix), value={ 'source': 'Camera', 'other': 'Rip' }) rebulk.regex(*build_source_pattern('HD-?CAM', suffix=rip_optional_suffix), value={ 'source': 'HD Camera', 'other': 'Rip' }) rebulk.regex(*build_source_pattern('TELESYNC', 'TS', suffix=rip_optional_suffix), value={ 'source': 'Telesync', 'other': 'Rip' }) rebulk.regex(*build_source_pattern('HD-?TELESYNC', 'HD-?TS', suffix=rip_optional_suffix), value={ 'source': 'HD Telesync', 'other': 'Rip' }) rebulk.regex(*build_source_pattern('WORKPRINT', 'WP'), value='Workprint') rebulk.regex(*build_source_pattern('TELECINE', 'TC', suffix=rip_optional_suffix), value={ 'source': 'Telecine', 'other': 'Rip' }) rebulk.regex(*build_source_pattern('HD-?TELECINE', 'HD-?TC', suffix=rip_optional_suffix), value={ 'source': 'HD Telecine', 'other': 'Rip' }) rebulk.regex(*build_source_pattern('PPV', suffix=rip_optional_suffix), value={ 'source': 'Pay-per-view', 'other': 'Rip' }) rebulk.regex(*build_source_pattern('SD-?TV', suffix=rip_optional_suffix), value={ 'source': 'TV', 'other': 'Rip' }) rebulk.regex( *build_source_pattern( 'TV', suffix=rip_suffix), # TV is too common to allow matching value={ 'source': 'TV', 'other': 'Rip' }) rebulk.regex(*build_source_pattern('TV', 'SD-?TV', prefix=rip_prefix), value={ 'source': 'TV', 'other': 'Rip' }) rebulk.regex(*build_source_pattern('TV-?(?=Dub)'), value='TV') rebulk.regex(*build_source_pattern('DVB', 'PD-?TV', suffix=rip_optional_suffix), value={ 'source': 'Digital TV', 'other': 'Rip' }) rebulk.regex(*build_source_pattern('DVD', suffix=rip_optional_suffix), value={ 'source': 'DVD', 'other': 'Rip' }) rebulk.regex(*build_source_pattern('DM', suffix=rip_optional_suffix), value={ 'source': 'Digital Master', 'other': 'Rip' }) rebulk.regex( *build_source_pattern( 'VIDEO-?TS', 'DVD-?R(?:$|(?!E))', # 'DVD-?R(?:$|^E)' => DVD-Real ... 'DVD-?9', 'DVD-?5'), value='DVD') rebulk.regex(*build_source_pattern('HD-?TV', suffix=rip_optional_suffix), conflict_solver=demote_other, value={ 'source': 'HDTV', 'other': 'Rip' }) rebulk.regex(*build_source_pattern('TV-?HD', suffix=rip_suffix), conflict_solver=demote_other, value={ 'source': 'HDTV', 'other': 'Rip' }) rebulk.regex(*build_source_pattern('TV', suffix='-?(?P<other>Rip-?HD)'), conflict_solver=demote_other, value={ 'source': 'HDTV', 'other': 'Rip' }) rebulk.regex(*build_source_pattern('VOD', suffix=rip_optional_suffix), value={ 'source': 'Video on Demand', 'other': 'Rip' }) rebulk.regex(*build_source_pattern('WEB', 'WEB-?DL', suffix=rip_suffix), value={ 'source': 'Web', 'other': 'Rip' }) # WEBCap is a synonym to WEBRip, mostly used by non english rebulk.regex(*build_source_pattern('WEB-?(?P<another>Cap)', suffix=rip_optional_suffix), value={ 'source': 'Web', 'other': 'Rip', 'another': 'Rip' }) rebulk.regex(*build_source_pattern('WEB-?DL', 'WEB-?U?HD', 'DL-?WEB', 'DL(?=-?Mux)'), value={'source': 'Web'}) rebulk.regex('(WEB)', value='Web', tags='weak.source') rebulk.regex(*build_source_pattern('HD-?DVD', suffix=rip_optional_suffix), value={ 'source': 'HD-DVD', 'other': 'Rip' }) rebulk.regex(*build_source_pattern('Blu-?ray', 'BD', 'BD[59]', 'BD25', 'BD50', suffix=rip_optional_suffix), value={ 'source': 'Blu-ray', 'other': 'Rip' }) rebulk.regex( *build_source_pattern('(?P<another>BR)-?(?=Scr(?:eener)?)', '(?P<another>BR)-?(?=Mux)'), # BRRip value={ 'source': 'Blu-ray', 'another': 'Reencoded' }) rebulk.regex( *build_source_pattern('(?P<another>BR)', suffix=rip_suffix), # BRRip value={ 'source': 'Blu-ray', 'other': 'Rip', 'another': 'Reencoded' }) rebulk.regex(*build_source_pattern('Ultra-?Blu-?ray', 'Blu-?ray-?Ultra'), value='Ultra HD Blu-ray') rebulk.regex(*build_source_pattern('AHDTV'), value='Analog HDTV') rebulk.regex(*build_source_pattern('UHD-?TV', suffix=rip_optional_suffix), conflict_solver=demote_other, value={ 'source': 'Ultra HDTV', 'other': 'Rip' }) rebulk.regex(*build_source_pattern('UHD', suffix=rip_suffix), conflict_solver=demote_other, value={ 'source': 'Ultra HDTV', 'other': 'Rip' }) rebulk.regex(*build_source_pattern('DSR', 'DTH', suffix=rip_optional_suffix), value={ 'source': 'Satellite', 'other': 'Rip' }) rebulk.regex(*build_source_pattern('DSR?', 'SAT', suffix=rip_suffix), value={ 'source': 'Satellite', 'other': 'Rip' }) rebulk.rules(ValidateSource, UltraHdBlurayRule) return rebulk
def episodes(): """ Builder for rebulk object. :return: Created Rebulk object :rtype: Rebulk """ # pylint: disable=too-many-branches,too-many-statements rebulk = Rebulk() rebulk.regex_defaults(flags=re.IGNORECASE).string_defaults(ignore_case=True) rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator']) def season_episode_conflict_solver(match, other): """ Conflict solver for episode/season patterns :param match: :param other: :return: """ if match.name in ['season', 'episode'] and other.name in ['screen_size', 'video_codec', 'audio_codec', 'audio_channels', 'container', 'date']: return match elif match.name in ['season', 'episode'] and other.name in ['season', 'episode'] \ and match.initiator != other.initiator: if 'x' in match.initiator.raw.lower(): return match if 'x' in other.initiator.raw.lower(): return other return '__default__' season_episode_seps = [] season_episode_seps.extend(seps) season_episode_seps.extend(['x', 'X', 'e', 'E']) def season_episode_validator(match): """ Validator for season/episode matches """ if match.name in ['season', 'episode'] and match.initiator.start: return match.initiator.input_string[match.initiator.start] in season_episode_seps \ or match.initiator.input_string[match.initiator.start - 1] in season_episode_seps return True # 01x02, 01x02x03x04 rebulk.chain(formatter={'season': int, 'episode': int}, tags=['SxxExx'], abbreviations=[alt_dash], children=True, private_parent=True, conflict_solver=season_episode_conflict_solver) \ .defaults(validator=season_episode_validator) \ .regex(r'S(?P<season>\d+)@?(?:xE|Ex|E|x)@?(?P<episode>\d+)') \ .regex(r'(?:(?P<episodeSeparator>xE|Ex|E|x|-|\+|&)(?P<episode>\d+))').repeater('*') \ .chain() \ .regex(r'(?P<season>\d+)@?x@?(?P<episode>\d+)') \ .regex(r'(?:(?P<episodeSeparator>x|-|\+|&)(?P<episode>\d+))').repeater('*') \ .chain() \ .regex(r'S(?P<season>\d+)') \ .regex(r'(?:(?P<seasonSeparator>S|-|\+|&)(?P<season>\d+))').repeater('*') # episode_details property for episode_detail in ('Special', 'Bonus', 'Omake', 'Ova', 'Oav', 'Pilot', 'Unaired'): rebulk.string(episode_detail, value=episode_detail, name='episode_details') rebulk.regex(r'Extras?', name='episode_details', value='Extras') rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'], validate_all=True, validator={'__parent__': seps_surround}, children=True, private_parent=True) season_words = ['season', 'saison', 'serie', 'seasons', 'saisons', 'series'] episode_words = ['episode', 'episodes', 'ep'] of_words = ['of', 'sur'] all_words = ['All'] rebulk.chain(abbreviations=[alt_dash], formatter={'season': parse_numeral, 'count': parse_numeral}) \ .defaults(validator=None) \ .regex(build_or_pattern(season_words) + '@?(?P<season>' + numeral + ')') \ .regex(r'' + build_or_pattern(of_words) + '@?(?P<count>' + numeral + ')').repeater('?') \ .regex(r'@?(?P<seasonSeparator>-)@?(?P<season>\d+)').repeater('*') \ .regex(r'@?(?P<seasonSeparator>\+|&)@?(?P<season>\d+)').repeater('*') rebulk.regex(build_or_pattern(episode_words) + r'-?(?P<episode>\d+)' + r'(?:v(?P<version>\d+))?' + r'(?:-?' + build_or_pattern(of_words) + r'?-?(?P<count>\d+))?', # Episode 4 abbreviations=[dash], formatter=int, disabled=lambda context: context.get('type') == 'episode') rebulk.regex(build_or_pattern(episode_words) + r'-?(?P<episode>' + numeral + ')' + r'(?:v(?P<version>\d+))?' + r'(?:-?' + build_or_pattern(of_words) + r'?-?(?P<count>\d+))?', # Episode 4 abbreviations=[dash], formatter={'episode': parse_numeral, 'version': int, 'count': int}, disabled=lambda context: context.get('type') != 'episode') rebulk.regex(r'S?(?P<season>\d+)-?(?:xE|Ex|E|x)-?(?P<other>' + build_or_pattern(all_words) + ')', tags=['SxxExx'], abbreviations=[dash], validator=None, formatter={'season': int, 'other': lambda match: 'Complete'}) rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'], validate_all=True, validator={'__parent__': seps_surround}, children=True, private_parent=True) # 12, 13 rebulk.chain(tags=['bonus-conflict', 'weak-movie'], formatter={'episode': int, 'version': int}) \ .defaults(validator=None) \ .regex(r'(?P<episode>\d{2})') \ .regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{2})').repeater('*') # 012, 013 rebulk.chain(tags=['bonus-conflict', 'weak-movie'], formatter={'episode': int, 'version': int}) \ .defaults(validator=None) \ .regex(r'0(?P<episode>\d{1,2})') \ .regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'(?P<episodeSeparator>[x-])0(?P<episode>\d{1,2})').repeater('*') # 112, 113 rebulk.chain(tags=['bonus-conflict', 'weak-movie'], formatter={'episode': int, 'version': int}, disabled=lambda context: not context.get('episode_prefer_number', False)) \ .defaults(validator=None) \ .regex(r'(?P<episode>\d{3,4})') \ .regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{3,4})').repeater('*') # 1, 2, 3 rebulk.chain(tags=['bonus-conflict', 'weak-movie'], formatter={'episode': int, 'version': int}, disabled=lambda context: context.get('type') != 'episode') \ .defaults(validator=None) \ .regex(r'(?P<episode>\d)') \ .regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{1,2})').repeater('*') # e112, e113 # TODO: Enhance rebulk for validator to be used globally (season_episode_validator) rebulk.chain(formatter={'episode': int, 'version': int}) \ .defaults(validator=None) \ .regex(r'e(?P<episode>\d{1,4})') \ .regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4})').repeater('*') # ep 112, ep113, ep112, ep113 rebulk.chain(abbreviations=[dash], formatter={'episode': int, 'version': int}) \ .defaults(validator=None) \ .regex(r'ep-?(?P<episode>\d{1,4})') \ .regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'(?P<episodeSeparator>ep|e|x|-)(?P<episode>\d{1,4})').repeater('*') # 102, 0102 rebulk.chain(tags=['bonus-conflict', 'weak-movie', 'weak-duplicate'], formatter={'season': int, 'episode': int, 'version': int}, conflict_solver=lambda match, other: match if other.name == 'year' else '__default__', disabled=lambda context: context.get('episode_prefer_number', False)) \ .defaults(validator=None) \ .regex(r'(?P<season>\d{1,2})(?P<episode>\d{2})') \ .regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'(?P<episodeSeparator>x|-)(?P<episode>\d{2})').repeater('*') rebulk.regex(r'v(?P<version>\d+)', children=True, private_parent=True, formatter=int) rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator']) # TODO: List of words # detached of X count (season/episode) rebulk.regex(r'(?P<episode>\d+)?-?' + build_or_pattern(of_words) + r'-?(?P<count>\d+)-?' + build_or_pattern(episode_words) + '?', abbreviations=[dash], children=True, private_parent=True, formatter=int) rebulk.regex(r'Minisodes?', name='episode_format', value="Minisode") # Harcoded movie to disable weak season/episodes rebulk.regex('OSS-?117', abbreviations=[dash], name="hardcoded-movies", marker=True, conflict_solver=lambda match, other: None) rebulk.rules(EpisodeNumberSeparatorRange, SeasonSeparatorRange, RemoveWeakIfMovie, RemoveWeakIfSxxExx, RemoveWeakDuplicate, EpisodeDetailValidator, RemoveDetachedEpisodeNumber, VersionValidator, CountValidator, EpisodeSingleDigitValidator) return rebulk
def source(config): # pylint:disable=unused-argument """ Builder for rebulk object. :param config: rule configuration :type config: dict :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'source')) rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash], private_parent=True, children=True) rebulk = rebulk.defaults( name='source', tags=['video-codec-prefix', 'streaming_service.suffix'], validate_all=True, validator={'__parent__': or_(seps_before, seps_after)}) rip_prefix = config['rip_prefix'] rip_suffix = config['rip_suffix'] def build_source_pattern(*patterns, prefix='', suffix=''): """Helper pattern to build source pattern.""" return [prefix + f'({pattern})' + suffix for pattern in patterns] def demote_other(match, other): # pylint: disable=unused-argument """Default conflict solver with 'other' property.""" return other if other.name in ['other', 'release_group' ] else '__default__' rebulk.regex(*build_source_pattern('VHS', suffix=optional(rip_suffix)), value={ 'source': 'VHS', 'other': 'Rip' }) rebulk.regex(*build_source_pattern('CAM', suffix=optional(rip_suffix)), value={ 'source': 'Camera', 'other': 'Rip' }) rebulk.regex(*build_source_pattern('HD-?CAM', suffix=optional(rip_suffix)), value={ 'source': 'HD Camera', 'other': 'Rip' }) # For TS, we remove 'streaming_service.suffix' tag to avoid "Shots" being guessed as Showtime and TS. rebulk.regex(*build_source_pattern('TELESYNC', 'TS', suffix=optional(rip_suffix)), value={ 'source': 'Telesync', 'other': 'Rip' }, tags=['video-codec-prefix'], overrides=["tags"]) rebulk.regex(*build_source_pattern('HD-?TELESYNC', 'HD-?TS', suffix=optional(rip_suffix)), value={ 'source': 'HD Telesync', 'other': 'Rip' }) rebulk.regex(*build_source_pattern('WORKPRINT', 'WP'), value='Workprint') rebulk.regex(*build_source_pattern('TELECINE', 'TC', suffix=optional(rip_suffix)), value={ 'source': 'Telecine', 'other': 'Rip' }) rebulk.regex(*build_source_pattern('HD-?TELECINE', 'HD-?TC', suffix=optional(rip_suffix)), value={ 'source': 'HD Telecine', 'other': 'Rip' }) rebulk.regex(*build_source_pattern('PPV', suffix=optional(rip_suffix)), value={ 'source': 'Pay-per-view', 'other': 'Rip' }) rebulk.regex(*build_source_pattern('SD-?TV', suffix=optional(rip_suffix)), value={ 'source': 'TV', 'other': 'Rip' }) rebulk.regex( *build_source_pattern( 'TV', suffix=rip_suffix), # TV is too common to allow matching value={ 'source': 'TV', 'other': 'Rip' }) rebulk.regex(*build_source_pattern('TV', 'SD-?TV', prefix=rip_prefix), value={ 'source': 'TV', 'other': 'Rip' }) rebulk.regex(*build_source_pattern('TV-?(?=Dub)'), value='TV') rebulk.regex(*build_source_pattern('DVB', 'PD-?TV', suffix=optional(rip_suffix)), value={ 'source': 'Digital TV', 'other': 'Rip' }) rebulk.regex(*build_source_pattern('DVD', suffix=optional(rip_suffix)), value={ 'source': 'DVD', 'other': 'Rip' }) rebulk.regex(*build_source_pattern('DM', suffix=optional(rip_suffix)), value={ 'source': 'Digital Master', 'other': 'Rip' }) rebulk.regex( *build_source_pattern( 'VIDEO-?TS', 'DVD-?R(?:$|(?!E))', # 'DVD-?R(?:$|^E)' => DVD-Real ... 'DVD-?9', 'DVD-?5'), value='DVD') rebulk.regex(*build_source_pattern('HD-?TV', suffix=optional(rip_suffix)), conflict_solver=demote_other, value={ 'source': 'HDTV', 'other': 'Rip' }) rebulk.regex(*build_source_pattern('TV-?HD', suffix=rip_suffix), conflict_solver=demote_other, value={ 'source': 'HDTV', 'other': 'Rip' }) rebulk.regex(*build_source_pattern('TV', suffix='-?(?P<other>Rip-?HD)'), conflict_solver=demote_other, value={ 'source': 'HDTV', 'other': 'Rip' }) rebulk.regex(*build_source_pattern('VOD', suffix=optional(rip_suffix)), value={ 'source': 'Video on Demand', 'other': 'Rip' }) rebulk.regex(*build_source_pattern('WEB', 'WEB-?DL', suffix=rip_suffix), value={ 'source': 'Web', 'other': 'Rip' }) # WEBCap is a synonym to WEBRip, mostly used by non english rebulk.regex(*build_source_pattern('WEB-?(?P<another>Cap)', suffix=optional(rip_suffix)), value={ 'source': 'Web', 'other': 'Rip', 'another': 'Rip' }) rebulk.regex(*build_source_pattern('WEB-?DL', 'WEB-?U?HD', 'DL-?WEB', 'DL(?=-?Mux)'), value={'source': 'Web'}) rebulk.regex('(WEB)', value='Web', tags='weak.source') rebulk.regex(*build_source_pattern('HD-?DVD', suffix=optional(rip_suffix)), value={ 'source': 'HD-DVD', 'other': 'Rip' }) rebulk.regex(*build_source_pattern('Blu-?ray', 'BD', 'BD[59]', 'BD25', 'BD50', suffix=optional(rip_suffix)), value={ 'source': 'Blu-ray', 'other': 'Rip' }) rebulk.regex( *build_source_pattern('(?P<another>BR)-?(?=Scr(?:eener)?)', '(?P<another>BR)-?(?=Mux)'), # BRRip value={ 'source': 'Blu-ray', 'another': 'Reencoded' }) rebulk.regex( *build_source_pattern('(?P<another>BR)', suffix=rip_suffix), # BRRip value={ 'source': 'Blu-ray', 'other': 'Rip', 'another': 'Reencoded' }) rebulk.regex(*build_source_pattern('Ultra-?Blu-?ray', 'Blu-?ray-?Ultra'), value='Ultra HD Blu-ray') rebulk.regex(*build_source_pattern('AHDTV'), value='Analog HDTV') rebulk.regex(*build_source_pattern('UHD-?TV', suffix=optional(rip_suffix)), conflict_solver=demote_other, value={ 'source': 'Ultra HDTV', 'other': 'Rip' }) rebulk.regex(*build_source_pattern('UHD', suffix=rip_suffix), conflict_solver=demote_other, value={ 'source': 'Ultra HDTV', 'other': 'Rip' }) rebulk.regex(*build_source_pattern('DSR', 'DTH', suffix=optional(rip_suffix)), value={ 'source': 'Satellite', 'other': 'Rip' }) rebulk.regex(*build_source_pattern('DSR?', 'SAT', suffix=rip_suffix), value={ 'source': 'Satellite', 'other': 'Rip' }) rebulk.rules(ValidateSourcePrefixSuffix, ValidateWeakSource, UltraHdBlurayRule) return rebulk
def website(config): """ Builder for rebulk object. :param config: rule configuration :type config: dict :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'website')) rebulk = rebulk.regex_defaults(flags=re.IGNORECASE).string_defaults( ignore_case=True) rebulk.defaults(name="website") with open( os.path.join( os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 'tlds-alpha-by-domain.txt')) as tld_file: tlds = [ tld.strip().decode('utf-8') for tld in tld_file.readlines() if b'--' not in tld ][1:] # All registered domain extension safe_tlds = config['safe_tlds'] # For sure a website extension safe_subdomains = config['safe_subdomains'] # For sure a website subdomain safe_prefix = config['safe_prefixes'] # Those words before a tlds are sure website_prefixes = config['prefixes'] rebulk.regex(r'(?:[^a-z0-9]|^)((?:' + build_or_pattern(safe_subdomains) + r'\.)+(?:[a-z-]+\.)+(?:' + build_or_pattern(tlds) + r'))(?:[^a-z0-9]|$)', children=True) rebulk.regex(r'(?:[^a-z0-9]|^)((?:' + build_or_pattern(safe_subdomains) + r'\.)*[a-z-]+\.(?:' + build_or_pattern(safe_tlds) + r'))(?:[^a-z0-9]|$)', safe_subdomains=safe_subdomains, safe_tlds=safe_tlds, children=True) rebulk.regex(r'(?:[^a-z0-9]|^)((?:' + build_or_pattern(safe_subdomains) + r'\.)*[a-z-]+\.(?:' + build_or_pattern(safe_prefix) + r'\.)+(?:' + build_or_pattern(tlds) + r'))(?:[^a-z0-9]|$)', safe_subdomains=safe_subdomains, safe_prefix=safe_prefix, tlds=tlds, children=True) rebulk.string(*website_prefixes, validator=seps_surround, private=True, tags=['website.prefix']) class PreferTitleOverWebsite(Rule): """ If found match is more likely a title, remove website. """ consequence = RemoveMatch @staticmethod def valid_followers(match): """ Validator for next website matches """ return match.named('season', 'episode', 'year') def when(self, matches, context): to_remove = [] for website_match in matches.named('website'): safe = False for safe_start in safe_subdomains + safe_prefix: if website_match.value.lower().startswith(safe_start): safe = True break if not safe: suffix = matches.next( website_match, PreferTitleOverWebsite.valid_followers, 0) if suffix: group = matches.markers.at_match( website_match, lambda marker: marker.name == 'group', 0) if not group: to_remove.append(website_match) return to_remove rebulk.rules(PreferTitleOverWebsite, ValidateWebsitePrefix) return rebulk
def episodes(): """ Builder for rebulk object. :return: Created Rebulk object :rtype: Rebulk """ #pylint: disable=too-many-branches,too-many-statements rebulk = Rebulk() rebulk.regex_defaults(flags=re.IGNORECASE).string_defaults(ignore_case=True) rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator']) # 01x02, 01x02x03x04 if REGEX_AVAILABLE: rebulk.regex(r'(?P<season>\d+)@?x@?(?P<episode>\d+)' + r'(?:(?P<episodeSeparator>x|-|\+|&)(?P<episode>\d+))*', # S01E02, S01x02, S01E02E03, S01Ex02, S01xE02, SO1Ex02Ex03 r'S(?P<season>\d+)@?(?:xE|Ex|E|x)@?(?P<episode>\d+)' + r'(?:(?P<episodeSeparator>xE|Ex|E|x|-|\+|&)(?P<episode>\d+))*', # S01 r'S(?P<season>\d+)' + r'(?:(?P<seasonSeparator>S|-|\+|&)(?P<season>\d+))*', formatter={'season': int, 'episode': int}, tags=['SxxExx'], abbreviations=[alt_dash], children=True, private_parent=True, conflict_solver=lambda match, other: match if match.name in ['season', 'episode'] and other.name in ['screen_size', 'video_codec', 'audio_codec', 'audio_channels', 'container', 'date'] else '__default__') else: rebulk.chain(formatter={'season': int, 'episode': int}, tags=['SxxExx'], abbreviations=[alt_dash], children=True, private_parent=True, conflict_solver=lambda match, other: match if match.name in ['season', 'episode'] and other.name in ['screen_size', 'video_codec', 'audio_codec', 'audio_channels', 'container', 'date'] else '__default__') \ .defaults(validator=None)\ .regex(r'S(?P<season>\d+)@?(?:xE|Ex|E|x)@?(?P<episode>\d+)') \ .regex(r'(?:(?P<episodeSeparator>xE|Ex|E|x|-|\+|&)(?P<episode>\d+))').repeater('*') \ .chain() \ .regex(r'(?P<season>\d+)@?x@?(?P<episode>\d+)') \ .regex(r'(?:(?P<episodeSeparator>x|-|\+|&)(?P<episode>\d+))').repeater('*') \ .chain() \ .regex(r'S(?P<season>\d+)') \ .regex(r'(?:(?P<seasonSeparator>S|-|\+|&)(?P<season>\d+))').repeater('*') # episode_details property for episode_detail in ('Special', 'Bonus', 'Omake', 'Ova', 'Oav', 'Pilot', 'Unaired'): rebulk.string(episode_detail, value=episode_detail, name='episode_details') rebulk.regex(r'Extras?', name='episode_details', value='Extras') rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'], validate_all=True, validator={'__parent__': seps_surround}, children=True, private_parent=True) season_words = ['season', 'saison', 'serie', 'seasons', 'saisons', 'series'] episode_words = ['episode', 'episodes', 'ep'] of_words = ['of', 'sur'] all_words = ['All'] if REGEX_AVAILABLE: rebulk.regex(r'\L<season_words>@?(?P<season>' + numeral + ')' + r'(?:@?\L<of_words>@?(?P<count>' + numeral + '))?' + r'(?:@?(?P<seasonSeparator>-)@?(?P<season>\d+))*' + r'(?:@?(?P<seasonSeparator>\+|&)@?(?P<season>\d+))*', of_words=of_words, season_words=season_words, # Season 1, # Season one abbreviations=[alt_dash], formatter={'season': parse_numeral, 'count': parse_numeral}) else: rebulk.chain(abbreviations=[alt_dash], formatter={'season': parse_numeral, 'count': parse_numeral})\ .defaults(validator=None)\ .regex(build_or_pattern(season_words) + '@?(?P<season>' + numeral + ')') \ .regex(r'' + build_or_pattern(of_words) + '@?(?P<count>' + numeral + ')').repeater('?') \ .regex(r'@?(?P<seasonSeparator>-)@?(?P<season>\d+)').repeater('*') \ .regex(r'@?(?P<seasonSeparator>\+|&)@?(?P<season>\d+)').repeater('*') if REGEX_AVAILABLE: rebulk.regex(r'\L<episode_words>-?(?P<episode>\d+)' + r'(?:v(?P<version>\d+))?' + r'(?:-?\L<of_words>?-?(?P<count>\d+))?', of_words=of_words, episode_words=episode_words, # Episode 4 abbreviations=[dash], formatter=int, disabled=lambda context: context.get('type') == 'episode') else: rebulk.regex(build_or_pattern(episode_words) + r'-?(?P<episode>\d+)' + r'(?:v(?P<version>\d+))?' + r'(?:-?' + build_or_pattern(of_words) + r'?-?(?P<count>\d+))?', # Episode 4 abbreviations=[dash], formatter=int, disabled=lambda context: context.get('type') == 'episode') if REGEX_AVAILABLE: rebulk.regex(r'\L<episode_words>-?(?P<episode>' + numeral + ')' + r'(?:v(?P<version>\d+))?' + r'(?:-?\L<of_words>?-?(?P<count>\d+))?', of_words=of_words, episode_words=episode_words, # Episode 4 abbreviations=[dash], formatter={'episode': parse_numeral, 'version': int, 'count': int}, disabled=lambda context: context.get('type') != 'episode') else: rebulk.regex(build_or_pattern(episode_words) + r'-?(?P<episode>' + numeral + ')' + r'(?:v(?P<version>\d+))?' + r'(?:-?'+ build_or_pattern(of_words) + r'?-?(?P<count>\d+))?', # Episode 4 abbreviations=[dash], formatter={'episode': parse_numeral, 'version': int, 'count': int}, disabled=lambda context: context.get('type') != 'episode') if REGEX_AVAILABLE: rebulk.regex(r'S?(?P<season>\d+)-?(?:xE|Ex|E|x)-?(?P<other>\L<all_words>)', tags=['SxxExx'], all_words=all_words, abbreviations=[dash], validator=None, formatter={'season': int, 'other': lambda match: 'Complete'}) else: rebulk.regex(r'S?(?P<season>\d+)-?(?:xE|Ex|E|x)-?(?P<other>'+build_or_pattern(all_words)+')', tags=['SxxExx'], abbreviations=[dash], validator=None, formatter={'season': int, 'other': lambda match: 'Complete'}) rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'], validate_all=True, validator={'__parent__': seps_surround}, children=True, private_parent=True) if REGEX_AVAILABLE: # 12, 13 rebulk.regex(r'(?P<episode>\d{2})' + r'(?:v(?P<version>\d+))?' + r'(?:(?P<episodeSeparator>[x-])(?P<episode>\d{2}))*', tags=['bonus-conflict', 'weak-movie'], formatter={'episode': int, 'version': int}) else: rebulk.chain(tags=['bonus-conflict', 'weak-movie'], formatter={'episode': int, 'version': int}) \ .defaults(validator=None) \ .regex(r'(?P<episode>\d{2})')\ .regex(r'v(?P<version>\d+)').repeater('?')\ .regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{2})').repeater('*') if REGEX_AVAILABLE: # 012, 013 rebulk.regex(r'0(?P<episode>\d{1,2})' + r'(?:v(?P<version>\d+))?' + r'(?:(?P<episodeSeparator>[x-])0(?P<episode>\d{1,2}))*', tags=['bonus-conflict', 'weak-movie'], formatter={'episode': int, 'version': int}) else: rebulk.chain(tags=['bonus-conflict', 'weak-movie'], formatter={'episode': int, 'version': int}) \ .defaults(validator=None) \ .regex(r'0(?P<episode>\d{1,2})') \ .regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'(?P<episodeSeparator>[x-])0(?P<episode>\d{1,2})').repeater('*') if REGEX_AVAILABLE: # 112, 113 rebulk.regex(r'(?P<episode>\d{3,4})' + r'(?:v(?P<version>\d+))?' + r'(?:(?P<episodeSeparator>[x-])(?P<episode>\d{3,4}))*', tags=['bonus-conflict', 'weak-movie'], formatter={'episode': int, 'version': int}, disabled=lambda context: not context.get('episode_prefer_number', False)) else: rebulk.chain(tags=['bonus-conflict', 'weak-movie'], formatter={'episode': int, 'version': int}, disabled=lambda context: not context.get('episode_prefer_number', False)) \ .defaults(validator=None) \ .regex(r'(?P<episode>\d{3,4})')\ .regex(r'v(?P<version>\d+)').repeater('?')\ .regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{3,4})').repeater('*') if REGEX_AVAILABLE: # 1, 2, 3 rebulk.regex(r'(?P<episode>\d)' + r'(?:v(?P<version>\d+))?' + r'(?:(?P<episodeSeparator>[x-])(?P<episode>\d{1,2}))*', tags=['bonus-conflict', 'weak-movie'], formatter={'episode': int, 'version': int}, disabled=lambda context: context.get('type') != 'episode') else: rebulk.chain(tags=['bonus-conflict', 'weak-movie'], formatter={'episode': int, 'version': int}, disabled=lambda context: context.get('type') != 'episode') \ .defaults(validator=None) \ .regex(r'(?P<episode>\d)')\ .regex(r'v(?P<version>\d+)').repeater('?')\ .regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{1,2})').repeater('*') # e112, e113 if REGEX_AVAILABLE: rebulk.regex(r'e(?P<episode>\d{1,4})' + r'(?:v(?P<version>\d+))?' + r'(?:(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4}))*', formatter={'episode': int, 'version': int}) else: rebulk.chain(formatter={'episode': int, 'version': int}) \ .defaults(validator=None) \ .regex(r'e(?P<episode>\d{1,4})')\ .regex(r'v(?P<version>\d+)').repeater('?')\ .regex(r'(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4})').repeater('*') # ep 112, ep113, ep112, ep113 if REGEX_AVAILABLE: rebulk.regex(r'ep-?(?P<episode>\d{1,4})' + r'(?:v(?P<version>\d+))?' + r'(?:(?P<episodeSeparator>ep|e|x|-)(?P<episode>\d{1,4}))*', abbreviations=[dash], formatter={'episode': int, 'version': int}) else: rebulk.chain(abbreviations=[dash], formatter={'episode': int, 'version': int}) \ .defaults(validator=None) \ .regex(r'ep-?(?P<episode>\d{1,4})')\ .regex(r'v(?P<version>\d+)').repeater('?')\ .regex(r'(?P<episodeSeparator>ep|e|x|-)(?P<episode>\d{1,4})').repeater('*') # 102, 0102 if REGEX_AVAILABLE: rebulk.regex(r'(?P<season>\d{1,2})(?P<episode>\d{2})' + r'(?:v(?P<version>\d+))?' + r'(?:(?P<episodeSeparator>x|-)(?P<episode>\d{2}))*', tags=['bonus-conflict', 'weak-movie', 'weak-duplicate'], formatter={'season': int, 'episode': int, 'version': int}, conflict_solver=lambda match, other: match if other.name == 'year' else '__default__', disabled=lambda context: context.get('episode_prefer_number', False)) else: rebulk.chain(tags=['bonus-conflict', 'weak-movie', 'weak-duplicate'], formatter={'season': int, 'episode': int, 'version': int}, conflict_solver=lambda match, other: match if other.name == 'year' else '__default__', disabled=lambda context: context.get('episode_prefer_number', False))\ .defaults(validator=None)\ .regex(r'(?P<season>\d{1,2})(?P<episode>\d{2})')\ .regex(r'v(?P<version>\d+)').repeater('?')\ .regex(r'(?P<episodeSeparator>x|-)(?P<episode>\d{2})').repeater('*') rebulk.regex(r'v(?P<version>\d+)', children=True, private_parent=True, formatter=int) rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator']) #TODO: List of words # detached of X count (season/episode) if REGEX_AVAILABLE: rebulk.regex(r'(?P<episode>\d+)?-?\L<of_words>-?(?P<count>\d+)-?\L<episode_words>?', of_words=of_words, episode_words=episode_words, abbreviations=[dash], children=True, private_parent=True, formatter=int) else: rebulk.regex(r'(?P<episode>\d+)?-?' + build_or_pattern(of_words) + r'-?(?P<count>\d+)-?' + build_or_pattern(episode_words) + '?', abbreviations=[dash], children=True, private_parent=True, formatter=int) rebulk.regex(r'Minisodes?', name='episode_format', value="Minisode") # Harcoded movie to disable weak season/episodes rebulk.regex('OSS-?117', abbreviations=[dash], name="hardcoded-movies", marker=True, conflict_solver=lambda match, other: None) rebulk.rules(EpisodeNumberSeparatorRange, SeasonSeparatorRange, RemoveWeakIfMovie, RemoveWeakIfSxxExx, RemoveWeakDuplicate, EpisodeDetailValidator, RemoveDetachedEpisodeNumber, VersionValidator, CountValidator, EpisodeSingleDigitValidator) return rebulk
def source(config): # pylint:disable=unused-argument """ Builder for rebulk object. :param config: rule configuration :type config: dict :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'source')) rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash], private_parent=True, children=True) rebulk.defaults(name='source', tags=['video-codec-prefix', 'streaming_service.suffix']) rip_prefix = '(?P<other>Rip)-?' rip_suffix = '-?(?P<other>Rip)' rip_optional_suffix = '(?:' + rip_suffix + ')?' def build_source_pattern(*patterns, **kwargs): """Helper pattern to build source pattern.""" prefix_format = kwargs.get('prefix') or '' suffix_format = kwargs.get('suffix') or '' string_format = prefix_format + '({0})' + suffix_format return [string_format.format(pattern) for pattern in patterns] def demote_other(match, other): # pylint: disable=unused-argument """Default conflict solver with 'other' property.""" return other if other.name == 'other' else '__default__' rebulk.regex(*build_source_pattern('VHS', suffix=rip_optional_suffix), value={'source': 'VHS', 'other': 'Rip'}) rebulk.regex(*build_source_pattern('CAM', suffix=rip_optional_suffix), value={'source': 'Camera', 'other': 'Rip'}) rebulk.regex(*build_source_pattern('HD-?CAM', suffix=rip_optional_suffix), value={'source': 'HD Camera', 'other': 'Rip'}) rebulk.regex(*build_source_pattern('TELESYNC', 'TS', suffix=rip_optional_suffix), value={'source': 'Telesync', 'other': 'Rip'}) rebulk.regex(*build_source_pattern('HD-?TELESYNC', 'HD-?TS', suffix=rip_optional_suffix), value={'source': 'HD Telesync', 'other': 'Rip'}) rebulk.regex(*build_source_pattern('WORKPRINT', 'WP'), value='Workprint') rebulk.regex(*build_source_pattern('TELECINE', 'TC', suffix=rip_optional_suffix), value={'source': 'Telecine', 'other': 'Rip'}) rebulk.regex(*build_source_pattern('HD-?TELECINE', 'HD-?TC', suffix=rip_optional_suffix), value={'source': 'HD Telecine', 'other': 'Rip'}) rebulk.regex(*build_source_pattern('PPV', suffix=rip_optional_suffix), value={'source': 'Pay-per-view', 'other': 'Rip'}) rebulk.regex(*build_source_pattern('SD-?TV', suffix=rip_optional_suffix), value={'source': 'TV', 'other': 'Rip'}) rebulk.regex(*build_source_pattern('TV', suffix=rip_suffix), # TV is too common to allow matching value={'source': 'TV', 'other': 'Rip'}) rebulk.regex(*build_source_pattern('TV', 'SD-?TV', prefix=rip_prefix), value={'source': 'TV', 'other': 'Rip'}) rebulk.regex(*build_source_pattern('TV-?(?=Dub)'), value='TV') rebulk.regex(*build_source_pattern('DVB', 'PD-?TV', suffix=rip_optional_suffix), value={'source': 'Digital TV', 'other': 'Rip'}) rebulk.regex(*build_source_pattern('DVD', suffix=rip_optional_suffix), value={'source': 'DVD', 'other': 'Rip'}) rebulk.regex(*build_source_pattern('DM', suffix=rip_optional_suffix), value={'source': 'Digital Master', 'other': 'Rip'}) rebulk.regex(*build_source_pattern('VIDEO-?TS', 'DVD-?R(?:$|(?!E))', # 'DVD-?R(?:$|^E)' => DVD-Real ... 'DVD-?9', 'DVD-?5'), value='DVD') rebulk.regex(*build_source_pattern('HD-?TV', suffix=rip_optional_suffix), conflict_solver=demote_other, value={'source': 'HDTV', 'other': 'Rip'}) rebulk.regex(*build_source_pattern('TV-?HD', suffix=rip_suffix), conflict_solver=demote_other, value={'source': 'HDTV', 'other': 'Rip'}) rebulk.regex(*build_source_pattern('TV', suffix='-?(?P<other>Rip-?HD)'), conflict_solver=demote_other, value={'source': 'HDTV', 'other': 'Rip'}) rebulk.regex(*build_source_pattern('VOD', suffix=rip_optional_suffix), value={'source': 'Video on Demand', 'other': 'Rip'}) rebulk.regex(*build_source_pattern('WEB', 'WEB-?DL', suffix=rip_suffix), value={'source': 'Web', 'other': 'Rip'}) # WEBCap is a synonym to WEBRip, mostly used by non english rebulk.regex(*build_source_pattern('WEB-?(?P<another>Cap)', suffix=rip_optional_suffix), value={'source': 'Web', 'other': 'Rip', 'another': 'Rip'}) rebulk.regex(*build_source_pattern('WEB-?DL', 'WEB-?U?HD', 'DL-?WEB', 'DL(?=-?Mux)'), value={'source': 'Web'}) rebulk.regex('(WEB)', value='Web', tags='weak.source') rebulk.regex(*build_source_pattern('HD-?DVD', suffix=rip_optional_suffix), value={'source': 'HD-DVD', 'other': 'Rip'}) rebulk.regex(*build_source_pattern('Blu-?ray', 'BD', 'BD[59]', 'BD25', 'BD50', suffix=rip_optional_suffix), value={'source': 'Blu-ray', 'other': 'Rip'}) rebulk.regex(*build_source_pattern('(?P<another>BR)-?(?=Scr(?:eener)?)', '(?P<another>BR)-?(?=Mux)'), # BRRip value={'source': 'Blu-ray', 'another': 'Reencoded'}) rebulk.regex(*build_source_pattern('(?P<another>BR)', suffix=rip_suffix), # BRRip value={'source': 'Blu-ray', 'other': 'Rip', 'another': 'Reencoded'}) rebulk.regex(*build_source_pattern('Ultra-?Blu-?ray', 'Blu-?ray-?Ultra'), value='Ultra HD Blu-ray') rebulk.regex(*build_source_pattern('AHDTV'), value='Analog HDTV') rebulk.regex(*build_source_pattern('UHD-?TV', suffix=rip_optional_suffix), conflict_solver=demote_other, value={'source': 'Ultra HDTV', 'other': 'Rip'}) rebulk.regex(*build_source_pattern('UHD', suffix=rip_suffix), conflict_solver=demote_other, value={'source': 'Ultra HDTV', 'other': 'Rip'}) rebulk.regex(*build_source_pattern('DSR', 'DTH', suffix=rip_optional_suffix), value={'source': 'Satellite', 'other': 'Rip'}) rebulk.regex(*build_source_pattern('DSR?', 'SAT', suffix=rip_suffix), value={'source': 'Satellite', 'other': 'Rip'}) rebulk.rules(ValidateSource, UltraHdBlurayRule) return rebulk
def other(config): # pylint:disable=unused-argument,too-many-statements """ Builder for rebulk object. :param config: rule configuration :type config: dict :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'other')) rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True) rebulk.defaults(name="other", validator=seps_surround) rebulk.regex('Audio-?Fix', 'Audio-?Fixed', value='Audio Fixed') rebulk.regex('Sync-?Fix', 'Sync-?Fixed', value='Sync Fixed') rebulk.regex('Dual', 'Dual-?Audio', value='Dual Audio') rebulk.regex('ws', 'wide-?screen', value='Widescreen') rebulk.regex('Re-?Enc(?:oded)?', value='Reencoded') rebulk.string('Repack', 'Rerip', value='Proper', tags=['streaming_service.prefix', 'streaming_service.suffix']) rebulk.string('Proper', value='Proper', tags=['has-neighbor', 'streaming_service.prefix', 'streaming_service.suffix']) rebulk.regex('Real-Proper', 'Real-Repack', 'Real-Rerip', value='Proper', tags=['streaming_service.prefix', 'streaming_service.suffix', 'real']) rebulk.regex('Real', value='Proper', tags=['has-neighbor', 'streaming_service.prefix', 'streaming_service.suffix', 'real']) rebulk.string('Fix', 'Fixed', value='Fix', tags=['has-neighbor-before', 'has-neighbor-after', 'streaming_service.prefix', 'streaming_service.suffix']) rebulk.string('Dirfix', 'Nfofix', 'Prooffix', value='Fix', tags=['streaming_service.prefix', 'streaming_service.suffix']) rebulk.regex('(?:Proof-?)?Sample-?Fix', value='Fix', tags=['streaming_service.prefix', 'streaming_service.suffix']) rebulk.string('Fansub', value='Fan Subtitled', tags='has-neighbor') rebulk.string('Fastsub', value='Fast Subtitled', tags='has-neighbor') season_words = build_or_pattern(["seasons?", "series?"]) complete_articles = build_or_pattern(["The"]) def validate_complete(match): """ Make sure season word is are defined. :param match: :type match: :return: :rtype: """ children = match.children if not children.named('completeWordsBefore') and not children.named('completeWordsAfter'): return False return True rebulk.regex('(?P<completeArticle>' + complete_articles + '-)?' + '(?P<completeWordsBefore>' + season_words + '-)?' + 'Complete' + '(?P<completeWordsAfter>-' + season_words + ')?', private_names=['completeArticle', 'completeWordsBefore', 'completeWordsAfter'], value={'other': 'Complete'}, tags=['release-group-prefix'], validator={'__parent__': compose(seps_surround, validate_complete)}) rebulk.string('R5', value='Region 5') rebulk.string('RC', value='Region C') rebulk.regex('Pre-?Air', value='Preair') rebulk.regex('(?:PS-?)?Vita', value='PS Vita') rebulk.regex('(HD)(?P<another>Rip)', value={'other': 'HD', 'another': 'Rip'}, private_parent=True, children=True, validator={'__parent__': seps_surround}, validate_all=True) for value in ('Screener', 'Remux', 'PAL', 'SECAM', 'NTSC', 'XXX'): rebulk.string(value, value=value) rebulk.string('3D', value='3D', tags='has-neighbor') rebulk.string('HQ', value='High Quality', tags='uhdbluray-neighbor') rebulk.string('HR', value='High Resolution') rebulk.string('LD', value='Line Dubbed') rebulk.string('MD', value='Mic Dubbed') rebulk.string('mHD', 'HDLight', value='Micro HD') rebulk.string('LDTV', value='Low Definition') rebulk.string('HFR', value='High Frame Rate') rebulk.string('HD', value='HD', validator=None, tags=['streaming_service.prefix', 'streaming_service.suffix']) rebulk.regex('Full-?HD', 'FHD', value='Full HD', validator=None, tags=['streaming_service.prefix', 'streaming_service.suffix']) rebulk.regex('Ultra-?(?:HD)?', 'UHD', value='Ultra HD', validator=None, tags=['streaming_service.prefix', 'streaming_service.suffix']) rebulk.regex('Upscaled?', value='Upscaled') for value in ('Complete', 'Classic', 'Bonus', 'Trailer', 'Retail', 'Colorized', 'Internal'): rebulk.string(value, value=value, tags=['has-neighbor', 'release-group-prefix']) rebulk.regex('LiNE', value='Line Audio', tags=['has-neighbor-before', 'has-neighbor-after', 'release-group-prefix']) rebulk.regex('Read-?NFO', value='Read NFO') rebulk.string('CONVERT', value='Converted', tags='has-neighbor') rebulk.string('DOCU', 'DOKU', value='Documentary', tags='has-neighbor') rebulk.string('OM', value='Open Matte', tags='has-neighbor') rebulk.string('STV', value='Straight to Video', tags='has-neighbor') rebulk.string('OAR', value='Original Aspect Ratio', tags='has-neighbor') rebulk.string('Complet', value='Complete', tags=['has-neighbor', 'release-group-prefix']) for coast in ('East', 'West'): rebulk.regex(r'(?:Live-)?(?:Episode-)?' + coast + '-?(?:Coast-)?Feed', value=coast + ' Coast Feed') rebulk.string('VO', 'OV', value='Original Video', tags='has-neighbor') rebulk.string('Ova', 'Oav', value='Original Animated Video') rebulk.regex('Scr(?:eener)?', value='Screener', validator=None, tags=['other.validate.screener', 'source-prefix', 'source-suffix']) rebulk.string('Mux', value='Mux', validator=seps_after, tags=['other.validate.mux', 'video-codec-prefix', 'source-suffix']) rebulk.string('HC', 'vost', value='Hardcoded Subtitles') rebulk.string('SDR', value='Standard Dynamic Range', tags='uhdbluray-neighbor') rebulk.regex('HDR(?:10)?', value='HDR10', tags='uhdbluray-neighbor') rebulk.regex('Dolby-?Vision', value='Dolby Vision', tags='uhdbluray-neighbor') rebulk.regex('BT-?2020', value='BT.2020', tags='uhdbluray-neighbor') rebulk.string('Sample', value='Sample', tags=['at-end', 'not-a-release-group']) rebulk.string('Extras', value='Extras', tags='has-neighbor') rebulk.regex('Digital-?Extras?', value='Extras') rebulk.string('Proof', value='Proof', tags=['at-end', 'not-a-release-group']) rebulk.string('Obfuscated', 'Scrambled', value='Obfuscated', tags=['at-end', 'not-a-release-group']) rebulk.string('xpost', 'postbot', 'asrequested', value='Repost', tags='not-a-release-group') rebulk.rules(RenameAnotherToOther, ValidateHasNeighbor, ValidateHasNeighborAfter, ValidateHasNeighborBefore, ValidateScreenerRule, ValidateMuxRule, ValidateHardcodedSubs, ValidateStreamingServiceNeighbor, ValidateAtEnd, ValidateReal, ProperCountRule) return rebulk
def episodes(): """ Builder for rebulk object. :return: Created Rebulk object :rtype: Rebulk """ # pylint: disable=too-many-branches,too-many-statements,too-many-locals rebulk = Rebulk() rebulk.regex_defaults(flags=re.IGNORECASE).string_defaults(ignore_case=True) rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator']) def episodes_season_chain_breaker(matches): """ Break chains if there's more than 100 offset between two neighbor values. :param matches: :type matches: :return: :rtype: """ eps = matches.named('episode') if len(eps) > 1 and abs(eps[-1].value - eps[-2].value) > 100: return True seasons = matches.named('season') if len(seasons) > 1 and abs(seasons[-1].value - seasons[-2].value) > 100: return True return False rebulk.chain_defaults(chain_breaker=episodes_season_chain_breaker) def season_episode_conflict_solver(match, other): """ Conflict solver for episode/season patterns :param match: :param other: :return: """ if match.name in ['season', 'episode'] and other.name in ['screen_size', 'video_codec', 'audio_codec', 'audio_channels', 'container', 'date']: return match elif match.name in ['season', 'episode'] and other.name in ['season', 'episode'] \ and match.initiator != other.initiator: if 'weak-episode' in match.tags: return match if 'weak-episode' in other.tags: return other if 'x' in match.initiator.raw.lower(): return match if 'x' in other.initiator.raw.lower(): return other return '__default__' season_episode_seps = [] season_episode_seps.extend(seps) season_episode_seps.extend(['x', 'X', 'e', 'E']) season_words = ['season', 'saison', 'serie', 'seasons', 'saisons', 'series'] episode_words = ['episode', 'episodes', 'eps', 'ep'] of_words = ['of', 'sur'] all_words = ['All'] season_markers = ["S"] season_ep_markers = ["x"] episode_markers = ["xE", "Ex", "EP", "E", "x"] range_separators = ['-', '~', 'to', 'a'] weak_discrete_separators = list(sep for sep in seps if sep not in range_separators) strong_discrete_separators = ['+', '&', 'and', 'et'] discrete_separators = strong_discrete_separators + weak_discrete_separators def ordering_validator(match): """ Validator for season list. They should be in natural order to be validated. episode/season separated by a weak discrete separator should be consecutive, unless a strong discrete separator or a range separator is present in the chain (1.3&5 is valid, but 1.3-5 is not valid and 1.3.5 is not valid) """ values = match.children.to_dict(implicit=True) if 'season' in values and is_iterable(values['season']): # Season numbers must be in natural order to be validated. if not list(sorted(values['season'])) == values['season']: return False if 'episode' in values and is_iterable(values['episode']): # Season numbers must be in natural order to be validated. if not list(sorted(values['episode'])) == values['episode']: return False def is_consecutive(property_name): """ Check if the property season or episode has valid consecutive values. :param property_name: :type property_name: :return: :rtype: """ previous_match = None valid = True for current_match in match.children.named(property_name): if previous_match: match.children.previous(current_match, lambda m: m.name == property_name + 'Separator') separator = match.children.previous(current_match, lambda m: m.name == property_name + 'Separator', 0) if separator.raw not in range_separators and separator.raw in weak_discrete_separators: if not current_match.value - previous_match.value == 1: valid = False if separator.raw in strong_discrete_separators: valid = True break previous_match = current_match return valid return is_consecutive('episode') and is_consecutive('season') # S01E02, 01x02, S01S02S03 rebulk.chain(formatter={'season': int, 'episode': int}, tags=['SxxExx'], abbreviations=[alt_dash], children=True, private_parent=True, validate_all=True, validator={'__parent__': ordering_validator}, conflict_solver=season_episode_conflict_solver) \ .regex(build_or_pattern(season_markers) + r'(?P<season>\d+)@?' + build_or_pattern(episode_markers) + r'@?(?P<episode>\d+)', validate_all=True, validator={'__parent__': seps_before}).repeater('+') \ .regex(build_or_pattern(episode_markers + discrete_separators + range_separators, name='episodeSeparator', escape=True) + r'(?P<episode>\d+)').repeater('*') \ .chain() \ .regex(r'(?P<season>\d+)@?' + build_or_pattern(season_ep_markers) + r'@?(?P<episode>\d+)', validate_all=True, validator={'__parent__': seps_before}) \ .chain() \ .regex(r'(?P<season>\d+)@?' + build_or_pattern(season_ep_markers) + r'@?(?P<episode>\d+)', validate_all=True, validator={'__parent__': seps_before}) \ .regex(build_or_pattern(season_ep_markers + discrete_separators + range_separators, name='episodeSeparator', escape=True) + r'(?P<episode>\d+)').repeater('*') \ .chain() \ .regex(build_or_pattern(season_markers) + r'(?P<season>\d+)', validate_all=True, validator={'__parent__': seps_before}) \ .regex(build_or_pattern(season_markers + discrete_separators + range_separators, name='seasonSeparator', escape=True) + r'(?P<season>\d+)').repeater('*') # episode_details property for episode_detail in ('Special', 'Bonus', 'Omake', 'Ova', 'Oav', 'Pilot', 'Unaired'): rebulk.string(episode_detail, value=episode_detail, name='episode_details') rebulk.regex(r'Extras?', name='episode_details', value='Extras') rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'], validate_all=True, validator={'__parent__': seps_surround}, children=True, private_parent=True) def validate_roman(match): """ Validate a roman match if surrounded by separators :param match: :type match: :return: :rtype: """ if int_coercable(match.raw): return True return seps_surround(match) rebulk.chain(abbreviations=[alt_dash], formatter={'season': parse_numeral, 'count': parse_numeral}, validator={'__parent__': compose(seps_surround, ordering_validator), 'season': validate_roman, 'count': validate_roman}) \ .defaults(validator=None) \ .regex(build_or_pattern(season_words) + '@?(?P<season>' + numeral + ')') \ .regex(r'' + build_or_pattern(of_words) + '@?(?P<count>' + numeral + ')').repeater('?') \ .regex(r'@?(?P<seasonSeparator>' + build_or_pattern(range_separators + discrete_separators + ['@'], escape=True) + r')@?(?P<season>\d+)').repeater('*') rebulk.regex(build_or_pattern(episode_words) + r'-?(?P<episode>\d+)' + r'(?:v(?P<version>\d+))?' + r'(?:-?' + build_or_pattern(of_words) + r'-?(?P<count>\d+))?', # Episode 4 abbreviations=[dash], formatter=int, disabled=lambda context: context.get('type') == 'episode') rebulk.regex(build_or_pattern(episode_words) + r'-?(?P<episode>' + numeral + ')' + r'(?:v(?P<version>\d+))?' + r'(?:-?' + build_or_pattern(of_words) + r'-?(?P<count>\d+))?', # Episode 4 abbreviations=[dash], validator={'episode': validate_roman}, formatter={'episode': parse_numeral, 'version': int, 'count': int}, disabled=lambda context: context.get('type') != 'episode') rebulk.regex(r'S?(?P<season>\d+)-?(?:xE|Ex|E|x)-?(?P<other>' + build_or_pattern(all_words) + ')', tags=['SxxExx'], abbreviations=[dash], validator=None, formatter={'season': int, 'other': lambda match: 'Complete'}) rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'], validate_all=True, validator={'__parent__': seps_surround}, children=True, private_parent=True) # 12, 13 rebulk.chain(tags=['bonus-conflict', 'weak-movie', 'weak-episode'], formatter={'episode': int, 'version': int}) \ .defaults(validator=None) \ .regex(r'(?P<episode>\d{2})') \ .regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{2})').repeater('*') # 012, 013 rebulk.chain(tags=['bonus-conflict', 'weak-movie', 'weak-episode'], formatter={'episode': int, 'version': int}) \ .defaults(validator=None) \ .regex(r'0(?P<episode>\d{1,2})') \ .regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'(?P<episodeSeparator>[x-])0(?P<episode>\d{1,2})').repeater('*') # 112, 113 rebulk.chain(tags=['bonus-conflict', 'weak-movie', 'weak-episode'], formatter={'episode': int, 'version': int}, disabled=lambda context: not context.get('episode_prefer_number', False)) \ .defaults(validator=None) \ .regex(r'(?P<episode>\d{3,4})') \ .regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{3,4})').repeater('*') # 1, 2, 3 rebulk.chain(tags=['bonus-conflict', 'weak-movie', 'weak-episode'], formatter={'episode': int, 'version': int}, disabled=lambda context: context.get('type') != 'episode') \ .defaults(validator=None) \ .regex(r'(?P<episode>\d)') \ .regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{1,2})').repeater('*') # e112, e113 # TODO: Enhance rebulk for validator to be used globally (season_episode_validator) rebulk.chain(formatter={'episode': int, 'version': int}) \ .defaults(validator=None) \ .regex(r'e(?P<episode>\d{1,4})') \ .regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4})').repeater('*') # ep 112, ep113, ep112, ep113 rebulk.chain(abbreviations=[dash], formatter={'episode': int, 'version': int}) \ .defaults(validator=None) \ .regex(r'ep-?(?P<episode>\d{1,4})') \ .regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'(?P<episodeSeparator>ep|e|x|-)(?P<episode>\d{1,4})').repeater('*') # 102, 0102 rebulk.chain(tags=['bonus-conflict', 'weak-movie', 'weak-episode', 'weak-duplicate'], formatter={'season': int, 'episode': int, 'version': int}, conflict_solver=lambda match, other: match if other.name == 'year' else '__default__', disabled=lambda context: context.get('episode_prefer_number', False)) \ .defaults(validator=None) \ .regex(r'(?P<season>\d{1,2})(?P<episode>\d{2})') \ .regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'(?P<episodeSeparator>x|-)(?P<episode>\d{2})').repeater('*') rebulk.regex(r'v(?P<version>\d+)', children=True, private_parent=True, formatter=int) rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator']) # TODO: List of words # detached of X count (season/episode) rebulk.regex(r'(?P<episode>\d+)?-?' + build_or_pattern(of_words) + r'-?(?P<count>\d+)-?' + build_or_pattern(episode_words) + '?', abbreviations=[dash], children=True, private_parent=True, formatter=int) rebulk.regex(r'Minisodes?', name='episode_format', value="Minisode") # Harcoded movie to disable weak season/eps rebulk.regex('OSS-?117', abbreviations=[dash], name="hardcoded-movies", marker=True, conflict_solver=lambda match, other: None) rebulk.rules(EpisodeNumberSeparatorRange(range_separators), SeasonSeparatorRange(range_separators), RemoveWeakIfMovie, RemoveWeakIfSxxExx, RemoveWeakDuplicate, EpisodeDetailValidator, RemoveDetachedEpisodeNumber, VersionValidator, CountValidator, EpisodeSingleDigitValidator) return rebulk
def video_codec(config): # pylint:disable=unused-argument """ Builder for rebulk object. :param config: rule configuration :type config: dict :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk() rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True) rebulk.defaults(name="video_codec", tags=['source-suffix', 'streaming_service.suffix'], disabled=lambda context: is_disabled(context, 'video_codec')) rebulk.regex(r'Rv\d{2}', value='RealVideo') rebulk.regex('Mpe?g-?2', '[hx]-?262', value='MPEG-2') rebulk.string("DVDivX", "DivX", value="DivX") rebulk.string('XviD', value='Xvid') rebulk.regex('VC-?1', value='VC-1') rebulk.string('VP7', value='VP7') rebulk.string('VP8', 'VP80', value='VP8') rebulk.string('VP9', value='VP9') rebulk.regex('[hx]-?263', value='H.263') rebulk.regex('[hx]-?264', '(MPEG-?4)?AVC(?:HD)?', value='H.264') rebulk.regex('[hx]-?265', 'HEVC', value='H.265') rebulk.regex('(?P<video_codec>hevc)(?P<color_depth>10)', value={'video_codec': 'H.265', 'color_depth': '10-bit'}, tags=['video-codec-suffix'], children=True) # http://blog.mediacoderhq.com/h264-profiles-and-levels/ # https://en.wikipedia.org/wiki/H.264/MPEG-4_AVC rebulk.defaults(name="video_profile", validator=seps_surround, disabled=lambda context: is_disabled(context, 'video_profile')) rebulk.string('BP', value='Baseline', tags='video_profile.rule') rebulk.string('XP', 'EP', value='Extended', tags='video_profile.rule') rebulk.string('MP', value='Main', tags='video_profile.rule') rebulk.string('HP', 'HiP', value='High', tags='video_profile.rule') # https://en.wikipedia.org/wiki/Scalable_Video_Coding rebulk.string('SC', 'SVC', value='Scalable Video Coding', tags='video_profile.rule') # https://en.wikipedia.org/wiki/AVCHD rebulk.regex('AVC(?:HD)?', value='Advanced Video Codec High Definition', tags='video_profile.rule') # https://en.wikipedia.org/wiki/H.265/HEVC rebulk.string('HEVC', value='High Efficiency Video Coding', tags='video_profile.rule') rebulk.regex('Hi422P', value='High 4:2:2') rebulk.regex('Hi444PP', value='High 4:4:4 Predictive') rebulk.regex('Hi10P?', value='High 10') # no profile validation is required rebulk.string('DXVA', value='DXVA', name='video_api', disabled=lambda context: is_disabled(context, 'video_api')) rebulk.defaults(name='color_depth', validator=seps_surround, disabled=lambda context: is_disabled(context, 'color_depth')) rebulk.regex('12.?bits?', value='12-bit') rebulk.regex('10.?bits?', 'YUV420P10', 'Hi10P?', value='10-bit') rebulk.regex('8.?bits?', value='8-bit') rebulk.rules(ValidateVideoCodec, VideoProfileRule) return rebulk
def other(config): # pylint:disable=unused-argument,too-many-statements """ Builder for rebulk object. :param config: rule configuration :type config: dict :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'other')) rebulk = rebulk.regex_defaults( flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True) rebulk.defaults(name="other", validator=seps_surround) rebulk.regex('Audio-?Fix', 'Audio-?Fixed', value='Audio Fixed') rebulk.regex('Sync-?Fix', 'Sync-?Fixed', value='Sync Fixed') rebulk.regex('Dual', 'Dual-?Audio', value='Dual Audio') rebulk.regex('ws', 'wide-?screen', value='Widescreen') rebulk.regex('Re-?Enc(?:oded)?', value='Reencoded') rebulk.string( 'Proper', 'Repack', 'Rerip', value='Proper', tags=['streaming_service.prefix', 'streaming_service.suffix']) rebulk.regex( 'Real-Proper', 'Real-Repack', 'Real-Rerip', value='Proper', tags=['streaming_service.prefix', 'streaming_service.suffix', 'real']) rebulk.string('Fix', 'Fixed', value='Fix', tags=[ 'has-neighbor-before', 'has-neighbor-after', 'streaming_service.prefix', 'streaming_service.suffix' ]) rebulk.string( 'Dirfix', 'Nfofix', 'Prooffix', value='Fix', tags=['streaming_service.prefix', 'streaming_service.suffix']) rebulk.regex('(?:Proof-?)?Sample-?Fix', value='Fix', tags=['streaming_service.prefix', 'streaming_service.suffix']) rebulk.string('Fansub', value='Fan Subtitled', tags='has-neighbor') rebulk.string('Fastsub', value='Fast Subtitled', tags='has-neighbor') season_words = build_or_pattern(["seasons?", "series?"]) complete_articles = build_or_pattern(["The"]) def validate_complete(match): """ Make sure season word is are defined. :param match: :type match: :return: :rtype: """ children = match.children if not children.named('completeWordsBefore') and not children.named( 'completeWordsAfter'): return False return True rebulk.regex( '(?P<completeArticle>' + complete_articles + '-)?' + '(?P<completeWordsBefore>' + season_words + '-)?' + 'Complete' + '(?P<completeWordsAfter>-' + season_words + ')?', private_names=[ 'completeArticle', 'completeWordsBefore', 'completeWordsAfter' ], value={'other': 'Complete'}, tags=['release-group-prefix'], validator={'__parent__': compose(seps_surround, validate_complete)}) rebulk.string('R5', value='Region 5') rebulk.string('RC', value='Region C') rebulk.regex('Pre-?Air', value='Preair') rebulk.regex('(?:PS-?)?Vita', value='PS Vita') rebulk.regex('(HD)(?P<another>Rip)', value={ 'other': 'HD', 'another': 'Rip' }, private_parent=True, children=True, validator={'__parent__': seps_surround}, validate_all=True) for value in ('Screener', 'Remux', '3D', 'PAL', 'SECAM', 'NTSC', 'XXX'): rebulk.string(value, value=value) rebulk.string('HQ', value='High Quality', tags='uhdbluray-neighbor') rebulk.string('HR', value='High Resolution') rebulk.string('LD', value='Line Dubbed') rebulk.string('MD', value='Mic Dubbed') rebulk.string('mHD', 'HDLight', value='Micro HD') rebulk.string('LDTV', value='Low Definition') rebulk.string('HFR', value='High Frame Rate') rebulk.string( 'HD', value='HD', validator=None, tags=['streaming_service.prefix', 'streaming_service.suffix']) rebulk.regex('Full-?HD', 'FHD', value='Full HD', validator=None, tags=['streaming_service.prefix', 'streaming_service.suffix']) rebulk.regex('Ultra-?(?:HD)?', 'UHD', value='Ultra HD', validator=None, tags=['streaming_service.prefix', 'streaming_service.suffix']) rebulk.regex('Upscaled?', value='Upscaled') for value in ('Complete', 'Classic', 'Bonus', 'Trailer', 'Retail', 'Colorized', 'Internal'): rebulk.string(value, value=value, tags=['has-neighbor', 'release-group-prefix']) rebulk.regex('LiNE', value='Line Audio', tags=[ 'has-neighbor-before', 'has-neighbor-after', 'release-group-prefix' ]) rebulk.regex('Read-?NFO', value='Read NFO') rebulk.string('CONVERT', value='Converted', tags='has-neighbor') rebulk.string('DOCU', 'DOKU', value='Documentary', tags='has-neighbor') rebulk.string('OM', value='Open Matte', tags='has-neighbor') rebulk.string('STV', value='Straight to Video', tags='has-neighbor') rebulk.string('OAR', value='Original Aspect Ratio', tags='has-neighbor') rebulk.string('Complet', value='Complete', tags=['has-neighbor', 'release-group-prefix']) for coast in ('East', 'West'): rebulk.regex(r'(?:Live-)?(?:Episode-)?' + coast + '-?(?:Coast-)?Feed', value=coast + ' Coast Feed') rebulk.string('VO', 'OV', value='Original Video', tags='has-neighbor') rebulk.string('Ova', 'Oav', value='Original Animated Video') rebulk.regex( 'Scr(?:eener)?', value='Screener', validator=None, tags=['other.validate.screener', 'source-prefix', 'source-suffix']) rebulk.string( 'Mux', value='Mux', validator=seps_after, tags=['other.validate.mux', 'video-codec-prefix', 'source-suffix']) rebulk.string('HC', 'vost', value='Hardcoded Subtitles') rebulk.string('SDR', value='Standard Dynamic Range', tags='uhdbluray-neighbor') rebulk.regex('HDR(?:10)?', value='HDR10', tags='uhdbluray-neighbor') rebulk.regex('Dolby-?Vision', value='Dolby Vision', tags='uhdbluray-neighbor') rebulk.regex('BT-?2020', value='BT.2020', tags='uhdbluray-neighbor') rebulk.string('Sample', value='Sample', tags=['at-end', 'not-a-release-group']) rebulk.string('Proof', value='Proof', tags=['at-end', 'not-a-release-group']) rebulk.string('Obfuscated', 'Scrambled', value='Obfuscated', tags=['at-end', 'not-a-release-group']) rebulk.string('xpost', 'postbot', 'asrequested', value='Repost', tags='not-a-release-group') rebulk.rules(RenameAnotherToOther, ValidateHasNeighbor, ValidateHasNeighborAfter, ValidateHasNeighborBefore, ValidateScreenerRule, ValidateMuxRule, ValidateHardcodedSubs, ValidateStreamingServiceNeighbor, ValidateAtEnd, ProperCountRule) return rebulk
def website(config): """ Builder for rebulk object. :param config: rule configuration :type config: dict :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'website')) rebulk = rebulk.regex_defaults(flags=re.IGNORECASE).string_defaults(ignore_case=True) rebulk.defaults(name="website") with resource_stream('guessit', 'tlds-alpha-by-domain.txt') as tld_file: tlds = [ tld.strip().decode('utf-8') for tld in tld_file.readlines() if b'--' not in tld ][1:] # All registered domain extension safe_tlds = config['safe_tlds'] # For sure a website extension safe_subdomains = config['safe_subdomains'] # For sure a website subdomain safe_prefix = config['safe_prefixes'] # Those words before a tlds are sure website_prefixes = config['prefixes'] rebulk.regex(r'(?:[^a-z0-9]|^)((?:'+build_or_pattern(safe_subdomains) + r'\.)+(?:[a-z-]+\.)+(?:'+build_or_pattern(tlds) + r'))(?:[^a-z0-9]|$)', children=True) rebulk.regex(r'(?:[^a-z0-9]|^)((?:'+build_or_pattern(safe_subdomains) + r'\.)*[a-z-]+\.(?:'+build_or_pattern(safe_tlds) + r'))(?:[^a-z0-9]|$)', safe_subdomains=safe_subdomains, safe_tlds=safe_tlds, children=True) rebulk.regex(r'(?:[^a-z0-9]|^)((?:'+build_or_pattern(safe_subdomains) + r'\.)*[a-z-]+\.(?:'+build_or_pattern(safe_prefix) + r'\.)+(?:'+build_or_pattern(tlds) + r'))(?:[^a-z0-9]|$)', safe_subdomains=safe_subdomains, safe_prefix=safe_prefix, tlds=tlds, children=True) rebulk.string(*website_prefixes, validator=seps_surround, private=True, tags=['website.prefix']) class PreferTitleOverWebsite(Rule): """ If found match is more likely a title, remove website. """ consequence = RemoveMatch @staticmethod def valid_followers(match): """ Validator for next website matches """ return any(name in ['season', 'episode', 'year'] for name in match.names) def when(self, matches, context): to_remove = [] for website_match in matches.named('website'): safe = False for safe_start in safe_subdomains + safe_prefix: if website_match.value.lower().startswith(safe_start): safe = True break if not safe: suffix = matches.next(website_match, PreferTitleOverWebsite.valid_followers, 0) if suffix: to_remove.append(website_match) return to_remove rebulk.rules(PreferTitleOverWebsite, ValidateWebsitePrefix) return rebulk