def part(): """ Builder for rebulk object. :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash], validator={'__parent__': seps_surround}) prefixes = ['pt', 'part'] if REGEX_AVAILABLE: rebulk.regex(r'\L<prefixes>-?(' + numeral + r')', prefixes=prefixes, name='part', validate_all=True, private_parent=True, children=True, formatter=parse_numeral) else: rebulk.regex(build_or_pattern(prefixes) + r'-?(' + numeral + r')', prefixes=prefixes, name='part', validate_all=True, private_parent=True, children=True, formatter=parse_numeral) return rebulk
def bonus(config): # pylint:disable=unused-argument """ Builder for rebulk object. :param config: rule configuration :type config: dict :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'bonus')) rebulk = rebulk.regex_defaults(flags=re.IGNORECASE) rebulk.regex(r'x(\d+)', name='bonus', private_parent=True, children=True, formatter=int, validator={'__parent__': seps_surround}, validate_all=True, conflict_solver=lambda match, conflicting: match if conflicting.name in ('video_codec', 'episode') and 'weak-episode' not in conflicting.tags else '__default__') rebulk.rules(BonusTitleRule) return rebulk
def release_group(): """ Builder for rebulk object. :return: Created Rebulk object :rtype: Rebulk """ return Rebulk().rules(SceneReleaseGroup, AnimeReleaseGroup, ExpectedReleaseGroup)
def expected(input_string, context): """ Expected property functional pattern. :param input_string: :type input_string: :param context: :type context: :return: :rtype: """ ret = [] for search in context.get(context_key): if search.startswith('re:'): search = search[3:] search = search.replace(' ', '-') matches = Rebulk().regex(search, abbreviations=[dash], flags=re.IGNORECASE) \ .matches(input_string, context) for match in matches: ret.append(match.span) else: value = search for sep in seps: input_string = input_string.replace(sep, ' ') search = search.replace(sep, ' ') for start in find_all(input_string, search, ignore_case=True): ret.append({ 'start': start, 'end': start + len(search), 'value': value }) return ret
def path(): """ Builder for rebulk object. :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk() rebulk.defaults(name="path", marker=True) def mark_path(input_string, context): """ Functional pattern to mark path elements. :param input_string: :return: """ ret = [] if context.get('name_only', False): ret.append((0, len(input_string))) else: indices = list(find_all(input_string, '/')) indices += list(find_all(input_string, '\\')) indices += [-1, len(input_string)] indices.sort() for i in range(0, len(indices) - 1): ret.append((indices[i] + 1, indices[i + 1])) return ret rebulk.functional(mark_path) return rebulk
def part(config): # pylint:disable=unused-argument """ Builder for rebulk object. :param config: rule configuration :type config: dict :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'part')) rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash], validator={'__parent__': seps_surround}) prefixes = config['prefixes'] def validate_roman(match): """ Validate a roman match if surrounded by separators :param match: :type match: :return: :rtype: """ if int_coercable(match.raw): return True return seps_surround(match) rebulk.regex(build_or_pattern(prefixes) + r'-?(?P<part>' + numeral + r')', prefixes=prefixes, validate_all=True, private_parent=True, children=True, formatter=parse_numeral, validator={'part': compose(validate_roman, lambda m: 0 < m.value < 100)}) return rebulk
def type_(): """ Builder for rebulk object. :return: Created Rebulk object :rtype: Rebulk """ return Rebulk().rules(TypeProcessor)
def mimetype(): """ Builder for rebulk object. :return: Created Rebulk object :rtype: Rebulk """ return Rebulk().rules(Mimetype)
def streaming_service(config): # pylint: disable=too-many-statements,unused-argument """Streaming service property. :param config: rule configuration :type config: dict :return: :rtype: Rebulk """ rebulk = Rebulk( disabled=lambda context: is_disabled(context, 'streaming_service')) rebulk = rebulk.string_defaults(ignore_case=True).regex_defaults( flags=re.IGNORECASE, abbreviations=[dash]) rebulk.defaults(name='streaming_service', tags=['source-prefix']) regex_prefix = 're:' for value, items in config.items(): patterns = items if isinstance(items, list) else [items] for pattern in patterns: if isinstance(pattern, dict): kwargs = pattern pattern = pattern['pattern'] else: kwargs = {} regex = kwargs.pop('regex', False) if regex or pattern.startswith(regex_prefix): rebulk.regex(pattern[len(regex_prefix):], value=value, **kwargs) else: rebulk.string(pattern, value=value, **kwargs) rebulk.rules(ValidateStreamingService) return rebulk
def bit_rate(config): # pylint:disable=unused-argument """ Builder for rebulk object. :param config: rule configuration :type config: dict :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk(disabled=lambda context: (is_disabled( context, 'audio_bit_rate') and is_disabled(context, 'video_bit_rate'))) rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]) rebulk.defaults(name='audio_bit_rate', validator=seps_surround) rebulk.regex( r'\d+-?[kmg]b(ps|its?)', r'\d+\.\d+-?[kmg]b(ps|its?)', conflict_solver=(lambda match, other: match if other.name == 'audio_channels' and 'weak-audio_channels' not in other.tags else other), formatter=BitRate.fromstring, tags=['release-group-prefix']) rebulk.rules(BitRateTypeRule) return rebulk
def rebulk_builder(config): """ Default builder for main Rebulk object used by api. :return: Main Rebulk object :rtype: Rebulk """ def _config(name): return config.get(name, {}) rebulk = Rebulk() common_words = frozenset(_config('common_words')) rebulk.rebulk(path(_config('path'))) rebulk.rebulk(groups(_config('groups'))) rebulk.rebulk(episodes(_config('episodes'))) rebulk.rebulk(container(_config('container'))) rebulk.rebulk(source(_config('source'))) rebulk.rebulk(video_codec(_config('video_codec'))) rebulk.rebulk(audio_codec(_config('audio_codec'))) rebulk.rebulk(screen_size(_config('screen_size'))) rebulk.rebulk(website(_config('website'))) rebulk.rebulk(date(_config('date'))) rebulk.rebulk(title(_config('title'))) rebulk.rebulk(episode_title(_config('episode_title'))) rebulk.rebulk(language(_config('language'), common_words)) rebulk.rebulk(country(_config('country'), common_words)) rebulk.rebulk(release_group(_config('release_group'))) rebulk.rebulk(streaming_service(_config('streaming_service'))) rebulk.rebulk(other(_config('other'))) rebulk.rebulk(size(_config('size'))) rebulk.rebulk(bit_rate(_config('bit_rate'))) rebulk.rebulk(edition(_config('edition'))) rebulk.rebulk(cd(_config('cd'))) rebulk.rebulk(bonus(_config('bonus'))) rebulk.rebulk(film(_config('film'))) rebulk.rebulk(part(_config('part'))) rebulk.rebulk(crc(_config('crc'))) rebulk.rebulk(processors(_config('processors'))) rebulk.rebulk(mimetype(_config('mimetype'))) rebulk.rebulk(type_(_config('type'))) def customize_properties(properties): """ Customize default rebulk properties """ count = properties['count'] del properties['count'] properties['season_count'] = count properties['episode_count'] = count return properties rebulk.customize_properties = customize_properties return rebulk
def streaming_service(config): """Streaming service property. :param config: rule configuration :type config: dict :return: :rtype: Rebulk """ rebulk = Rebulk( disabled=lambda context: is_disabled(context, 'streaming_service')) rebulk = rebulk.string_defaults(ignore_case=True).regex_defaults( flags=re.IGNORECASE, abbreviations=[dash]) rebulk.defaults(name='streaming_service', tags=['source-prefix']) for value, items in config.items(): patterns = items if isinstance(items, list) else [items] for pattern in patterns: if pattern.startswith('re:'): rebulk.regex(pattern, value=value) else: rebulk.string(pattern, value=value) rebulk.rules(ValidateStreamingService) return rebulk
def streaming_service(): """Streaming service property. :return: :rtype: Rebulk """ rebulk = Rebulk().string_defaults(ignore_case=True).regex_defaults( flags=re.IGNORECASE, abbreviations=[dash]) rebulk.defaults(name='streaming_service', tags=['format-prefix']) rebulk.string('AE', 'A&E', value='A&E') rebulk.string('AMBC', value='ABC') rebulk.string('AMC', value='AMC') rebulk.string('AMZN', 'AmazonPrime', value='Amazon Prime') rebulk.regex('Amazon-Prime', value='Amazon Prime') rebulk.string('AS', 'AdultSwim', value='Adult Swim') rebulk.regex('Adult-Swim', value='Adult Swim') rebulk.string('iP', 'BBCiPlayer', value='BBC iPlayer') rebulk.regex('BBC-iPlayer', value='BBC iPlayer') rebulk.string('CBS', value='CBS') rebulk.string('CC', 'ComedyCentral', value='Comedy Central') rebulk.regex('Comedy-Central', value='Comedy Central') rebulk.string('CR', 'CrunchyRoll', value='Crunchy Roll') rebulk.regex('Crunchy-Roll', value='Crunchy Roll') rebulk.string('CW', 'TheCW', value='The CW') rebulk.regex('The-CW', value='The CW') rebulk.string('DISC', 'Discovery', value='Discovery') rebulk.string('DIY', value='DIY Network') rebulk.string('DSNY', 'Disney', value='Disney') rebulk.string('EPIX', 'ePix', value='ePix') rebulk.string('HBO', 'HBOGo', value='HBO Go') rebulk.regex('HBO-Go', value='HBO Go') rebulk.string('HIST', 'History', value='History') rebulk.string('ID', value='Investigation Discovery') rebulk.string('IFC', 'IFC', value='IFC') rebulk.string('PBS', 'PBS', value='PBS') rebulk.string('NATG', 'NationalGeographic', value='National Geographic') rebulk.regex('National-Geographic', value='National Geographic') rebulk.string('NBA', 'NBATV', value='NBA TV') rebulk.regex('NBA-TV', value='NBA TV') rebulk.string('NBC', value='NBC') rebulk.string('NFL', value='NFL') rebulk.string('NICK', 'Nickelodeon', value='Nickelodeon') rebulk.string('NF', 'Netflix', value='Netflix') rebulk.string('iTunes', value='iTunes') rebulk.string('RTE', value='RTÉ One') rebulk.string('SESO', 'SeeSo', value='SeeSo') rebulk.string('SPKE', 'SpikeTV', 'Spike TV', value='Spike TV') rebulk.string('SYFY', 'Syfy', value='Syfy') rebulk.string('TFOU', 'TFou', value='TFou') rebulk.string('TLC', value='TLC') rebulk.string('TV3', value='TV3 Ireland') rebulk.string('TV4', value='TV4 Sweeden') rebulk.string('TVL', 'TVLand', 'TV Land', value='TV Land') rebulk.string('UFC', value='UFC') rebulk.string('USAN', value='USA Network') rebulk.rules(ValidateStreamingService) return rebulk
def video_codec(): """ Builder for rebulk object. :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True) rebulk.defaults(name="video_codec") rebulk.regex(r"Rv\d{2}", value="Real") rebulk.regex("Mpeg2", value="Mpeg2") rebulk.regex("DVDivX", "DivX", value="DivX") rebulk.regex("XviD", value="XviD") rebulk.regex("[hx]-?264(?:-?AVC(HD)?)?", "MPEG-?4(?:-?AVC(HD)?)", "AVCHD", value="h264") rebulk.regex("[hx]-?265(?:-?HEVC)?", "HEVC", value="h265") # http://blog.mediacoderhq.com/h264-profiles-and-levels/ # http://fr.wikipedia.org/wiki/H.264 rebulk.defaults(name="video_profile", validator=seps_surround) rebulk.regex('10.?bit', 'Hi10P', value='10bit') rebulk.regex('8.?bit', value='8bit') rebulk.string('BP', value='BP', tags='video_profile.rule') rebulk.string('XP', 'EP', value='XP', tags='video_profile.rule') rebulk.string('MP', value='MP', tags='video_profile.rule') rebulk.string('HP', 'HiP', value='HP', tags='video_profile.rule') rebulk.regex('Hi422P', value='Hi422P', tags='video_profile.rule') rebulk.regex('Hi444PP', value='Hi444PP', tags='video_profile.rule') rebulk.string('DXVA', value='DXVA', name='video_api') rebulk.rules(ValidateVideoCodec, VideoProfileRule) return rebulk
def country(config, common_words): """ Builder for rebulk object. :param config: rule configuration :type config: dict :param common_words: common words :type common_words: set :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'country')) rebulk = rebulk.defaults(name='country') def find_countries(string, context=None): """ Find countries in given string. """ allowed_countries = context.get( 'allowed_countries') if context else None return CountryFinder(allowed_countries, common_words).find(string) rebulk.functional( find_countries, # Â Prefer language and any other property over country if not US or GB. conflict_solver=lambda match, other: match if other.name != 'language' or match.value not in (babelfish.Country('US'), babelfish.Country('GB')) else other, properties={'country': [None]}, disabled=lambda context: not context.get('allowed_countries')) babelfish.country_converters['guessit'] = GuessitCountryConverter( config['synonyms']) return rebulk
def edition(): """ Builder for rebulk object. :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk().regex_defaults( flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True) rebulk.defaults(name='edition', validator=seps_surround) rebulk.regex('collector', 'collector-edition', 'edition-collector', value='Collector Edition') rebulk.regex('special-edition', 'edition-special', value='Special Edition', conflict_solver=lambda match, other: other if other.name == 'episode_details' and other.value == 'Special' else '__default__') rebulk.regex('criterion-edition', 'edition-criterion', value='Criterion Edition') rebulk.regex('deluxe', 'deluxe-edition', 'edition-deluxe', value='Deluxe Edition') rebulk.regex('director\'?s?-cut', 'director\'?s?-cut-edition', 'edition-director\'?s?-cut', value='Director\'s cut') return rebulk
def edition(): """ Builder for rebulk object. :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk().regex_defaults( flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True) rebulk.defaults(name='edition', validator=seps_surround) rebulk.regex('collector', 'collector-edition', 'edition-collector', value='Collector Edition') rebulk.regex('special-edition', 'edition-special', value='Special Edition', conflict_solver=lambda match, other: other if other.name == 'episode_details' and other.value == 'Special' else '__default__') rebulk.string('se', value='Special Edition', tags='has-neighbor') rebulk.regex('criterion-edition', 'edition-criterion', value='Criterion Edition') rebulk.regex('deluxe', 'deluxe-edition', 'edition-deluxe', value='Deluxe Edition') rebulk.regex('limited', 'limited-edition', value='Limited Edition', tags=['has-neighbor', 'release-group-prefix']) rebulk.regex(r'theatrical-cut', r'theatrical-edition', r'theatrical', value='Theatrical Edition') rebulk.regex(r"director'?s?-cut", r"director'?s?-cut-edition", r"edition-director'?s?-cut", 'DC', value="Director's Cut") rebulk.regex('extended', 'extended-?cut', 'extended-?version', value='Extended', tags=['has-neighbor', 'release-group-prefix']) rebulk.regex('alternat(e|ive)(?:-?Cut)?', value='Alternative Cut', tags=['has-neighbor', 'release-group-prefix']) for value in ('Remastered', 'Uncensored', 'Uncut', 'Unrated'): rebulk.string(value, value=value, tags=['has-neighbor', 'release-group-prefix']) rebulk.string('Festival', value='Festival', tags=['has-neighbor-before', 'has-neighbor-after']) return rebulk
def format_(): """ Builder for rebulk object. :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]) rebulk.defaults(name="format") rebulk.regex("VHS", "VHS-?Rip", value="VHS") rebulk.regex("CAM", "CAM-?Rip", "HD-?CAM", value="Cam") rebulk.regex("TELESYNC", "TS", "HD-?TS", value="Telesync") rebulk.regex("WORKPRINT", "WP", value="Workprint") rebulk.regex("TELECINE", "TC", value="Telecine") rebulk.regex("PPV", "PPV-?Rip", value="PPV") # Pay Per View rebulk.regex("SD-?TV", "SD-?TV-?Rip", "Rip-?SD-?TV", "TV-?Rip", "Rip-?TV", value="TV") # TV is too common to allow matching rebulk.regex("DVB-?Rip", "DVB", "PD-?TV", value="DVB") rebulk.regex( "DVD", "DVD-?Rip", "VIDEO-?TS", "DVD-?R(?:$|(?!E))", # "DVD-?R(?:$|^E)" => DVD-Real ... "DVD-?9", "DVD-?5", value="DVD") rebulk.regex("HD-?TV", "TV-?RIP-?HD", "HD-?TV-?RIP", "HD-?RIP", value="HDTV") rebulk.regex("VOD", "VOD-?Rip", value="VOD") rebulk.regex("WEB-?Rip", "WEB-?DL-?Rip", "WEB-?Cap", value="WEBRip") rebulk.regex("WEB-?DL", "WEB-?HD", "WEB", value="WEB-DL") rebulk.regex("HD-?DVD-?Rip", "HD-?DVD", value="HD-DVD") rebulk.regex("Blu-?ray(?:-?Rip)?", "B[DR]", "B[DR]-?Rip", "BD[59]", "BD25", "BD50", value="BluRay") rebulk.regex("AHDTV", value="AHDTV") rebulk.regex("HDTC", value="HDTC") rebulk.regex("DSR", "DSR?-?Rip", "SAT-?Rip", "DTH", "DTH-?Rip", value="SATRip") rebulk.rules(ValidateFormat) return rebulk
def website(): """ Builder for rebulk object. :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE) rebulk.defaults(name="website") tlds = [ l.strip().decode('utf-8') for l in resource_stream( 'guessit', 'tlds-alpha-by-domain.txt').readlines() if b'--' not in l ][1:] # All registered domain extension safe_tlds = ['com', 'org', 'net'] # For sure a website extension safe_subdomains = ['www'] # For sure a website subdomain safe_prefix = ['co', 'com', 'org', 'net'] # Those words before a tlds are sure if REGEX_AVAILABLE: rebulk.regex( r'(?:[^a-z0-9]|^)((?:\L<safe_subdomains>\.)+(?:[a-z-]+\.)+(?:\L<tlds>))(?:[^a-z0-9]|$)', safe_subdomains=safe_subdomains, tlds=tlds, children=True) rebulk.regex( r'(?:[^a-z0-9]|^)((?:\L<safe_subdomains>\.)*[a-z-]+\.(?:\L<safe_tlds>))(?:[^a-z0-9]|$)', safe_subdomains=safe_subdomains, safe_tlds=safe_tlds, children=True) rebulk.regex( r'(?:[^a-z0-9]|^)((?:\L<safe_subdomains>\.)*[a-z-]+\.(?:\L<safe_prefix>\.)+(?:\L<tlds>))(?:[^a-z0-9]|$)', safe_subdomains=safe_subdomains, safe_prefix=safe_prefix, tlds=tlds, children=True) else: rebulk.regex(r'(?:[^a-z0-9]|^)((?:' + build_or_pattern(safe_subdomains) + r'\.)+(?:[a-z-]+\.)+(?:' + build_or_pattern(tlds) + r'))(?:[^a-z0-9]|$)', children=True) rebulk.regex(r'(?:[^a-z0-9]|^)((?:' + build_or_pattern(safe_subdomains) + r'\.)*[a-z-]+\.(?:' + build_or_pattern(safe_tlds) + r'))(?:[^a-z0-9]|$)', safe_subdomains=safe_subdomains, safe_tlds=safe_tlds, children=True) rebulk.regex(r'(?:[^a-z0-9]|^)((?:' + build_or_pattern(safe_subdomains) + r'\.)*[a-z-]+\.(?:' + build_or_pattern(safe_prefix) + r'\.)+(?:' + build_or_pattern(tlds) + r'))(?:[^a-z0-9]|$)', safe_subdomains=safe_subdomains, safe_prefix=safe_prefix, tlds=tlds, children=True) return rebulk
def screen_size(): """ Builder for rebulk object. :return: Created Rebulk object :rtype: Rebulk """ def conflict_solver(match, other): """ Conflict solver for most screen_size. """ if other.name == 'screen_size': if 'resolution' in other.tags: # The chtouile to solve conflict in "720 x 432" string matching both 720p pattern int_value = _digits_re.findall(match.raw)[-1] if other.value.startswith(int_value): return match return other return '__default__' rebulk = Rebulk().string_defaults(ignore_case=True).regex_defaults( flags=re.IGNORECASE) rebulk.defaults(name="screen_size", validator=seps_surround, conflict_solver=conflict_solver) rebulk.regex(r'(?:\d{3,}(?:x|\*))?360(?:i)', value='360i') rebulk.regex(r'(?:\d{3,}(?:x|\*))?360(?:p?x?)', value='360p') rebulk.regex(r"(?:\d{3,}(?:x|\*))?368(?:p?x?)", value="368p") rebulk.regex(r'(?:\d{3,}(?:x|\*))?480(?:i)', value='480i') rebulk.regex(r'(?:\d{3,}(?:x|\*))?480(?:p?x?)', value='480p') rebulk.regex(r'(?:\d{3,}(?:x|\*))?576(?:i)', value='576i') rebulk.regex(r'(?:\d{3,}(?:x|\*))?576(?:p?x?)', value='576p') rebulk.regex(r'(?:\d{3,}(?:x|\*))?720(?:p?(?:50|60)?x?)', value='720p') rebulk.regex(r"(?:\d{3,}(?:x|\*))?720(?:p(?:50|60)?x?)", value="720p") rebulk.regex(r"(?:\d{3,}(?:x|\*))?720p?hd", value="720p") rebulk.regex(r'(?:\d{3,}(?:x|\*))?900(?:i)', value='900i') rebulk.regex(r'(?:\d{3,}(?:x|\*))?900(?:p?x?)', value='900p') rebulk.regex(r"(?:\d{3,}(?:x|\*))?1080i", value="1080i") rebulk.regex(r"(?:\d{3,}(?:x|\*))?1080p?x?", value="1080p") rebulk.regex(r"(?:\d{3,}(?:x|\*))?1080(?:p(?:50|60)?x?)", value="1080p") rebulk.regex(r"(?:\d{3,}(?:x|\*))?1080p?hd", value="1080p") rebulk.regex(r'(?:\d{3,}(?:x|\*))?2160(?:p?x?)', value='2160p') rebulk.string('4k', value='2160p') rebulk.regex(r'(?:\d{3,}(?:x|\*))?4320(?:p?x?)', value='4320p') _digits_re = re.compile(r'\d+') rebulk.defaults(name="screen_size", validator=seps_surround) rebulk.regex(r'\d{3,}-?(?:x|\*)-?\d{3,}', formatter=lambda value: 'x'.join(_digits_re.findall(value)), abbreviations=[dash], tags=['resolution'], conflict_solver=lambda match, other: '__default__' if other.name == 'screen_size' else other) rebulk.rules(ScreenSizeOnlyOne, RemoveScreenSizeConflicts) return rebulk
def audio_codec(config): # pylint:disable=unused-argument """ Builder for rebulk object. :param config: rule configuration :type config: dict :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk() \ .regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]) \ .string_defaults(ignore_case=True) def audio_codec_priority(match1, match2): """ Gives priority to audio_codec :param match1: :type match1: :param match2: :type match2: :return: :rtype: """ if match1.name == 'audio_codec' and match2.name in [ 'audio_profile', 'audio_channels' ]: return match2 if match1.name in ['audio_profile', 'audio_channels' ] and match2.name == 'audio_codec': return match1 return '__default__' rebulk.defaults( name='audio_codec', conflict_solver=audio_codec_priority, disabled=lambda context: is_disabled(context, 'audio_codec')) load_config_patterns(rebulk, config.get('audio_codec')) rebulk.defaults( clear=True, name='audio_profile', disabled=lambda context: is_disabled(context, 'audio_profile')) load_config_patterns(rebulk, config.get('audio_profile')) rebulk.defaults( clear=True, name="audio_channels", disabled=lambda context: is_disabled(context, 'audio_channels')) load_config_patterns(rebulk, config.get('audio_channels')) rebulk.rules(DtsHDRule, DtsRule, AacRule, DolbyDigitalRule, AudioValidatorRule, HqConflictRule, AudioChannelsValidatorRule) return rebulk
def processors(): """ Builder for rebulk object. :return: Created Rebulk object :rtype: Rebulk """ return Rebulk().rules(EnlargeGroupMatches, EquivalentHoles, RemoveLessSpecificSeasonEpisode, RemoveAmbiguous, SeasonYear, Processors, StripSeparators)
def audio_codec(): """ Builder for rebulk object. :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True) def audio_codec_priority(match1, match2): """ Gives priority to audio_codec :param match1: :type match1: :param match2: :type match2: :return: :rtype: """ if match1.name == 'audio_codec' and match2.name in ['audio_profile', 'audio_channels']: return match2 if match1.name in ['audio_profile', 'audio_channels'] and match2.name == 'audio_codec': return match1 return '__default__' rebulk.defaults(name="audio_codec", conflict_solver=audio_codec_priority) rebulk.regex("MP3", "LAME", r"LAME(?:\d)+-?(?:\d)+", value="MP3") rebulk.regex("Dolby", "DolbyDigital", "Dolby-Digital", "DDP?", value="DolbyDigital") rebulk.regex("DolbyAtmos", "Dolby-Atmos", "Atmos", value="DolbyAtmos") rebulk.regex("AAC", value="AAC") rebulk.regex("AC3D?", value="AC3") rebulk.regex("Flac", value="FLAC") rebulk.regex("DTS", value="DTS") rebulk.regex("True-?HD", value="TrueHD") rebulk.defaults(name="audio_profile") rebulk.string("HD", value="HD", tags="DTS") rebulk.regex("HD-?MA", value="HDMA", tags="DTS") rebulk.string("HE", value="HE", tags="AAC") rebulk.string("LC", value="LC", tags="AAC") rebulk.string("HQ", value="HQ", tags="AC3") rebulk.defaults(name="audio_channels") rebulk.regex(r'(7[\W_][01](?:ch)?)(?:[^\d]|$)', value='7.1', children=True) rebulk.regex(r'(5[\W_][01](?:ch)?)(?:[^\d]|$)', value='5.1', children=True) rebulk.regex(r'(2[\W_]0(?:ch)?)(?:[^\d]|$)', value='2.0', children=True) rebulk.regex('7[01]', value='7.1', validator=seps_after, tags='weak-audio_channels') rebulk.regex('5[01]', value='5.1', validator=seps_after, tags='weak-audio_channels') rebulk.string('20', value='2.0', validator=seps_after, tags='weak-audio_channels') rebulk.string('7ch', '8ch', value='7.1') rebulk.string('5ch', '6ch', value='5.1') rebulk.string('2ch', 'stereo', value='2.0') rebulk.string('1ch', 'mono', value='1.0') rebulk.rules(DtsRule, AacRule, Ac3Rule, AudioValidatorRule, HqConflictRule, AudioChannelsValidatorRule) return rebulk
def container(): """ Builder for rebulk object. :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE).string_defaults( ignore_case=True) rebulk.defaults( name='container', formatter=lambda value: value.strip(seps), tags=['extension'], conflict_solver=lambda match, other: other if other.name in ['format', 'video_codec'] or other.name == 'container' and 'extension' not in other.tags else '__default__') subtitles = ['srt', 'idx', 'sub', 'ssa', 'ass'] info = ['nfo'] videos = [ '3g2', '3gp', '3gp2', 'asf', 'avi', 'divx', 'flv', 'm4v', 'mk2', 'mka', 'mkv', 'mov', 'mp4', 'mp4a', 'mpeg', 'mpg', 'ogg', 'ogm', 'ogv', 'qt', 'ra', 'ram', 'rm', 'ts', 'wav', 'webm', 'wma', 'wmv', 'iso', 'vob' ] torrent = ['torrent'] nzb = ['nzb'] rebulk.regex(r'\.' + build_or_pattern(subtitles) + '$', exts=subtitles, tags=['extension', 'subtitle']) rebulk.regex(r'\.' + build_or_pattern(info) + '$', exts=info, tags=['extension', 'info']) rebulk.regex(r'\.' + build_or_pattern(videos) + '$', exts=videos, tags=['extension', 'video']) rebulk.regex(r'\.' + build_or_pattern(torrent) + '$', exts=torrent, tags=['extension', 'torrent']) rebulk.regex(r'\.' + build_or_pattern(nzb) + '$', exts=nzb, tags=['extension', 'nzb']) rebulk.defaults( name='container', validator=seps_surround, formatter=lambda s: s.lower(), conflict_solver=lambda match, other: match if other.name in ['format', 'video_codec'] or other.name == 'container' and 'extension' in other.tags else '__default__') rebulk.string(*[sub for sub in subtitles if sub not in ['sub']], tags=['subtitle']) rebulk.string(*videos, tags=['video']) rebulk.string(*torrent, tags=['torrent']) rebulk.string(*nzb, tags=['nzb']) return rebulk
def rules(): """Return all our custom rules to be applied to the guessit api. IMPORTANT: - DO NOT define priority or dependency in each rule. Just define order here. - Only allowed dependency is TypeProcessor because we want to apply rules for certain types only """ return Rebulk().rules(RenamePartsToEpisodeNumbers, AppendPartToMovieTile, AppendLineToMovieTitle, AppendUsToMovieTitle, PrependXxxToMovieTitle, VhsAsMovieTitle)
def container(config): """ Builder for rebulk object. :param config: rule configuration :type config: dict :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'container')) rebulk = rebulk.regex_defaults(flags=re.IGNORECASE).string_defaults( ignore_case=True) rebulk.defaults(name='container', formatter=lambda value: value.strip(seps), tags=['extension'], conflict_solver=lambda match, other: other if other.name in ('source', 'video_codec') or other.name == 'container' and 'extension' not in other.tags else '__default__') subtitles = config['subtitles'] info = config['info'] videos = config['videos'] torrent = config['torrent'] nzb = config['nzb'] rebulk.regex(r'\.' + build_or_pattern(subtitles) + '$', exts=subtitles, tags=['extension', 'subtitle']) rebulk.regex(r'\.' + build_or_pattern(info) + '$', exts=info, tags=['extension', 'info']) rebulk.regex(r'\.' + build_or_pattern(videos) + '$', exts=videos, tags=['extension', 'video']) rebulk.regex(r'\.' + build_or_pattern(torrent) + '$', exts=torrent, tags=['extension', 'torrent']) rebulk.regex(r'\.' + build_or_pattern(nzb) + '$', exts=nzb, tags=['extension', 'nzb']) rebulk.defaults(clear=True, name='container', validator=seps_surround, formatter=lambda s: s.lower(), conflict_solver=lambda match, other: match if other.name in ('source', 'video_codec') or other.name == 'container' and 'extension' in other.tags else '__default__') rebulk.string(*[sub for sub in subtitles if sub not in ('sub', 'ass')], tags=['subtitle']) rebulk.string(*videos, tags=['video']) rebulk.string(*torrent, tags=['torrent']) rebulk.string(*nzb, tags=['nzb']) return rebulk
def episode_title(): """ Builder for rebulk object. :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk().rules(EpisodeTitleFromPosition, AlternativeTitleReplace, TitleToEpisodeTitle, Filepart3EpisodeTitle, Filepart2EpisodeTitle) return rebulk
def release_group(config): """ Builder for rebulk object. :param config: rule configuration :type config: dict :return: Created Rebulk object :rtype: Rebulk """ forbidden_groupnames = config['forbidden_names'] groupname_ignore_seps = config['ignored_seps'] groupname_seps = ''.join( [c for c in seps if c not in groupname_ignore_seps]) def clean_groupname(string): """ Removes and strip separators from input_string :param string: :type string: :return: :rtype: """ string = string.strip(groupname_seps) if not (string.endswith(tuple(groupname_ignore_seps)) and string.startswith(tuple(groupname_ignore_seps))) \ and not any(i in string.strip(groupname_ignore_seps) for i in groupname_ignore_seps): string = string.strip(groupname_ignore_seps) for forbidden in forbidden_groupnames: if string.lower().startswith( forbidden) and string[len(forbidden):len(forbidden) + 1] in seps: string = string[len(forbidden):] string = string.strip(groupname_seps) if string.lower().endswith( forbidden) and string[-len(forbidden) - 1:-len(forbidden)] in seps: string = string[:len(forbidden)] string = string.strip(groupname_seps) return string rebulk = Rebulk( disabled=lambda context: is_disabled(context, 'release_group')) expected_group = build_expected_function('expected_group') rebulk.functional( expected_group, name='release_group', tags=['expected'], validator=seps_surround, conflict_solver=lambda match, other: other, disabled=lambda context: not context.get('expected_group')) return rebulk.rules(DashSeparatedReleaseGroup(clean_groupname), SceneReleaseGroup(clean_groupname), AnimeReleaseGroup)
def screen_size(config): """ Builder for rebulk object. :param config: rule configuration :type config: dict :return: Created Rebulk object :rtype: Rebulk """ interlaced = frozenset(config['interlaced']) progressive = frozenset(config['progressive']) frame_rates = frozenset(config['frame_rates']) min_ar = config['min_ar'] max_ar = config['max_ar'] rebulk = Rebulk() rebulk = rebulk.string_defaults(ignore_case=True).regex_defaults( flags=re.IGNORECASE) rebulk.defaults( name='screen_size', validator=seps_surround, abbreviations=[dash], disabled=lambda context: is_disabled(context, 'screen_size')) frame_rate_pattern = build_or_pattern(frame_rates, name='frame_rate') interlaced_pattern = build_or_pattern(interlaced, name='height') progressive_pattern = build_or_pattern(progressive, name='height') res_pattern = r'(?:(?P<width>\d{3,4})(?:x|\*))?' rebulk.regex(res_pattern + interlaced_pattern + r'(?P<scan_type>i)' + frame_rate_pattern + '?') rebulk.regex(res_pattern + progressive_pattern + r'(?P<scan_type>p)' + frame_rate_pattern + '?') rebulk.regex(res_pattern + progressive_pattern + r'(?P<scan_type>p)?(?:hd)') rebulk.regex(res_pattern + progressive_pattern + r'(?P<scan_type>p)?x?') rebulk.string('4k', value='2160p', conflict_solver=lambda match, other: '__default__' if other.name == 'screen_size' else match) rebulk.regex(r'(?P<width>\d{3,4})-?(?:x|\*)-?(?P<height>\d{3,4})', conflict_solver=lambda match, other: '__default__' if other.name == 'screen_size' else other) rebulk.regex(frame_rate_pattern + '-?(?:p|fps)', name='frame_rate', formatter=FrameRate.fromstring, disabled=lambda context: is_disabled(context, 'frame_rate')) rebulk.rules(PostProcessScreenSize(progressive, min_ar, max_ar), ScreenSizeOnlyOne, ResolveScreenSizeConflicts) return rebulk
def rebulk_builder(): """ Default builder for main Rebulk object used by api. :return: Main Rebulk object :rtype: Rebulk """ rebulk = Rebulk() rebulk.rebulk(path()) rebulk.rebulk(groups()) rebulk.rebulk(episodes()) rebulk.rebulk(container()) rebulk.rebulk(format_()) rebulk.rebulk(video_codec()) rebulk.rebulk(audio_codec()) rebulk.rebulk(screen_size()) rebulk.rebulk(website()) rebulk.rebulk(date()) rebulk.rebulk(title()) rebulk.rebulk(episode_title()) rebulk.rebulk(language()) rebulk.rebulk(country()) rebulk.rebulk(release_group()) rebulk.rebulk(streaming_service()) rebulk.rebulk(other()) rebulk.rebulk(size()) rebulk.rebulk(edition()) rebulk.rebulk(cds()) rebulk.rebulk(bonus()) rebulk.rebulk(film()) rebulk.rebulk(part()) rebulk.rebulk(crc()) rebulk.rebulk(processors()) rebulk.rebulk(mimetype()) rebulk.rebulk(type_()) def customize_properties(properties): """ Customize default rebulk properties """ count = properties['count'] del properties['count'] properties['season_count'] = count properties['episode_count'] = count return properties rebulk.customize_properties = customize_properties return rebulk