def __init__(self):
        Transformer.__init__(self, 15)

        of_separators = ['of', 'sur', '/', '\\']
        of_separators_re = re.compile(build_or_pattern(of_separators, escape=True), re.IGNORECASE)

        self.container = PropertiesContainer(enhance=False, canonical_from_pattern=False)

        episode_words = ['episodes?']

        def _formater(episode_number):
            epnum = parse_numeral(episode_number)
            if not valid_year(epnum):
                if epnum > 100:
                    season, epnum = epnum // 100, epnum % 100
                    # episodes which have a season > 50 are most likely errors
                    # (Simpson is at 25!)
                    if season > 50:
                        return None
                    return {'season': season, 'episodeNumber': epnum}
                else:
                    return epnum

        self.container.register_property(['episodeNumber', 'season'], '[0-9]{2,4}', confidence=0.6, formatter=_formater, disabler=lambda options: options.get('episode_prefer_number') if options else False)
        self.container.register_property(['episodeNumber', 'season'], '[0-9]{4}', confidence=0.6, formatter=_formater)
        self.container.register_property('episodeNumber', '[^0-9](\d{1,3})', confidence=0.6, formatter=parse_numeral, disabler=lambda options: not options.get('episode_prefer_number') if options else True)
        self.container.register_property(None, '(' + build_or_pattern(episode_words) + sep + '?(?P<episodeNumber>' + numeral + '))[^0-9]', confidence=0.4, formatter=parse_numeral)
        self.container.register_property(None, r'(?P<episodeNumber>' + numeral + ')' + sep + '?' + of_separators_re.pattern + sep + '?(?P<episodeCount>' + numeral +')', confidence=0.6, formatter=parse_numeral)
        self.container.register_property('episodeNumber', r'^' + sep + '?(\d{1,3})' + sep, confidence=0.4, formatter=parse_numeral, disabler=lambda options: not options.get('episode_prefer_number') if options else True)
        self.container.register_property('episodeNumber', sep + r'(\d{1,3})' + sep + '?$', confidence=0.4, formatter=parse_numeral, disabler=lambda options: not options.get('episode_prefer_number') if options else True)
    def __init__(self):
        Transformer.__init__(self, 45)

        self.container = PropertiesContainer(enhance=False,
                                             canonical_from_pattern=False)

        tlds_pattern = build_or_pattern(
            TLDS)  # All registered domain extension
        safe_tlds_pattern = build_or_pattern(
            ['com', 'org', 'net'])  # For sure a website extension
        safe_subdomains_pattern = build_or_pattern(
            ['www'])  # For sure a website subdomain
        safe_prefix_tlds_pattern = build_or_pattern(
            ['co', 'com', 'org', 'net'])  # Those words before a tlds are sure

        self.container.register_property(
            'website', '(?:' + safe_subdomains_pattern + '\.)+' +
            r'(?:[a-z-]+\.)+' + r'(?:' + tlds_pattern + r')+')
        self.container.register_property(
            'website', '(?:' + safe_subdomains_pattern + '\.)*' +
            r'[a-z-]+\.' + r'(?:' + safe_tlds_pattern + r')+')
        self.container.register_property(
            'website',
            '(?:' + safe_subdomains_pattern + '\.)*' + r'[a-z-]+\.' + r'(?:' +
            safe_prefix_tlds_pattern + r'\.)+' + r'(?:' + tlds_pattern + r')+')
Example #3
0
    def __init__(self):
        Transformer.__init__(self, 45)

        self.container = PropertiesContainer(enhance=False, canonical_from_pattern=False)

        tlds = []

        f = resource_stream('guessit', 'tlds-alpha-by-domain.txt')
        f.readline()
        next(f)
        for tld in f:
            tld = tld.strip()
            if b'--' in tld:
                continue
            tlds.append(tld.decode("utf-8"))
        f.close()

        tlds_pattern = build_or_pattern(tlds)  # All registered domain extension
        safe_tlds_pattern = build_or_pattern(['com', 'org', 'net'])  # For sure a website extension
        safe_subdomains_pattern = build_or_pattern(['www'])  # For sure a website subdomain
        safe_prefix_tlds_pattern = build_or_pattern(['co', 'com', 'org', 'net'])  # Those words before a tlds are sure

        self.container.register_property('website', '(?:' + safe_subdomains_pattern + '\.)+' + r'(?:[a-z-]+\.)+' + r'(?:' + tlds_pattern + r')+')
        self.container.register_property('website', '(?:' + safe_subdomains_pattern + '\.)*' + r'[a-z-]+\.' + r'(?:' + safe_tlds_pattern + r')+')
        self.container.register_property('website', '(?:' + safe_subdomains_pattern + '\.)*' + r'[a-z-]+\.' + r'(?:' + safe_prefix_tlds_pattern + r'\.)+' + r'(?:' + tlds_pattern + r')+')
Example #4
0
    def __init__(self):
        Transformer.__init__(self, 45)

        self.container = PropertiesContainer(enhance=False, canonical_from_pattern=False)

        tlds_pattern = build_or_pattern(TLDS)  # All registered domain extension
        safe_tlds_pattern = build_or_pattern(['com', 'org', 'net'])  # For sure a website extension
        safe_subdomains_pattern = build_or_pattern(['www'])  # For sure a website subdomain
        safe_prefix_tlds_pattern = build_or_pattern(['co', 'com', 'org', 'net'])  # Those words before a tlds are sure

        self.container.register_property('website', '(?:' + safe_subdomains_pattern + '\.)+' + r'(?:[a-z-]+\.)+' + r'(?:' + tlds_pattern + r')+')
        self.container.register_property('website', '(?:' + safe_subdomains_pattern + '\.)*' + r'[a-z-]+\.' + r'(?:' + safe_tlds_pattern + r')+')
        self.container.register_property('website', '(?:' + safe_subdomains_pattern + '\.)*' + r'[a-z-]+\.' + r'(?:' + safe_prefix_tlds_pattern + r'\.)+' + r'(?:' + tlds_pattern + r')+')
Example #5
0
    def __init__(self):
        Transformer.__init__(self, 45)

        self.container = PropertiesContainer(enhance=False,
                                             canonical_from_pattern=False)

        tlds = []

        f = resource_stream('guessit', 'tlds-alpha-by-domain.txt')
        f.readline()
        next(f)
        for tld in f:
            tld = tld.strip()
            if b'--' in tld:
                continue
            tlds.append(tld.decode("utf-8"))
        f.close()

        tlds_pattern = build_or_pattern(
            tlds)  # All registered domain extension
        safe_tlds_pattern = build_or_pattern(
            ['com', 'org', 'net'])  # For sure a website extension
        safe_subdomains_pattern = build_or_pattern(
            ['www'])  # For sure a website subdomain
        safe_prefix_tlds_pattern = build_or_pattern(
            ['co', 'com', 'org', 'net'])  # Those words before a tlds are sure

        self.container.register_property(
            'website', '(?:' + safe_subdomains_pattern + '\.)+' +
            r'(?:[a-z-]+\.)+' + r'(?:' + tlds_pattern + r')+')
        self.container.register_property(
            'website', '(?:' + safe_subdomains_pattern + '\.)*' +
            r'[a-z-]+\.' + r'(?:' + safe_tlds_pattern + r')+')
        self.container.register_property(
            'website',
            '(?:' + safe_subdomains_pattern + '\.)*' + r'[a-z-]+\.' + r'(?:' +
            safe_prefix_tlds_pattern + r'\.)+' + r'(?:' + tlds_pattern + r')+')
Example #6
0
    def __init__(self):
        Transformer.__init__(self, 35)

        self.container = PropertiesContainer()
        self.qualities = QualitiesContainer()

        def register_property(propname, props, **kwargs):
            """props a dict of {value: [patterns]}"""
            for canonical_form, patterns in props.items():
                if isinstance(patterns, tuple):
                    patterns2, pattern_kwarg = patterns
                    if kwargs:
                        current_kwarg = dict(kwargs)
                        current_kwarg.update(pattern_kwarg)
                    else:
                        current_kwarg = dict(pattern_kwarg)
                    current_kwarg['canonical_form'] = canonical_form
                    self.container.register_property(propname, *patterns2, **current_kwarg)
                elif kwargs:
                    current_kwarg = dict(kwargs)
                    current_kwarg['canonical_form'] = canonical_form
                    self.container.register_property(propname, *patterns, **current_kwarg)
                else:
                    self.container.register_property(propname, *patterns, canonical_form=canonical_form)

        def register_quality(propname, quality_dict):
            """props a dict of {canonical_form: quality}"""
            for canonical_form, quality in quality_dict.items():
                self.qualities.register_quality(propname, canonical_form, quality)

        register_property('container', {'mp4': ['MP4']})

        # http://en.wikipedia.org/wiki/Pirated_movie_release_types
        register_property('format', {'VHS': ['VHS', 'VHS-Rip'],
                                     'Cam': ['CAM', 'CAMRip', 'HD-CAM'],
                                     #'Telesync': ['TELESYNC', 'PDVD'],
                                     'Telesync': (['TS', 'HD-TS'], {'confidence': 0.4}),
                                     'Workprint': ['WORKPRINT', 'WP'],
                                     'Telecine': ['TELECINE', 'TC'],
                                     'PPV': ['PPV', 'PPV-Rip'],  # Pay Per View
                                     'TV': ['SD-TV', 'SD-TV-Rip', 'Rip-SD-TV', 'TV-Rip', 'Rip-TV'],
                                     'DVB': ['DVB-Rip', 'DVB', 'PD-TV'],
                                     'DVD': ['DVD', 'DVD-Rip', 'VIDEO-TS', 'DVD-R', 'DVD-9', 'DVD-5'],
                                     'HDTV': ['HD-TV', 'TV-RIP-HD', 'HD-TV-RIP', 'HD-RIP'],
                                     'VOD': ['VOD', 'VOD-Rip'],
                                     'WEBRip': ['WEB-Rip'],
                                     'WEB-DL': ['WEB-DL', 'WEB-HD', 'WEB'],
                                     'HD-DVD': ['HD-DVD-Rip', 'HD-DVD'],
                                     'BluRay': ['Blu-ray(?:-Rip)?', 'B[DR]', 'B[DR]-Rip', 'BD[59]', 'BD25', 'BD50']
                                     })

        register_quality('format', {'VHS': -100,
                                    'Cam': -90,
                                    'Telesync': -80,
                                    'Workprint': -70,
                                    'Telecine': -60,
                                    'PPV': -50,
                                    'TV': -30,
                                    'DVB': -20,
                                    'DVD': 0,
                                    'HDTV': 20,
                                    'VOD': 40,
                                    'WEBRip': 50,
                                    'WEB-DL': 60,
                                    'HD-DVD': 80,
                                    'BluRay': 100
                                    })

        register_property('screenSize', {'360p': ['(?:\d{3,}(?:\\|\/|x|\*))?360(?:i|p?x?)'],
                                         '368p': ['(?:\d{3,}(?:\\|\/|x|\*))?368(?:i|p?x?)'],
                                         '480p': ['(?:\d{3,}(?:\\|\/|x|\*))?480(?:i|p?x?)'],
                                         #'480p': (['hr'], {'confidence': 0.2}), # duplicate dict key
                                         '576p': ['(?:\d{3,}(?:\\|\/|x|\*))?576(?:i|p?x?)'],
                                         '720p': ['(?:\d{3,}(?:\\|\/|x|\*))?720(?:i|p?x?)'],
                                         '900p': ['(?:\d{3,}(?:\\|\/|x|\*))?900(?:i|p?x?)'],
                                         '1080i': ['(?:\d{3,}(?:\\|\/|x|\*))?1080i'],
                                         '1080p': ['(?:\d{3,}(?:\\|\/|x|\*))?1080p?x?'],
                                         '4K': ['(?:\d{3,}(?:\\|\/|x|\*))?2160(?:i|p?x?)']
                                         },
                          validator=ChainedValidator(DefaultValidator(), OnlyOneValidator()))

        class ResolutionValidator(object):
            """Make sure our match is surrounded by separators, or by another entry"""
            @staticmethod
            def validate(prop, string, node, match, entry_start, entry_end):
                """
                span = _get_span(prop, match)
                span = _trim_span(span, string[span[0]:span[1]])
                start, end = span

                sep_start = start <= 0 or string[start - 1] in sep
                sep_end = end >= len(string) or string[end] in sep
                start_by_other = start in entry_end
                end_by_other = end in entry_start
                if (sep_start or start_by_other) and (sep_end or end_by_other):
                    return True
                return False
                """
                return True

        _digits_re = re.compile('\d+')

        def resolution_formatter(value):
            digits = _digits_re.findall(value)
            return 'x'.join(digits)

        self.container.register_property('screenSize', '\d{3,4}-?[x\*]-?\d{3,4}', canonical_from_pattern=False, formatter=resolution_formatter, validator=ChainedValidator(DefaultValidator(), ResolutionValidator()))

        register_quality('screenSize', {'360p': -300,
                                        '368p': -200,
                                        '480p': -100,
                                        '576p': 0,
                                        '720p': 100,
                                        '900p': 130,
                                        '1080i': 180,
                                        '1080p': 200,
                                        '4K': 400
                                        })

        _videoCodecProperty = {'Real': ['Rv\d{2}'],  # http://en.wikipedia.org/wiki/RealVideo
                               'Mpeg2': ['Mpeg2'],
                               'DivX': ['DVDivX', 'DivX'],
                               'XviD': ['XviD'],
                               'h264': ['[hx]-264(?:-AVC)?', 'MPEG-4(?:-AVC)'],
                               'h265': ['[hx]-265(?:-HEVC)?', 'HEVC']
                               }

        register_property('videoCodec', _videoCodecProperty)

        register_quality('videoCodec', {'Real': -50,
                                        'Mpeg2': -30,
                                        'DivX': -10,
                                        'XviD': 0,
                                        'h264': 100,
                                        'h265': 150
                                        })

        # http://blog.mediacoderhq.com/h264-profiles-and-levels/
        # http://fr.wikipedia.org/wiki/H.264
        self.container.register_property('videoProfile', 'BP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess]))
        self.container.register_property('videoProfile', 'XP', 'EP', canonical_form='XP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess]))
        self.container.register_property('videoProfile', 'MP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess]))
        self.container.register_property('videoProfile', 'HP', 'HiP', canonical_form='HP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess]))
        self.container.register_property('videoProfile', '10.?bit', 'Hi10P', canonical_form='10bit')
        self.container.register_property('videoProfile', '8.?bit', canonical_form='8bit')
        self.container.register_property('videoProfile', 'Hi422P', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess]))
        self.container.register_property('videoProfile', 'Hi444PP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess]))

        register_quality('videoProfile', {'BP': -20,
                                          'XP': -10,
                                          'MP': 0,
                                          'HP': 10,
                                          '10bit': 15,
                                          'Hi422P': 25,
                                          'Hi444PP': 35
                                          })

        # has nothing to do here (or on filenames for that matter), but some
        # releases use it and it helps to identify release groups, so we adapt
        register_property('videoApi', {'DXVA': ['DXVA']})

        register_property('audioCodec', {'MP3': ['MP3', 'LAME', 'LAME(?:\d)+-(?:\d)+'],
                                         'DolbyDigital': ['DD'],
                                         'AAC': ['AAC'],
                                         'AC3': ['AC3'],
                                         'Flac': ['FLAC'],
                                         'DTS': (['DTS'], {'validator': LeftValidator()}),
                                         'TrueHD': ['True-HD']
                                         })

        register_quality('audioCodec', {'MP3': 10,
                                        'DolbyDigital': 30,
                                        'AAC': 35,
                                        'AC3': 40,
                                        'Flac': 45,
                                        'DTS': 60,
                                        'TrueHD': 70
                                        })

        self.container.register_property('audioProfile', 'HD', validator=LeavesValidator(lambdas=[lambda node: node.guess.get('audioCodec') == 'DTS']))
        self.container.register_property('audioProfile', 'HD-MA', canonical_form='HDMA', validator=LeavesValidator(lambdas=[lambda node: node.guess.get('audioCodec') == 'DTS']))
        self.container.register_property('audioProfile', 'HE', validator=LeavesValidator(lambdas=[lambda node: node.guess.get('audioCodec') == 'AAC']))
        self.container.register_property('audioProfile', 'LC', validator=LeavesValidator(lambdas=[lambda node: node.guess.get('audioCodec') == 'AAC']))
        self.container.register_property('audioProfile', 'HQ', validator=LeavesValidator(lambdas=[lambda node: node.guess.get('audioCodec') == 'AC3']))

        register_quality('audioProfile', {'HD': 20,
                                          'HDMA': 50,
                                          'LC': 0,
                                          'HQ': 0,
                                          'HE': 20
                                          })

        register_property('audioChannels', {'7.1': ['7[\W_]1', '7ch', '8ch'],
                                            '5.1': ['5[\W_]1', '5ch', '6ch'],
                                            '2.0': ['2[\W_]0', '2ch', 'stereo'],
                                            '1.0': ['1[\W_]0', '1ch', 'mono']
                                            })

        register_quality('audioChannels', {'7.1': 200,
                                           '5.1': 100,
                                           '2.0': 0,
                                           '1.0': -100
                                           })

        self.container.register_property('episodeFormat', r'Minisodes?', canonical_form='Minisode')

        self.container.register_property('crc32', '(?:[a-fA-F]|[0-9]){8}', enhance=False, canonical_from_pattern=False)

        part_words = ['pt', 'part']
        self.container.register_property(None, '(' + build_or_pattern(part_words) + sep + '?(?P<part>' + numeral + '))[^0-9]', enhance=False, canonical_from_pattern=False, confidence=0.4, formatter=parse_numeral)

        register_property('other', {'AudioFix': ['Audio-Fix', 'Audio-Fixed'],
                                    'SyncFix': ['Sync-Fix', 'Sync-Fixed'],
                                    'DualAudio': ['Dual-Audio'],
                                    'WideScreen': ['ws', 'wide-screen'],
                                    'Netflix': ['Netflix', 'NF']
                                    })

        self.container.register_property('other', 'Real', 'Fix', canonical_form='Proper', validator=ChainedValidator(FullMatchValidator(), NeighborValidator()))
        self.container.register_property('other', 'Proper', 'Repack', 'Rerip', canonical_form='Proper')
        self.container.register_property('other', 'Fansub', canonical_form='Fansub', validator=ChainedValidator(FullMatchValidator(), NeighborValidator()))
        self.container.register_property('other', 'Fastsub', canonical_form='Fastsub', validator=ChainedValidator(FullMatchValidator(), NeighborValidator()))
        self.container.register_property('other', '(?:Seasons?' + sep + '?)?Complete', canonical_form='Complete')
        self.container.register_property('other', 'R5', 'RC', canonical_form='R5')
        self.container.register_property('other', 'Pre-Air', 'Preair', canonical_form='Preair')
        self.container.register_property('other', 'CC')  # Close Caption
        self.container.register_property('other', 'LD', 'MD')  # Line/Mic Dubbed

        self.container.register_canonical_properties('other', 'Screener', 'Remux', '3D', 'HD', 'mHD', 'HDLight', 'HQ',
                                                     'DDC',
                                                     'HR', 'PAL', 'SECAM', 'NTSC')
        self.container.register_canonical_properties('other', 'Limited', 'Complete', 'Classic', 'Unrated', 'LiNE', 'Bonus', 'Trailer', validator=WeakValidator())

        for prop in self.container.get_properties('format'):
            self.container.register_property('other', prop.pattern + '(-?Scr(?:eener)?)', canonical_form='Screener')

        for exts in (subtitle_exts, info_exts, video_exts):
            for container in exts:
                self.container.register_property('container', container, confidence=0.3)
Example #7
0
    def __init__(self):
        Transformer.__init__(self, 20)

        range_separators = ['-', 'to', 'a']
        discrete_separators = ['&', 'and', 'et']
        of_separators = ['of', 'sur', '/', '\\']

        season_words = ['seasons?', 'saisons?', 'series?']
        episode_words = ['episodes?']

        season_markers = ['s']
        episode_markers = ['e', 'ep']

        discrete_sep = sep
        for range_separator in range_separators:
            discrete_sep = discrete_sep.replace(range_separator, '')
        discrete_separators.append(discrete_sep)
        all_separators = list(range_separators)
        all_separators.extend(discrete_separators)

        self.container = PropertiesContainer(enhance=False,
                                             canonical_from_pattern=False)

        range_separators_re = re.compile(build_or_pattern(range_separators),
                                         re.IGNORECASE)
        discrete_separators_re = re.compile(
            build_or_pattern(discrete_separators), re.IGNORECASE)
        all_separators_re = re.compile(build_or_pattern(all_separators),
                                       re.IGNORECASE)
        of_separators_re = re.compile(
            build_or_pattern(of_separators, escape=True), re.IGNORECASE)

        season_words_re = re.compile(build_or_pattern(season_words),
                                     re.IGNORECASE)
        episode_words_re = re.compile(build_or_pattern(episode_words),
                                      re.IGNORECASE)

        season_markers_re = re.compile(build_or_pattern(season_markers),
                                       re.IGNORECASE)
        episode_markers_re = re.compile(build_or_pattern(episode_markers),
                                        re.IGNORECASE)

        def list_parser(value,
                        propertyListName,
                        discrete_separators_re=discrete_separators_re,
                        range_separators_re=range_separators_re,
                        allow_discrete=False,
                        fill_gaps=False):
            discrete_elements = filter(lambda x: x != '',
                                       discrete_separators_re.split(value))
            discrete_elements = [x.strip() for x in discrete_elements]

            proper_discrete_elements = []
            i = 0
            while i < len(discrete_elements):
                if i < len(discrete_elements) - 2 and range_separators_re.match(
                        discrete_elements[i + 1]):
                    proper_discrete_elements.append(discrete_elements[i] +
                                                    discrete_elements[i + 1] +
                                                    discrete_elements[i + 2])
                    i += 3
                else:
                    match = range_separators_re.search(discrete_elements[i])
                    if match and match.start() == 0:
                        proper_discrete_elements[i -
                                                 1] = proper_discrete_elements[
                                                     i -
                                                     1] + discrete_elements[i]
                    elif match and match.end() == len(discrete_elements[i]):
                        proper_discrete_elements.append(discrete_elements[i] +
                                                        discrete_elements[i +
                                                                          1])
                    else:
                        proper_discrete_elements.append(discrete_elements[i])
                    i += 1

            discrete_elements = proper_discrete_elements

            ret = []

            for discrete_element in discrete_elements:
                range_values = filter(
                    lambda x: x != '',
                    range_separators_re.split(discrete_element))
                range_values = [x.strip() for x in range_values]
                if len(range_values) > 1:
                    for x in range(0, len(range_values) - 1):
                        start_range_ep = parse_numeral(range_values[x])
                        end_range_ep = parse_numeral(range_values[x + 1])
                        for range_ep in range(start_range_ep,
                                              end_range_ep + 1):
                            if range_ep not in ret:
                                ret.append(range_ep)
                else:
                    discrete_value = parse_numeral(discrete_element)
                    if discrete_value not in ret:
                        ret.append(discrete_value)

            if len(ret) > 1:
                if not allow_discrete:
                    valid_ret = []
                    # replace discrete elements by ranges
                    valid_ret.append(ret[0])
                    for i in range(0, len(ret) - 1):
                        previous = valid_ret[len(valid_ret) - 1]
                        if ret[i + 1] < previous:
                            pass
                        else:
                            valid_ret.append(ret[i + 1])
                    ret = valid_ret
                if fill_gaps:
                    ret = list(range(min(ret), max(ret) + 1))
                if len(ret) > 1:
                    return {None: ret[0], propertyListName: ret}
            if len(ret) > 0:
                return ret[0]
            return None

        def episode_parser_x(value):
            return list_parser(value,
                               'episodeList',
                               discrete_separators_re=re.compile(
                                   'x', re.IGNORECASE))

        def episode_parser_e(value):
            return list_parser(value,
                               'episodeList',
                               discrete_separators_re=re.compile(
                                   'e', re.IGNORECASE),
                               fill_gaps=True)

        def episode_parser(value):
            return list_parser(value, 'episodeList')

        def season_parser(value):
            return list_parser(value, 'seasonList')

        class ResolutionCollisionValidator(object):
            def validate(self, prop, string, node, match, entry_start,
                         entry_end):
                return len(match.group(2)) < 3  #limit

        self.container.register_property(None,
                                         r'(' + season_words_re.pattern + sep +
                                         '?(?P<season>' + numeral + ')' + sep +
                                         '?' + season_words_re.pattern + '?)',
                                         confidence=1.0,
                                         formatter=parse_numeral)
        self.container.register_property(
            None,
            r'(' + season_words_re.pattern + sep + '?(?P<season>' +
            digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern +
            sep + '?' + digital_numeral + ')*)' + sep + '?' +
            season_words_re.pattern + '?)' + sep,
            confidence=1.0,
            formatter={
                None: parse_numeral,
                'season': season_parser
            },
            validator=ChainedValidator(
                DefaultValidator(),
                FormatterValidator(
                    'season', lambda x: len(x) > 1
                    if hasattr(x, '__len__') else False)))

        self.container.register_property(
            None,
            r'(' + season_markers_re.pattern + '(?P<season>' +
            digital_numeral + ')[^0-9]?' + sep + '?(?P<episodeNumber>(?:e' +
            digital_numeral + '(?:' + sep + '?[e-]' + digital_numeral +
            ')*)))',
            confidence=1.0,
            formatter={
                None: parse_numeral,
                'episodeNumber': episode_parser_e,
                'season': season_parser
            },
            validator=NoValidator())
        #self.container.register_property(None, r'[^0-9]((?P<season>' + digital_numeral + ')[^0-9 .-]?-?(?P<episodeNumber>(?:x' + digital_numeral + '(?:' + sep + '?[x-]' + digital_numeral + ')*)))', confidence=1.0, formatter={None: parse_numeral, 'episodeNumber': episode_parser_x, 'season': season_parser}, validator=ChainedValidator(DefaultValidator(), ResolutionCollisionValidator()))
        self.container.register_property(
            None,
            sep + r'((?P<season>' + digital_numeral + ')' + sep + '' +
            '(?P<episodeNumber>(?:x' + sep + digital_numeral + '(?:' + sep +
            '[x-]' + digital_numeral + ')*)))',
            confidence=1.0,
            formatter={
                None: parse_numeral,
                'episodeNumber': episode_parser_x,
                'season': season_parser
            },
            validator=ChainedValidator(DefaultValidator(),
                                       ResolutionCollisionValidator()))
        self.container.register_property(
            None,
            r'((?P<season>' + digital_numeral + ')' +
            '(?P<episodeNumber>(?:x' + digital_numeral + '(?:[x-]' +
            digital_numeral + ')*)))',
            confidence=1.0,
            formatter={
                None: parse_numeral,
                'episodeNumber': episode_parser_x,
                'season': season_parser
            },
            validator=ChainedValidator(DefaultValidator(),
                                       ResolutionCollisionValidator()))
        self.container.register_property(
            None,
            r'(' + season_markers_re.pattern + '(?P<season>' +
            digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern +
            sep + '?' + digital_numeral + ')*))',
            confidence=0.6,
            formatter={
                None: parse_numeral,
                'season': season_parser
            },
            validator=NoValidator())

        self.container.register_property(None,
                                         r'((?P<episodeNumber>' +
                                         digital_numeral + ')' + sep +
                                         '?v(?P<version>\d+))',
                                         confidence=0.6,
                                         formatter=parse_numeral)
        self.container.register_property(
            None,
            r'(ep' + sep + r'?(?P<episodeNumber>' + numeral + ')' + sep + '?)',
            confidence=0.7,
            formatter=parse_numeral)
        self.container.register_property(
            None,
            r'(ep' + sep + r'?(?P<episodeNumber>' + numeral + ')' + sep +
            '?v(?P<version>\d+))',
            confidence=0.7,
            formatter=parse_numeral)

        self.container.register_property(
            None,
            r'(' + episode_markers_re.pattern + '(?P<episodeNumber>' +
            digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern +
            sep + '?' + digital_numeral + ')*))',
            confidence=0.6,
            formatter={
                None: parse_numeral,
                'episodeNumber': episode_parser
            })
        self.container.register_property(
            None,
            r'(' + episode_words_re.pattern + sep + '?(?P<episodeNumber>' +
            digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern +
            sep + '?' + digital_numeral + ')*)' + sep + '?' +
            episode_words_re.pattern + '?)',
            confidence=0.8,
            formatter={
                None: parse_numeral,
                'episodeNumber': episode_parser
            })

        self.container.register_property(
            None,
            r'(' + episode_markers_re.pattern + '(?P<episodeNumber>' +
            digital_numeral + ')' + sep + '?v(?P<version>\d+))',
            confidence=0.6,
            formatter={
                None: parse_numeral,
                'episodeNumber': episode_parser
            })
        self.container.register_property(
            None,
            r'(' + episode_words_re.pattern + sep + '?(?P<episodeNumber>' +
            digital_numeral + ')' + sep + '?v(?P<version>\d+))',
            confidence=0.8,
            formatter={
                None: parse_numeral,
                'episodeNumber': episode_parser
            })

        self.container.register_property('episodeNumber',
                                         r'^ ?(\d{2})' + sep,
                                         confidence=0.4,
                                         formatter=parse_numeral)
        self.container.register_property('episodeNumber',
                                         r'^ ?(\d{2})' + sep,
                                         confidence=0.4,
                                         formatter=parse_numeral)
        self.container.register_property('episodeNumber',
                                         r'^ ?0(\d{1,2})' + sep,
                                         confidence=0.4,
                                         formatter=parse_numeral)
        self.container.register_property('episodeNumber',
                                         sep + r'(\d{2}) ?$',
                                         confidence=0.4,
                                         formatter=parse_numeral)
        self.container.register_property('episodeNumber',
                                         sep + r'0(\d{1,2}) ?$',
                                         confidence=0.4,
                                         formatter=parse_numeral)

        self.container.register_property(
            None,
            r'((?P<episodeNumber>' + numeral + ')' + sep + '?' +
            of_separators_re.pattern + sep + '?(?P<episodeCount>' + numeral +
            ')(?:' + sep + '?(?:episodes?|eps?))?)',
            confidence=0.7,
            formatter=parse_numeral)
        self.container.register_property(
            None,
            r'((?:episodes?|eps?)' + sep + '?(?P<episodeNumber>' + numeral +
            ')' + sep + '?' + of_separators_re.pattern + sep +
            '?(?P<episodeCount>' + numeral + '))',
            confidence=0.7,
            formatter=parse_numeral)
        self.container.register_property(None,
                                         r'((?:seasons?|saisons?|s)' + sep +
                                         '?(?P<season>' + numeral + ')' + sep +
                                         '?' + of_separators_re.pattern + sep +
                                         '?(?P<seasonCount>' + numeral + '))',
                                         confidence=0.7,
                                         formatter=parse_numeral)
        self.container.register_property(
            None,
            r'((?P<season>' + numeral + ')' + sep + '?' +
            of_separators_re.pattern + sep + '?(?P<seasonCount>' + numeral +
            ')' + sep + '?(?:seasons?|saisons?|s))',
            confidence=0.7,
            formatter=parse_numeral)

        self.container.register_canonical_properties('other',
                                                     'FiNAL',
                                                     'Complete',
                                                     validator=WeakValidator())

        self.container.register_property(
            None,
            r'[^0-9]((?P<season>' + digital_numeral +
            ')[^0-9 .-]?-?(?P<other>xAll))',
            confidence=1.0,
            formatter={
                None: parse_numeral,
                'other': lambda x: 'Complete',
                'season': season_parser
            },
            validator=ChainedValidator(DefaultValidator(),
                                       ResolutionCollisionValidator()))
Example #8
0
    def __init__(self):
        Transformer.__init__(self, 35)

        self.container = PropertiesContainer()
        self.qualities = QualitiesContainer()

        def register_property(propname, props, **kwargs):
            """props a dict of {value: [patterns]}"""
            for canonical_form, patterns in props.items():
                if isinstance(patterns, tuple):
                    patterns2, pattern_kwarg = patterns
                    if kwargs:
                        current_kwarg = dict(kwargs)
                        current_kwarg.update(pattern_kwarg)
                    else:
                        current_kwarg = dict(pattern_kwarg)
                    current_kwarg['canonical_form'] = canonical_form
                    self.container.register_property(propname, *patterns2, **current_kwarg)
                elif kwargs:
                    current_kwarg = dict(kwargs)
                    current_kwarg['canonical_form'] = canonical_form
                    self.container.register_property(propname, *patterns, **current_kwarg)
                else:
                    self.container.register_property(propname, *patterns, canonical_form=canonical_form)

        def register_quality(propname, quality_dict):
            """props a dict of {canonical_form: quality}"""
            for canonical_form, quality in quality_dict.items():
                self.qualities.register_quality(propname, canonical_form, quality)

        register_property('container', {'mp4': ['MP4']})

        # http://en.wikipedia.org/wiki/Pirated_movie_release_types
        register_property('format', {'VHS': ['VHS', 'VHS-Rip'],
                                     'Cam': ['CAM', 'CAMRip', 'HD-CAM'],
                                     #'Telesync': ['TELESYNC', 'PDVD'],
                                     'Telesync': (['TS', 'HD-TS'], {'confidence': 0.4}),
                                     'Workprint': ['WORKPRINT', 'WP'],
                                     'Telecine': ['TELECINE', 'TC'],
                                     'PPV': ['PPV', 'PPV-Rip'],  # Pay Per View
                                     'TV': ['SD-TV', 'SD-TV-Rip', 'Rip-SD-TV', 'TV-Rip', 'Rip-TV'],
                                     'DVB': ['DVB-Rip', 'DVB', 'PD-TV'],
                                     'DVD': ['DVD', 'DVD-Rip', 'VIDEO-TS', 'DVD-R', 'DVD-9', 'DVD-5'],
                                     'HDTV': ['HD-TV', 'TV-RIP-HD', 'HD-TV-RIP', 'HD-RIP'],
                                     'VOD': ['VOD', 'VOD-Rip'],
                                     'WEBRip': ['WEB-Rip'],
                                     'WEB-DL': ['WEB-DL', 'WEB-HD', 'WEB'],
                                     'HD-DVD': ['HD-DVD-Rip', 'HD-DVD'],
                                     'BluRay': ['Blu-ray(?:-Rip)?', 'B[DR]', 'B[DR]-Rip', 'BD[59]', 'BD25', 'BD50']
                                     })

        register_quality('format', {'VHS': -100,
                                    'Cam': -90,
                                    'Telesync': -80,
                                    'Workprint': -70,
                                    'Telecine': -60,
                                    'PPV': -50,
                                    'TV': -30,
                                    'DVB': -20,
                                    'DVD': 0,
                                    'HDTV': 20,
                                    'VOD': 40,
                                    'WEBRip': 50,
                                    'WEB-DL': 60,
                                    'HD-DVD': 80,
                                    'BluRay': 100
                                    })

        register_property('screenSize', {'360p': ['(?:\d{3,}(?:\\|\/|x|\*))?360(?:i|p?x?)'],
                                         '368p': ['(?:\d{3,}(?:\\|\/|x|\*))?368(?:i|p?x?)'],
                                         '480p': ['(?:\d{3,}(?:\\|\/|x|\*))?480(?:i|p?x?)'],
                                         #'480p': (['hr'], {'confidence': 0.2}), # duplicate dict key
                                         '576p': ['(?:\d{3,}(?:\\|\/|x|\*))?576(?:i|p?x?)'],
                                         '720p': ['(?:\d{3,}(?:\\|\/|x|\*))?720(?:i|p?x?)'],
                                         '900p': ['(?:\d{3,}(?:\\|\/|x|\*))?900(?:i|p?x?)'],
                                         '1080i': ['(?:\d{3,}(?:\\|\/|x|\*))?1080i'],
                                         '1080p': ['(?:\d{3,}(?:\\|\/|x|\*))?1080p?x?'],
                                         '4K': ['(?:\d{3,}(?:\\|\/|x|\*))?2160(?:i|p?x?)']
                                         },
                          validator=ChainedValidator(DefaultValidator(), OnlyOneValidator()))

        class ResolutionValidator(object):
            """Make sure our match is surrounded by separators, or by another entry"""
            @staticmethod
            def validate(prop, string, node, match, entry_start, entry_end):
                """
                span = _get_span(prop, match)
                span = _trim_span(span, string[span[0]:span[1]])
                start, end = span

                sep_start = start <= 0 or string[start - 1] in sep
                sep_end = end >= len(string) or string[end] in sep
                start_by_other = start in entry_end
                end_by_other = end in entry_start
                if (sep_start or start_by_other) and (sep_end or end_by_other):
                    return True
                return False
                """
                return True

        _digits_re = re.compile('\d+')

        def resolution_formatter(value):
            digits = _digits_re.findall(value)
            return 'x'.join(digits)

        self.container.register_property('screenSize', '\d{3,4}-?[x\*]-?\d{3,4}', canonical_from_pattern=False, formatter=resolution_formatter, validator=ChainedValidator(DefaultValidator(), ResolutionValidator()))

        register_quality('screenSize', {'360p': -300,
                                        '368p': -200,
                                        '480p': -100,
                                        '576p': 0,
                                        '720p': 100,
                                        '900p': 130,
                                        '1080i': 180,
                                        '1080p': 200,
                                        '4K': 400
                                        })

        _videoCodecProperty = {'Real': ['Rv\d{2}'],  # http://en.wikipedia.org/wiki/RealVideo
                               'Mpeg2': ['Mpeg2'],
                               'DivX': ['DVDivX', 'DivX'],
                               'XviD': ['XviD'],
                               'h264': ['[hx]-264(?:-AVC)?', 'MPEG-4(?:-AVC)'],
                               'h265': ['[hx]-265(?:-HEVC)?', 'HEVC']
                               }

        register_property('videoCodec', _videoCodecProperty)

        register_quality('videoCodec', {'Real': -50,
                                        'Mpeg2': -30,
                                        'DivX': -10,
                                        'XviD': 0,
                                        'h264': 100,
                                        'h265': 150
                                        })

        # http://blog.mediacoderhq.com/h264-profiles-and-levels/
        # http://fr.wikipedia.org/wiki/H.264
        self.container.register_property('videoProfile', 'BP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess]))
        self.container.register_property('videoProfile', 'XP', 'EP', canonical_form='XP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess]))
        self.container.register_property('videoProfile', 'MP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess]))
        self.container.register_property('videoProfile', 'HP', 'HiP', canonical_form='HP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess]))
        self.container.register_property('videoProfile', '10.?bit', 'Hi10P', canonical_form='10bit')
        self.container.register_property('videoProfile', '8.?bit', canonical_form='8bit')
        self.container.register_property('videoProfile', 'Hi422P', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess]))
        self.container.register_property('videoProfile', 'Hi444PP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess]))

        register_quality('videoProfile', {'BP': -20,
                                          'XP': -10,
                                          'MP': 0,
                                          'HP': 10,
                                          '10bit': 15,
                                          'Hi422P': 25,
                                          'Hi444PP': 35
                                          })

        # has nothing to do here (or on filenames for that matter), but some
        # releases use it and it helps to identify release groups, so we adapt
        register_property('videoApi', {'DXVA': ['DXVA']})

        register_property('audioCodec', {'MP3': ['MP3', 'LAME', 'LAME(?:\d)+-(?:\d)+'],
                                         'DolbyDigital': ['DD'],
                                         'AAC': ['AAC'],
                                         'AC3': ['AC3'],
                                         'Flac': ['FLAC'],
                                         'DTS': (['DTS'], {'validator': LeftValidator()}),
                                         'TrueHD': ['True-HD']
                                         })

        register_quality('audioCodec', {'MP3': 10,
                                        'DolbyDigital': 30,
                                        'AAC': 35,
                                        'AC3': 40,
                                        'Flac': 45,
                                        'DTS': 60,
                                        'TrueHD': 70
                                        })

        self.container.register_property('audioProfile', 'HD', validator=LeavesValidator(lambdas=[lambda node: node.guess.get('audioCodec') == 'DTS']))
        self.container.register_property('audioProfile', 'HD-MA', canonical_form='HDMA', validator=LeavesValidator(lambdas=[lambda node: node.guess.get('audioCodec') == 'DTS']))
        self.container.register_property('audioProfile', 'HE', validator=LeavesValidator(lambdas=[lambda node: node.guess.get('audioCodec') == 'AAC']))
        self.container.register_property('audioProfile', 'LC', validator=LeavesValidator(lambdas=[lambda node: node.guess.get('audioCodec') == 'AAC']))
        self.container.register_property('audioProfile', 'HQ', validator=LeavesValidator(lambdas=[lambda node: node.guess.get('audioCodec') == 'AC3']))

        register_quality('audioProfile', {'HD': 20,
                                          'HDMA': 50,
                                          'LC': 0,
                                          'HQ': 0,
                                          'HE': 20
                                          })

        register_property('audioChannels', {'7.1': ['7[\W_]1', '7ch', '8ch'],
                                            '5.1': ['5[\W_]1', '5ch', '6ch'],
                                            '2.0': ['2[\W_]0', '2ch', 'stereo'],
                                            '1.0': ['1[\W_]0', '1ch', 'mono']
                                            })

        register_quality('audioChannels', {'7.1': 200,
                                           '5.1': 100,
                                           '2.0': 0,
                                           '1.0': -100
                                           })

        self.container.register_property('episodeFormat', r'Minisodes?', canonical_form='Minisode')

        self.container.register_property('crc32', '(?:[a-fA-F]|[0-9]){8}', enhance=False, canonical_from_pattern=False)

        weak_episode_words = ['pt', 'part']
        self.container.register_property(None, '(' + build_or_pattern(weak_episode_words) + sep + '?(?P<part>' + numeral + '))[^0-9]', enhance=False, canonical_from_pattern=False, confidence=0.4, formatter=parse_numeral)

        register_property('other', {'AudioFix': ['Audio-Fix', 'Audio-Fixed'],
                                    'SyncFix': ['Sync-Fix', 'Sync-Fixed'],
                                    'DualAudio': ['Dual-Audio'],
                                    'WideScreen': ['ws', 'wide-screen'],
                                    'Netflix': ['Netflix', 'NF']
                                    })

        self.container.register_property('other', 'Real', 'Fix', canonical_form='Proper', validator=NeighborValidator())
        self.container.register_property('other', 'Proper', 'Repack', 'Rerip', canonical_form='Proper')
        self.container.register_property('other', 'Fansub', canonical_form='Fansub')
        self.container.register_property('other', 'Fastsub', canonical_form='Fastsub')
        self.container.register_property('other', '(?:Seasons?' + sep + '?)?Complete', canonical_form='Complete')
        self.container.register_property('other', 'R5', 'RC', canonical_form='R5')
        self.container.register_property('other', 'Pre-Air', 'Preair', canonical_form='Preair')

        self.container.register_canonical_properties('other', 'Screener', 'Remux', '3D', 'HD', 'mHD', 'HDLight', 'HQ',
                                                     'DDC',
                                                     'HR', 'PAL', 'SECAM', 'NTSC')
        self.container.register_canonical_properties('other', 'Limited', 'Complete', 'Classic', 'Unrated', 'LiNE', 'Bonus', 'Trailer', validator=WeakValidator())

        for prop in self.container.get_properties('format'):
            self.container.register_property('other', prop.pattern + '(-?Scr(?:eener)?)', canonical_form='Screener')

        for exts in (subtitle_exts, info_exts, video_exts):
            for container in exts:
                self.container.register_property('container', container, confidence=0.3)
Example #9
0
    def __init__(self):
        Transformer.__init__(self, 20)

        range_separators = ['-', 'to', 'a']
        discrete_separators = ['&', 'and', 'et']
        of_separators = ['of', 'sur', '/', '\\']

        season_words = ['seasons?', 'saisons?', 'series?']
        episode_words = ['episodes?']

        season_markers = ['s']
        episode_markers = ['e', 'ep']

        discrete_sep = sep
        for range_separator in range_separators:
            discrete_sep = discrete_sep.replace(range_separator, '')
        discrete_separators.append(discrete_sep)
        all_separators = list(range_separators)
        all_separators.extend(discrete_separators)

        self.container = PropertiesContainer(enhance=False, canonical_from_pattern=False)

        range_separators_re = re.compile(build_or_pattern(range_separators), re.IGNORECASE)
        discrete_separators_re = re.compile(build_or_pattern(discrete_separators), re.IGNORECASE)
        all_separators_re = re.compile(build_or_pattern(all_separators), re.IGNORECASE)
        of_separators_re = re.compile(build_or_pattern(of_separators, escape=True), re.IGNORECASE)

        season_words_re = re.compile(build_or_pattern(season_words), re.IGNORECASE)
        episode_words_re = re.compile(build_or_pattern(episode_words), re.IGNORECASE)

        season_markers_re = re.compile(build_or_pattern(season_markers), re.IGNORECASE)
        episode_markers_re = re.compile(build_or_pattern(episode_markers), re.IGNORECASE)

        def list_parser(value, propertyListName, discrete_separators_re=discrete_separators_re, range_separators_re=range_separators_re, allow_discrete=False, fill_gaps=False):
            discrete_elements = filter(lambda x: x != '', discrete_separators_re.split(value))
            discrete_elements = [x.strip() for x in discrete_elements]

            proper_discrete_elements = []
            i = 0
            while i < len(discrete_elements):
                if i < len(discrete_elements) - 2 and range_separators_re.match(discrete_elements[i+1]):
                    proper_discrete_elements.append(discrete_elements[i] + discrete_elements[i+1] + discrete_elements[i+2])
                    i += 3
                else:
                    match = range_separators_re.search(discrete_elements[i])
                    if match and match.start() == 0:
                        proper_discrete_elements[i-1] = proper_discrete_elements[i-1] + discrete_elements[i]
                    elif match and match.end() == len(discrete_elements[i]):
                        proper_discrete_elements.append(discrete_elements[i] + discrete_elements[i + 1])
                    else:
                        proper_discrete_elements.append(discrete_elements[i])
                    i += 1

            discrete_elements = proper_discrete_elements

            ret = []

            for discrete_element in discrete_elements:
                range_values = filter(lambda x: x != '', range_separators_re.split(discrete_element))
                range_values = [x.strip() for x in range_values]
                if len(range_values) > 1:
                    for x in range(0, len(range_values) - 1):
                        start_range_ep = parse_numeral(range_values[x])
                        end_range_ep = parse_numeral(range_values[x+1])
                        for range_ep in range(start_range_ep, end_range_ep + 1):
                            if range_ep not in ret:
                                ret.append(range_ep)
                else:
                    discrete_value = parse_numeral(discrete_element)
                    if discrete_value not in ret:
                        ret.append(discrete_value)

            if len(ret) > 1:
                if not allow_discrete:
                    valid_ret = []
                    # replace discrete elements by ranges
                    valid_ret.append(ret[0])
                    for i in range(0, len(ret) - 1):
                        previous = valid_ret[len(valid_ret) - 1]
                        if ret[i+1] < previous:
                            pass
                        else:
                            valid_ret.append(ret[i+1])
                    ret = valid_ret
                if fill_gaps:
                    ret = list(range(min(ret), max(ret) + 1))
                if len(ret) > 1:
                    return {None: ret[0], propertyListName: ret}
            if len(ret) > 0:
                return ret[0]
            return None

        def episode_parser_x(value):
            return list_parser(value, 'episodeList', discrete_separators_re=re.compile('x', re.IGNORECASE))

        def episode_parser_e(value):
            return list_parser(value, 'episodeList', discrete_separators_re=re.compile('e',re.IGNORECASE), fill_gaps=True)

        def episode_parser(value):
            return list_parser(value, 'episodeList')

        def season_parser(value):
            return list_parser(value, 'seasonList')

        class ResolutionCollisionValidator(object):
            def validate(self, prop, string, node, match, entry_start, entry_end):
                return len(match.group(2)) < 3 #limit

        self.container.register_property(None, r'(' + season_words_re.pattern + sep + '?(?P<season>' + numeral + ')' + sep + '?' + season_words_re.pattern + '?)', confidence=1.0, formatter=parse_numeral)
        self.container.register_property(None, r'(' + season_words_re.pattern + sep + '?(?P<season>' + digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + digital_numeral + ')*)' + sep + '?' + season_words_re.pattern + '?)' + sep, confidence=1.0, formatter={None: parse_numeral, 'season': season_parser}, validator=ChainedValidator(DefaultValidator(), FormatterValidator('season', lambda x: len(x) > 1 if hasattr(x, '__len__') else False)))

        self.container.register_property(None, r'(' + season_markers_re.pattern + '(?P<season>' + digital_numeral + ')[^0-9]?' + sep + '?(?P<episodeNumber>(?:e' + digital_numeral + '(?:' + sep + '?[e-]' + digital_numeral + ')*)))', confidence=1.0, formatter={None: parse_numeral, 'episodeNumber': episode_parser_e, 'season': season_parser}, validator=NoValidator())
        #self.container.register_property(None, r'[^0-9]((?P<season>' + digital_numeral + ')[^0-9 .-]?-?(?P<episodeNumber>(?:x' + digital_numeral + '(?:' + sep + '?[x-]' + digital_numeral + ')*)))', confidence=1.0, formatter={None: parse_numeral, 'episodeNumber': episode_parser_x, 'season': season_parser}, validator=ChainedValidator(DefaultValidator(), ResolutionCollisionValidator()))
        self.container.register_property(None, sep + r'((?P<season>' + digital_numeral + ')' + sep + '' + '(?P<episodeNumber>(?:x' + sep + digital_numeral + '(?:' + sep + '[x-]' + digital_numeral + ')*)))', confidence=1.0, formatter={None: parse_numeral, 'episodeNumber': episode_parser_x, 'season': season_parser}, validator=ChainedValidator(DefaultValidator(), ResolutionCollisionValidator()))
        self.container.register_property(None, r'((?P<season>' + digital_numeral + ')' + '(?P<episodeNumber>(?:x' + digital_numeral + '(?:[x-]' + digital_numeral + ')*)))', confidence=1.0, formatter={None: parse_numeral, 'episodeNumber': episode_parser_x, 'season': season_parser}, validator=ChainedValidator(DefaultValidator(), ResolutionCollisionValidator()))
        self.container.register_property(None, r'(' + season_markers_re.pattern + '(?P<season>' + digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + digital_numeral + ')*))', confidence=0.6, formatter={None: parse_numeral, 'season': season_parser}, validator=NoValidator())

        self.container.register_property(None, r'((?P<episodeNumber>' + digital_numeral + ')' + sep + '?v(?P<version>\d+))', confidence=0.6, formatter=parse_numeral)
        self.container.register_property(None, r'(ep' + sep + r'?(?P<episodeNumber>' + numeral + ')' + sep + '?)', confidence=0.7, formatter=parse_numeral)
        self.container.register_property(None, r'(ep' + sep + r'?(?P<episodeNumber>' + numeral + ')' + sep + '?v(?P<version>\d+))', confidence=0.7, formatter=parse_numeral)


        self.container.register_property(None, r'(' + episode_markers_re.pattern + '(?P<episodeNumber>' + digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + digital_numeral + ')*))', confidence=0.6, formatter={None: parse_numeral, 'episodeNumber': episode_parser})
        self.container.register_property(None, r'(' + episode_words_re.pattern + sep + '?(?P<episodeNumber>' + digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + digital_numeral + ')*)' + sep + '?' + episode_words_re.pattern + '?)', confidence=0.8, formatter={None: parse_numeral, 'episodeNumber': episode_parser})

        self.container.register_property(None, r'(' + episode_markers_re.pattern + '(?P<episodeNumber>' + digital_numeral + ')'  + sep + '?v(?P<version>\d+))', confidence=0.6, formatter={None: parse_numeral, 'episodeNumber': episode_parser})
        self.container.register_property(None, r'(' + episode_words_re.pattern + sep + '?(?P<episodeNumber>' + digital_numeral + ')'  + sep + '?v(?P<version>\d+))', confidence=0.8, formatter={None: parse_numeral, 'episodeNumber': episode_parser})


        self.container.register_property('episodeNumber', r'^ ?(\d{2})' + sep, confidence=0.4, formatter=parse_numeral)
        self.container.register_property('episodeNumber', r'^ ?(\d{2})' + sep, confidence=0.4, formatter=parse_numeral)
        self.container.register_property('episodeNumber', r'^ ?0(\d{1,2})' + sep, confidence=0.4, formatter=parse_numeral)
        self.container.register_property('episodeNumber', sep + r'(\d{2}) ?$', confidence=0.4, formatter=parse_numeral)
        self.container.register_property('episodeNumber', sep + r'0(\d{1,2}) ?$', confidence=0.4, formatter=parse_numeral)

        self.container.register_property(None, r'((?P<episodeNumber>' + numeral + ')' + sep + '?' + of_separators_re.pattern + sep + '?(?P<episodeCount>' + numeral + ')(?:' + sep + '?(?:episodes?|eps?))?)', confidence=0.7, formatter=parse_numeral)
        self.container.register_property(None, r'((?:episodes?|eps?)' + sep + '?(?P<episodeNumber>' + numeral + ')' + sep + '?' + of_separators_re.pattern + sep + '?(?P<episodeCount>' + numeral + '))', confidence=0.7, formatter=parse_numeral)
        self.container.register_property(None, r'((?:seasons?|saisons?|s)' + sep + '?(?P<season>' + numeral + ')' + sep + '?' + of_separators_re.pattern + sep + '?(?P<seasonCount>' + numeral + '))', confidence=0.7, formatter=parse_numeral)
        self.container.register_property(None, r'((?P<season>' + numeral + ')' + sep + '?' + of_separators_re.pattern + sep + '?(?P<seasonCount>' + numeral + ')' + sep + '?(?:seasons?|saisons?|s))', confidence=0.7, formatter=parse_numeral)

        self.container.register_canonical_properties('other', 'FiNAL', 'Complete', validator=WeakValidator())

        self.container.register_property(None, r'[^0-9]((?P<season>' + digital_numeral + ')[^0-9 .-]?-?(?P<other>xAll))', confidence=1.0, formatter={None: parse_numeral, 'other': lambda x: 'Complete', 'season': season_parser}, validator=ChainedValidator(DefaultValidator(), ResolutionCollisionValidator()))
Example #10
0
container.register_property('format', 'HDTV', 'HD-TV')
container.register_property('format', 'DVB', 'DVB-Rip', 'DVB', 'PD-TV')
container.register_property('format', 'WEBRip', 'WEB-Rip')
container.register_property('format', 'VHS', 'VHS')
container.register_property('format', 'WEB-DL', 'WEB-DL')

container.register_property('screenSize', '360p', '(?:\d{3,}(?:\\|\/|x|\*))?360(?:i|p?x?)')
container.register_property('screenSize', '368p', '(?:\d{3,}(?:\\|\/|x|\*))?368(?:i|p?x?)')
container.register_property('screenSize', '480p', '(?:\d{3,}(?:\\|\/|x|\*))?480(?:i|p?x?)')
container.register_property('screenSize', '576p', '(?:\d{3,}(?:\\|\/|x|\*))?576(?:i|p?x?)')
container.register_property('screenSize', '720p', '(?:\d{3,}(?:\\|\/|x|\*))?720(?:i|p?x?)')
container.register_property('screenSize', '1080i', '(?:\d{3,}(?:\\|\/|x|\*))?1080i(?:i|p?x?)')
container.register_property('screenSize', '1080p', '(?:\d{3,}(?:\\|\/|x|\*))?1080(?:i|p?x?)')
container.register_property('screenSize', '4K', '(?:\d{3,}(?:\\|\/|x|\*))?2160(?:i|p?x?)')

profile_pattern = build_or_pattern(["BS", "EP", "MP", "HP", "AVC"])

container.register_property('videoCodec', 'XviD', 'XviD', 'XviD-' + profile_pattern)
container.register_property('videoCodec', 'DivX', 'DVDivX', 'DivX', 'DivX-' + profile_pattern)
container.register_property('videoCodec', 'h264', '[hx]-264', '[hx]-264-' + profile_pattern)

container.register_property('videoCodec', 'Rv10', 'Rv10')
container.register_property('videoCodec', 'Mpeg2', 'Mpeg2')

# has nothing to do here (or on filenames for that matter), but some
# releases use it and it helps to identify release groups, so we adapt
container.register_property('videoApi', 'DXVA', 'DXVA')

container.register_property('audioCodec', 'AC3', 'AC3')
container.register_property('audioCodec', 'DTS', 'DTS', 'DTS-HD')
container.register_property('audioCodec', 'AAC', 'HE-AAC', 'AAC-HE', 'LC-AAC', 'AAC-LC', 'AAC')
    def __init__(self):
        Transformer.__init__(self, 20)

        of_separators = ['of', 'sur', '/', '\\']
        of_separators_re = re.compile(build_or_pattern(of_separators, escape=True), re.IGNORECASE)

        season_words = ['seasons?', 'saisons?', 'series?']
        episode_words = ['episodes?']

        season_markers = ['s']
        episode_markers = ['e', 'ep']

        self.container = PropertiesContainer(enhance=False, canonical_from_pattern=False)

        season_words_re = re.compile(build_or_pattern(season_words), re.IGNORECASE)
        episode_words_re = re.compile(build_or_pattern(episode_words), re.IGNORECASE)

        season_markers_re = re.compile(build_or_pattern(season_markers), re.IGNORECASE)
        episode_markers_re = re.compile(build_or_pattern(episode_markers), re.IGNORECASE)

        def episode_parser_x(value):
            return list_parser(value, 'episodeList', discrete_separators_re=re.compile('x', re.IGNORECASE))

        def episode_parser_e(value):
            return list_parser(value, 'episodeList', discrete_separators_re=re.compile('e', re.IGNORECASE), fill_gaps=True)

        def episode_parser(value):
            return list_parser(value, 'episodeList')

        def season_parser(value):
            return list_parser(value, 'seasonList')

        class ResolutionCollisionValidator(object):
            @staticmethod
            def validate(prop, string, node, match, entry_start, entry_end):
                # Invalidate when season or episode is more than 100.
                try:
                    season_value = season_parser(match.group(2))
                    episode_value = episode_parser_x(match.group(3))
                    return season_value < 100 or episode_value < 100
                except:
                    # This may occur for 1xAll or patterns like this.
                    return True

        self.container.register_property(None, r'(' + season_words_re.pattern + sep + '?(?P<season>' + numeral + ')' + sep + '?' + season_words_re.pattern + '?)', confidence=1.0, formatter=parse_numeral)
        self.container.register_property(None, r'(' + season_words_re.pattern + sep + '?(?P<season>' + digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + digital_numeral + ')*)' + sep + '?' + season_words_re.pattern + '?)' + sep, confidence=1.0, formatter={None: parse_numeral, 'season': season_parser}, validator=ChainedValidator(DefaultValidator(), FormatterValidator('season', lambda x: len(x) > 1 if hasattr(x, '__len__') else False)))

        self.container.register_property(None, r'(' + season_markers_re.pattern + '(?P<season>' + digital_numeral + ')[^0-9]?' + sep + '?(?P<episodeNumber>(?:e' + digital_numeral + '(?:' + sep + '?[e-]' + digital_numeral + ')*)))', confidence=1.0, formatter={None: parse_numeral, 'episodeNumber': episode_parser_e, 'season': season_parser}, validator=NoValidator())
        self.container.register_property(None, r'(' + season_markers_re.pattern + '(?P<season>' + digital_numeral + ')[^0-9]?' + sep + '?(?P<episodeNumber>(?:e' + digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + digital_numeral + ')*)))', confidence=1.0, formatter={None: parse_numeral, 'episodeNumber': episode_parser, 'season': season_parser}, validator=NoValidator())

        self.container.register_property(None, sep + r'((?P<season>' + digital_numeral + ')' + sep + '' + '(?P<episodeNumber>(?:x' + sep + digital_numeral + '(?:' + sep + '[x-]' + digital_numeral + ')*)))', confidence=1.0, formatter={None: parse_numeral, 'episodeNumber': episode_parser_x, 'season': season_parser}, validator=ChainedValidator(DefaultValidator(), ResolutionCollisionValidator()))
        self.container.register_property(None, r'((?P<season>' + digital_numeral + ')' + '(?P<episodeNumber>(?:x' + digital_numeral + '(?:[x-]' + digital_numeral + ')*)))', confidence=1.0, formatter={None: parse_numeral, 'episodeNumber': episode_parser_x, 'season': season_parser}, validator=ChainedValidator(DefaultValidator(), ResolutionCollisionValidator()))
        self.container.register_property(None, r'(' + season_markers_re.pattern + '(?P<season>' + digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + digital_numeral + ')*))', confidence=0.6, formatter={None: parse_numeral, 'season': season_parser}, validator=NoValidator())

        self.container.register_property(None, r'((?P<episodeNumber>' + digital_numeral + ')' + sep + '?v(?P<version>\d+))', confidence=0.6, formatter=parse_numeral)
        self.container.register_property('version', sep + r'(V\d+)' + sep, confidence=0.6, formatter=parse_numeral, validator=NoValidator())
        self.container.register_property(None, r'(ep' + sep + r'?(?P<episodeNumber>' + digital_numeral + ')' + sep + '?)', confidence=0.7, formatter=parse_numeral)
        self.container.register_property(None, r'(ep' + sep + r'?(?P<episodeNumber>' + digital_numeral + ')' + sep + '?v(?P<version>\d+))', confidence=0.7, formatter=parse_numeral)

        self.container.register_property(None, r'(' + episode_markers_re.pattern + '(?P<episodeNumber>' + digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + digital_numeral + ')*))', confidence=0.6, formatter={None: parse_numeral, 'episodeNumber': episode_parser})
        self.container.register_property(None, r'(' + episode_words_re.pattern + sep + '?(?P<episodeNumber>' + digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + digital_numeral + ')*)' + sep + '?' + episode_words_re.pattern + '?)', confidence=0.8, formatter={None: parse_numeral, 'episodeNumber': episode_parser})

        self.container.register_property(None, r'(' + episode_markers_re.pattern + '(?P<episodeNumber>' + digital_numeral + ')' + sep + '?v(?P<version>\d+))', confidence=0.6, formatter={None: parse_numeral, 'episodeNumber': episode_parser})
        self.container.register_property(None, r'(' + episode_words_re.pattern + sep + '?(?P<episodeNumber>' + digital_numeral + ')' + sep + '?v(?P<version>\d+))', confidence=0.8, formatter={None: parse_numeral, 'episodeNumber': episode_parser})

        self.container.register_property('episodeNumber', r'^' + sep + '+(\d{2}' + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + '\d{2}' + ')*)' + sep, confidence=0.4, formatter=episode_parser)
        self.container.register_property('episodeNumber', r'^' + sep + '+0(\d{1,2}' + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + '0\d{1,2}' + ')*)' + sep, confidence=0.4, formatter=episode_parser)
        self.container.register_property('episodeNumber', sep + r'(\d{2}' + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + r'\d{2}' + ')*)' + sep + '+$', confidence=0.4, formatter=episode_parser)
        self.container.register_property('episodeNumber', sep + r'0(\d{1,2}' + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + r'0\d{1,2}' + ')*)' + sep + '+$', confidence=0.4, formatter=episode_parser)

        self.container.register_property(None, r'((?P<episodeNumber>' + numeral + ')' + sep + '?' + of_separators_re.pattern + sep + '?(?P<episodeCount>' + numeral + ')(?:' + sep + '?(?:episodes?|eps?))?)', confidence=0.7, formatter=parse_numeral)
        self.container.register_property(None, r'((?:episodes?|eps?)' + sep + '?(?P<episodeNumber>' + numeral + ')' + sep + '?' + of_separators_re.pattern + sep + '?(?P<episodeCount>' + numeral + '))', confidence=0.7, formatter=parse_numeral)
        self.container.register_property(None, r'((?:seasons?|saisons?|s)' + sep + '?(?P<season>' + numeral + ')' + sep + '?' + of_separators_re.pattern + sep + '?(?P<seasonCount>' + numeral + '))', confidence=0.7, formatter=parse_numeral)
        self.container.register_property(None, r'((?P<season>' + numeral + ')' + sep + '?' + of_separators_re.pattern + sep + '?(?P<seasonCount>' + numeral + ')' + sep + '?(?:seasons?|saisons?|s))', confidence=0.7, formatter=parse_numeral)

        self.container.register_canonical_properties('other', 'FiNAL', 'Complete', validator=WeakValidator())

        self.container.register_property(None, r'[^0-9]((?P<season>' + digital_numeral + ')[^0-9 .-]?-?(?P<other>xAll))', confidence=1.0, formatter={None: parse_numeral, 'other': lambda x: 'Complete', 'season': season_parser}, validator=ChainedValidator(DefaultValidator(), ResolutionCollisionValidator()))
Example #12
0
from guessit.patterns.numeral import parse_numeral

range_separators = ['-', 'to', 'a']
discrete_separators = ['&', 'and', 'et']
excluded_separators = ['.']  # Dot cannot serve as a discrete_separator

discrete_sep = sep
for range_separator in range_separators:
    discrete_sep = discrete_sep.replace(range_separator, '')
for excluded_separator in excluded_separators:
    discrete_sep = discrete_sep.replace(excluded_separator, '')
discrete_separators.append(discrete_sep)
all_separators = list(range_separators)
all_separators.extend(discrete_separators)

range_separators_re = re.compile(build_or_pattern(range_separators), re.IGNORECASE)
discrete_separators_re = re.compile(build_or_pattern(discrete_separators), re.IGNORECASE)
all_separators_re = re.compile(build_or_pattern(all_separators), re.IGNORECASE)


def list_parser(value, property_list_name, discrete_separators_re=discrete_separators_re, range_separators_re=range_separators_re, allow_discrete=False, fill_gaps=False):
    discrete_elements = filter(lambda x: x != '', discrete_separators_re.split(value))
    discrete_elements = [x.strip() for x in discrete_elements]

    proper_discrete_elements = []
    i = 0
    while i < len(discrete_elements):
        if i < len(discrete_elements) - 2 and range_separators_re.match(discrete_elements[i+1]):
            proper_discrete_elements.append(discrete_elements[i] + discrete_elements[i+1] + discrete_elements[i+2])
            i += 3
        else:
    def __init__(self):
        Transformer.__init__(self, 15)

        of_separators = ["of", "sur", "/", "\\"]
        of_separators_re = re.compile(build_or_pattern(of_separators, escape=True), re.IGNORECASE)

        self.container = PropertiesContainer(enhance=False, canonical_from_pattern=False, remove_duplicates=True)

        episode_words = ["episodes?"]

        def episode_list_parser(value):
            return list_parser(value, "episodeList")

        def season_episode_parser(episode_number):
            epnum = parse_numeral(episode_number)
            if not valid_year(epnum):
                if epnum > 100:
                    season, epnum = epnum // 100, epnum % 100
                    # episodes which have a season > 50 are most likely errors
                    # (Simpson is at 25!)
                    if season > 50:
                        return None
                    return {"season": season, "episodeNumber": epnum}
                else:
                    return epnum

        self.container.register_property(
            ["episodeNumber", "season"],
            "[0-9]{2,4}",
            confidence=0.6,
            formatter=season_episode_parser,
            disabler=lambda options: options.get("episode_prefer_number") if options else False,
        )
        self.container.register_property(
            ["episodeNumber", "season"], "[0-9]{4}", confidence=0.6, formatter=season_episode_parser
        )
        self.container.register_property(
            None,
            "(" + build_or_pattern(episode_words) + sep + "?(?P<episodeNumber>" + numeral + "))[^0-9]",
            confidence=0.4,
            formatter=parse_numeral,
        )
        self.container.register_property(
            None,
            r"(?P<episodeNumber>"
            + numeral
            + ")"
            + sep
            + "?"
            + of_separators_re.pattern
            + sep
            + "?(?P<episodeCount>"
            + numeral
            + ")",
            confidence=0.6,
            formatter=parse_numeral,
        )
        self.container.register_property(
            "episodeNumber",
            "[^0-9](\d{2,3}" + "(?:" + sep + "?" + all_separators_re.pattern + sep + "?" + "\d{2,3}" + ")*)",
            confidence=0.4,
            formatter=episode_list_parser,
            disabler=lambda options: not options.get("episode_prefer_number") if options else True,
        )
        self.container.register_property(
            "episodeNumber",
            r"^"
            + sep
            + "?(\d{2,3}"
            + "(?:"
            + sep
            + "?"
            + all_separators_re.pattern
            + sep
            + "?"
            + "\d{2,3}"
            + ")*)"
            + sep,
            confidence=0.4,
            formatter=episode_list_parser,
            disabler=lambda options: not options.get("episode_prefer_number") if options else True,
        )
        self.container.register_property(
            "episodeNumber",
            sep
            + r"(\d{2,3}"
            + "(?:"
            + sep
            + "?"
            + all_separators_re.pattern
            + sep
            + "?"
            + "\d{2,3}"
            + ")*)"
            + sep
            + "?$",
            confidence=0.4,
            formatter=episode_list_parser,
            disabler=lambda options: not options.get("episode_prefer_number") if options else True,
        )
Example #14
0
    def __init__(self):
        Transformer.__init__(self, 20)

        of_separators = ['of', 'sur', '/', '\\']
        of_separators_re = re.compile(
            build_or_pattern(of_separators, escape=True), re.IGNORECASE)

        season_words = ['seasons?', 'saisons?', 'series?']
        episode_words = ['episodes?']

        season_markers = ['s']
        episode_markers = ['e', 'ep']

        self.container = PropertiesContainer(enhance=False,
                                             canonical_from_pattern=False)

        season_words_re = re.compile(build_or_pattern(season_words),
                                     re.IGNORECASE)
        episode_words_re = re.compile(build_or_pattern(episode_words),
                                      re.IGNORECASE)

        season_markers_re = re.compile(build_or_pattern(season_markers),
                                       re.IGNORECASE)
        episode_markers_re = re.compile(build_or_pattern(episode_markers),
                                        re.IGNORECASE)

        def episode_parser_x(value):
            return list_parser(value,
                               'episodeList',
                               discrete_separators_re=re.compile(
                                   'x', re.IGNORECASE))

        def episode_parser_e(value):
            return list_parser(value,
                               'episodeList',
                               discrete_separators_re=re.compile(
                                   'e', re.IGNORECASE),
                               fill_gaps=True)

        def episode_parser(value):
            return list_parser(value, 'episodeList')

        def season_parser(value):
            return list_parser(value, 'seasonList')

        class ResolutionCollisionValidator(object):
            @staticmethod
            def validate(prop, string, node, match, entry_start, entry_end):
                return len(match.group(2)) < 3  # limit

        self.container.register_property(None,
                                         r'(' + season_words_re.pattern + sep +
                                         '?(?P<season>' + numeral + ')' + sep +
                                         '?' + season_words_re.pattern + '?)',
                                         confidence=1.0,
                                         formatter=parse_numeral)
        self.container.register_property(
            None,
            r'(' + season_words_re.pattern + sep + '?(?P<season>' +
            digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern +
            sep + '?' + digital_numeral + ')*)' + sep + '?' +
            season_words_re.pattern + '?)' + sep,
            confidence=1.0,
            formatter={
                None: parse_numeral,
                'season': season_parser
            },
            validator=ChainedValidator(
                DefaultValidator(),
                FormatterValidator(
                    'season', lambda x: len(x) > 1
                    if hasattr(x, '__len__') else False)))

        self.container.register_property(
            None,
            r'(' + season_markers_re.pattern + '(?P<season>' +
            digital_numeral + ')[^0-9]?' + sep + '?(?P<episodeNumber>(?:e' +
            digital_numeral + '(?:' + sep + '?[e-]' + digital_numeral +
            ')*)))',
            confidence=1.0,
            formatter={
                None: parse_numeral,
                'episodeNumber': episode_parser_e,
                'season': season_parser
            },
            validator=NoValidator())
        self.container.register_property(
            None,
            r'(' + season_markers_re.pattern + '(?P<season>' +
            digital_numeral + ')[^0-9]?' + sep + '?(?P<episodeNumber>(?:e' +
            digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern +
            sep + '?' + digital_numeral + ')*)))',
            confidence=1.0,
            formatter={
                None: parse_numeral,
                'episodeNumber': episode_parser,
                'season': season_parser
            },
            validator=NoValidator())

        self.container.register_property(
            None,
            sep + r'((?P<season>' + digital_numeral + ')' + sep + '' +
            '(?P<episodeNumber>(?:x' + sep + digital_numeral + '(?:' + sep +
            '[x-]' + digital_numeral + ')*)))',
            confidence=1.0,
            formatter={
                None: parse_numeral,
                'episodeNumber': episode_parser_x,
                'season': season_parser
            },
            validator=ChainedValidator(DefaultValidator(),
                                       ResolutionCollisionValidator()))
        self.container.register_property(
            None,
            r'((?P<season>' + digital_numeral + ')' +
            '(?P<episodeNumber>(?:x' + digital_numeral + '(?:[x-]' +
            digital_numeral + ')*)))',
            confidence=1.0,
            formatter={
                None: parse_numeral,
                'episodeNumber': episode_parser_x,
                'season': season_parser
            },
            validator=ChainedValidator(DefaultValidator(),
                                       ResolutionCollisionValidator()))
        self.container.register_property(
            None,
            r'(' + season_markers_re.pattern + '(?P<season>' +
            digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern +
            sep + '?' + digital_numeral + ')*))',
            confidence=0.6,
            formatter={
                None: parse_numeral,
                'season': season_parser
            },
            validator=NoValidator())

        self.container.register_property(None,
                                         r'((?P<episodeNumber>' +
                                         digital_numeral + ')' + sep +
                                         '?v(?P<version>\d+))',
                                         confidence=0.6,
                                         formatter=parse_numeral)
        self.container.register_property('version',
                                         sep + r'(V\d+)' + sep,
                                         confidence=0.6,
                                         formatter=parse_numeral,
                                         validator=NoValidator())
        self.container.register_property(
            None,
            r'(ep' + sep + r'?(?P<episodeNumber>' + digital_numeral + ')' +
            sep + '?)',
            confidence=0.7,
            formatter=parse_numeral)
        self.container.register_property(
            None,
            r'(ep' + sep + r'?(?P<episodeNumber>' + digital_numeral + ')' +
            sep + '?v(?P<version>\d+))',
            confidence=0.7,
            formatter=parse_numeral)

        self.container.register_property(
            None,
            r'(' + episode_markers_re.pattern + '(?P<episodeNumber>' +
            digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern +
            sep + '?' + digital_numeral + ')*))',
            confidence=0.6,
            formatter={
                None: parse_numeral,
                'episodeNumber': episode_parser
            })
        self.container.register_property(
            None,
            r'(' + episode_words_re.pattern + sep + '?(?P<episodeNumber>' +
            digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern +
            sep + '?' + digital_numeral + ')*)' + sep + '?' +
            episode_words_re.pattern + '?)',
            confidence=0.8,
            formatter={
                None: parse_numeral,
                'episodeNumber': episode_parser
            })

        self.container.register_property(
            None,
            r'(' + episode_markers_re.pattern + '(?P<episodeNumber>' +
            digital_numeral + ')' + sep + '?v(?P<version>\d+))',
            confidence=0.6,
            formatter={
                None: parse_numeral,
                'episodeNumber': episode_parser
            })
        self.container.register_property(
            None,
            r'(' + episode_words_re.pattern + sep + '?(?P<episodeNumber>' +
            digital_numeral + ')' + sep + '?v(?P<version>\d+))',
            confidence=0.8,
            formatter={
                None: parse_numeral,
                'episodeNumber': episode_parser
            })

        self.container.register_property('episodeNumber',
                                         r'^' + sep + '+(\d{2}' + '(?:' + sep +
                                         '?' + all_separators_re.pattern +
                                         sep + '?' + '\d{2}' + ')*)' + sep,
                                         confidence=0.4,
                                         formatter=episode_parser)
        self.container.register_property(
            'episodeNumber',
            r'^' + sep + '+0(\d{1,2}' + '(?:' + sep + '?' +
            all_separators_re.pattern + sep + '?' + '0\d{1,2}' + ')*)' + sep,
            confidence=0.4,
            formatter=episode_parser)
        self.container.register_property('episodeNumber',
                                         sep + r'(\d{2}' + '(?:' + sep + '?' +
                                         all_separators_re.pattern + sep +
                                         '?' + r'\d{2}' + ')*)' + sep + '+$',
                                         confidence=0.4,
                                         formatter=episode_parser)
        self.container.register_property(
            'episodeNumber',
            sep + r'0(\d{1,2}' + '(?:' + sep + '?' +
            all_separators_re.pattern + sep + '?' + r'0\d{1,2}' + ')*)' + sep +
            '+$',
            confidence=0.4,
            formatter=episode_parser)

        self.container.register_property(
            None,
            r'((?P<episodeNumber>' + numeral + ')' + sep + '?' +
            of_separators_re.pattern + sep + '?(?P<episodeCount>' + numeral +
            ')(?:' + sep + '?(?:episodes?|eps?))?)',
            confidence=0.7,
            formatter=parse_numeral)
        self.container.register_property(
            None,
            r'((?:episodes?|eps?)' + sep + '?(?P<episodeNumber>' + numeral +
            ')' + sep + '?' + of_separators_re.pattern + sep +
            '?(?P<episodeCount>' + numeral + '))',
            confidence=0.7,
            formatter=parse_numeral)
        self.container.register_property(None,
                                         r'((?:seasons?|saisons?|s)' + sep +
                                         '?(?P<season>' + numeral + ')' + sep +
                                         '?' + of_separators_re.pattern + sep +
                                         '?(?P<seasonCount>' + numeral + '))',
                                         confidence=0.7,
                                         formatter=parse_numeral)
        self.container.register_property(
            None,
            r'((?P<season>' + numeral + ')' + sep + '?' +
            of_separators_re.pattern + sep + '?(?P<seasonCount>' + numeral +
            ')' + sep + '?(?:seasons?|saisons?|s))',
            confidence=0.7,
            formatter=parse_numeral)

        self.container.register_canonical_properties('other',
                                                     'FiNAL',
                                                     'Complete',
                                                     validator=WeakValidator())

        self.container.register_property(
            None,
            r'[^0-9]((?P<season>' + digital_numeral +
            ')[^0-9 .-]?-?(?P<other>xAll))',
            confidence=1.0,
            formatter={
                None: parse_numeral,
                'other': lambda x: 'Complete',
                'season': season_parser
            },
            validator=ChainedValidator(DefaultValidator(),
                                       ResolutionCollisionValidator()))
Example #15
0
    def __init__(self):
        Transformer.__init__(self, 35)

        self.container = PropertiesContainer()
        self.qualities = QualitiesContainer()

        def register_property(propname, props, **kwargs):
            """props a dict of {value: [patterns]}"""
            for canonical_form, patterns in props.items():
                if isinstance(patterns, tuple):
                    patterns2, pattern_kwarg = patterns
                    if kwargs:
                        current_kwarg = dict(kwargs)
                        current_kwarg.update(pattern_kwarg)
                    else:
                        current_kwarg = dict(pattern_kwarg)
                    current_kwarg["canonical_form"] = canonical_form
                    self.container.register_property(propname, *patterns2, **current_kwarg)
                elif kwargs:
                    current_kwarg = dict(kwargs)
                    current_kwarg["canonical_form"] = canonical_form
                    self.container.register_property(propname, *patterns, **current_kwarg)
                else:
                    self.container.register_property(propname, *patterns, canonical_form=canonical_form)

        def register_quality(propname, quality_dict):
            """props a dict of {canonical_form: quality}"""
            for canonical_form, quality in quality_dict.items():
                self.qualities.register_quality(propname, canonical_form, quality)

        # http://en.wikipedia.org/wiki/Pirated_movie_release_types
        register_property(
            "format",
            {
                "VHS": ["VHS", "VHS-Rip"],
                "Cam": ["CAM", "CAMRip", "HD-CAM"],
                #'Telesync': ['TELESYNC', 'PDVD'],
                "Telesync": (["TS", "HD-TS"], {"confidence": 0.4}),
                "Workprint": ["WORKPRINT", "WP"],
                "Telecine": ["TELECINE", "TC"],
                "PPV": ["PPV", "PPV-Rip"],  # Pay Per View
                "TV": ["SD-TV", "SD-TV-Rip", "Rip-SD-TV", "TV-Rip", "Rip-TV"],
                "DVB": ["DVB-Rip", "DVB", "PD-TV"],
                "DVD": ["DVD", "DVD-Rip", "VIDEO-TS", "DVD-R", "DVD-9", "DVD-5"],
                "HDTV": ["HD-TV", "TV-RIP-HD", "HD-TV-RIP", "HD-RIP"],
                "VOD": ["VOD", "VOD-Rip"],
                "WEBRip": ["WEB-Rip"],
                "WEB-DL": ["WEB-DL", "WEB-HD", "WEB"],
                "HD-DVD": ["HD-DVD-Rip", "HD-DVD"],
                "BluRay": ["Blu-ray(?:-Rip)?", "B[DR]", "B[DR]-Rip", "BD[59]", "BD25", "BD50"],
            },
        )

        register_quality(
            "format",
            {
                "VHS": -100,
                "Cam": -90,
                "Telesync": -80,
                "Workprint": -70,
                "Telecine": -60,
                "PPV": -50,
                "TV": -30,
                "DVB": -20,
                "DVD": 0,
                "HDTV": 20,
                "VOD": 40,
                "WEBRip": 50,
                "WEB-DL": 60,
                "HD-DVD": 80,
                "BluRay": 100,
            },
        )

        register_property(
            "screenSize",
            {
                "360p": ["(?:\d{3,}(?:\\|\/|x|\*))?360(?:i|p?x?)"],
                "368p": ["(?:\d{3,}(?:\\|\/|x|\*))?368(?:i|p?x?)"],
                "480p": ["(?:\d{3,}(?:\\|\/|x|\*))?480(?:i|p?x?)"],
                #'480p': (['hr'], {'confidence': 0.2}), # duplicate dict key
                "576p": ["(?:\d{3,}(?:\\|\/|x|\*))?576(?:i|p?x?)"],
                "720p": ["(?:\d{3,}(?:\\|\/|x|\*))?720(?:i|p?x?)"],
                "900p": ["(?:\d{3,}(?:\\|\/|x|\*))?900(?:i|p?x?)"],
                "1080i": ["(?:\d{3,}(?:\\|\/|x|\*))?1080i"],
                "1080p": ["(?:\d{3,}(?:\\|\/|x|\*))?1080p?x?"],
                "4K": ["(?:\d{3,}(?:\\|\/|x|\*))?2160(?:i|p?x?)"],
            },
            validator=ChainedValidator(DefaultValidator(), OnlyOneValidator()),
        )

        _digits_re = re.compile("\d+")

        def resolution_formatter(value):
            digits = _digits_re.findall(value)
            return "x".join(digits)

        self.container.register_property(
            "screenSize", "\d{3,4}-?[x\*]-?\d{3,4}", canonical_from_pattern=False, formatter=resolution_formatter
        )

        register_quality(
            "screenSize",
            {
                "360p": -300,
                "368p": -200,
                "480p": -100,
                "576p": 0,
                "720p": 100,
                "900p": 130,
                "1080i": 180,
                "1080p": 200,
                "4K": 400,
            },
        )

        _videoCodecProperty = {
            "Real": ["Rv\d{2}"],  # http://en.wikipedia.org/wiki/RealVideo
            "Mpeg2": ["Mpeg2"],
            "DivX": ["DVDivX", "DivX"],
            "XviD": ["XviD"],
            "h264": ["[hx]-264(?:-AVC)?", "MPEG-4(?:-AVC)"],
            "h265": ["[hx]-265(?:-HEVC)?", "HEVC"],
        }

        register_property("videoCodec", _videoCodecProperty)

        register_quality("videoCodec", {"Real": -50, "Mpeg2": -30, "DivX": -10, "XviD": 0, "h264": 100, "h265": 150})

        # http://blog.mediacoderhq.com/h264-profiles-and-levels/
        # http://fr.wikipedia.org/wiki/H.264
        self.container.register_property(
            "videoProfile", "BP", validator=LeavesValidator(lambdas=[lambda node: "videoCodec" in node.guess])
        )
        self.container.register_property(
            "videoProfile",
            "XP",
            "EP",
            canonical_form="XP",
            validator=LeavesValidator(lambdas=[lambda node: "videoCodec" in node.guess]),
        )
        self.container.register_property(
            "videoProfile", "MP", validator=LeavesValidator(lambdas=[lambda node: "videoCodec" in node.guess])
        )
        self.container.register_property(
            "videoProfile",
            "HP",
            "HiP",
            canonical_form="HP",
            validator=LeavesValidator(lambdas=[lambda node: "videoCodec" in node.guess]),
        )
        self.container.register_property("videoProfile", "10.?bit", "Hi10P", canonical_form="10bit")
        self.container.register_property("videoProfile", "8.?bit", canonical_form="8bit")
        self.container.register_property(
            "videoProfile", "Hi422P", validator=LeavesValidator(lambdas=[lambda node: "videoCodec" in node.guess])
        )
        self.container.register_property(
            "videoProfile", "Hi444PP", validator=LeavesValidator(lambdas=[lambda node: "videoCodec" in node.guess])
        )

        register_quality(
            "videoProfile", {"BP": -20, "XP": -10, "MP": 0, "HP": 10, "10bit": 15, "Hi422P": 25, "Hi444PP": 35}
        )

        # has nothing to do here (or on filenames for that matter), but some
        # releases use it and it helps to identify release groups, so we adapt
        register_property("videoApi", {"DXVA": ["DXVA"]})

        register_property(
            "audioCodec",
            {
                "MP3": ["MP3", "LAME", "LAME(?:\d)+-(?:\d)+"],
                "DolbyDigital": ["DD"],
                "AAC": ["AAC"],
                "AC3": ["AC3"],
                "Flac": ["FLAC"],
                "DTS": (["DTS"], {"validator": LeftValidator()}),
                "TrueHD": ["True-HD"],
            },
        )

        register_quality(
            "audioCodec", {"MP3": 10, "DolbyDigital": 30, "AAC": 35, "AC3": 40, "Flac": 45, "DTS": 60, "TrueHD": 70}
        )

        self.container.register_property(
            "audioProfile",
            "HD",
            validator=LeavesValidator(lambdas=[lambda node: node.guess.get("audioCodec") == "DTS"]),
        )
        self.container.register_property(
            "audioProfile",
            "HD-MA",
            canonical_form="HDMA",
            validator=LeavesValidator(lambdas=[lambda node: node.guess.get("audioCodec") == "DTS"]),
        )
        self.container.register_property(
            "audioProfile",
            "HE",
            validator=LeavesValidator(lambdas=[lambda node: node.guess.get("audioCodec") == "AAC"]),
        )
        self.container.register_property(
            "audioProfile",
            "LC",
            validator=LeavesValidator(lambdas=[lambda node: node.guess.get("audioCodec") == "AAC"]),
        )
        self.container.register_property(
            "audioProfile",
            "HQ",
            validator=LeavesValidator(lambdas=[lambda node: node.guess.get("audioCodec") == "AC3"]),
        )

        register_quality("audioProfile", {"HD": 20, "HDMA": 50, "LC": 0, "HQ": 0, "HE": 20})

        register_property(
            "audioChannels",
            {
                "7.1": ["7[\W_]1", "7ch", "8ch"],
                "5.1": ["5[\W_]1", "5ch", "6ch"],
                "2.0": ["2[\W_]0", "2ch", "stereo"],
                "1.0": ["1[\W_]0", "1ch", "mono"],
            },
        )

        register_quality("audioChannels", {"7.1": 200, "5.1": 100, "2.0": 0, "1.0": -100})

        self.container.register_property("episodeFormat", r"Minisodes?", canonical_form="Minisode")

        self.container.register_property("crc32", "(?:[a-fA-F]|[0-9]){8}", enhance=False, canonical_from_pattern=False)

        part_words = ["pt", "part"]
        self.container.register_property(
            None,
            "(" + build_or_pattern(part_words) + sep + "?(?P<part>" + numeral + "))[^0-9]",
            enhance=False,
            canonical_from_pattern=False,
            confidence=0.4,
            formatter=parse_numeral,
        )

        register_property(
            "other",
            {
                "AudioFix": ["Audio-Fix", "Audio-Fixed"],
                "SyncFix": ["Sync-Fix", "Sync-Fixed"],
                "DualAudio": ["Dual-Audio"],
                "WideScreen": ["ws", "wide-screen"],
                "Netflix": ["Netflix", "NF"],
            },
        )

        self.container.register_property(
            "other",
            "Real",
            "Fix",
            canonical_form="Proper",
            validator=ChainedValidator(FullMatchValidator(), NeighborValidator()),
        )
        self.container.register_property("other", "Proper", "Repack", "Rerip", canonical_form="Proper")
        self.container.register_property(
            "other",
            "Fansub",
            canonical_form="Fansub",
            validator=ChainedValidator(FullMatchValidator(), NeighborValidator()),
        )
        self.container.register_property(
            "other",
            "Fastsub",
            canonical_form="Fastsub",
            validator=ChainedValidator(FullMatchValidator(), NeighborValidator()),
        )
        self.container.register_property("other", "(?:Seasons?" + sep + "?)?Complete", canonical_form="Complete")
        self.container.register_property("other", "R5", "RC", canonical_form="R5")
        self.container.register_property("other", "Pre-Air", "Preair", canonical_form="Preair")
        self.container.register_property("other", "CC")  # Close Caption
        self.container.register_property("other", "LD", "MD")  # Line/Mic Dubbed

        self.container.register_canonical_properties(
            "other", "Screener", "Remux", "3D", "HD", "mHD", "HDLight", "HQ", "DDC", "HR", "PAL", "SECAM", "NTSC"
        )
        self.container.register_canonical_properties(
            "other", "Limited", "Complete", "Classic", "Unrated", "LiNE", "Bonus", "Trailer", validator=WeakValidator()
        )

        for prop in self.container.get_properties("format"):
            self.container.register_property("other", prop.pattern + "(-?Scr(?:eener)?)", canonical_form="Screener")

        for exts in (subtitle_exts, info_exts, video_exts):
            for container in exts:
                self.container.register_property("container", container, confidence=0.3)