예제 #1
0
 def guess_date(string, node=None, options=None):
     date, span = search_date(
         string,
         options.get('date_year_first') if options else False,
         options.get('date_day_first') if options else False)
     if date and span and DefaultValidator.validate_string(
             string,
             span):  # ensure we have a separator before and after date
         return {'date': date}, span
     return None, None
예제 #2
0
    def __init__(self):
        Transformer.__init__(self, 35)

        self.container = PropertiesContainer()
        self.qualities = QualitiesContainer()

        def register_property(propname, props, **kwargs):
            """props a dict of {value: [patterns]}"""
            for canonical_form, patterns in props.items():
                if isinstance(patterns, tuple):
                    patterns2, pattern_kwarg = patterns
                    if kwargs:
                        current_kwarg = dict(kwargs)
                        current_kwarg.update(pattern_kwarg)
                    else:
                        current_kwarg = dict(pattern_kwarg)
                    current_kwarg['canonical_form'] = canonical_form
                    self.container.register_property(propname, *patterns2, **current_kwarg)
                elif kwargs:
                    current_kwarg = dict(kwargs)
                    current_kwarg['canonical_form'] = canonical_form
                    self.container.register_property(propname, *patterns, **current_kwarg)
                else:
                    self.container.register_property(propname, *patterns, canonical_form=canonical_form)

        def register_quality(propname, quality_dict):
            """props a dict of {canonical_form: quality}"""
            for canonical_form, quality in quality_dict.items():
                self.qualities.register_quality(propname, canonical_form, quality)

        register_property('container', {'mp4': ['MP4']})

        # http://en.wikipedia.org/wiki/Pirated_movie_release_types
        register_property('format', {'VHS': ['VHS', 'VHS-Rip'],
                                     'Cam': ['CAM', 'CAMRip', 'HD-CAM'],
                                     #'Telesync': ['TELESYNC', 'PDVD'],
                                     'Telesync': (['TS', 'HD-TS'], {'confidence': 0.4}),
                                     'Workprint': ['WORKPRINT', 'WP'],
                                     'Telecine': ['TELECINE', 'TC'],
                                     'PPV': ['PPV', 'PPV-Rip'],  # Pay Per View
                                     'TV': ['SD-TV', 'SD-TV-Rip', 'Rip-SD-TV', 'TV-Rip', 'Rip-TV'],
                                     'DVB': ['DVB-Rip', 'DVB', 'PD-TV'],
                                     'DVD': ['DVD', 'DVD-Rip', 'VIDEO-TS', 'DVD-R', 'DVD-9', 'DVD-5'],
                                     'HDTV': ['HD-TV', 'TV-RIP-HD', 'HD-TV-RIP', 'HD-RIP'],
                                     'VOD': ['VOD', 'VOD-Rip'],
                                     'WEBRip': ['WEB-Rip'],
                                     'WEB-DL': ['WEB-DL', 'WEB-HD', 'WEB'],
                                     'HD-DVD': ['HD-DVD-Rip', 'HD-DVD'],
                                     'BluRay': ['Blu-ray(?:-Rip)?', 'B[DR]', 'B[DR]-Rip', 'BD[59]', 'BD25', 'BD50']
                                     })

        register_quality('format', {'VHS': -100,
                                    'Cam': -90,
                                    'Telesync': -80,
                                    'Workprint': -70,
                                    'Telecine': -60,
                                    'PPV': -50,
                                    'TV': -30,
                                    'DVB': -20,
                                    'DVD': 0,
                                    'HDTV': 20,
                                    'VOD': 40,
                                    'WEBRip': 50,
                                    'WEB-DL': 60,
                                    'HD-DVD': 80,
                                    'BluRay': 100
                                    })

        register_property('screenSize', {'360p': ['(?:\d{3,}(?:\\|\/|x|\*))?360(?:i|p?x?)'],
                                         '368p': ['(?:\d{3,}(?:\\|\/|x|\*))?368(?:i|p?x?)'],
                                         '480p': ['(?:\d{3,}(?:\\|\/|x|\*))?480(?:i|p?x?)'],
                                         #'480p': (['hr'], {'confidence': 0.2}), # duplicate dict key
                                         '576p': ['(?:\d{3,}(?:\\|\/|x|\*))?576(?:i|p?x?)'],
                                         '720p': ['(?:\d{3,}(?:\\|\/|x|\*))?720(?:i|p?x?)'],
                                         '900p': ['(?:\d{3,}(?:\\|\/|x|\*))?900(?:i|p?x?)'],
                                         '1080i': ['(?:\d{3,}(?:\\|\/|x|\*))?1080i'],
                                         '1080p': ['(?:\d{3,}(?:\\|\/|x|\*))?1080p?x?'],
                                         '4K': ['(?:\d{3,}(?:\\|\/|x|\*))?2160(?:i|p?x?)']
                                         },
                          validator=ChainedValidator(DefaultValidator(), OnlyOneValidator()))

        class ResolutionValidator(object):
            """Make sure our match is surrounded by separators, or by another entry"""
            @staticmethod
            def validate(prop, string, node, match, entry_start, entry_end):
                """
                span = _get_span(prop, match)
                span = _trim_span(span, string[span[0]:span[1]])
                start, end = span

                sep_start = start <= 0 or string[start - 1] in sep
                sep_end = end >= len(string) or string[end] in sep
                start_by_other = start in entry_end
                end_by_other = end in entry_start
                if (sep_start or start_by_other) and (sep_end or end_by_other):
                    return True
                return False
                """
                return True

        _digits_re = re.compile('\d+')

        def resolution_formatter(value):
            digits = _digits_re.findall(value)
            return 'x'.join(digits)

        self.container.register_property('screenSize', '\d{3,4}-?[x\*]-?\d{3,4}', canonical_from_pattern=False, formatter=resolution_formatter, validator=ChainedValidator(DefaultValidator(), ResolutionValidator()))

        register_quality('screenSize', {'360p': -300,
                                        '368p': -200,
                                        '480p': -100,
                                        '576p': 0,
                                        '720p': 100,
                                        '900p': 130,
                                        '1080i': 180,
                                        '1080p': 200,
                                        '4K': 400
                                        })

        _videoCodecProperty = {'Real': ['Rv\d{2}'],  # http://en.wikipedia.org/wiki/RealVideo
                               'Mpeg2': ['Mpeg2'],
                               'DivX': ['DVDivX', 'DivX'],
                               'XviD': ['XviD'],
                               'h264': ['[hx]-264(?:-AVC)?', 'MPEG-4(?:-AVC)'],
                               'h265': ['[hx]-265(?:-HEVC)?', 'HEVC']
                               }

        register_property('videoCodec', _videoCodecProperty)

        register_quality('videoCodec', {'Real': -50,
                                        'Mpeg2': -30,
                                        'DivX': -10,
                                        'XviD': 0,
                                        'h264': 100,
                                        'h265': 150
                                        })

        # http://blog.mediacoderhq.com/h264-profiles-and-levels/
        # http://fr.wikipedia.org/wiki/H.264
        self.container.register_property('videoProfile', 'BP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess]))
        self.container.register_property('videoProfile', 'XP', 'EP', canonical_form='XP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess]))
        self.container.register_property('videoProfile', 'MP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess]))
        self.container.register_property('videoProfile', 'HP', 'HiP', canonical_form='HP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess]))
        self.container.register_property('videoProfile', '10.?bit', 'Hi10P', canonical_form='10bit')
        self.container.register_property('videoProfile', '8.?bit', canonical_form='8bit')
        self.container.register_property('videoProfile', 'Hi422P', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess]))
        self.container.register_property('videoProfile', 'Hi444PP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess]))

        register_quality('videoProfile', {'BP': -20,
                                          'XP': -10,
                                          'MP': 0,
                                          'HP': 10,
                                          '10bit': 15,
                                          'Hi422P': 25,
                                          'Hi444PP': 35
                                          })

        # has nothing to do here (or on filenames for that matter), but some
        # releases use it and it helps to identify release groups, so we adapt
        register_property('videoApi', {'DXVA': ['DXVA']})

        register_property('audioCodec', {'MP3': ['MP3', 'LAME', 'LAME(?:\d)+-(?:\d)+'],
                                         'DolbyDigital': ['DD'],
                                         'AAC': ['AAC'],
                                         'AC3': ['AC3'],
                                         'Flac': ['FLAC'],
                                         'DTS': (['DTS'], {'validator': LeftValidator()}),
                                         'TrueHD': ['True-HD']
                                         })

        register_quality('audioCodec', {'MP3': 10,
                                        'DolbyDigital': 30,
                                        'AAC': 35,
                                        'AC3': 40,
                                        'Flac': 45,
                                        'DTS': 60,
                                        'TrueHD': 70
                                        })

        self.container.register_property('audioProfile', 'HD', validator=LeavesValidator(lambdas=[lambda node: node.guess.get('audioCodec') == 'DTS']))
        self.container.register_property('audioProfile', 'HD-MA', canonical_form='HDMA', validator=LeavesValidator(lambdas=[lambda node: node.guess.get('audioCodec') == 'DTS']))
        self.container.register_property('audioProfile', 'HE', validator=LeavesValidator(lambdas=[lambda node: node.guess.get('audioCodec') == 'AAC']))
        self.container.register_property('audioProfile', 'LC', validator=LeavesValidator(lambdas=[lambda node: node.guess.get('audioCodec') == 'AAC']))
        self.container.register_property('audioProfile', 'HQ', validator=LeavesValidator(lambdas=[lambda node: node.guess.get('audioCodec') == 'AC3']))

        register_quality('audioProfile', {'HD': 20,
                                          'HDMA': 50,
                                          'LC': 0,
                                          'HQ': 0,
                                          'HE': 20
                                          })

        register_property('audioChannels', {'7.1': ['7[\W_]1', '7ch', '8ch'],
                                            '5.1': ['5[\W_]1', '5ch', '6ch'],
                                            '2.0': ['2[\W_]0', '2ch', 'stereo'],
                                            '1.0': ['1[\W_]0', '1ch', 'mono']
                                            })

        register_quality('audioChannels', {'7.1': 200,
                                           '5.1': 100,
                                           '2.0': 0,
                                           '1.0': -100
                                           })

        self.container.register_property('episodeFormat', r'Minisodes?', canonical_form='Minisode')

        self.container.register_property('crc32', '(?:[a-fA-F]|[0-9]){8}', enhance=False, canonical_from_pattern=False)

        part_words = ['pt', 'part']
        self.container.register_property(None, '(' + build_or_pattern(part_words) + sep + '?(?P<part>' + numeral + '))[^0-9]', enhance=False, canonical_from_pattern=False, confidence=0.4, formatter=parse_numeral)

        register_property('other', {'AudioFix': ['Audio-Fix', 'Audio-Fixed'],
                                    'SyncFix': ['Sync-Fix', 'Sync-Fixed'],
                                    'DualAudio': ['Dual-Audio'],
                                    'WideScreen': ['ws', 'wide-screen'],
                                    'Netflix': ['Netflix', 'NF']
                                    })

        self.container.register_property('other', 'Real', 'Fix', canonical_form='Proper', validator=ChainedValidator(FullMatchValidator(), NeighborValidator()))
        self.container.register_property('other', 'Proper', 'Repack', 'Rerip', canonical_form='Proper')
        self.container.register_property('other', 'Fansub', canonical_form='Fansub', validator=ChainedValidator(FullMatchValidator(), NeighborValidator()))
        self.container.register_property('other', 'Fastsub', canonical_form='Fastsub', validator=ChainedValidator(FullMatchValidator(), NeighborValidator()))
        self.container.register_property('other', '(?:Seasons?' + sep + '?)?Complete', canonical_form='Complete')
        self.container.register_property('other', 'R5', 'RC', canonical_form='R5')
        self.container.register_property('other', 'Pre-Air', 'Preair', canonical_form='Preair')
        self.container.register_property('other', 'CC')  # Close Caption
        self.container.register_property('other', 'LD', 'MD')  # Line/Mic Dubbed

        self.container.register_canonical_properties('other', 'Screener', 'Remux', '3D', 'HD', 'mHD', 'HDLight', 'HQ',
                                                     'DDC',
                                                     'HR', 'PAL', 'SECAM', 'NTSC')
        self.container.register_canonical_properties('other', 'Limited', 'Complete', 'Classic', 'Unrated', 'LiNE', 'Bonus', 'Trailer', validator=WeakValidator())

        for prop in self.container.get_properties('format'):
            self.container.register_property('other', prop.pattern + '(-?Scr(?:eener)?)', canonical_form='Screener')

        for exts in (subtitle_exts, info_exts, video_exts):
            for container in exts:
                self.container.register_property('container', container, confidence=0.3)
예제 #3
0
    def __init__(self):
        Transformer.__init__(self, 20)

        range_separators = ['-', 'to', 'a']
        discrete_separators = ['&', 'and', 'et']
        of_separators = ['of', 'sur', '/', '\\']

        season_words = ['seasons?', 'saisons?', 'series?']
        episode_words = ['episodes?']

        season_markers = ['s']
        episode_markers = ['e', 'ep']

        discrete_sep = sep
        for range_separator in range_separators:
            discrete_sep = discrete_sep.replace(range_separator, '')
        discrete_separators.append(discrete_sep)
        all_separators = list(range_separators)
        all_separators.extend(discrete_separators)

        self.container = PropertiesContainer(enhance=False,
                                             canonical_from_pattern=False)

        range_separators_re = re.compile(build_or_pattern(range_separators),
                                         re.IGNORECASE)
        discrete_separators_re = re.compile(
            build_or_pattern(discrete_separators), re.IGNORECASE)
        all_separators_re = re.compile(build_or_pattern(all_separators),
                                       re.IGNORECASE)
        of_separators_re = re.compile(
            build_or_pattern(of_separators, escape=True), re.IGNORECASE)

        season_words_re = re.compile(build_or_pattern(season_words),
                                     re.IGNORECASE)
        episode_words_re = re.compile(build_or_pattern(episode_words),
                                      re.IGNORECASE)

        season_markers_re = re.compile(build_or_pattern(season_markers),
                                       re.IGNORECASE)
        episode_markers_re = re.compile(build_or_pattern(episode_markers),
                                        re.IGNORECASE)

        def list_parser(value,
                        propertyListName,
                        discrete_separators_re=discrete_separators_re,
                        range_separators_re=range_separators_re,
                        allow_discrete=False,
                        fill_gaps=False):
            discrete_elements = filter(lambda x: x != '',
                                       discrete_separators_re.split(value))
            discrete_elements = [x.strip() for x in discrete_elements]

            proper_discrete_elements = []
            i = 0
            while i < len(discrete_elements):
                if i < len(discrete_elements) - 2 and range_separators_re.match(
                        discrete_elements[i + 1]):
                    proper_discrete_elements.append(discrete_elements[i] +
                                                    discrete_elements[i + 1] +
                                                    discrete_elements[i + 2])
                    i += 3
                else:
                    match = range_separators_re.search(discrete_elements[i])
                    if match and match.start() == 0:
                        proper_discrete_elements[i -
                                                 1] = proper_discrete_elements[
                                                     i -
                                                     1] + discrete_elements[i]
                    elif match and match.end() == len(discrete_elements[i]):
                        proper_discrete_elements.append(discrete_elements[i] +
                                                        discrete_elements[i +
                                                                          1])
                    else:
                        proper_discrete_elements.append(discrete_elements[i])
                    i += 1

            discrete_elements = proper_discrete_elements

            ret = []

            for discrete_element in discrete_elements:
                range_values = filter(
                    lambda x: x != '',
                    range_separators_re.split(discrete_element))
                range_values = [x.strip() for x in range_values]
                if len(range_values) > 1:
                    for x in range(0, len(range_values) - 1):
                        start_range_ep = parse_numeral(range_values[x])
                        end_range_ep = parse_numeral(range_values[x + 1])
                        for range_ep in range(start_range_ep,
                                              end_range_ep + 1):
                            if range_ep not in ret:
                                ret.append(range_ep)
                else:
                    discrete_value = parse_numeral(discrete_element)
                    if discrete_value not in ret:
                        ret.append(discrete_value)

            if len(ret) > 1:
                if not allow_discrete:
                    valid_ret = []
                    # replace discrete elements by ranges
                    valid_ret.append(ret[0])
                    for i in range(0, len(ret) - 1):
                        previous = valid_ret[len(valid_ret) - 1]
                        if ret[i + 1] < previous:
                            pass
                        else:
                            valid_ret.append(ret[i + 1])
                    ret = valid_ret
                if fill_gaps:
                    ret = list(range(min(ret), max(ret) + 1))
                if len(ret) > 1:
                    return {None: ret[0], propertyListName: ret}
            if len(ret) > 0:
                return ret[0]
            return None

        def episode_parser_x(value):
            return list_parser(value,
                               'episodeList',
                               discrete_separators_re=re.compile(
                                   'x', re.IGNORECASE))

        def episode_parser_e(value):
            return list_parser(value,
                               'episodeList',
                               discrete_separators_re=re.compile(
                                   'e', re.IGNORECASE),
                               fill_gaps=True)

        def episode_parser(value):
            return list_parser(value, 'episodeList')

        def season_parser(value):
            return list_parser(value, 'seasonList')

        class ResolutionCollisionValidator(object):
            def validate(self, prop, string, node, match, entry_start,
                         entry_end):
                return len(match.group(2)) < 3  #limit

        self.container.register_property(None,
                                         r'(' + season_words_re.pattern + sep +
                                         '?(?P<season>' + numeral + ')' + sep +
                                         '?' + season_words_re.pattern + '?)',
                                         confidence=1.0,
                                         formatter=parse_numeral)
        self.container.register_property(
            None,
            r'(' + season_words_re.pattern + sep + '?(?P<season>' +
            digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern +
            sep + '?' + digital_numeral + ')*)' + sep + '?' +
            season_words_re.pattern + '?)' + sep,
            confidence=1.0,
            formatter={
                None: parse_numeral,
                'season': season_parser
            },
            validator=ChainedValidator(
                DefaultValidator(),
                FormatterValidator(
                    'season', lambda x: len(x) > 1
                    if hasattr(x, '__len__') else False)))

        self.container.register_property(
            None,
            r'(' + season_markers_re.pattern + '(?P<season>' +
            digital_numeral + ')[^0-9]?' + sep + '?(?P<episodeNumber>(?:e' +
            digital_numeral + '(?:' + sep + '?[e-]' + digital_numeral +
            ')*)))',
            confidence=1.0,
            formatter={
                None: parse_numeral,
                'episodeNumber': episode_parser_e,
                'season': season_parser
            },
            validator=NoValidator())
        #self.container.register_property(None, r'[^0-9]((?P<season>' + digital_numeral + ')[^0-9 .-]?-?(?P<episodeNumber>(?:x' + digital_numeral + '(?:' + sep + '?[x-]' + digital_numeral + ')*)))', confidence=1.0, formatter={None: parse_numeral, 'episodeNumber': episode_parser_x, 'season': season_parser}, validator=ChainedValidator(DefaultValidator(), ResolutionCollisionValidator()))
        self.container.register_property(
            None,
            sep + r'((?P<season>' + digital_numeral + ')' + sep + '' +
            '(?P<episodeNumber>(?:x' + sep + digital_numeral + '(?:' + sep +
            '[x-]' + digital_numeral + ')*)))',
            confidence=1.0,
            formatter={
                None: parse_numeral,
                'episodeNumber': episode_parser_x,
                'season': season_parser
            },
            validator=ChainedValidator(DefaultValidator(),
                                       ResolutionCollisionValidator()))
        self.container.register_property(
            None,
            r'((?P<season>' + digital_numeral + ')' +
            '(?P<episodeNumber>(?:x' + digital_numeral + '(?:[x-]' +
            digital_numeral + ')*)))',
            confidence=1.0,
            formatter={
                None: parse_numeral,
                'episodeNumber': episode_parser_x,
                'season': season_parser
            },
            validator=ChainedValidator(DefaultValidator(),
                                       ResolutionCollisionValidator()))
        self.container.register_property(
            None,
            r'(' + season_markers_re.pattern + '(?P<season>' +
            digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern +
            sep + '?' + digital_numeral + ')*))',
            confidence=0.6,
            formatter={
                None: parse_numeral,
                'season': season_parser
            },
            validator=NoValidator())

        self.container.register_property(None,
                                         r'((?P<episodeNumber>' +
                                         digital_numeral + ')' + sep +
                                         '?v(?P<version>\d+))',
                                         confidence=0.6,
                                         formatter=parse_numeral)
        self.container.register_property(
            None,
            r'(ep' + sep + r'?(?P<episodeNumber>' + numeral + ')' + sep + '?)',
            confidence=0.7,
            formatter=parse_numeral)
        self.container.register_property(
            None,
            r'(ep' + sep + r'?(?P<episodeNumber>' + numeral + ')' + sep +
            '?v(?P<version>\d+))',
            confidence=0.7,
            formatter=parse_numeral)

        self.container.register_property(
            None,
            r'(' + episode_markers_re.pattern + '(?P<episodeNumber>' +
            digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern +
            sep + '?' + digital_numeral + ')*))',
            confidence=0.6,
            formatter={
                None: parse_numeral,
                'episodeNumber': episode_parser
            })
        self.container.register_property(
            None,
            r'(' + episode_words_re.pattern + sep + '?(?P<episodeNumber>' +
            digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern +
            sep + '?' + digital_numeral + ')*)' + sep + '?' +
            episode_words_re.pattern + '?)',
            confidence=0.8,
            formatter={
                None: parse_numeral,
                'episodeNumber': episode_parser
            })

        self.container.register_property(
            None,
            r'(' + episode_markers_re.pattern + '(?P<episodeNumber>' +
            digital_numeral + ')' + sep + '?v(?P<version>\d+))',
            confidence=0.6,
            formatter={
                None: parse_numeral,
                'episodeNumber': episode_parser
            })
        self.container.register_property(
            None,
            r'(' + episode_words_re.pattern + sep + '?(?P<episodeNumber>' +
            digital_numeral + ')' + sep + '?v(?P<version>\d+))',
            confidence=0.8,
            formatter={
                None: parse_numeral,
                'episodeNumber': episode_parser
            })

        self.container.register_property('episodeNumber',
                                         r'^ ?(\d{2})' + sep,
                                         confidence=0.4,
                                         formatter=parse_numeral)
        self.container.register_property('episodeNumber',
                                         r'^ ?(\d{2})' + sep,
                                         confidence=0.4,
                                         formatter=parse_numeral)
        self.container.register_property('episodeNumber',
                                         r'^ ?0(\d{1,2})' + sep,
                                         confidence=0.4,
                                         formatter=parse_numeral)
        self.container.register_property('episodeNumber',
                                         sep + r'(\d{2}) ?$',
                                         confidence=0.4,
                                         formatter=parse_numeral)
        self.container.register_property('episodeNumber',
                                         sep + r'0(\d{1,2}) ?$',
                                         confidence=0.4,
                                         formatter=parse_numeral)

        self.container.register_property(
            None,
            r'((?P<episodeNumber>' + numeral + ')' + sep + '?' +
            of_separators_re.pattern + sep + '?(?P<episodeCount>' + numeral +
            ')(?:' + sep + '?(?:episodes?|eps?))?)',
            confidence=0.7,
            formatter=parse_numeral)
        self.container.register_property(
            None,
            r'((?:episodes?|eps?)' + sep + '?(?P<episodeNumber>' + numeral +
            ')' + sep + '?' + of_separators_re.pattern + sep +
            '?(?P<episodeCount>' + numeral + '))',
            confidence=0.7,
            formatter=parse_numeral)
        self.container.register_property(None,
                                         r'((?:seasons?|saisons?|s)' + sep +
                                         '?(?P<season>' + numeral + ')' + sep +
                                         '?' + of_separators_re.pattern + sep +
                                         '?(?P<seasonCount>' + numeral + '))',
                                         confidence=0.7,
                                         formatter=parse_numeral)
        self.container.register_property(
            None,
            r'((?P<season>' + numeral + ')' + sep + '?' +
            of_separators_re.pattern + sep + '?(?P<seasonCount>' + numeral +
            ')' + sep + '?(?:seasons?|saisons?|s))',
            confidence=0.7,
            formatter=parse_numeral)

        self.container.register_canonical_properties('other',
                                                     'FiNAL',
                                                     'Complete',
                                                     validator=WeakValidator())

        self.container.register_property(
            None,
            r'[^0-9]((?P<season>' + digital_numeral +
            ')[^0-9 .-]?-?(?P<other>xAll))',
            confidence=1.0,
            formatter={
                None: parse_numeral,
                'other': lambda x: 'Complete',
                'season': season_parser
            },
            validator=ChainedValidator(DefaultValidator(),
                                       ResolutionCollisionValidator()))
예제 #4
0
    def __init__(self):
        Transformer.__init__(self, 20)

        self.container = PropertiesContainer(enhance=False,
                                             canonical_from_pattern=False)

        def episode_parser(value):
            values = re_split('[a-zA-Z]', value)
            values = [x for x in values if x]
            ret = []
            for letters_elt in values:
                dashed_values = letters_elt.split('-')
                dashed_values = [x for x in dashed_values if x]
                if len(dashed_values) > 1:
                    for _ in range(0, len(dashed_values) - 1):
                        start_dash_ep = parse_numeral(dashed_values[0])
                        end_dash_ep = parse_numeral(dashed_values[1])
                        for dash_ep in range(start_dash_ep, end_dash_ep + 1):
                            ret.append(dash_ep)
                else:
                    ret.append(parse_numeral(letters_elt))
            if len(ret) > 1:
                return {
                    None: ret[0],
                    'episodeList': ret
                }  # TODO: Should support seasonList also
            elif len(ret) > 0:
                return ret[0]
            else:
                return None

        class ResolutionCollisionValidator(object):
            def validate(self, prop, string, node, match, entry_start,
                         entry_end):
                return len(match.group(2)) < 3

        self.container.register_property(None,
                                         r'((?:season|saison)' + sep +
                                         '?(?P<season>' + numeral + '))',
                                         confidence=1.0,
                                         formatter=parse_numeral)
        self.container.register_property(
            None,
            r'(s(?P<season>' + digital_numeral + ')[^0-9]?' + sep +
            '?(?P<episodeNumber>(?:e' + digital_numeral + '(?:' + sep +
            '?[e-]' + digital_numeral + ')*)))[^0-9]',
            confidence=1.0,
            formatter={
                None: parse_numeral,
                'episodeNumber': episode_parser
            },
            validator=NoValidator())
        self.container.register_property(
            None,
            r'[^0-9]((?P<season>' + digital_numeral +
            ')[^0-9 .-]?-?(?P<episodeNumber>(?:x' + digital_numeral + '(?:' +
            sep + '?[x-]' + digital_numeral + ')*)))[^0-9]',
            confidence=1.0,
            formatter={
                None: parse_numeral,
                'episodeNumber': episode_parser
            },
            validator=ChainedValidator(DefaultValidator(),
                                       ResolutionCollisionValidator()))
        self.container.register_property(None,
                                         r'(s(?P<season>' + digital_numeral +
                                         '))[^0-9]',
                                         confidence=0.6,
                                         formatter=parse_numeral,
                                         validator=NoValidator())
        self.container.register_property(None,
                                         r'((?P<episodeNumber>' +
                                         digital_numeral + ')v[23])',
                                         confidence=0.6,
                                         formatter=parse_numeral)
        self.container.register_property(
            None,
            r'((?:ep)' + sep + r'(?P<episodeNumber>' + numeral + '))[^0-9]',
            confidence=0.7,
            formatter=parse_numeral)
        self.container.register_property(None,
                                         r'(e(?P<episodeNumber>' +
                                         digital_numeral + '))',
                                         confidence=0.6,
                                         formatter=parse_numeral)
        self.container.register_property(None,
                                         r'\A ?((?P<episodeNumber>' + '\d{2}' +
                                         '))' + sep,
                                         confidence=0.4,
                                         formatter=parse_numeral)
        self.container.register_property(None,
                                         r'\A ?(0(?P<episodeNumber>' + '\d+' +
                                         '))' + sep,
                                         confidence=0.4,
                                         formatter=parse_numeral)

        self.container.register_canonical_properties('other',
                                                     'FiNAL',
                                                     'Complete',
                                                     validator=WeakValidator())
예제 #5
0
 def guess_date(string, node=None, options=None):
     date, span = search_date(string, options.get('date_year_first') if options else False, options.get('date_day_first') if options else False)
     if date and span and DefaultValidator.validate_string(string, span): # ensure we have a separator before and after date
         return {'date': date}, span
     return None, None
예제 #6
0
    def __init__(self):
        Transformer.__init__(self, 20)

        of_separators = ['of', 'sur', '/', '\\']
        of_separators_re = re.compile(
            build_or_pattern(of_separators, escape=True), re.IGNORECASE)

        season_words = ['seasons?', 'saisons?', 'series?']
        episode_words = ['episodes?']

        season_markers = ['s']
        episode_markers = ['e', 'ep']

        self.container = PropertiesContainer(enhance=False,
                                             canonical_from_pattern=False)

        season_words_re = re.compile(build_or_pattern(season_words),
                                     re.IGNORECASE)
        episode_words_re = re.compile(build_or_pattern(episode_words),
                                      re.IGNORECASE)

        season_markers_re = re.compile(build_or_pattern(season_markers),
                                       re.IGNORECASE)
        episode_markers_re = re.compile(build_or_pattern(episode_markers),
                                        re.IGNORECASE)

        def episode_parser_x(value):
            return list_parser(value,
                               'episodeList',
                               discrete_separators_re=re.compile(
                                   'x', re.IGNORECASE))

        def episode_parser_e(value):
            return list_parser(value,
                               'episodeList',
                               discrete_separators_re=re.compile(
                                   'e', re.IGNORECASE),
                               fill_gaps=True)

        def episode_parser(value):
            return list_parser(value, 'episodeList')

        def season_parser(value):
            return list_parser(value, 'seasonList')

        class ResolutionCollisionValidator(object):
            @staticmethod
            def validate(prop, string, node, match, entry_start, entry_end):
                return len(match.group(2)) < 3  # limit

        self.container.register_property(None,
                                         r'(' + season_words_re.pattern + sep +
                                         '?(?P<season>' + numeral + ')' + sep +
                                         '?' + season_words_re.pattern + '?)',
                                         confidence=1.0,
                                         formatter=parse_numeral)
        self.container.register_property(
            None,
            r'(' + season_words_re.pattern + sep + '?(?P<season>' +
            digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern +
            sep + '?' + digital_numeral + ')*)' + sep + '?' +
            season_words_re.pattern + '?)' + sep,
            confidence=1.0,
            formatter={
                None: parse_numeral,
                'season': season_parser
            },
            validator=ChainedValidator(
                DefaultValidator(),
                FormatterValidator(
                    'season', lambda x: len(x) > 1
                    if hasattr(x, '__len__') else False)))

        self.container.register_property(
            None,
            r'(' + season_markers_re.pattern + '(?P<season>' +
            digital_numeral + ')[^0-9]?' + sep + '?(?P<episodeNumber>(?:e' +
            digital_numeral + '(?:' + sep + '?[e-]' + digital_numeral +
            ')*)))',
            confidence=1.0,
            formatter={
                None: parse_numeral,
                'episodeNumber': episode_parser_e,
                'season': season_parser
            },
            validator=NoValidator())
        self.container.register_property(
            None,
            r'(' + season_markers_re.pattern + '(?P<season>' +
            digital_numeral + ')[^0-9]?' + sep + '?(?P<episodeNumber>(?:e' +
            digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern +
            sep + '?' + digital_numeral + ')*)))',
            confidence=1.0,
            formatter={
                None: parse_numeral,
                'episodeNumber': episode_parser,
                'season': season_parser
            },
            validator=NoValidator())

        self.container.register_property(
            None,
            sep + r'((?P<season>' + digital_numeral + ')' + sep + '' +
            '(?P<episodeNumber>(?:x' + sep + digital_numeral + '(?:' + sep +
            '[x-]' + digital_numeral + ')*)))',
            confidence=1.0,
            formatter={
                None: parse_numeral,
                'episodeNumber': episode_parser_x,
                'season': season_parser
            },
            validator=ChainedValidator(DefaultValidator(),
                                       ResolutionCollisionValidator()))
        self.container.register_property(
            None,
            r'((?P<season>' + digital_numeral + ')' +
            '(?P<episodeNumber>(?:x' + digital_numeral + '(?:[x-]' +
            digital_numeral + ')*)))',
            confidence=1.0,
            formatter={
                None: parse_numeral,
                'episodeNumber': episode_parser_x,
                'season': season_parser
            },
            validator=ChainedValidator(DefaultValidator(),
                                       ResolutionCollisionValidator()))
        self.container.register_property(
            None,
            r'(' + season_markers_re.pattern + '(?P<season>' +
            digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern +
            sep + '?' + digital_numeral + ')*))',
            confidence=0.6,
            formatter={
                None: parse_numeral,
                'season': season_parser
            },
            validator=NoValidator())

        self.container.register_property(None,
                                         r'((?P<episodeNumber>' +
                                         digital_numeral + ')' + sep +
                                         '?v(?P<version>\d+))',
                                         confidence=0.6,
                                         formatter=parse_numeral)
        self.container.register_property('version',
                                         sep + r'(V\d+)' + sep,
                                         confidence=0.6,
                                         formatter=parse_numeral,
                                         validator=NoValidator())
        self.container.register_property(
            None,
            r'(ep' + sep + r'?(?P<episodeNumber>' + digital_numeral + ')' +
            sep + '?)',
            confidence=0.7,
            formatter=parse_numeral)
        self.container.register_property(
            None,
            r'(ep' + sep + r'?(?P<episodeNumber>' + digital_numeral + ')' +
            sep + '?v(?P<version>\d+))',
            confidence=0.7,
            formatter=parse_numeral)

        self.container.register_property(
            None,
            r'(' + episode_markers_re.pattern + '(?P<episodeNumber>' +
            digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern +
            sep + '?' + digital_numeral + ')*))',
            confidence=0.6,
            formatter={
                None: parse_numeral,
                'episodeNumber': episode_parser
            })
        self.container.register_property(
            None,
            r'(' + episode_words_re.pattern + sep + '?(?P<episodeNumber>' +
            digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern +
            sep + '?' + digital_numeral + ')*)' + sep + '?' +
            episode_words_re.pattern + '?)',
            confidence=0.8,
            formatter={
                None: parse_numeral,
                'episodeNumber': episode_parser
            })

        self.container.register_property(
            None,
            r'(' + episode_markers_re.pattern + '(?P<episodeNumber>' +
            digital_numeral + ')' + sep + '?v(?P<version>\d+))',
            confidence=0.6,
            formatter={
                None: parse_numeral,
                'episodeNumber': episode_parser
            })
        self.container.register_property(
            None,
            r'(' + episode_words_re.pattern + sep + '?(?P<episodeNumber>' +
            digital_numeral + ')' + sep + '?v(?P<version>\d+))',
            confidence=0.8,
            formatter={
                None: parse_numeral,
                'episodeNumber': episode_parser
            })

        self.container.register_property('episodeNumber',
                                         r'^' + sep + '+(\d{2}' + '(?:' + sep +
                                         '?' + all_separators_re.pattern +
                                         sep + '?' + '\d{2}' + ')*)' + sep,
                                         confidence=0.4,
                                         formatter=episode_parser)
        self.container.register_property(
            'episodeNumber',
            r'^' + sep + '+0(\d{1,2}' + '(?:' + sep + '?' +
            all_separators_re.pattern + sep + '?' + '0\d{1,2}' + ')*)' + sep,
            confidence=0.4,
            formatter=episode_parser)
        self.container.register_property('episodeNumber',
                                         sep + r'(\d{2}' + '(?:' + sep + '?' +
                                         all_separators_re.pattern + sep +
                                         '?' + r'\d{2}' + ')*)' + sep + '+$',
                                         confidence=0.4,
                                         formatter=episode_parser)
        self.container.register_property(
            'episodeNumber',
            sep + r'0(\d{1,2}' + '(?:' + sep + '?' +
            all_separators_re.pattern + sep + '?' + r'0\d{1,2}' + ')*)' + sep +
            '+$',
            confidence=0.4,
            formatter=episode_parser)

        self.container.register_property(
            None,
            r'((?P<episodeNumber>' + numeral + ')' + sep + '?' +
            of_separators_re.pattern + sep + '?(?P<episodeCount>' + numeral +
            ')(?:' + sep + '?(?:episodes?|eps?))?)',
            confidence=0.7,
            formatter=parse_numeral)
        self.container.register_property(
            None,
            r'((?:episodes?|eps?)' + sep + '?(?P<episodeNumber>' + numeral +
            ')' + sep + '?' + of_separators_re.pattern + sep +
            '?(?P<episodeCount>' + numeral + '))',
            confidence=0.7,
            formatter=parse_numeral)
        self.container.register_property(None,
                                         r'((?:seasons?|saisons?|s)' + sep +
                                         '?(?P<season>' + numeral + ')' + sep +
                                         '?' + of_separators_re.pattern + sep +
                                         '?(?P<seasonCount>' + numeral + '))',
                                         confidence=0.7,
                                         formatter=parse_numeral)
        self.container.register_property(
            None,
            r'((?P<season>' + numeral + ')' + sep + '?' +
            of_separators_re.pattern + sep + '?(?P<seasonCount>' + numeral +
            ')' + sep + '?(?:seasons?|saisons?|s))',
            confidence=0.7,
            formatter=parse_numeral)

        self.container.register_canonical_properties('other',
                                                     'FiNAL',
                                                     'Complete',
                                                     validator=WeakValidator())

        self.container.register_property(
            None,
            r'[^0-9]((?P<season>' + digital_numeral +
            ')[^0-9 .-]?-?(?P<other>xAll))',
            confidence=1.0,
            formatter={
                None: parse_numeral,
                'other': lambda x: 'Complete',
                'season': season_parser
            },
            validator=ChainedValidator(DefaultValidator(),
                                       ResolutionCollisionValidator()))