class GuessitConverter(babelfish.LanguageReverseConverter): # pylint: disable=missing-docstring _with_country_regexp = re.compile(r'(.*)\((.*)\)') _with_country_regexp2 = re.compile(r'(.*)-(.*)') def __init__(self): self.guessit_exceptions = {} for (alpha3, country), synlist in SYN.items(): for syn in synlist: self.guessit_exceptions[syn.lower()] = (alpha3, country, None) @property def codes(self): # pylint: disable=missing-docstring return (babelfish.language_converters['alpha3b'].codes | babelfish.language_converters['alpha2'].codes | babelfish.language_converters['name'].codes | babelfish.language_converters['opensubtitles'].codes | babelfish.country_converters['name'].codes | frozenset(self.guessit_exceptions.keys())) def convert(self, alpha3, country=None, script=None): return str(babelfish.Language(alpha3, country, script)) def reverse(self, name): # pylint:disable=arguments-differ with_country = (GuessitConverter._with_country_regexp.match(name) or GuessitConverter._with_country_regexp2.match(name)) name = name.lower() if with_country: lang = babelfish.Language.fromguessit( with_country.group(1).strip()) lang.country = babelfish.Country.fromguessit( with_country.group(2).strip()) return lang.alpha3, lang.country.alpha2 if lang.country else None, lang.script or None # exceptions come first, as they need to override a potential match # with any of the other guessers try: return self.guessit_exceptions[name] except KeyError: pass for conv in [ babelfish.Language, babelfish.Language.fromalpha3b, babelfish.Language.fromalpha2, babelfish.Language.fromname, babelfish.Language.fromopensubtitles ]: try: reverse = conv(name) return reverse.alpha3, reverse.country, reverse.script except (ValueError, babelfish.LanguageReverseError): pass raise babelfish.LanguageReverseError(name)
class GuessitConverter(babelfish.LanguageReverseConverter): # pylint: disable=missing-docstring _with_country_regexp = re.compile(r'(.*)\((.*)\)') _with_country_regexp2 = re.compile(r'(.*)-(.*)') def __init__(self, synonyms): self.guessit_exceptions = {} for code, synlist in synonyms.items(): if '_' in code: (alpha3, country) = code.split('_') else: (alpha3, country) = (code, None) for syn in synlist: self.guessit_exceptions[syn.lower()] = (alpha3, country, None) @property def codes(self): # pylint: disable=missing-docstring return (babelfish.language_converters['alpha3b'].codes | babelfish.language_converters['alpha2'].codes | babelfish.language_converters['name'].codes | babelfish.language_converters['opensubtitles'].codes | babelfish.country_converters['name'].codes | frozenset(self.guessit_exceptions.keys())) def convert(self, alpha3, country=None, script=None): return str(babelfish.Language(alpha3, country, script)) def reverse(self, name): # pylint:disable=arguments-renamed name = name.lower() # exceptions come first, as they need to override a potential match # with any of the other guessers try: return self.guessit_exceptions[name] except KeyError: pass for conv in [ babelfish.Language, babelfish.Language.fromalpha3b, babelfish.Language.fromalpha2, babelfish.Language.fromname, babelfish.Language.fromopensubtitles, babelfish.Language.fromietf ]: try: reverse = conv(name) return reverse.alpha3, reverse.country, reverse.script except (ValueError, babelfish.LanguageReverseError): pass raise babelfish.LanguageReverseError(name)
def screen_size(): """ Builder for rebulk object. :return: Created Rebulk object :rtype: Rebulk """ def conflict_solver(match, other): """ Conflict solver for most screen_size. """ if other.name == 'screen_size': if 'resolution' in other.tags: # The chtouile to solve conflict in "720 x 432" string matching both 720p pattern int_value = _digits_re.findall(match.raw)[-1] if other.value.startswith(int_value): return match return other return '__default__' rebulk = Rebulk().string_defaults(ignore_case=True).regex_defaults( flags=re.IGNORECASE) rebulk.defaults(name="screen_size", validator=seps_surround, conflict_solver=conflict_solver) rebulk.regex(r'(?:\d{3,}(?:x|\*))?360(?:i)', value='360i') rebulk.regex(r'(?:\d{3,}(?:x|\*))?360(?:p?x?)', value='360p') rebulk.regex(r"(?:\d{3,}(?:x|\*))?368(?:p?x?)", value="368p") rebulk.regex(r'(?:\d{3,}(?:x|\*))?480(?:i)', value='480i') rebulk.regex(r'(?:\d{3,}(?:x|\*))?480(?:p?x?)', value='480p') rebulk.regex(r'(?:\d{3,}(?:x|\*))?576(?:i)', value='576i') rebulk.regex(r'(?:\d{3,}(?:x|\*))?576(?:p?x?)', value='576p') rebulk.regex(r'(?:\d{3,}(?:x|\*))?720(?:p?(?:50|60)?x?)', value='720p') rebulk.regex(r"(?:\d{3,}(?:x|\*))?720(?:p(?:50|60)?x?)", value="720p") rebulk.regex(r"(?:\d{3,}(?:x|\*))?720p?hd", value="720p") rebulk.regex(r'(?:\d{3,}(?:x|\*))?900(?:i)', value='900i') rebulk.regex(r'(?:\d{3,}(?:x|\*))?900(?:p?x?)', value='900p') rebulk.regex(r"(?:\d{3,}(?:x|\*))?1080i", value="1080i") rebulk.regex(r"(?:\d{3,}(?:x|\*))?1080p?x?", value="1080p") rebulk.regex(r"(?:\d{3,}(?:x|\*))?1080(?:p(?:50|60)?x?)", value="1080p") rebulk.regex(r"(?:\d{3,}(?:x|\*))?1080p?hd", value="1080p") rebulk.regex(r'(?:\d{3,}(?:x|\*))?2160(?:p?x?)', value='2160p') rebulk.string('4k', value='2160p') rebulk.regex(r'(?:\d{3,}(?:x|\*))?4320(?:p?x?)', value='4320p') _digits_re = re.compile(r'\d+') rebulk.defaults(name="screen_size", validator=seps_surround) rebulk.regex(r'\d{3,}-?(?:x|\*)-?\d{3,}', formatter=lambda value: 'x'.join(_digits_re.findall(value)), abbreviations=[dash], tags=['resolution'], conflict_solver=lambda match, other: '__default__' if other.name == 'screen_size' else other) rebulk.rules(ScreenSizeOnlyOne, RemoveScreenSizeConflicts) return rebulk
class Quantity(object): """ Represent a quantity object with magnitude and units. """ parser_re = re.compile(r'(?P<magnitude>\d+(?:[.]\d+)?)(?P<units>[^\d]+)?') def __init__(self, magnitude, units): self.magnitude = magnitude self.units = units @classmethod @abstractmethod def parse_units(cls, value): """ Parse a string to a proper unit notation. """ raise NotImplementedError @classmethod def fromstring(cls, string): """ Parse the string into a quantity object. :param string: :return: """ values = cls.parser_re.match(string).groupdict() try: magnitude = int(values['magnitude']) except ValueError: magnitude = float(values['magnitude']) units = cls.parse_units(values['units']) return cls(magnitude, units) def __hash__(self): return hash(str(self)) def __eq__(self, other): if isinstance(other, str): return str(self) == other if not isinstance(other, self.__class__): return NotImplemented return self.magnitude == other.magnitude and self.units == other.units def __ne__(self, other): return not self == other def __repr__(self): return f'<{self.__class__.__name__} [{self}]>' def __str__(self): return f'{self.magnitude}{self.units}'
def screen_size(): """ Builder for rebulk object. :return: Created Rebulk object :rtype: Rebulk """ def conflict_solver(match, other): """ Conflict solver for most screen_size. """ if other.name == 'screen_size': if 'resolution' in other.tags: # The chtouile to solve conflict in "720 x 432" string matching both 720p pattern int_value = _digits_re.findall(match.raw)[-1] if other.value.startswith(int_value): return match return other return '__default__' rebulk = Rebulk().string_defaults(ignore_case=True).regex_defaults(flags=re.IGNORECASE) rebulk.defaults(name="screen_size", validator=seps_surround, conflict_solver=conflict_solver) rebulk.regex(r"(?:\d{3,}(?:x|\*))?360(?:i|p?x?)", value="360p") rebulk.regex(r"(?:\d{3,}(?:x|\*))?368(?:i|p?x?)", value="368p") rebulk.regex(r"(?:\d{3,}(?:x|\*))?480(?:i|p?x?)", value="480p") rebulk.regex(r"(?:\d{3,}(?:x|\*))?576(?:i|p?x?)", value="576p") rebulk.regex(r"(?:\d{3,}(?:x|\*))?720(?:i|p?(?:50|60)?x?)", value="720p") rebulk.regex(r"(?:\d{3,}(?:x|\*))?720(?:p(?:50|60)?x?)", value="720p") rebulk.regex(r"(?:\d{3,}(?:x|\*))?720p?hd", value="720p") rebulk.regex(r"(?:\d{3,}(?:x|\*))?900(?:i|p?x?)", value="900p") rebulk.regex(r"(?:\d{3,}(?:x|\*))?1080i", value="1080i") rebulk.regex(r"(?:\d{3,}(?:x|\*))?1080p?x?", value="1080p") rebulk.regex(r"(?:\d{3,}(?:x|\*))?1080(?:p(?:50|60)?x?)", value="1080p") rebulk.regex(r"(?:\d{3,}(?:x|\*))?1080p?hd", value="1080p") rebulk.regex(r"(?:\d{3,}(?:x|\*))?2160(?:i|p?x?)", value="4K") rebulk.string('4k', value='4K') _digits_re = re.compile(r'\d+') rebulk.defaults(name="screen_size", validator=seps_surround) rebulk.regex(r'\d{3,}-?(?:x|\*)-?\d{3,}', formatter=lambda value: 'x'.join(_digits_re.findall(value)), abbreviations=[dash], tags=['resolution'], conflict_solver=lambda match, other: '__default__' if other.name == 'screen_size' else other) rebulk.rules(ScreenSizeOnlyOne, RemoveScreenSizeConflicts) return rebulk
#!/usr/bin/env python # -*- coding: utf-8 -*- """ Date """ from dateutil import parser from rebulk.remodule import re _dsep = r'[-/ \.]' _dsep_bis = r'[-/ \.x]' date_regexps = [ # pylint:disable=consider-using-f-string re.compile(r'%s((\d{8}))%s' % (_dsep, _dsep), re.IGNORECASE), # pylint:disable=consider-using-f-string re.compile(r'%s((\d{6}))%s' % (_dsep, _dsep), re.IGNORECASE), # pylint:disable=consider-using-f-string re.compile( r'(?:^|[^\d])((\d{2})%s(\d{1,2})%s(\d{1,2}))(?:$|[^\d])' % (_dsep, _dsep), re.IGNORECASE), # pylint:disable=consider-using-f-string re.compile( r'(?:^|[^\d])((\d{1,2})%s(\d{1,2})%s(\d{2}))(?:$|[^\d])' % (_dsep, _dsep), re.IGNORECASE), # pylint:disable=consider-using-f-string re.compile( r'(?:^|[^\d])((\d{4})%s(\d{1,2})%s(\d{1,2}))(?:$|[^\d])' % (_dsep_bis, _dsep), re.IGNORECASE), # pylint:disable=consider-using-f-string re.compile(
class TestYml(object): """ Run tests from yaml files. Multiple input strings having same expected results can be chained. Use $ marker to check inputs that should not match results. """ options_re = re.compile(r'^([ \+-]+)(.*)') files, ids = files_and_ids(filename_predicate) @staticmethod def set_default(expected, default): if default: for k, v in default.items(): if k not in expected: expected[k] = v @pytest.mark.parametrize('filename', files, ids=ids) def test(self, filename, caplog): caplog.setLevel(logging.INFO) with open(os.path.join(__location__, filename), 'r', encoding='utf-8') as infile: data = yaml.load(infile, OrderedDictYAMLLoader) entries = Results() last_expected = None for string, expected in reversed(list(data.items())): if expected is None: data[string] = last_expected else: last_expected = expected default = None try: default = data['__default__'] del data['__default__'] except KeyError: pass for string, expected in data.items(): TestYml.set_default(expected, default) entry = self.check_data(filename, string, expected) entries.append(entry) entries.assert_ok() def check_data(self, filename, string, expected): if six.PY2 and isinstance(string, six.text_type): string = string.encode('utf-8') converts = [] for k, v in expected.items(): if isinstance(v, six.text_type): v = v.encode('utf-8') converts.append((k, v)) for k, v in converts: expected[k] = v if not isinstance(string, str): string = str(string) if not string_predicate or string_predicate(string): # pylint: disable=not-callable entry = self.check(string, expected) if entry.ok: logger.debug('[' + filename + '] ' + str(entry)) elif entry.warning: logger.warning('[' + filename + '] ' + str(entry)) elif entry.error: logger.error('[' + filename + '] ' + str(entry)) for line in entry.details: logger.error('[' + filename + '] ' + ' ' * 4 + line) return entry def check(self, string, expected): negates, global_, string = self.parse_token_options(string) options = expected.get('options') if options is None: options = {} if not isinstance(options, dict): options = parse_options(options) if 'implicit' not in options: options['implicit'] = True try: result = guessit(string, options) except Exception as exc: logger.error('[' + string + '] Exception: ' + str(exc)) raise exc entry = EntryResult(string, negates) if global_: self.check_global(string, result, entry) self.check_expected(result, expected, entry) return entry def parse_token_options(self, string): matches = self.options_re.search(string) negates = False global_ = False if matches: string = matches.group(2) for opt in matches.group(1): if '-' in opt: negates = True if '+' in opt: global_ = True return negates, global_, string def check_global(self, string, result, entry): global_span = [] for result_matches in result.matches.values(): for result_match in result_matches: if not global_span: global_span = list(result_match.span) else: if global_span[0] > result_match.span[0]: global_span[0] = result_match.span[0] if global_span[1] < result_match.span[1]: global_span[1] = result_match.span[1] if global_span and global_span[1] - global_span[0] < len(string): entry.others.append("Match is not global") def is_same(self, value, expected): values = set(value) if is_iterable(value) else set((value, )) expecteds = set(expected) if is_iterable(expected) else set( (expected, )) if len(values) != len(expecteds): return False if isinstance(next(iter(values)), babelfish.Language): # pylint: disable=no-member expecteds = set([ babelfish.Language.fromguessit(expected) for expected in expecteds ]) elif isinstance(next(iter(values)), babelfish.Country): # pylint: disable=no-member expecteds = set([ babelfish.Country.fromguessit(expected) for expected in expecteds ]) return values == expecteds def check_expected(self, result, expected, entry): if expected: for expected_key, expected_value in expected.items(): if expected_key and expected_key != 'options' and expected_value is not None: negates_key, _, result_key = self.parse_token_options( expected_key) if result_key in result.keys(): if not self.is_same(result[result_key], expected_value): if negates_key: entry.valid.append( (expected_key, expected_value)) else: entry.different.append( (expected_key, expected_value, result[expected_key])) else: if negates_key: entry.different.append( (expected_key, expected_value, result[expected_key])) else: entry.valid.append( (expected_key, expected_value)) elif not negates_key: entry.missing.append((expected_key, expected_value)) for result_key, result_value in result.items(): if result_key not in expected.keys(): entry.extra.append((result_key, result_value))
#!/usr/bin/env python # -*- coding: utf-8 -*- """ Date """ from dateutil import parser from rebulk.remodule import re _dsep = r'[-/ \.]' _dsep_bis = r'[-/ \.x]' date_regexps = [ re.compile(r'%s((\d{8}))%s' % (_dsep, _dsep), re.IGNORECASE), re.compile(r'%s((\d{6}))%s' % (_dsep, _dsep), re.IGNORECASE), re.compile( r'(?:^|[^\d])((\d{2})%s(\d{1,2})%s(\d{1,2}))(?:$|[^\d])' % (_dsep, _dsep), re.IGNORECASE), re.compile( r'(?:^|[^\d])((\d{1,2})%s(\d{1,2})%s(\d{2}))(?:$|[^\d])' % (_dsep, _dsep), re.IGNORECASE), re.compile( r'(?:^|[^\d])((\d{4})%s(\d{1,2})%s(\d{1,2}))(?:$|[^\d])' % (_dsep_bis, _dsep), re.IGNORECASE), re.compile( r'(?:^|[^\d])((\d{1,2})%s(\d{1,2})%s(\d{4}))(?:$|[^\d])' % (_dsep, _dsep_bis), re.IGNORECASE), re.compile( r'(?:^|[^\d])((\d{1,2}(?:st|nd|rd|th)?%s(?:[a-z]{3,10})%s\d{4}))(?:$|[^\d])' % (_dsep, _dsep), re.IGNORECASE) ]
#!/usr/bin/env python # -*- coding: utf-8 -*- """ Date """ from dateutil import parser from rebulk.remodule import re _dsep = r'[-/ \.]' _dsep_bis = r'[-/ \.x]' date_regexps = [ re.compile(r'%s((\d{8}))%s' % (_dsep, _dsep), re.IGNORECASE), re.compile(r'%s((\d{6}))%s' % (_dsep, _dsep), re.IGNORECASE), re.compile(r'(?:^|[^\d])((\d{2})%s(\d{1,2})%s(\d{1,2}))(?:$|[^\d])' % (_dsep, _dsep), re.IGNORECASE), re.compile(r'(?:^|[^\d])((\d{1,2})%s(\d{1,2})%s(\d{2}))(?:$|[^\d])' % (_dsep, _dsep), re.IGNORECASE), re.compile(r'(?:^|[^\d])((\d{4})%s(\d{1,2})%s(\d{1,2}))(?:$|[^\d])' % (_dsep_bis, _dsep), re.IGNORECASE), re.compile(r'(?:^|[^\d])((\d{1,2})%s(\d{1,2})%s(\d{4}))(?:$|[^\d])' % (_dsep, _dsep_bis), re.IGNORECASE), re.compile(r'(?:^|[^\d])((\d{1,2}(?:st|nd|rd|th)?%s(?:[a-z]{3,10})%s\d{4}))(?:$|[^\d])' % (_dsep, _dsep), re.IGNORECASE)] def valid_year(year): """Check if number is a valid year""" return 1920 <= year < 2030 def _is_int(string): """ Check if the input string is an integer
conflict_solver=lambda match, other: match if other.name in ['episode', 'season'] else '__default__') rebulk.functional(guess_idnumber, name='uuid', conflict_solver=lambda match, other: match if other.name in ['episode', 'season'] else '__default__') return rebulk _DIGIT = 0 _LETTER = 1 _OTHER = 2 _idnum = re.compile(r'(?P<uuid>[a-zA-Z0-9-]{20,})') # 1.0, (0, 0)) def guess_idnumber(string): """ Guess id number function :param string: :type string: :return: :rtype: """ # pylint:disable=invalid-name ret = [] matches = list(_idnum.finditer(string)) for match in matches:
re_ += word re_ += ')' return re_ word_numeral = __build_word_numeral(english_word_numeral_list, french_word_numeral_list, french_alt_word_numeral_list) numeral = '(?:' + digital_numeral + '|' + roman_numeral + '|' + word_numeral + ')' __romanNumeralMap = (('M', 1000), ('CM', 900), ('D', 500), ('CD', 400), ('C', 100), ('XC', 90), ('L', 50), ('XL', 40), ('X', 10), ('IX', 9), ('V', 5), ('IV', 4), ('I', 1)) __romanNumeralPattern = re.compile('^' + roman_numeral + '$') def __parse_roman(value): """ convert Roman numeral to integer :param value: Value to parse :type value: string :return: :rtype: """ if not __romanNumeralPattern.search(value): raise ValueError('Invalid Roman numeral: %s' % value) result = 0
class TestYml(object): """ Run tests from yaml files. Multiple input strings having same expected results can be chained. Use $ marker to check inputs that should not match results. """ options_re = re.compile(r'^([ +-]+)(.*)') def _get_unique_id(self, collection, base_id): ret = base_id i = 2 while ret in collection: suffix = "-" + str(i) ret = base_id + suffix i += 1 return ret def pytest_generate_tests(self, metafunc): if 'yml_test_case' in metafunc.fixturenames: entries = [] entry_ids = [] entry_set = set() for filename, _ in zip(*files_and_ids()): with open(os.path.join(__location__, filename), 'r', encoding='utf-8') as infile: data = yaml.load(infile, OrderedDictYAMLLoader) last_expected = None for string, expected in reversed(list(data.items())): if expected is None: data[string] = last_expected else: last_expected = expected default = None try: default = data['__default__'] del data['__default__'] except KeyError: pass for string, expected in data.items(): TestYml.set_default(expected, default) string = TestYml.fix_encoding(string) entries.append((filename, string, expected)) unique_id = self._get_unique_id( entry_set, '[' + filename + '] ' + str(string)) entry_set.add(unique_id) entry_ids.append(unique_id) metafunc.parametrize('yml_test_case', entries, ids=entry_ids) @staticmethod def set_default(expected, default): if default: for k, v in default.items(): if k not in expected: expected[k] = v @classmethod def fix_encoding(cls, string): if not isinstance(string, str): string = str(string) return string def test_entry(self, yml_test_case): filename, string, expected = yml_test_case result = self.check_data(filename, string, expected) assert not result.error def check_data(self, filename, string, expected): entry = self.check(string, expected) if entry.ok: logger.debug('[%s] %s', filename, entry) elif entry.warning: logger.warning('[%s] %s', filename, entry) elif entry.error: logger.error('[%s] %s', filename, entry) for line in entry.details: logger.error('[%s] %s', filename, ' ' * 4 + line) return entry def check(self, string, expected): negates, global_, string = self.parse_token_options(string) options = expected.get('options') if options is None: options = {} if not isinstance(options, dict): options = parse_options(options) try: result = guessit(string, options) except Exception as exc: logger.error('[%s] Exception: %s', string, exc) raise exc entry = EntryResult(string, negates) if global_: self.check_global(string, result, entry) self.check_expected(result, expected, entry) return entry def parse_token_options(self, string): matches = self.options_re.search(string) negates = False global_ = False if matches: string = matches.group(2) for opt in matches.group(1): if '-' in opt: negates = True if '+' in opt: global_ = True return negates, global_, string def check_global(self, string, result, entry): global_span = [] for result_matches in result.matches.values(): for result_match in result_matches: if not global_span: global_span = list(result_match.span) else: if global_span[0] > result_match.span[0]: global_span[0] = result_match.span[0] if global_span[1] < result_match.span[1]: global_span[1] = result_match.span[1] if global_span and global_span[1] - global_span[0] < len(string): entry.others.append("Match is not global") def is_same(self, value, expected): values = set(value) if is_iterable(value) else set((value, )) expecteds = set(expected) if is_iterable(expected) else set( (expected, )) if len(values) != len(expecteds): return False if isinstance(next(iter(values)), babelfish.Language): # pylint: disable=no-member expecteds = { babelfish.Language.fromguessit(expected) for expected in expecteds } elif isinstance(next(iter(values)), babelfish.Country): # pylint: disable=no-member expecteds = { babelfish.Country.fromguessit(expected) for expected in expecteds } return values == expecteds def check_expected(self, result, expected, entry): if expected: for expected_key, expected_value in expected.items(): if expected_key and expected_key != 'options' and expected_value is not None: negates_key, _, result_key = self.parse_token_options( expected_key) if result_key in result.keys(): if not self.is_same(result[result_key], expected_value): if negates_key: entry.valid.append( (expected_key, expected_value)) else: entry.different.append( (expected_key, expected_value, result[result_key])) else: if negates_key: entry.different.append( (expected_key, expected_value, result[result_key])) else: entry.valid.append( (expected_key, expected_value)) elif not negates_key: entry.missing.append((expected_key, expected_value)) for result_key, result_value in result.items(): if result_key not in expected.keys(): entry.extra.append((result_key, result_value))
('M', 1000), ('CM', 900), ('D', 500), ('CD', 400), ('C', 100), ('XC', 90), ('L', 50), ('XL', 40), ('X', 10), ('IX', 9), ('V', 5), ('IV', 4), ('I', 1) ) __romanNumeralPattern = re.compile('^' + roman_numeral + '$') def __parse_roman(value): """ convert Roman numeral to integer :param value: Value to parse :type value: string :return: :rtype: """ if not __romanNumeralPattern.search(value): raise ValueError('Invalid Roman numeral: %s' % value) result = 0