Beispiel #1
0
class GuessitConverter(babelfish.LanguageReverseConverter):  # pylint: disable=missing-docstring
    _with_country_regexp = re.compile(r'(.*)\((.*)\)')
    _with_country_regexp2 = re.compile(r'(.*)-(.*)')

    def __init__(self):
        self.guessit_exceptions = {}
        for (alpha3, country), synlist in SYN.items():
            for syn in synlist:
                self.guessit_exceptions[syn.lower()] = (alpha3, country, None)

    @property
    def codes(self):  # pylint: disable=missing-docstring
        return (babelfish.language_converters['alpha3b'].codes
                | babelfish.language_converters['alpha2'].codes
                | babelfish.language_converters['name'].codes
                | babelfish.language_converters['opensubtitles'].codes
                | babelfish.country_converters['name'].codes
                | frozenset(self.guessit_exceptions.keys()))

    def convert(self, alpha3, country=None, script=None):
        return str(babelfish.Language(alpha3, country, script))

    def reverse(self, name):  # pylint:disable=arguments-differ
        with_country = (GuessitConverter._with_country_regexp.match(name)
                        or GuessitConverter._with_country_regexp2.match(name))

        name = name.lower()
        if with_country:
            lang = babelfish.Language.fromguessit(
                with_country.group(1).strip())
            lang.country = babelfish.Country.fromguessit(
                with_country.group(2).strip())
            return lang.alpha3, lang.country.alpha2 if lang.country else None, lang.script or None

        # exceptions come first, as they need to override a potential match
        # with any of the other guessers
        try:
            return self.guessit_exceptions[name]
        except KeyError:
            pass

        for conv in [
                babelfish.Language, babelfish.Language.fromalpha3b,
                babelfish.Language.fromalpha2, babelfish.Language.fromname,
                babelfish.Language.fromopensubtitles
        ]:
            try:
                reverse = conv(name)
                return reverse.alpha3, reverse.country, reverse.script
            except (ValueError, babelfish.LanguageReverseError):
                pass

        raise babelfish.LanguageReverseError(name)
Beispiel #2
0
class GuessitConverter(babelfish.LanguageReverseConverter):  # pylint: disable=missing-docstring
    _with_country_regexp = re.compile(r'(.*)\((.*)\)')
    _with_country_regexp2 = re.compile(r'(.*)-(.*)')

    def __init__(self, synonyms):
        self.guessit_exceptions = {}
        for code, synlist in synonyms.items():
            if '_' in code:
                (alpha3, country) = code.split('_')
            else:
                (alpha3, country) = (code, None)
            for syn in synlist:
                self.guessit_exceptions[syn.lower()] = (alpha3, country, None)

    @property
    def codes(self):  # pylint: disable=missing-docstring
        return (babelfish.language_converters['alpha3b'].codes
                | babelfish.language_converters['alpha2'].codes
                | babelfish.language_converters['name'].codes
                | babelfish.language_converters['opensubtitles'].codes
                | babelfish.country_converters['name'].codes
                | frozenset(self.guessit_exceptions.keys()))

    def convert(self, alpha3, country=None, script=None):
        return str(babelfish.Language(alpha3, country, script))

    def reverse(self, name):  # pylint:disable=arguments-renamed
        name = name.lower()
        # exceptions come first, as they need to override a potential match
        # with any of the other guessers
        try:
            return self.guessit_exceptions[name]
        except KeyError:
            pass

        for conv in [
                babelfish.Language, babelfish.Language.fromalpha3b,
                babelfish.Language.fromalpha2, babelfish.Language.fromname,
                babelfish.Language.fromopensubtitles,
                babelfish.Language.fromietf
        ]:
            try:
                reverse = conv(name)
                return reverse.alpha3, reverse.country, reverse.script
            except (ValueError, babelfish.LanguageReverseError):
                pass

        raise babelfish.LanguageReverseError(name)
Beispiel #3
0
def screen_size():
    """
    Builder for rebulk object.
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    def conflict_solver(match, other):
        """
        Conflict solver for most screen_size.
        """
        if other.name == 'screen_size':
            if 'resolution' in other.tags:
                # The chtouile to solve conflict in "720 x 432" string matching both 720p pattern
                int_value = _digits_re.findall(match.raw)[-1]
                if other.value.startswith(int_value):
                    return match
            return other
        return '__default__'

    rebulk = Rebulk().string_defaults(ignore_case=True).regex_defaults(
        flags=re.IGNORECASE)
    rebulk.defaults(name="screen_size",
                    validator=seps_surround,
                    conflict_solver=conflict_solver)

    rebulk.regex(r'(?:\d{3,}(?:x|\*))?360(?:i)', value='360i')
    rebulk.regex(r'(?:\d{3,}(?:x|\*))?360(?:p?x?)', value='360p')
    rebulk.regex(r"(?:\d{3,}(?:x|\*))?368(?:p?x?)", value="368p")
    rebulk.regex(r'(?:\d{3,}(?:x|\*))?480(?:i)', value='480i')
    rebulk.regex(r'(?:\d{3,}(?:x|\*))?480(?:p?x?)', value='480p')
    rebulk.regex(r'(?:\d{3,}(?:x|\*))?576(?:i)', value='576i')
    rebulk.regex(r'(?:\d{3,}(?:x|\*))?576(?:p?x?)', value='576p')
    rebulk.regex(r'(?:\d{3,}(?:x|\*))?720(?:p?(?:50|60)?x?)', value='720p')
    rebulk.regex(r"(?:\d{3,}(?:x|\*))?720(?:p(?:50|60)?x?)", value="720p")
    rebulk.regex(r"(?:\d{3,}(?:x|\*))?720p?hd", value="720p")
    rebulk.regex(r'(?:\d{3,}(?:x|\*))?900(?:i)', value='900i')
    rebulk.regex(r'(?:\d{3,}(?:x|\*))?900(?:p?x?)', value='900p')
    rebulk.regex(r"(?:\d{3,}(?:x|\*))?1080i", value="1080i")
    rebulk.regex(r"(?:\d{3,}(?:x|\*))?1080p?x?", value="1080p")
    rebulk.regex(r"(?:\d{3,}(?:x|\*))?1080(?:p(?:50|60)?x?)", value="1080p")
    rebulk.regex(r"(?:\d{3,}(?:x|\*))?1080p?hd", value="1080p")
    rebulk.regex(r'(?:\d{3,}(?:x|\*))?2160(?:p?x?)', value='2160p')
    rebulk.string('4k', value='2160p')
    rebulk.regex(r'(?:\d{3,}(?:x|\*))?4320(?:p?x?)', value='4320p')

    _digits_re = re.compile(r'\d+')

    rebulk.defaults(name="screen_size", validator=seps_surround)
    rebulk.regex(r'\d{3,}-?(?:x|\*)-?\d{3,}',
                 formatter=lambda value: 'x'.join(_digits_re.findall(value)),
                 abbreviations=[dash],
                 tags=['resolution'],
                 conflict_solver=lambda match, other: '__default__'
                 if other.name == 'screen_size' else other)

    rebulk.rules(ScreenSizeOnlyOne, RemoveScreenSizeConflicts)

    return rebulk
Beispiel #4
0
class Quantity(object):
    """
    Represent a quantity object with magnitude and units.
    """

    parser_re = re.compile(r'(?P<magnitude>\d+(?:[.]\d+)?)(?P<units>[^\d]+)?')

    def __init__(self, magnitude, units):
        self.magnitude = magnitude
        self.units = units

    @classmethod
    @abstractmethod
    def parse_units(cls, value):
        """
        Parse a string to a proper unit notation.
        """
        raise NotImplementedError

    @classmethod
    def fromstring(cls, string):
        """
        Parse the string into a quantity object.
        :param string:
        :return:
        """
        values = cls.parser_re.match(string).groupdict()
        try:
            magnitude = int(values['magnitude'])
        except ValueError:
            magnitude = float(values['magnitude'])
        units = cls.parse_units(values['units'])

        return cls(magnitude, units)

    def __hash__(self):
        return hash(str(self))

    def __eq__(self, other):
        if isinstance(other, str):
            return str(self) == other
        if not isinstance(other, self.__class__):
            return NotImplemented
        return self.magnitude == other.magnitude and self.units == other.units

    def __ne__(self, other):
        return not self == other

    def __repr__(self):
        return f'<{self.__class__.__name__} [{self}]>'

    def __str__(self):
        return f'{self.magnitude}{self.units}'
Beispiel #5
0
def screen_size():
    """
    Builder for rebulk object.
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    def conflict_solver(match, other):
        """
        Conflict solver for most screen_size.
        """
        if other.name == 'screen_size':
            if 'resolution' in other.tags:
                # The chtouile to solve conflict in "720 x 432" string matching both 720p pattern
                int_value = _digits_re.findall(match.raw)[-1]
                if other.value.startswith(int_value):
                    return match
            return other
        return '__default__'

    rebulk = Rebulk().string_defaults(ignore_case=True).regex_defaults(flags=re.IGNORECASE)
    rebulk.defaults(name="screen_size", validator=seps_surround, conflict_solver=conflict_solver)

    rebulk.regex(r"(?:\d{3,}(?:x|\*))?360(?:i|p?x?)", value="360p")
    rebulk.regex(r"(?:\d{3,}(?:x|\*))?368(?:i|p?x?)", value="368p")
    rebulk.regex(r"(?:\d{3,}(?:x|\*))?480(?:i|p?x?)", value="480p")
    rebulk.regex(r"(?:\d{3,}(?:x|\*))?576(?:i|p?x?)", value="576p")
    rebulk.regex(r"(?:\d{3,}(?:x|\*))?720(?:i|p?(?:50|60)?x?)", value="720p")
    rebulk.regex(r"(?:\d{3,}(?:x|\*))?720(?:p(?:50|60)?x?)", value="720p")
    rebulk.regex(r"(?:\d{3,}(?:x|\*))?720p?hd", value="720p")
    rebulk.regex(r"(?:\d{3,}(?:x|\*))?900(?:i|p?x?)", value="900p")
    rebulk.regex(r"(?:\d{3,}(?:x|\*))?1080i", value="1080i")
    rebulk.regex(r"(?:\d{3,}(?:x|\*))?1080p?x?", value="1080p")
    rebulk.regex(r"(?:\d{3,}(?:x|\*))?1080(?:p(?:50|60)?x?)", value="1080p")
    rebulk.regex(r"(?:\d{3,}(?:x|\*))?1080p?hd", value="1080p")
    rebulk.regex(r"(?:\d{3,}(?:x|\*))?2160(?:i|p?x?)", value="4K")
    rebulk.string('4k', value='4K')

    _digits_re = re.compile(r'\d+')

    rebulk.defaults(name="screen_size", validator=seps_surround)
    rebulk.regex(r'\d{3,}-?(?:x|\*)-?\d{3,}',
                 formatter=lambda value: 'x'.join(_digits_re.findall(value)),
                 abbreviations=[dash],
                 tags=['resolution'],
                 conflict_solver=lambda match, other: '__default__' if other.name == 'screen_size' else other)

    rebulk.rules(ScreenSizeOnlyOne, RemoveScreenSizeConflicts)

    return rebulk
Beispiel #6
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Date
"""
from dateutil import parser

from rebulk.remodule import re

_dsep = r'[-/ \.]'
_dsep_bis = r'[-/ \.x]'

date_regexps = [
    # pylint:disable=consider-using-f-string
    re.compile(r'%s((\d{8}))%s' % (_dsep, _dsep), re.IGNORECASE),
    # pylint:disable=consider-using-f-string
    re.compile(r'%s((\d{6}))%s' % (_dsep, _dsep), re.IGNORECASE),
    # pylint:disable=consider-using-f-string
    re.compile(
        r'(?:^|[^\d])((\d{2})%s(\d{1,2})%s(\d{1,2}))(?:$|[^\d])' %
        (_dsep, _dsep), re.IGNORECASE),
    # pylint:disable=consider-using-f-string
    re.compile(
        r'(?:^|[^\d])((\d{1,2})%s(\d{1,2})%s(\d{2}))(?:$|[^\d])' %
        (_dsep, _dsep), re.IGNORECASE),
    # pylint:disable=consider-using-f-string
    re.compile(
        r'(?:^|[^\d])((\d{4})%s(\d{1,2})%s(\d{1,2}))(?:$|[^\d])' %
        (_dsep_bis, _dsep), re.IGNORECASE),
    # pylint:disable=consider-using-f-string
    re.compile(
Beispiel #7
0
class TestYml(object):
    """
    Run tests from yaml files.
    Multiple input strings having same expected results can be chained.
    Use $ marker to check inputs that should not match results.
    """

    options_re = re.compile(r'^([ \+-]+)(.*)')

    files, ids = files_and_ids(filename_predicate)

    @staticmethod
    def set_default(expected, default):
        if default:
            for k, v in default.items():
                if k not in expected:
                    expected[k] = v

    @pytest.mark.parametrize('filename', files, ids=ids)
    def test(self, filename, caplog):
        caplog.setLevel(logging.INFO)
        with open(os.path.join(__location__, filename), 'r',
                  encoding='utf-8') as infile:
            data = yaml.load(infile, OrderedDictYAMLLoader)
        entries = Results()

        last_expected = None
        for string, expected in reversed(list(data.items())):
            if expected is None:
                data[string] = last_expected
            else:
                last_expected = expected

        default = None
        try:
            default = data['__default__']
            del data['__default__']
        except KeyError:
            pass

        for string, expected in data.items():
            TestYml.set_default(expected, default)
            entry = self.check_data(filename, string, expected)
            entries.append(entry)
        entries.assert_ok()

    def check_data(self, filename, string, expected):
        if six.PY2 and isinstance(string, six.text_type):
            string = string.encode('utf-8')
            converts = []
            for k, v in expected.items():
                if isinstance(v, six.text_type):
                    v = v.encode('utf-8')
                    converts.append((k, v))
            for k, v in converts:
                expected[k] = v
        if not isinstance(string, str):
            string = str(string)
        if not string_predicate or string_predicate(string):  # pylint: disable=not-callable
            entry = self.check(string, expected)
            if entry.ok:
                logger.debug('[' + filename + '] ' + str(entry))
            elif entry.warning:
                logger.warning('[' + filename + '] ' + str(entry))
            elif entry.error:
                logger.error('[' + filename + '] ' + str(entry))
                for line in entry.details:
                    logger.error('[' + filename + '] ' + ' ' * 4 + line)
        return entry

    def check(self, string, expected):
        negates, global_, string = self.parse_token_options(string)

        options = expected.get('options')
        if options is None:
            options = {}
        if not isinstance(options, dict):
            options = parse_options(options)
        if 'implicit' not in options:
            options['implicit'] = True
        try:
            result = guessit(string, options)
        except Exception as exc:
            logger.error('[' + string + '] Exception: ' + str(exc))
            raise exc

        entry = EntryResult(string, negates)

        if global_:
            self.check_global(string, result, entry)

        self.check_expected(result, expected, entry)

        return entry

    def parse_token_options(self, string):
        matches = self.options_re.search(string)
        negates = False
        global_ = False
        if matches:
            string = matches.group(2)
            for opt in matches.group(1):
                if '-' in opt:
                    negates = True
                if '+' in opt:
                    global_ = True
        return negates, global_, string

    def check_global(self, string, result, entry):
        global_span = []
        for result_matches in result.matches.values():
            for result_match in result_matches:
                if not global_span:
                    global_span = list(result_match.span)
                else:
                    if global_span[0] > result_match.span[0]:
                        global_span[0] = result_match.span[0]
                    if global_span[1] < result_match.span[1]:
                        global_span[1] = result_match.span[1]
        if global_span and global_span[1] - global_span[0] < len(string):
            entry.others.append("Match is not global")

    def is_same(self, value, expected):
        values = set(value) if is_iterable(value) else set((value, ))
        expecteds = set(expected) if is_iterable(expected) else set(
            (expected, ))
        if len(values) != len(expecteds):
            return False
        if isinstance(next(iter(values)), babelfish.Language):
            # pylint: disable=no-member
            expecteds = set([
                babelfish.Language.fromguessit(expected)
                for expected in expecteds
            ])
        elif isinstance(next(iter(values)), babelfish.Country):
            # pylint: disable=no-member
            expecteds = set([
                babelfish.Country.fromguessit(expected)
                for expected in expecteds
            ])
        return values == expecteds

    def check_expected(self, result, expected, entry):
        if expected:
            for expected_key, expected_value in expected.items():
                if expected_key and expected_key != 'options' and expected_value is not None:
                    negates_key, _, result_key = self.parse_token_options(
                        expected_key)
                    if result_key in result.keys():
                        if not self.is_same(result[result_key],
                                            expected_value):
                            if negates_key:
                                entry.valid.append(
                                    (expected_key, expected_value))
                            else:
                                entry.different.append(
                                    (expected_key, expected_value,
                                     result[expected_key]))
                        else:
                            if negates_key:
                                entry.different.append(
                                    (expected_key, expected_value,
                                     result[expected_key]))
                            else:
                                entry.valid.append(
                                    (expected_key, expected_value))
                    elif not negates_key:
                        entry.missing.append((expected_key, expected_value))

        for result_key, result_value in result.items():
            if result_key not in expected.keys():
                entry.extra.append((result_key, result_value))
Beispiel #8
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Date
"""
from dateutil import parser

from rebulk.remodule import re

_dsep = r'[-/ \.]'
_dsep_bis = r'[-/ \.x]'

date_regexps = [
    re.compile(r'%s((\d{8}))%s' % (_dsep, _dsep), re.IGNORECASE),
    re.compile(r'%s((\d{6}))%s' % (_dsep, _dsep), re.IGNORECASE),
    re.compile(
        r'(?:^|[^\d])((\d{2})%s(\d{1,2})%s(\d{1,2}))(?:$|[^\d])' %
        (_dsep, _dsep), re.IGNORECASE),
    re.compile(
        r'(?:^|[^\d])((\d{1,2})%s(\d{1,2})%s(\d{2}))(?:$|[^\d])' %
        (_dsep, _dsep), re.IGNORECASE),
    re.compile(
        r'(?:^|[^\d])((\d{4})%s(\d{1,2})%s(\d{1,2}))(?:$|[^\d])' %
        (_dsep_bis, _dsep), re.IGNORECASE),
    re.compile(
        r'(?:^|[^\d])((\d{1,2})%s(\d{1,2})%s(\d{4}))(?:$|[^\d])' %
        (_dsep, _dsep_bis), re.IGNORECASE),
    re.compile(
        r'(?:^|[^\d])((\d{1,2}(?:st|nd|rd|th)?%s(?:[a-z]{3,10})%s\d{4}))(?:$|[^\d])'
        % (_dsep, _dsep), re.IGNORECASE)
]
Beispiel #9
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Date
"""
from dateutil import parser

from rebulk.remodule import re

_dsep = r'[-/ \.]'
_dsep_bis = r'[-/ \.x]'

date_regexps = [
    re.compile(r'%s((\d{8}))%s' % (_dsep, _dsep), re.IGNORECASE),
    re.compile(r'%s((\d{6}))%s' % (_dsep, _dsep), re.IGNORECASE),
    re.compile(r'(?:^|[^\d])((\d{2})%s(\d{1,2})%s(\d{1,2}))(?:$|[^\d])' % (_dsep, _dsep), re.IGNORECASE),
    re.compile(r'(?:^|[^\d])((\d{1,2})%s(\d{1,2})%s(\d{2}))(?:$|[^\d])' % (_dsep, _dsep), re.IGNORECASE),
    re.compile(r'(?:^|[^\d])((\d{4})%s(\d{1,2})%s(\d{1,2}))(?:$|[^\d])' % (_dsep_bis, _dsep), re.IGNORECASE),
    re.compile(r'(?:^|[^\d])((\d{1,2})%s(\d{1,2})%s(\d{4}))(?:$|[^\d])' % (_dsep, _dsep_bis), re.IGNORECASE),
    re.compile(r'(?:^|[^\d])((\d{1,2}(?:st|nd|rd|th)?%s(?:[a-z]{3,10})%s\d{4}))(?:$|[^\d])' % (_dsep, _dsep),
               re.IGNORECASE)]


def valid_year(year):
    """Check if number is a valid year"""
    return 1920 <= year < 2030


def _is_int(string):
    """
    Check if the input string is an integer
Beispiel #10
0
                 conflict_solver=lambda match, other: match
                 if other.name in ['episode', 'season']
                 else '__default__')

    rebulk.functional(guess_idnumber, name='uuid',
                      conflict_solver=lambda match, other: match
                      if other.name in ['episode', 'season']
                      else '__default__')
    return rebulk


_DIGIT = 0
_LETTER = 1
_OTHER = 2

_idnum = re.compile(r'(?P<uuid>[a-zA-Z0-9-]{20,})')  # 1.0, (0, 0))


def guess_idnumber(string):
    """
    Guess id number function
    :param string:
    :type string:
    :return:
    :rtype:
    """
    # pylint:disable=invalid-name
    ret = []

    matches = list(_idnum.finditer(string))
    for match in matches:
Beispiel #11
0
            re_ += word
    re_ += ')'
    return re_


word_numeral = __build_word_numeral(english_word_numeral_list,
                                    french_word_numeral_list,
                                    french_alt_word_numeral_list)

numeral = '(?:' + digital_numeral + '|' + roman_numeral + '|' + word_numeral + ')'

__romanNumeralMap = (('M', 1000), ('CM', 900), ('D', 500), ('CD', 400),
                     ('C', 100), ('XC', 90), ('L', 50), ('XL', 40), ('X', 10),
                     ('IX', 9), ('V', 5), ('IV', 4), ('I', 1))

__romanNumeralPattern = re.compile('^' + roman_numeral + '$')


def __parse_roman(value):
    """
    convert Roman numeral to integer

    :param value: Value to parse
    :type value: string
    :return:
    :rtype:
    """
    if not __romanNumeralPattern.search(value):
        raise ValueError('Invalid Roman numeral: %s' % value)

    result = 0
Beispiel #12
0
                 conflict_solver=lambda match, other: match
                 if other.name in ['episode', 'season']
                 else '__default__')

    rebulk.functional(guess_idnumber, name='uuid',
                      conflict_solver=lambda match, other: match
                      if other.name in ['episode', 'season']
                      else '__default__')
    return rebulk


_DIGIT = 0
_LETTER = 1
_OTHER = 2

_idnum = re.compile(r'(?P<uuid>[a-zA-Z0-9-]{20,})')  # 1.0, (0, 0))


def guess_idnumber(string):
    """
    Guess id number function
    :param string:
    :type string:
    :return:
    :rtype:
    """
    # pylint:disable=invalid-name
    ret = []

    matches = list(_idnum.finditer(string))
    for match in matches:
Beispiel #13
0
class TestYml(object):
    """
    Run tests from yaml files.
    Multiple input strings having same expected results can be chained.
    Use $ marker to check inputs that should not match results.
    """

    options_re = re.compile(r'^([ +-]+)(.*)')

    def _get_unique_id(self, collection, base_id):
        ret = base_id
        i = 2
        while ret in collection:
            suffix = "-" + str(i)
            ret = base_id + suffix
            i += 1
        return ret

    def pytest_generate_tests(self, metafunc):
        if 'yml_test_case' in metafunc.fixturenames:
            entries = []
            entry_ids = []
            entry_set = set()

            for filename, _ in zip(*files_and_ids()):
                with open(os.path.join(__location__, filename),
                          'r',
                          encoding='utf-8') as infile:
                    data = yaml.load(infile, OrderedDictYAMLLoader)

                last_expected = None
                for string, expected in reversed(list(data.items())):
                    if expected is None:
                        data[string] = last_expected
                    else:
                        last_expected = expected

                default = None
                try:
                    default = data['__default__']
                    del data['__default__']
                except KeyError:
                    pass

                for string, expected in data.items():
                    TestYml.set_default(expected, default)
                    string = TestYml.fix_encoding(string)

                    entries.append((filename, string, expected))
                    unique_id = self._get_unique_id(
                        entry_set, '[' + filename + '] ' + str(string))
                    entry_set.add(unique_id)
                    entry_ids.append(unique_id)

            metafunc.parametrize('yml_test_case', entries, ids=entry_ids)

    @staticmethod
    def set_default(expected, default):
        if default:
            for k, v in default.items():
                if k not in expected:
                    expected[k] = v

    @classmethod
    def fix_encoding(cls, string):
        if not isinstance(string, str):
            string = str(string)
        return string

    def test_entry(self, yml_test_case):
        filename, string, expected = yml_test_case
        result = self.check_data(filename, string, expected)
        assert not result.error

    def check_data(self, filename, string, expected):
        entry = self.check(string, expected)
        if entry.ok:
            logger.debug('[%s] %s', filename, entry)
        elif entry.warning:
            logger.warning('[%s] %s', filename, entry)
        elif entry.error:
            logger.error('[%s] %s', filename, entry)
            for line in entry.details:
                logger.error('[%s] %s', filename, ' ' * 4 + line)
        return entry

    def check(self, string, expected):
        negates, global_, string = self.parse_token_options(string)

        options = expected.get('options')
        if options is None:
            options = {}
        if not isinstance(options, dict):
            options = parse_options(options)
        try:
            result = guessit(string, options)
        except Exception as exc:
            logger.error('[%s] Exception: %s', string, exc)
            raise exc

        entry = EntryResult(string, negates)

        if global_:
            self.check_global(string, result, entry)

        self.check_expected(result, expected, entry)

        return entry

    def parse_token_options(self, string):
        matches = self.options_re.search(string)
        negates = False
        global_ = False
        if matches:
            string = matches.group(2)
            for opt in matches.group(1):
                if '-' in opt:
                    negates = True
                if '+' in opt:
                    global_ = True
        return negates, global_, string

    def check_global(self, string, result, entry):
        global_span = []
        for result_matches in result.matches.values():
            for result_match in result_matches:
                if not global_span:
                    global_span = list(result_match.span)
                else:
                    if global_span[0] > result_match.span[0]:
                        global_span[0] = result_match.span[0]
                    if global_span[1] < result_match.span[1]:
                        global_span[1] = result_match.span[1]
        if global_span and global_span[1] - global_span[0] < len(string):
            entry.others.append("Match is not global")

    def is_same(self, value, expected):
        values = set(value) if is_iterable(value) else set((value, ))
        expecteds = set(expected) if is_iterable(expected) else set(
            (expected, ))
        if len(values) != len(expecteds):
            return False
        if isinstance(next(iter(values)), babelfish.Language):
            # pylint: disable=no-member
            expecteds = {
                babelfish.Language.fromguessit(expected)
                for expected in expecteds
            }
        elif isinstance(next(iter(values)), babelfish.Country):
            # pylint: disable=no-member
            expecteds = {
                babelfish.Country.fromguessit(expected)
                for expected in expecteds
            }
        return values == expecteds

    def check_expected(self, result, expected, entry):
        if expected:
            for expected_key, expected_value in expected.items():
                if expected_key and expected_key != 'options' and expected_value is not None:
                    negates_key, _, result_key = self.parse_token_options(
                        expected_key)
                    if result_key in result.keys():
                        if not self.is_same(result[result_key],
                                            expected_value):
                            if negates_key:
                                entry.valid.append(
                                    (expected_key, expected_value))
                            else:
                                entry.different.append(
                                    (expected_key, expected_value,
                                     result[result_key]))
                        else:
                            if negates_key:
                                entry.different.append(
                                    (expected_key, expected_value,
                                     result[result_key]))
                            else:
                                entry.valid.append(
                                    (expected_key, expected_value))
                    elif not negates_key:
                        entry.missing.append((expected_key, expected_value))

        for result_key, result_value in result.items():
            if result_key not in expected.keys():
                entry.extra.append((result_key, result_value))
Beispiel #14
0
    ('M', 1000),
    ('CM', 900),
    ('D', 500),
    ('CD', 400),
    ('C', 100),
    ('XC', 90),
    ('L', 50),
    ('XL', 40),
    ('X', 10),
    ('IX', 9),
    ('V', 5),
    ('IV', 4),
    ('I', 1)
)

__romanNumeralPattern = re.compile('^' + roman_numeral + '$')


def __parse_roman(value):
    """
    convert Roman numeral to integer

    :param value: Value to parse
    :type value: string
    :return:
    :rtype:
    """
    if not __romanNumeralPattern.search(value):
        raise ValueError('Invalid Roman numeral: %s' % value)

    result = 0