Exemple #1
0
 def __init__(self, *a, **kw):
     super(Locale, self).__init__(*a, **kw)
     self.currency_formats['amount_only'] = MONEY_AMOUNT_FORMAT
     delta_p = self.currency_formats['standard'].pattern
     minus_sign = self.number_symbols.get('minusSign', '-')
     plus_sign = self.number_symbols.get('plusSign', '+')
     if ';' in delta_p:
         pos, neg = delta_p.split(';')
         assert len(neg) > len(pos)
         assert minus_sign in neg
         pos = neg.replace(minus_sign, plus_sign)
         self.currency_delta_pattern = parse_pattern('%s;%s' % (pos, neg))
     else:
         self.currency_delta_pattern = parse_pattern('{0}{2};{1}{2}'.format(
             plus_sign, minus_sign, delta_p))
Exemple #2
0
def test_parse_pattern_negative():

    # No negative format specified
    np = numbers.parse_pattern(u'¤#,##0.00')
    assert np.prefix == (u'¤', u'-¤')
    assert np.suffix == (u'', u'')

    # Negative format is specified
    np = numbers.parse_pattern(u'¤#,##0.00;(¤#,##0.00)')
    assert np.prefix == (u'¤', u'(¤')
    assert np.suffix == (u'', u')')

    # Negative sign is a suffix
    np = numbers.parse_pattern(u'¤ #,##0.00;¤ #,##0.00-')
    assert np.prefix == (u'¤ ', u'¤ ')
    assert np.suffix == (u'', u'-')
Exemple #3
0
def format_scientific_field(spec, prec, number, locale):
    prec = SCIENTIFIC_DECIMAL_DIGITS if prec is None else int(prec)
    format_ = u'0.%sE+000' % (u'#' * prec)
    pattern = parse_pattern(format_)
    decimal_symbol = get_decimal_symbol(locale)
    string = pattern.apply(number, locale).replace(u'.', decimal_symbol)
    return string.lower() if spec.islower() else string
Exemple #4
0
def format_scientific_field(spec, prec, number, locale):
    prec = SCIENTIFIC_DECIMAL_DIGITS if prec is None else int(prec)
    format_ = u'0.%sE+000' % (u'#' * prec)
    pattern = parse_pattern(format_)
    decimal_symbol = get_decimal_symbol(locale)
    string = pattern.apply(number, locale).replace(u'.', decimal_symbol)
    return string.lower() if spec.islower() else string
Exemple #5
0
def load_i18n(project_root, tell_sentry):
    # Load the locales
    localeDir = os.path.join(project_root, 'i18n', 'core')
    locales = LOCALES
    for file in os.listdir(localeDir):
        try:
            parts = file.split(".")
            if not (len(parts) == 2 and parts[1] == "po"):
                continue
            lang = parts[0]
            with open(os.path.join(localeDir, file)) as f:
                l = locales[lang.lower()] = Locale(lang)
                c = l.catalog = read_po(f)
                c.plural_func = get_function_from_rule(c.plural_expr)
                try:
                    l.countries = make_sorted_dict(COUNTRIES, l.territories)
                except KeyError:
                    l.countries = COUNTRIES
                try:
                    l.languages_2 = make_sorted_dict(LANGUAGES_2, l.languages)
                except KeyError:
                    l.languages_2 = LANGUAGES_2
        except Exception as e:
            tell_sentry(e, {})

    # Add aliases
    for k, v in list(locales.items()):
        locales.setdefault(ALIASES.get(k, k), v)
        locales.setdefault(ALIASES_R.get(k, k), v)
    for k, v in list(locales.items()):
        locales.setdefault(k.split('_', 1)[0], v)

    # Patch the locales to look less formal
    locales['fr'].currency_formats[None] = parse_pattern('#,##0.00\u202f\xa4')
    locales['fr'].currency_symbols['USD'] = '$'
Exemple #6
0
def parse_currency_formats(data, tree):
    currency_formats = data.setdefault('currency_formats', {})
    for currency_format in tree.findall('.//currencyFormats'):
        if _should_skip_number_elem(
                data, currency_format):  # TODO: Support other number systems
            continue

        for length_elem in currency_format.findall('./currencyFormatLength'):
            curr_length_type = length_elem.attrib.get('type')
            for elem in length_elem.findall('currencyFormat'):
                type = elem.attrib.get('type')
                if curr_length_type:
                    # Handle `<currencyFormatLength type="short">`, etc.
                    # TODO(3.x): use nested dicts instead of colon-separated madness
                    type = '%s:%s' % (type, curr_length_type)
                if _should_skip_elem(elem, type, currency_formats):
                    continue
                for child in elem.iter():
                    if child.tag == 'alias':
                        currency_formats[type] = Alias(
                            _translate_alias(
                                ['currency_formats', elem.attrib['type']],
                                child.attrib['path']))
                    elif child.tag == 'pattern':
                        pattern = text_type(child.text)
                        currency_formats[type] = numbers.parse_pattern(pattern)
Exemple #7
0
def parse_decimal_formats(data, tree):
    decimal_formats = data.setdefault('decimal_formats', {})
    for df_elem in tree.findall('.//decimalFormats'):
        if _should_skip_number_elem(
                data, df_elem):  # TODO: Support other number systems
            continue
        for elem in df_elem.findall('./decimalFormatLength'):
            length_type = elem.attrib.get('type')
            if _should_skip_elem(elem, length_type, decimal_formats):
                continue
            if elem.findall('./alias'):
                # TODO map the alias to its target
                continue
            for pattern_el in elem.findall('./decimalFormat/pattern'):
                pattern_type = pattern_el.attrib.get('type')
                pattern = numbers.parse_pattern(text_type(pattern_el.text))
                if pattern_type:
                    # This is a compact decimal format, see:
                    # https://www.unicode.org/reports/tr35/tr35-45/tr35-numbers.html#Compact_Number_Formats

                    # These are mapped into a `compact_decimal_formats` dictionary
                    # with the format {length: {count: {multiplier: pattern}}}.

                    # TODO: Add support for formatting them.
                    compact_decimal_formats = data.setdefault(
                        'compact_decimal_formats', {})
                    length_map = compact_decimal_formats.setdefault(
                        length_type, {})
                    length_count_map = length_map.setdefault(
                        pattern_el.attrib['count'], {})
                    length_count_map[pattern_type] = pattern
                else:
                    # Regular decimal format.
                    decimal_formats[length_type] = pattern
Exemple #8
0
def test_parse_pattern_negative():

    # No negative format specified
    np = numbers.parse_pattern(u'¤#,##0.00')
    assert np.prefix == (u'¤', u'-¤')
    assert np.suffix == (u'', u'')

    # Negative format is specified
    np = numbers.parse_pattern(u'¤#,##0.00;(¤#,##0.00)')
    assert np.prefix == (u'¤', u'(¤')
    assert np.suffix == (u'', u')')

    # Negative sign is a suffix
    np = numbers.parse_pattern(u'¤ #,##0.00;¤ #,##0.00-')
    assert np.prefix == (u'¤ ', u'¤ ')
    assert np.suffix == (u'', u'-')
Exemple #9
0
def load_i18n(project_root, tell_sentry):
    # Load the locales
    localeDir = os.path.join(project_root, 'i18n', 'core')
    locales = LOCALES
    for file in os.listdir(localeDir):
        try:
            parts = file.split(".")
            if not (len(parts) == 2 and parts[1] == "po"):
                continue
            lang = parts[0]
            with open(os.path.join(localeDir, file)) as f:
                l = locales[lang.lower()] = Locale(lang)
                c = l.catalog = read_po(f)
                c.plural_func = get_function_from_rule(c.plural_expr)
                try:
                    l.countries = make_sorted_dict(COUNTRIES, l.territories)
                except KeyError:
                    l.countries = COUNTRIES
                try:
                    l.languages_2 = make_sorted_dict(LANGUAGES_2, l.languages)
                except KeyError:
                    l.languages_2 = LANGUAGES_2
        except Exception as e:
            tell_sentry(e, {})

    # Add aliases
    for k, v in list(locales.items()):
        locales.setdefault(ALIASES.get(k, k), v)
        locales.setdefault(ALIASES_R.get(k, k), v)
    for k, v in list(locales.items()):
        locales.setdefault(k.split('_', 1)[0], v)

    # Patch the locales to look less formal
    locales['fr'].currency_formats[None] = parse_pattern('#,##0.00\u202f\xa4')
    locales['fr'].currency_symbols['USD'] = '$'
Exemple #10
0
def parse_currency_formats(data, tree):
    currency_formats = data.setdefault('currency_formats', {})
    for currency_format in tree.findall('.//currencyFormats'):
        if _should_skip_number_elem(data, currency_format):  # TODO: Support other number systems
            continue

        for length_elem in currency_format.findall('./currencyFormatLength'):
            curr_length_type = length_elem.attrib.get('type')
            for elem in length_elem.findall('currencyFormat'):
                type = elem.attrib.get('type')
                if curr_length_type:
                    # Handle `<currencyFormatLength type="short">`, etc.
                    # TODO(3.x): use nested dicts instead of colon-separated madness
                    type = '%s:%s' % (type, curr_length_type)
                if _should_skip_elem(elem, type, currency_formats):
                    continue
                for child in elem.getiterator():
                    if child.tag == 'alias':
                        currency_formats[type] = Alias(
                            _translate_alias(['currency_formats', elem.attrib['type']],
                                             child.attrib['path'])
                        )
                    elif child.tag == 'pattern':
                        pattern = text_type(child.text)
                        currency_formats[type] = numbers.parse_pattern(pattern)
Exemple #11
0
def parse_decimal_formats(data, tree):
    decimal_formats = data.setdefault('decimal_formats', {})
    for df_elem in tree.findall('.//decimalFormats'):
        if _should_skip_number_elem(data, df_elem):  # TODO: Support other number systems
            continue
        for elem in df_elem.findall('./decimalFormatLength'):
            length_type = elem.attrib.get('type')
            if _should_skip_elem(elem, length_type, decimal_formats):
                continue
            if elem.findall('./alias'):
                # TODO map the alias to its target
                continue
            for pattern_el in elem.findall('./decimalFormat/pattern'):
                pattern_type = pattern_el.attrib.get('type')
                pattern = numbers.parse_pattern(text_type(pattern_el.text))
                if pattern_type:
                    # This is a compact decimal format, see:
                    # https://www.unicode.org/reports/tr35/tr35-45/tr35-numbers.html#Compact_Number_Formats

                    # These are mapped into a `compact_decimal_formats` dictionary
                    # with the format {length: {count: {multiplier: pattern}}}.

                    # TODO: Add support for formatting them.
                    compact_decimal_formats = data.setdefault('compact_decimal_formats', {})
                    length_map = compact_decimal_formats.setdefault(length_type, {})
                    length_count_map = length_map.setdefault(pattern_el.attrib['count'], {})
                    length_count_map[pattern_type] = pattern
                else:
                    # Regular decimal format.
                    decimal_formats[length_type] = pattern
Exemple #12
0
def test_numberpattern_repr():
    """repr() outputs the pattern string"""

    # This implementation looks a bit funny, but that's cause strings are
    # repr'd differently in Python 2 vs 3 and this test runs under both.
    format = u'¤#,##0.00;(¤#,##0.00)'
    np = numbers.parse_pattern(format)
    assert repr(format) in repr(np)
Exemple #13
0
def parse_percent_formats(data, tree):
    percent_formats = data.setdefault('percent_formats', {})
    for elem in tree.findall('.//percentFormats/percentFormatLength'):
        type = elem.attrib.get('type')
        if _should_skip_elem(elem, type, percent_formats):
            continue
        pattern = text_type(elem.findtext('percentFormat/pattern'))
        percent_formats[type] = numbers.parse_pattern(pattern)
Exemple #14
0
def test_numberpattern_repr():
    """repr() outputs the pattern string"""

    # This implementation looks a bit funny, but that's cause strings are
    # repr'd differently in Python 2 vs 3 and this test runs under both.
    format = u'¤#,##0.00;(¤#,##0.00)'
    np = numbers.parse_pattern(format)
    assert repr(format) in repr(np)
Exemple #15
0
def test_parse_pattern():

    # Original pattern is preserved
    np = numbers.parse_pattern(u'¤#,##0.00')
    assert np.pattern == u'¤#,##0.00'

    np = numbers.parse_pattern(u'¤#,##0.00;(¤#,##0.00)')
    assert np.pattern == u'¤#,##0.00;(¤#,##0.00)'

    # Given a NumberPattern object, we don't return a new instance.
    # However, we don't cache NumberPattern objects, so calling
    # parse_pattern with the same format string will create new
    # instances
    np1 = numbers.parse_pattern(u'¤ #,##0.00')
    np2 = numbers.parse_pattern(u'¤ #,##0.00')
    assert np1 is not np2
    assert np1 is numbers.parse_pattern(np1)
Exemple #16
0
def parse_percent_formats(data, tree):
    percent_formats = data.setdefault('percent_formats', {})
    for elem in tree.findall('.//percentFormats/percentFormatLength'):
        type = elem.attrib.get('type')
        if _should_skip_elem(elem, type, percent_formats):
            continue
        pattern = text_type(elem.findtext('percentFormat/pattern'))
        percent_formats[type] = numbers.parse_pattern(pattern)
Exemple #17
0
def test_parse_pattern():

    # Original pattern is preserved
    np = numbers.parse_pattern(u'¤#,##0.00')
    assert np.pattern == u'¤#,##0.00'

    np = numbers.parse_pattern(u'¤#,##0.00;(¤#,##0.00)')
    assert np.pattern == u'¤#,##0.00;(¤#,##0.00)'

    # Given a NumberPattern object, we don't return a new instance.
    # However, we don't cache NumberPattern objects, so calling
    # parse_pattern with the same format string will create new
    # instances
    np1 = numbers.parse_pattern(u'¤ #,##0.00')
    np2 = numbers.parse_pattern(u'¤ #,##0.00')
    assert np1 is not np2
    assert np1 is numbers.parse_pattern(np1)
Exemple #18
0
def format_float_field(__, prec, number, locale):
    """Formats a fixed-point field."""
    format_ = u'0.'
    if prec is None:
        format_ += u'#' * NUMBER_DECIMAL_DIGITS
    else:
        format_ += u'0' * int(prec)
    pattern = parse_pattern(format_)
    return pattern.apply(number, locale)
Exemple #19
0
def format_float_field(__, prec, number, locale):
    """Formats a fixed-point field."""
    format_ = u'0.'
    if prec is None:
        format_ += u'#' * NUMBER_DECIMAL_DIGITS
    else:
        format_ += u'0' * int(prec)
    pattern = parse_pattern(format_)
    return pattern.apply(number, locale)
Exemple #20
0
def format_number(value, digits=None):
    locale = get_current_babel_locale()
    if digits is None:
        return format_decimal(value, locale=locale)
    (min_digits, max_digits) = (
        digits if isinstance(digits, tuple) else (digits, digits))
    format = locale.decimal_formats.get(None)
    pattern = parse_pattern(format)  # type: babel.numbers.NumberPattern
    return pattern.apply(value, locale, force_frac=(min_digits, max_digits))
Exemple #21
0
def parse_decimal_formats(data, tree):
    decimal_formats = data.setdefault('decimal_formats', {})
    for elem in tree.findall('.//decimalFormats/decimalFormatLength'):
        type = elem.attrib.get('type')
        if _should_skip_elem(elem, type, decimal_formats):
            continue
        if elem.findall('./alias'):
            # TODO map the alias to its target
            continue
        pattern = text_type(elem.findtext('./decimalFormat/pattern'))
        decimal_formats[type] = numbers.parse_pattern(pattern)
Exemple #22
0
def parse_scientific_formats(data, tree):
    scientific_formats = data.setdefault('scientific_formats', {})
    for sf_elem in tree.findall('.//scientificFormats'):
        if _should_skip_number_elem(data, sf_elem):  # TODO: Support other number systems
            continue
        for elem in sf_elem.findall('./scientificFormatLength'):
            type = elem.attrib.get('type')
            if _should_skip_elem(elem, type, scientific_formats):
                continue
            pattern = text_type(elem.findtext('scientificFormat/pattern'))
            scientific_formats[type] = numbers.parse_pattern(pattern)
Exemple #23
0
def parse_decimal_formats(data, tree):
    decimal_formats = data.setdefault('decimal_formats', {})
    for elem in tree.findall('.//decimalFormats/decimalFormatLength'):
        type = elem.attrib.get('type')
        if _should_skip_elem(elem, type, decimal_formats):
            continue
        if elem.findall('./alias'):
            # TODO map the alias to its target
            continue
        pattern = text_type(elem.findtext('./decimalFormat/pattern'))
        decimal_formats[type] = numbers.parse_pattern(pattern)
Exemple #24
0
def format_field(spec, arg, value, locale):
    if spec and isinstance(value, Number):
        if arg:
            spec += arg
        try:
            pattern = parse_pattern(spec)
        except ValueError:
            return spec
        else:
            return pattern.apply(value, locale)
    return str(value)
Exemple #25
0
def format_field(spec, arg, value, locale):
    if spec and isinstance(value, Number):
        if arg:
            spec += arg
        try:
            pattern = parse_pattern(spec)
        except ValueError:
            return spec
        else:
            return pattern.apply(value, locale)
    return str(value)
Exemple #26
0
def parse_scientific_formats(data, tree):
    scientific_formats = data.setdefault('scientific_formats', {})
    for sf_elem in tree.findall('.//scientificFormats'):
        if _should_skip_number_elem(
                data, sf_elem):  # TODO: Support other number systems
            continue
        for elem in sf_elem.findall('./scientificFormatLength'):
            type = elem.attrib.get('type')
            if _should_skip_elem(elem, type, scientific_formats):
                continue
            pattern = text_type(elem.findtext('scientificFormat/pattern'))
            scientific_formats[type] = numbers.parse_pattern(pattern)
Exemple #27
0
    def processFormats(self, locale, formats):
        '''
        Process the formats to a complete list of formats that will be used by conversion.
        '''
        assert isinstance(formats, dict), 'Invalid formats %s' % formats
        assert isinstance(locale, Locale), 'Invalid locale %s' % locale

        for clsTyp, format in formats.items():
            # In here we just check that the format is valid.
            try:
                if clsTyp in (Number, Percentage): bn.parse_pattern(format)
                elif format not in self.formats[clsTyp]: bd.parse_pattern(format)
            except Exception as e:
                raise FormatError('invalid %s format \'%s\' because: %s' % (clsTyp.__name__, format, str(e)))

        if Number not in formats: formats[Number] = locale.decimal_formats.get(None).pattern
        if Percentage not in formats: formats[Percentage] = locale.percent_formats.get(None).pattern

        for clsTyp, default in self.defaults.items():
            if clsTyp not in formats: formats[clsTyp] = default

        return formats
    def processFormats(self, locale, formats):
        '''
        Process the formats to a complete list of formats that will be used by conversion.
        '''
        assert isinstance(formats, dict), 'Invalid formats %s' % formats
        assert isinstance(locale, Locale), 'Invalid locale %s' % locale

        for clsTyp, format in formats.items():
            # In here we just check that the format is valid.
            try:
                if clsTyp in (Number, Percentage): bn.parse_pattern(format)
                elif format not in self.formats[clsTyp]: bd.parse_pattern(format)
            except Exception as e:
                raise FormatError('invalid %s format \'%s\' because: %s' % (clsTyp.__name__, format, str(e)))

        if Number not in formats: formats[Number] = locale.decimal_formats.get(None).pattern
        if Percentage not in formats: formats[Percentage] = locale.percent_formats.get(None).pattern

        for clsTyp, default in self.defaults.items():
            if clsTyp not in formats: formats[clsTyp] = default

        return formats
Exemple #29
0
def parse_percent_formats(data, tree):
    percent_formats = data.setdefault('percent_formats', {})

    for pf_elem in tree.findall('.//percentFormats'):
        if _should_skip_number_elem(
                data, pf_elem):  # TODO: Support other number systems
            continue
        for elem in pf_elem.findall('.//percentFormatLength'):
            type = elem.attrib.get('type')
            if _should_skip_elem(elem, type, percent_formats):
                continue
            pattern = str(elem.findtext('percentFormat/pattern'))
            percent_formats[type] = numbers.parse_pattern(pattern)
Exemple #30
0
def load_i18n(project_root, tell_sentry):
    # Load the locales
    key = lambda t: strip_accents(t[1])
    localeDir = os.path.join(project_root, 'i18n', 'core')
    locales = i18n.LOCALES
    for file in os.listdir(localeDir):
        try:
            parts = file.split(".")
            if not (len(parts) == 2 and parts[1] == "po"):
                continue
            lang = parts[0]
            with open(os.path.join(localeDir, file)) as f:
                l = locales[lang.lower()] = Locale(lang)
                c = l.catalog = read_po(f)
                c.plural_func = get_function_from_rule(c.plural_expr)
                try:
                    l.countries_map = {
                        k: l.territories[k]
                        for k in COUNTRIES_MAP
                    }
                    l.countries = sorted(l.countries_map.items(), key=key)
                except KeyError:
                    l.countries_map = COUNTRIES_MAP
                    l.countries = COUNTRIES
        except Exception as e:
            tell_sentry(e)

    # Add the default English locale
    locale_en = i18n.LOCALE_EN = locales['en'] = Locale('en')
    locale_en.catalog = Catalog('en')
    locale_en.catalog.plural_func = lambda n: n != 1
    locale_en.countries = COUNTRIES
    locale_en.countries_map = COUNTRIES_MAP

    # Add aliases
    for k, v in list(locales.items()):
        locales.setdefault(ALIASES.get(k, k), v)
        locales.setdefault(ALIASES_R.get(k, k), v)
    for k, v in list(locales.items()):
        locales.setdefault(k.split('_', 1)[0], v)

    # Patch the locales to look less formal
    locales['fr'].currency_formats[None] = parse_pattern('#,##0.00\u202f\xa4')
    locales['fr'].currency_symbols['USD'] = '$'
Exemple #31
0
def parse_currency_formats(data, tree):
    currency_formats = data.setdefault('currency_formats', {})
    for length_elem in tree.findall('.//currencyFormats/currencyFormatLength'):
        curr_length_type = length_elem.attrib.get('type')
        for elem in length_elem.findall('currencyFormat'):
            type = elem.attrib.get('type')
            if curr_length_type:
                # Handle `<currencyFormatLength type="short">`, etc.
                type = '%s:%s' % (type, curr_length_type)
            if _should_skip_elem(elem, type, currency_formats):
                continue
            for child in elem.getiterator():
                if child.tag == 'alias':
                    currency_formats[type] = Alias(
                        _translate_alias(
                            ['currency_formats', elem.attrib['type']],
                            child.attrib['path']))
                elif child.tag == 'pattern':
                    pattern = text_type(child.text)
                    currency_formats[type] = numbers.parse_pattern(pattern)
Exemple #32
0
def parse_currency_formats(data, tree):
    currency_formats = data.setdefault('currency_formats', {})
    for length_elem in tree.findall('.//currencyFormats/currencyFormatLength'):
        curr_length_type = length_elem.attrib.get('type')
        for elem in length_elem.findall('currencyFormat'):
            type = elem.attrib.get('type')
            if curr_length_type:
                # Handle `<currencyFormatLength type="short">`, etc.
                type = '%s:%s' % (type, curr_length_type)
            if _should_skip_elem(elem, type, currency_formats):
                continue
            for child in elem.getiterator():
                if child.tag == 'alias':
                    currency_formats[type] = Alias(
                        _translate_alias(['currency_formats', elem.attrib['type']],
                                         child.attrib['path'])
                    )
                elif child.tag == 'pattern':
                    pattern = text_type(child.text)
                    currency_formats[type] = numbers.parse_pattern(pattern)
Exemple #33
0
def load_i18n(website):
    # Load the locales
    key = lambda t: strip_accents(t[1])
    localeDir = os.path.join(website.project_root, 'i18n', 'core')
    locales = website.locales = {}
    for file in os.listdir(localeDir):
        try:
            parts = file.split(".")
            if not (len(parts) == 2 and parts[1] == "po"):
                continue
            lang = parts[0]
            with open(os.path.join(localeDir, file)) as f:
                l = locales[lang.lower()] = Locale(lang)
                c = l.catalog = read_po(f)
                c.plural_func = get_function_from_rule(c.plural_expr)
                try:
                    l.countries_map = {k: l.territories[k] for k in COUNTRIES_MAP}
                    l.countries = sorted(l.countries_map.items(), key=key)
                except KeyError:
                    l.countries_map = COUNTRIES_MAP
                    l.countries = COUNTRIES
        except Exception as e:
            website.tell_sentry(e)

    # Add the default English locale
    locale_en = website.locale_en = locales['en'] = Locale('en')
    locale_en.catalog = Catalog('en')
    locale_en.catalog.plural_func = lambda n: n != 1
    locale_en.countries = COUNTRIES
    locale_en.countries_map = COUNTRIES_MAP

    # Add aliases
    for k, v in list(locales.items()):
        locales.setdefault(ALIASES.get(k, k), v)
        locales.setdefault(ALIASES_R.get(k, k), v)
    for k, v in list(locales.items()):
        locales.setdefault(k.split('_', 1)[0], v)

    # Patch the locales to look less formal
    locales['fr'].currency_formats[None] = parse_pattern('#,##0.00\u202f\xa4')
    locales['fr'].currency_symbols['USD'] = '$'
Exemple #34
0
        return number
    if isinstance(number, int):
        return FluentInt(number, **kwargs)
    elif isinstance(number, float):
        return FluentFloat(number, **kwargs)
    elif isinstance(number, Decimal):
        return FluentDecimal(number, **kwargs)
    elif isinstance(number, FluentNone):
        return number
    else:
        raise TypeError(
            "Can't use fluent_number with object {0} for type {1}".format(
                number, type(number)))


_UNGROUPED_PATTERN = parse_pattern("#0")


def clone_pattern(pattern):
    return NumberPattern(pattern.pattern, pattern.prefix, pattern.suffix,
                         pattern.grouping, pattern.int_prec, pattern.frac_prec,
                         pattern.exp_prec, pattern.exp_plus)


@attr.s
class DateFormatOptions(object):
    # Parameters.
    # See https://projectfluent.org/fluent/guide/functions.html#datetime

    # Developer only
    timeZone = attr.ib(default=None)
Exemple #35
0
from unicodedata import combining, normalize

import babel.core
from babel.dates import format_date, format_datetime, format_time, format_timedelta
from babel.messages.pofile import Catalog
from babel.numbers import parse_pattern
from markupsafe import Markup
from pando.utils import utcnow

from ..constants import CURRENCIES, D_MAX
from ..exceptions import AmbiguousNumber, InvalidNumber
from ..website import website
from .currencies import Money, MoneyBasket


MONEY_AMOUNT_FORMAT = parse_pattern('#,##0.00')
ONLY_ZERO = {'0'}


def no_escape(s):
    return s


def LegacyMoney(o):
    return o if isinstance(o, (Money, MoneyBasket)) else Money(o, 'EUR')


Wrap = namedtuple('Wrap', 'value wrapper')


BOLD = Markup('<b>%s</b>')
Exemple #36
0
def test_parse_static_pattern():
    assert numbers.parse_pattern('Kun')  # in the So locale in CLDR 30
Exemple #37
0
def load_i18n(canonical_host, canonical_scheme, project_root, tell_sentry):
    # Load the locales
    localeDir = os.path.join(project_root, 'i18n', 'core')
    locales = LOCALES
    for file in os.listdir(localeDir):
        try:
            parts = file.split(".")
            if not (len(parts) == 2 and parts[1] == "po"):
                continue
            lang = parts[0]
            with open(os.path.join(localeDir, file)) as f:
                l = locales[lang.lower()] = Locale(lang)
                c = l.catalog = read_po(f)
                c.plural_func = get_function_from_rule(c.plural_expr)
                try:
                    l.countries = make_sorted_dict(COUNTRIES, l.territories)
                except KeyError:
                    l.countries = COUNTRIES
                try:
                    l.languages_2 = make_sorted_dict(LANGUAGES_2, l.languages)
                except KeyError:
                    l.languages_2 = LANGUAGES_2
        except Exception as e:
            tell_sentry(e, {}, allow_reraise=True)

    # Prepare a unique and sorted list for use in the language switcher
    percent = lambda l: sum(
        (percent(s) if isinstance(s, tuple) else 1) for s in l if s) / len(l)
    for l in locales.values():
        if l.language == 'en':
            l.completion = 1
            continue
        l.completion = percent([m.string for m in l.catalog if m.id])
    loc_url = canonical_scheme + '://%s.' + canonical_host
    lang_list = sorted(
        ((l.completion, l.language, l.language_name.title(),
          loc_url % l.language)
         for l in set(locales.values()) if l.completion),
        key=lambda t: (-t[0], t[1]),
    )

    # Add aliases
    for k, v in list(locales.items()):
        locales.setdefault(ALIASES.get(k, k), v)
        locales.setdefault(ALIASES_R.get(k, k), v)
    for k, v in list(locales.items()):
        locales.setdefault(k.split('_', 1)[0], v)

    # Patch the locales to look less formal
    locales['fr'].currency_formats[None] = parse_pattern('#,##0.00\u202f\xa4')
    locales['fr'].currency_symbols['USD'] = '$'

    # Load the markdown files
    docs = {}
    heading_re = re.compile(r'^(#+ )', re.M)
    for path in find_files(os.path.join(project_root, 'i18n'), '*.md'):
        d, b = os.path.split(path)
        doc = os.path.basename(d)
        lang = b[:-3]
        with open(path, 'rb') as f:
            md = f.read().decode('utf8')
            if md.startswith('# '):
                md = '\n'.join(md.split('\n')[1:]).strip()
                md = heading_re.sub(r'##\1', md)
            docs.setdefault(doc, {}).__setitem__(lang, markdown.render(md))

    return {'docs': docs, 'lang_list': lang_list, 'locales': locales}
Exemple #38
0
def test_parse_pattern():
    assert numbers.parse_pattern(u'¤#,##0.00;(¤#,##0.00)').suffix == (u'',
                                                                      u')')
    assert numbers.parse_pattern(u'¤ #,##0.00;¤ #,##0.00-').suffix == (u'',
                                                                       u'-')
Exemple #39
0
                    l.countries = COUNTRIES
    return langs


# Load the locales
LOCALES = load_langs("i18n")

# Add the default English locale
LOCALE_EN = LOCALES['en'] = Locale('en')
LOCALE_EN.catalog = Catalog('en')
LOCALE_EN.catalog.plural_func = lambda n: n != 1
LOCALE_EN.countries = COUNTRIES
LOCALE_EN.countries_map = COUNTRIES_MAP

# Patch the locales to look less formal
LOCALE_EN.currency_formats[None] = parse_pattern('\xa4#,##0.##')
LOCALES['fr'].currency_formats[None] = parse_pattern('#,##0.##\u202f\xa4')
LOCALES['fr'].currency_symbols['USD'] = '$'


def get_locale_for_request(request):
    accept_lang = request.headers.get("Accept-Language", "")
    languages = (lang.split(";", 1)[0] for lang in accept_lang.split(","))
    for lang in languages:
        lang = regularize_locale(lang)
        loc = LOCALES.get(lang)
        if loc:
            return loc
    return LOCALE_EN

Exemple #40
0
 def _format_currency(cls, number):
     locale = Locale.parse(cls.LOCALE)
     format = cls.FORMAT or locale.currency_formats.get(None)
     pattern = parse_pattern(format)
     pattern.frac_prec = (2, cls.DECIMAL_PLACES)
     return pattern.apply(number, locale, currency=cls.CODE)
Exemple #41
0
def main():
    parser = OptionParser(usage='%prog path/to/cldr')
    options, args = parser.parse_args()
    if len(args) != 1:
        parser.error('incorrect number of arguments')

    srcdir = args[0]
    destdir = os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])), '..',
                           'babel')

    sup = parse(os.path.join(srcdir, 'supplemental', 'supplementalData.xml'))

    # Import global data from the supplemental files
    global_data = {}

    territory_zones = global_data.setdefault('territory_zones', {})
    zone_aliases = global_data.setdefault('zone_aliases', {})
    zone_territories = global_data.setdefault('zone_territories', {})
    for elem in sup.findall('.//timezoneData/zoneFormatting/zoneItem'):
        tzid = elem.attrib['type']
        territory_zones.setdefault(elem.attrib['territory'], []).append(tzid)
        zone_territories[tzid] = elem.attrib['territory']
        if 'aliases' in elem.attrib:
            for alias in elem.attrib['aliases'].split():
                zone_aliases[alias] = tzid

    # Import Metazone mapping
    meta_zones = global_data.setdefault('meta_zones', {})
    tzsup = parse(os.path.join(srcdir, 'supplemental', 'metazoneInfo.xml'))
    for elem in tzsup.findall('.//timezone'):
        for child in elem.findall('usesMetazone'):
            if 'to' not in child.attrib:  # FIXME: support old mappings
                meta_zones[elem.attrib['type']] = child.attrib['mzone']

    outfile = open(os.path.join(destdir, 'global.dat'), 'wb')
    try:
        pickle.dump(global_data, outfile, 2)
    finally:
        outfile.close()

    # build a territory containment mapping for inheritance
    regions = {}
    for elem in sup.findall('.//territoryContainment/group'):
        regions[elem.attrib['type']] = elem.attrib['contains'].split()

    # Resolve territory containment
    territory_containment = {}
    region_items = sorted(regions.items())
    for group, territory_list in region_items:
        for territory in territory_list:
            containers = territory_containment.setdefault(territory, set([]))
            if group in territory_containment:
                containers |= territory_containment[group]
            containers.add(group)

    # prepare the per-locale plural rules definitions
    plural_rules = {}
    prsup = parse(os.path.join(srcdir, 'supplemental', 'plurals.xml'))
    for elem in prsup.findall('.//plurals/pluralRules'):
        rules = []
        for rule in elem.findall('pluralRule'):
            rules.append((rule.attrib['count'], text_type(rule.text)))
        pr = PluralRule(rules)
        for locale in elem.attrib['locales'].split():
            plural_rules[locale] = pr

    filenames = os.listdir(os.path.join(srcdir, 'main'))
    filenames.remove('root.xml')
    filenames.sort(key=lambda a: len(a))
    filenames.insert(0, 'root.xml')

    for filename in filenames:
        stem, ext = os.path.splitext(filename)
        if ext != '.xml':
            continue

        sys.stderr.write('Processing input file %r\n' % filename)
        tree = parse(os.path.join(srcdir, 'main', filename))
        data = {}

        language = None
        elem = tree.find('.//identity/language')
        if elem is not None:
            language = elem.attrib['type']
        sys.stderr.write('  Language:  %r\n' % language)

        territory = None
        elem = tree.find('.//identity/territory')
        if elem is not None:
            territory = elem.attrib['type']
        else:
            territory = '001'  # world
        sys.stderr.write('  Territory: %r\n' % territory)
        regions = territory_containment.get(territory, [])
        sys.stderr.write('  Regions:    %r\n' % regions)

        # plural rules
        locale_id = '_'.join([
            _f for _f in [language, territory != '001' and territory or None]
            if _f
        ])
        if locale_id in plural_rules:
            data['plural_form'] = plural_rules[locale_id]

        # <localeDisplayNames>

        territories = data.setdefault('territories', {})
        for elem in tree.findall('.//territories/territory'):
            if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                    and elem.attrib['type'] in territories:
                continue
            territories[elem.attrib['type']] = _text(elem)

        languages = data.setdefault('languages', {})
        for elem in tree.findall('.//languages/language'):
            if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                    and elem.attrib['type'] in languages:
                continue
            languages[elem.attrib['type']] = _text(elem)

        variants = data.setdefault('variants', {})
        for elem in tree.findall('.//variants/variant'):
            if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                    and elem.attrib['type'] in variants:
                continue
            variants[elem.attrib['type']] = _text(elem)

        scripts = data.setdefault('scripts', {})
        for elem in tree.findall('.//scripts/script'):
            if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                    and elem.attrib['type'] in scripts:
                continue
            scripts[elem.attrib['type']] = _text(elem)

        # <dates>

        week_data = data.setdefault('week_data', {})
        supelem = sup.find('.//weekData')

        for elem in supelem.findall('minDays'):
            territories = elem.attrib['territories'].split()
            if territory in territories or any(
                [r in territories for r in regions]):
                week_data['min_days'] = int(elem.attrib['count'])

        for elem in supelem.findall('firstDay'):
            if 'alt' not in elem.attrib:  # ignore alternatives
                territories = elem.attrib['territories'].split()
                if territory in territories or any(
                    [r in territories for r in regions]):
                    week_data['first_day'] = weekdays[elem.attrib['day']]

        for elem in supelem.findall('weekendStart'):
            territories = elem.attrib['territories'].split()
            if territory in territories or any(
                [r in territories for r in regions]):
                week_data['weekend_start'] = weekdays[elem.attrib['day']]

        for elem in supelem.findall('weekendEnd'):
            territories = elem.attrib['territories'].split()
            if territory in territories or any(
                [r in territories for r in regions]):
                week_data['weekend_end'] = weekdays[elem.attrib['day']]

        zone_formats = data.setdefault('zone_formats', {})
        for elem in tree.findall('.//timeZoneNames/gmtFormat'):
            if 'draft' not in elem.attrib and 'alt' not in elem.attrib:
                zone_formats['gmt'] = text_type(elem.text).replace('{0}', '%s')
                break
        for elem in tree.findall('.//timeZoneNames/regionFormat'):
            if 'draft' not in elem.attrib and 'alt' not in elem.attrib:
                zone_formats['region'] = text_type(elem.text).replace(
                    '{0}', '%s')
                break
        for elem in tree.findall('.//timeZoneNames/fallbackFormat'):
            if 'draft' not in elem.attrib and 'alt' not in elem.attrib:
                zone_formats['fallback'] = text_type(elem.text) \
                    .replace('{0}', '%(0)s').replace('{1}', '%(1)s')
                break

        time_zones = data.setdefault('time_zones', {})
        for elem in tree.findall('.//timeZoneNames/zone'):
            info = {}
            city = elem.findtext('exemplarCity')
            if city:
                info['city'] = text_type(city)
            for child in elem.findall('long/*'):
                info.setdefault('long', {})[child.tag] = text_type(child.text)
            for child in elem.findall('short/*'):
                info.setdefault('short', {})[child.tag] = text_type(child.text)
            time_zones[elem.attrib['type']] = info

        meta_zones = data.setdefault('meta_zones', {})
        for elem in tree.findall('.//timeZoneNames/metazone'):
            info = {}
            city = elem.findtext('exemplarCity')
            if city:
                info['city'] = text_type(city)
            for child in elem.findall('long/*'):
                info.setdefault('long', {})[child.tag] = text_type(child.text)
            for child in elem.findall('short/*'):
                info.setdefault('short', {})[child.tag] = text_type(child.text)
            info['common'] = elem.findtext('commonlyUsed') == 'true'
            meta_zones[elem.attrib['type']] = info

        for calendar in tree.findall('.//calendars/calendar'):
            if calendar.attrib['type'] != 'gregorian':
                # TODO: support other calendar types
                continue

            months = data.setdefault('months', {})
            for ctxt in calendar.findall('months/monthContext'):
                ctxt_type = ctxt.attrib['type']
                ctxts = months.setdefault(ctxt_type, {})
                for width in ctxt.findall('monthWidth'):
                    width_type = width.attrib['type']
                    widths = ctxts.setdefault(width_type, {})
                    for elem in width.getiterator():
                        if elem.tag == 'month':
                            if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                                    and int(elem.attrib['type']) in widths:
                                continue
                            widths[int(elem.attrib.get('type'))] = text_type(
                                elem.text)
                        elif elem.tag == 'alias':
                            ctxts[width_type] = Alias(
                                _translate_alias(
                                    ['months', ctxt_type, width_type],
                                    elem.attrib['path']))

            days = data.setdefault('days', {})
            for ctxt in calendar.findall('days/dayContext'):
                ctxt_type = ctxt.attrib['type']
                ctxts = days.setdefault(ctxt_type, {})
                for width in ctxt.findall('dayWidth'):
                    width_type = width.attrib['type']
                    widths = ctxts.setdefault(width_type, {})
                    for elem in width.getiterator():
                        if elem.tag == 'day':
                            dtype = weekdays[elem.attrib['type']]
                            if ('draft' in elem.attrib or 'alt' not in elem.attrib) \
                                    and dtype in widths:
                                continue
                            widths[dtype] = text_type(elem.text)
                        elif elem.tag == 'alias':
                            ctxts[width_type] = Alias(
                                _translate_alias(
                                    ['days', ctxt_type, width_type],
                                    elem.attrib['path']))

            quarters = data.setdefault('quarters', {})
            for ctxt in calendar.findall('quarters/quarterContext'):
                ctxt_type = ctxt.attrib['type']
                ctxts = quarters.setdefault(ctxt.attrib['type'], {})
                for width in ctxt.findall('quarterWidth'):
                    width_type = width.attrib['type']
                    widths = ctxts.setdefault(width_type, {})
                    for elem in width.getiterator():
                        if elem.tag == 'quarter':
                            if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                                    and int(elem.attrib['type']) in widths:
                                continue
                            widths[int(elem.attrib['type'])] = text_type(
                                elem.text)
                        elif elem.tag == 'alias':
                            ctxts[width_type] = Alias(
                                _translate_alias(
                                    ['quarters', ctxt_type, width_type],
                                    elem.attrib['path']))

            eras = data.setdefault('eras', {})
            for width in calendar.findall('eras/*'):
                width_type = NAME_MAP[width.tag]
                widths = eras.setdefault(width_type, {})
                for elem in width.getiterator():
                    if elem.tag == 'era':
                        if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                                and int(elem.attrib['type']) in widths:
                            continue
                        widths[int(elem.attrib.get('type'))] = text_type(
                            elem.text)
                    elif elem.tag == 'alias':
                        eras[width_type] = Alias(
                            _translate_alias(['eras', width_type],
                                             elem.attrib['path']))

            # AM/PM
            periods = data.setdefault('periods', {})
            for elem in calendar.findall('am'):
                if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                        and elem.tag in periods:
                    continue
                periods[elem.tag] = text_type(elem.text)
            for elem in calendar.findall('pm'):
                if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                        and elem.tag in periods:
                    continue
                periods[elem.tag] = text_type(elem.text)

            date_formats = data.setdefault('date_formats', {})
            for format in calendar.findall('dateFormats'):
                for elem in format.getiterator():
                    if elem.tag == 'dateFormatLength':
                        if 'draft' in elem.attrib and \
                                elem.attrib.get('type') in date_formats:
                            continue
                        try:
                            date_formats[elem.attrib.get('type')] = \
                                dates.parse_pattern(text_type(elem.findtext('dateFormat/pattern')))
                        except ValueError:
                            sys.stderr.write('ERROR: %s\n' % sys.exc_info()[1])
                    elif elem.tag == 'alias':
                        date_formats = Alias(
                            _translate_alias(['date_formats'],
                                             elem.attrib['path']))

            time_formats = data.setdefault('time_formats', {})
            for format in calendar.findall('timeFormats'):
                for elem in format.getiterator():
                    if elem.tag == 'timeFormatLength':
                        if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                                and elem.attrib.get('type') in time_formats:
                            continue
                        try:
                            time_formats[elem.attrib.get('type')] = \
                                dates.parse_pattern(text_type(elem.findtext('timeFormat/pattern')))
                        except ValueError:
                            sys.stderr.write('ERROR: %s\n' % sys.exc_info()[1])
                    elif elem.tag == 'alias':
                        time_formats = Alias(
                            _translate_alias(['time_formats'],
                                             elem.attrib['path']))

            datetime_formats = data.setdefault('datetime_formats', {})
            for format in calendar.findall('dateTimeFormats'):
                for elem in format.getiterator():
                    if elem.tag == 'dateTimeFormatLength':
                        if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                                and elem.attrib.get('type') in datetime_formats:
                            continue
                        try:
                            datetime_formats[elem.attrib.get('type')] = \
                                text_type(elem.findtext('dateTimeFormat/pattern'))
                        except ValueError:
                            sys.stderr.write('ERROR: %s\n' % sys.exc_info()[1])
                    elif elem.tag == 'alias':
                        datetime_formats = Alias(
                            _translate_alias(['datetime_formats'],
                                             elem.attrib['path']))

        # <numbers>

        number_symbols = data.setdefault('number_symbols', {})
        for elem in tree.findall('.//numbers/symbols/*'):
            if ('draft' in elem.attrib or 'alt' in elem.attrib):
                continue
            number_symbols[elem.tag] = text_type(elem.text)

        decimal_formats = data.setdefault('decimal_formats', {})
        for elem in tree.findall('.//decimalFormats/decimalFormatLength'):
            if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                    and elem.attrib.get('type') in decimal_formats:
                continue
            pattern = text_type(elem.findtext('decimalFormat/pattern'))
            decimal_formats[elem.attrib.get('type')] = numbers.parse_pattern(
                pattern)

        scientific_formats = data.setdefault('scientific_formats', {})
        for elem in tree.findall(
                './/scientificFormats/scientificFormatLength'):
            if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                    and elem.attrib.get('type') in scientific_formats:
                continue
            pattern = text_type(elem.findtext('scientificFormat/pattern'))
            scientific_formats[elem.attrib.get(
                'type')] = numbers.parse_pattern(pattern)

        currency_formats = data.setdefault('currency_formats', {})
        for elem in tree.findall('.//currencyFormats/currencyFormatLength'):
            if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                    and elem.attrib.get('type') in currency_formats:
                continue
            pattern = text_type(elem.findtext('currencyFormat/pattern'))
            currency_formats[elem.attrib.get('type')] = numbers.parse_pattern(
                pattern)

        percent_formats = data.setdefault('percent_formats', {})
        for elem in tree.findall('.//percentFormats/percentFormatLength'):
            if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                    and elem.attrib.get('type') in percent_formats:
                continue
            pattern = text_type(elem.findtext('percentFormat/pattern'))
            percent_formats[elem.attrib.get('type')] = numbers.parse_pattern(
                pattern)

        currency_names = data.setdefault('currency_names', {})
        currency_symbols = data.setdefault('currency_symbols', {})
        for elem in tree.findall('.//currencies/currency'):
            code = elem.attrib['type']
            # TODO: support plural rules for currency name selection
            for name in elem.findall('displayName'):
                if ('draft' in name.attrib or 'count' in name.attrib) \
                        and code in currency_names:
                    continue
                currency_names[code] = text_type(name.text)
            # TODO: support choice patterns for currency symbol selection
            symbol = elem.find('symbol')
            if symbol is not None and 'draft' not in symbol.attrib \
                    and 'choice' not in symbol.attrib:
                currency_symbols[code] = text_type(symbol.text)

        # <units>

        unit_patterns = data.setdefault('unit_patterns', {})
        for elem in tree.findall('.//units/unit'):
            unit_type = elem.attrib['type']
            unit_pattern = unit_patterns.setdefault(unit_type, {})
            for pattern in elem.findall('unitPattern'):
                unit_patterns[unit_type][pattern.attrib['count']] = \
                        text_type(pattern.text)

        outfile = open(os.path.join(destdir, 'localedata', stem + '.dat'),
                       'wb')
        try:
            pickle.dump(data, outfile, 2)
        finally:
            outfile.close()
Exemple #42
0
def test_parse_pattern():
    assert numbers.parse_pattern(u'¤#,##0.00;(¤#,##0.00)').suffix == (u'', u')')
    assert numbers.parse_pattern(u'¤ #,##0.00;¤ #,##0.00-').suffix == (u'', u'-')
Exemple #43
0
                    l.countries = COUNTRIES
    return langs


# Load the locales
LOCALES = load_langs("i18n")

# Add the default English locale
LOCALE_EN = LOCALES['en'] = Locale('en')
LOCALE_EN.catalog = Catalog('en')
LOCALE_EN.catalog.plural_func = lambda n: n != 1
LOCALE_EN.countries = COUNTRIES
LOCALE_EN.countries_map = COUNTRIES_MAP

# Patch the locales to look less formal
LOCALES['fr'].currency_formats[None] = parse_pattern('#,##0.00\u202f\xa4')
LOCALES['fr'].currency_symbols['USD'] = '$'


def get_locale_for_request(request):
    accept_lang = request.headers.get("Accept-Language", "")
    languages = (lang.split(";", 1)[0] for lang in accept_lang.split(","))
    for lang in languages:
        lang = regularize_locale(lang)
        loc = LOCALES.get(lang)
        if loc:
            return loc
    return LOCALE_EN


def format_currency_with_options(number,
Exemple #44
0
def main():
    parser = OptionParser(usage='%prog path/to/cldr')
    options, args = parser.parse_args()
    if len(args) != 1:
        parser.error('incorrect number of arguments')

    srcdir = args[0]
    destdir = os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])),
                           '..', 'babel')

    sup = parse(os.path.join(srcdir, 'supplemental', 'supplementalData.xml'))

    # Import global data from the supplemental files
    global_data = {}

    territory_zones = global_data.setdefault('territory_zones', {})
    zone_aliases = global_data.setdefault('zone_aliases', {})
    zone_territories = global_data.setdefault('zone_territories', {})
    for elem in sup.findall('.//timezoneData/zoneFormatting/zoneItem'):
        tzid = elem.attrib['type']
        territory_zones.setdefault(elem.attrib['territory'], []).append(tzid)
        zone_territories[tzid] = elem.attrib['territory']
        if 'aliases' in elem.attrib:
            for alias in elem.attrib['aliases'].split():
                zone_aliases[alias] = tzid

    # Import Metazone mapping
    meta_zones = global_data.setdefault('meta_zones', {})
    tzsup = parse(os.path.join(srcdir, 'supplemental', 'metazoneInfo.xml'))
    for elem in tzsup.findall('.//timezone'):
        for child in elem.findall('usesMetazone'):
            if 'to' not in child.attrib: # FIXME: support old mappings
                meta_zones[elem.attrib['type']] = child.attrib['mzone']

    outfile = open(os.path.join(destdir, 'global.dat'), 'wb')
    try:
        pickle.dump(global_data, outfile, 2)
    finally:
        outfile.close()

    # build a territory containment mapping for inheritance
    regions = {}
    for elem in sup.findall('.//territoryContainment/group'):
        regions[elem.attrib['type']] = elem.attrib['contains'].split()

    # Resolve territory containment
    territory_containment = {}
    region_items = sorted(regions.items())
    for group, territory_list in region_items:
        for territory in territory_list:
            containers = territory_containment.setdefault(territory, set([]))
            if group in territory_containment:
                containers |= territory_containment[group]
            containers.add(group)

    # prepare the per-locale plural rules definitions
    plural_rules = {}
    prsup = parse(os.path.join(srcdir, 'supplemental', 'plurals.xml'))
    for elem in prsup.findall('.//plurals/pluralRules'):
        rules = []
        for rule in elem.findall('pluralRule'):
            rules.append((rule.attrib['count'], text_type(rule.text)))
        pr = PluralRule(rules)
        for locale in elem.attrib['locales'].split():
            plural_rules[locale] = pr

    filenames = os.listdir(os.path.join(srcdir, 'main'))
    filenames.remove('root.xml')
    filenames.sort(key=lambda a: len(a))
    filenames.insert(0, 'root.xml')

    for filename in filenames:
        stem, ext = os.path.splitext(filename)
        if ext != '.xml':
            continue

        sys.stderr.write('Processing input file %r\n' % filename)
        tree = parse(os.path.join(srcdir, 'main', filename))
        data = {}

        language = None
        elem = tree.find('.//identity/language')
        if elem is not None:
            language = elem.attrib['type']
        sys.stderr.write('  Language:  %r\n' % language)

        territory = None
        elem = tree.find('.//identity/territory')
        if elem is not None:
            territory = elem.attrib['type']
        else:
            territory = '001' # world
        sys.stderr.write('  Territory: %r\n' % territory)
        regions = territory_containment.get(territory, [])
        sys.stderr.write('  Regions:    %r\n' % regions)

        # plural rules
        locale_id = '_'.join([_f for _f in [
            language,
            territory != '001' and territory or None
        ] if _f])
        if locale_id in plural_rules:
            data['plural_form'] = plural_rules[locale_id]

        # <localeDisplayNames>

        territories = data.setdefault('territories', {})
        for elem in tree.findall('.//territories/territory'):
            if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                    and elem.attrib['type'] in territories:
                continue
            territories[elem.attrib['type']] = _text(elem)

        languages = data.setdefault('languages', {})
        for elem in tree.findall('.//languages/language'):
            if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                    and elem.attrib['type'] in languages:
                continue
            languages[elem.attrib['type']] = _text(elem)

        variants = data.setdefault('variants', {})
        for elem in tree.findall('.//variants/variant'):
            if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                    and elem.attrib['type'] in variants:
                continue
            variants[elem.attrib['type']] = _text(elem)

        scripts = data.setdefault('scripts', {})
        for elem in tree.findall('.//scripts/script'):
            if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                    and elem.attrib['type'] in scripts:
                continue
            scripts[elem.attrib['type']] = _text(elem)

        # <dates>

        week_data = data.setdefault('week_data', {})
        supelem = sup.find('.//weekData')

        for elem in supelem.findall('minDays'):
            territories = elem.attrib['territories'].split()
            if territory in territories or any([r in territories for r in regions]):
                week_data['min_days'] = int(elem.attrib['count'])

        for elem in supelem.findall('firstDay'):
            territories = elem.attrib['territories'].split()
            if territory in territories or any([r in territories for r in regions]):
                week_data['first_day'] = weekdays[elem.attrib['day']]

        for elem in supelem.findall('weekendStart'):
            territories = elem.attrib['territories'].split()
            if territory in territories or any([r in territories for r in regions]):
                week_data['weekend_start'] = weekdays[elem.attrib['day']]

        for elem in supelem.findall('weekendEnd'):
            territories = elem.attrib['territories'].split()
            if territory in territories or any([r in territories for r in regions]):
                week_data['weekend_end'] = weekdays[elem.attrib['day']]

        zone_formats = data.setdefault('zone_formats', {})
        for elem in tree.findall('.//timeZoneNames/gmtFormat'):
            if 'draft' not in elem.attrib and 'alt' not in elem.attrib:
                zone_formats['gmt'] = text_type(elem.text).replace('{0}', '%s')
                break
        for elem in tree.findall('.//timeZoneNames/regionFormat'):
            if 'draft' not in elem.attrib and 'alt' not in elem.attrib:
                zone_formats['region'] = text_type(elem.text).replace('{0}', '%s')
                break
        for elem in tree.findall('.//timeZoneNames/fallbackFormat'):
            if 'draft' not in elem.attrib and 'alt' not in elem.attrib:
                zone_formats['fallback'] = text_type(elem.text) \
                    .replace('{0}', '%(0)s').replace('{1}', '%(1)s')
                break

        time_zones = data.setdefault('time_zones', {})
        for elem in tree.findall('.//timeZoneNames/zone'):
            info = {}
            city = elem.findtext('exemplarCity')
            if city:
                info['city'] = text_type(city)
            for child in elem.findall('long/*'):
                info.setdefault('long', {})[child.tag] = text_type(child.text)
            for child in elem.findall('short/*'):
                info.setdefault('short', {})[child.tag] = text_type(child.text)
            time_zones[elem.attrib['type']] = info

        meta_zones = data.setdefault('meta_zones', {})
        for elem in tree.findall('.//timeZoneNames/metazone'):
            info = {}
            city = elem.findtext('exemplarCity')
            if city:
                info['city'] = text_type(city)
            for child in elem.findall('long/*'):
                info.setdefault('long', {})[child.tag] = text_type(child.text)
            for child in elem.findall('short/*'):
                info.setdefault('short', {})[child.tag] = text_type(child.text)
            info['common'] = elem.findtext('commonlyUsed') == 'true'
            meta_zones[elem.attrib['type']] = info

        for calendar in tree.findall('.//calendars/calendar'):
            if calendar.attrib['type'] != 'gregorian':
                # TODO: support other calendar types
                continue

            months = data.setdefault('months', {})
            for ctxt in calendar.findall('months/monthContext'):
                ctxt_type = ctxt.attrib['type']
                ctxts = months.setdefault(ctxt_type, {})
                for width in ctxt.findall('monthWidth'):
                    width_type = width.attrib['type']
                    widths = ctxts.setdefault(width_type, {})
                    for elem in width.getiterator():
                        if elem.tag == 'month':
                            if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                                    and int(elem.attrib['type']) in widths:
                                continue
                            widths[int(elem.attrib.get('type'))] = text_type(elem.text)
                        elif elem.tag == 'alias':
                            ctxts[width_type] = Alias(
                                _translate_alias(['months', ctxt_type, width_type],
                                                 elem.attrib['path'])
                            )

            days = data.setdefault('days', {})
            for ctxt in calendar.findall('days/dayContext'):
                ctxt_type = ctxt.attrib['type']
                ctxts = days.setdefault(ctxt_type, {})
                for width in ctxt.findall('dayWidth'):
                    width_type = width.attrib['type']
                    widths = ctxts.setdefault(width_type, {})
                    for elem in width.getiterator():
                        if elem.tag == 'day':
                            dtype = weekdays[elem.attrib['type']]
                            if ('draft' in elem.attrib or 'alt' not in elem.attrib) \
                                    and dtype in widths:
                                continue
                            widths[dtype] = text_type(elem.text)
                        elif elem.tag == 'alias':
                            ctxts[width_type] = Alias(
                                _translate_alias(['days', ctxt_type, width_type],
                                                 elem.attrib['path'])
                            )

            quarters = data.setdefault('quarters', {})
            for ctxt in calendar.findall('quarters/quarterContext'):
                ctxt_type = ctxt.attrib['type']
                ctxts = quarters.setdefault(ctxt.attrib['type'], {})
                for width in ctxt.findall('quarterWidth'):
                    width_type = width.attrib['type']
                    widths = ctxts.setdefault(width_type, {})
                    for elem in width.getiterator():
                        if elem.tag == 'quarter':
                            if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                                    and int(elem.attrib['type']) in widths:
                                continue
                            widths[int(elem.attrib['type'])] = text_type(elem.text)
                        elif elem.tag == 'alias':
                            ctxts[width_type] = Alias(
                                _translate_alias(['quarters', ctxt_type, width_type],
                                                 elem.attrib['path'])
                            )

            eras = data.setdefault('eras', {})
            for width in calendar.findall('eras/*'):
                width_type = NAME_MAP[width.tag]
                widths = eras.setdefault(width_type, {})
                for elem in width.getiterator():
                    if elem.tag == 'era':
                        if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                                and int(elem.attrib['type']) in widths:
                            continue
                        widths[int(elem.attrib.get('type'))] = text_type(elem.text)
                    elif elem.tag == 'alias':
                        eras[width_type] = Alias(
                            _translate_alias(['eras', width_type],
                                             elem.attrib['path'])
                        )

            # AM/PM
            periods = data.setdefault('periods', {})
            for elem in calendar.findall('am'):
                if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                        and elem.tag in periods:
                    continue
                periods[elem.tag] = text_type(elem.text)
            for elem in calendar.findall('pm'):
                if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                        and elem.tag in periods:
                    continue
                periods[elem.tag] = text_type(elem.text)

            date_formats = data.setdefault('date_formats', {})
            for format in calendar.findall('dateFormats'):
                for elem in format.getiterator():
                    if elem.tag == 'dateFormatLength':
                        if 'draft' in elem.attrib and \
                                elem.attrib.get('type') in date_formats:
                            continue
                        try:
                            date_formats[elem.attrib.get('type')] = \
                                dates.parse_pattern(text_type(elem.findtext('dateFormat/pattern')))
                        except ValueError:
                            sys.stderr.write('ERROR: %s\n' % sys.exc_info()[1])
                    elif elem.tag == 'alias':
                        date_formats = Alias(_translate_alias(
                            ['date_formats'], elem.attrib['path'])
                        )

            time_formats = data.setdefault('time_formats', {})
            for format in calendar.findall('timeFormats'):
                for elem in format.getiterator():
                    if elem.tag == 'timeFormatLength':
                        if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                                and elem.attrib.get('type') in time_formats:
                            continue
                        try:
                            time_formats[elem.attrib.get('type')] = \
                                dates.parse_pattern(text_type(elem.findtext('timeFormat/pattern')))
                        except ValueError:
                            sys.stderr.write('ERROR: %s\n' % sys.exc_info()[1])
                    elif elem.tag == 'alias':
                        time_formats = Alias(_translate_alias(
                            ['time_formats'], elem.attrib['path'])
                        )

            datetime_formats = data.setdefault('datetime_formats', {})
            for format in calendar.findall('dateTimeFormats'):
                for elem in format.getiterator():
                    if elem.tag == 'dateTimeFormatLength':
                        if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                                and elem.attrib.get('type') in datetime_formats:
                            continue
                        try:
                            datetime_formats[elem.attrib.get('type')] = \
                                text_type(elem.findtext('dateTimeFormat/pattern'))
                        except ValueError:
                            sys.stderr.write('ERROR: %s\n' % sys.exc_info()[1])
                    elif elem.tag == 'alias':
                        datetime_formats = Alias(_translate_alias(
                            ['datetime_formats'], elem.attrib['path'])
                        )

        # <numbers>

        number_symbols = data.setdefault('number_symbols', {})
        for elem in tree.findall('.//numbers/symbols/*'):
            if ('draft' in elem.attrib or 'alt' in elem.attrib):
                continue
            number_symbols[elem.tag] = text_type(elem.text)

        decimal_formats = data.setdefault('decimal_formats', {})
        for elem in tree.findall('.//decimalFormats/decimalFormatLength'):
            if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                    and elem.attrib.get('type') in decimal_formats:
                continue
            pattern = text_type(elem.findtext('decimalFormat/pattern'))
            decimal_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern)

        scientific_formats = data.setdefault('scientific_formats', {})
        for elem in tree.findall('.//scientificFormats/scientificFormatLength'):
            if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                    and elem.attrib.get('type') in scientific_formats:
                continue
            pattern = text_type(elem.findtext('scientificFormat/pattern'))
            scientific_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern)

        currency_formats = data.setdefault('currency_formats', {})
        for elem in tree.findall('.//currencyFormats/currencyFormatLength'):
            if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                    and elem.attrib.get('type') in currency_formats:
                continue
            pattern = text_type(elem.findtext('currencyFormat/pattern'))
            currency_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern)

        percent_formats = data.setdefault('percent_formats', {})
        for elem in tree.findall('.//percentFormats/percentFormatLength'):
            if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                    and elem.attrib.get('type') in percent_formats:
                continue
            pattern = text_type(elem.findtext('percentFormat/pattern'))
            percent_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern)

        currency_names = data.setdefault('currency_names', {})
        currency_symbols = data.setdefault('currency_symbols', {})
        for elem in tree.findall('.//currencies/currency'):
            code = elem.attrib['type']
            # TODO: support plural rules for currency name selection
            for name in elem.findall('displayName'):
                if ('draft' in name.attrib or 'count' in name.attrib) \
                        and code in currency_names:
                    continue
                currency_names[code] = text_type(name.text)
            # TODO: support choice patterns for currency symbol selection
            symbol = elem.find('symbol')
            if symbol is not None and 'draft' not in symbol.attrib \
                    and 'choice' not in symbol.attrib:
                currency_symbols[code] = text_type(symbol.text)

        # <units>

        unit_patterns = data.setdefault('unit_patterns', {})
        for elem in tree.findall('.//units/unit'):
            unit_type = elem.attrib['type']
            unit_pattern = unit_patterns.setdefault(unit_type, {})
            for pattern in elem.findall('unitPattern'):
                unit_patterns[unit_type][pattern.attrib['count']] = \
                        text_type(pattern.text)

        dest = os.path.join(destdir, 'localedata', stem + '.dat')
        if not os.path.exists(os.path.dirname(dest)):
            os.mkdir(os.path.dirname(dest))

        outfile = open(dest, 'wb')
        try:
            pickle.dump(data, outfile, 2)
        finally:
            outfile.close()
Exemple #45
0
def load_i18n(canonical_host, canonical_scheme, project_root, tell_sentry):
    # Load the locales
    localeDir = os.path.join(project_root, 'i18n', 'core')
    locales = LOCALES
    source_strings = {}
    for file in os.listdir(localeDir):
        try:
            parts = file.split(".")
            if not (len(parts) == 2 and parts[1] == "po"):
                continue
            lang = parts[0]
            with open(os.path.join(localeDir, file), 'rb') as f:
                l = locales[lang.lower()] = Locale(lang)
                c = l.catalog = read_po(f)
                share_source_strings(c, source_strings)
                c.plural_func = get_function_from_rule(c.plural_expr)
                replace_unused_singulars(c)
                try:
                    l.countries = make_sorted_dict(COUNTRIES, l.territories)
                except KeyError:
                    l.countries = COUNTRIES
                try:
                    l.languages_2 = make_sorted_dict(LANGUAGES_2, l.languages)
                except KeyError:
                    l.languages_2 = LANGUAGES_2
        except Exception as e:
            tell_sentry(e, {})
    del source_strings

    # Prepare a unique and sorted list for use in the language switcher
    percent = lambda l, total: sum((percent(s, len(s)) if isinstance(s, tuple) else 1) for s in l if s) / total
    for l in list(locales.values()):
        if l.language == 'en':
            l.completion = 1
            continue
        l.completion = percent([m.string for m in l.catalog if m.id and not m.fuzzy], len(l.catalog))
        if l.completion == 0:
            del locales[l.language]
    loc_url = canonical_scheme+'://%s.'+canonical_host
    domain, port = (canonical_host.split(':') + [None])[:2]
    port = int(port) if port else socket.getservbyname(canonical_scheme, 'tcp')
    subdomains = {
        l.subdomain: loc_url % l.subdomain for l in locales.values()
        if resolve(l.subdomain + '.' + domain, port)
    }
    lang_list = sorted(
        (
            (l.completion, l.language, l.language_name.title(), loc_url % l.subdomain)
            for l in set(locales.values()) if l.completion > 0.5
        ),
        key=lambda t: (-t[0], t[1]),
    )

    # Add year-less date format
    year_re = re.compile(r'(^y+[^a-zA-Z]+|[^a-zA-Z]+y+$)')
    for l in locales.values():
        short_format = l.date_formats['short'].pattern
        assert short_format[0] == 'y' or short_format[-1] == 'y', (l.language, short_format)
        l.date_formats['short_yearless'] = year_re.sub('', short_format)

    # Add aliases
    for k, v in list(locales.items()):
        locales.setdefault(ALIASES.get(k, k), v)
        locales.setdefault(ALIASES_R.get(k, k), v)
    for k, v in list(locales.items()):
        locales.setdefault(k.split('_', 1)[0], v)

    # Patch the locales to look less formal
    locales['fr'].currency_formats['standard'] = parse_pattern('#,##0.00\u202f\xa4')
    locales['fr'].currencies['USD'] = 'dollar états-unien'

    # Load the markdown files
    docs = {}
    heading_re = re.compile(r'^(#+ )', re.M)
    for path in find_files(os.path.join(project_root, 'i18n'), '*.md'):
        d, b = os.path.split(path)
        doc = os.path.basename(d)
        lang = b[:-3]
        with open(path, 'rb') as f:
            md = f.read().decode('utf8')
            if md.startswith('# '):
                md = '\n'.join(md.split('\n')[1:]).strip()
                md = heading_re.sub(r'##\1', md)
            docs.setdefault(doc, {}).__setitem__(lang, markdown.render(md))

    return {'docs': docs, 'lang_list': lang_list, 'locales': locales, 'subdomains': subdomains}
Exemple #46
0
        # <numbers>

        number_symbols = data.setdefault('number_symbols', {})
        for elem in tree.findall('.//numbers/symbols/*'):
            if ('draft' in elem.attrib or 'alt' in elem.attrib):
                continue
            number_symbols[elem.tag] = unicode(elem.text)

        decimal_formats = data.setdefault('decimal_formats', {})
        for elem in tree.findall('.//decimalFormats/decimalFormatLength'):
            if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                    and elem.attrib.get('type') in decimal_formats:
                continue
            pattern = unicode(elem.findtext('decimalFormat/pattern'))
            decimal_formats[elem.attrib.get('type')] = numbers.parse_pattern(
                pattern)

        scientific_formats = data.setdefault('scientific_formats', {})
        for elem in tree.findall(
                './/scientificFormats/scientificFormatLength'):
            if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                    and elem.attrib.get('type') in scientific_formats:
                continue
            pattern = unicode(elem.findtext('scientificFormat/pattern'))
            scientific_formats[elem.attrib.get(
                'type')] = numbers.parse_pattern(pattern)

        currency_formats = data.setdefault('currency_formats', {})
        for elem in tree.findall('.//currencyFormats/currencyFormatLength'):
            if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                    and elem.attrib.get('type') in currency_formats:
Exemple #47
0
                    l.countries = COUNTRIES
    return langs


# Load the locales
LOCALES = load_langs("i18n/core")

# Add the default English locale
LOCALE_EN = LOCALES['en'] = Locale('en')
LOCALE_EN.catalog = Catalog('en')
LOCALE_EN.catalog.plural_func = lambda n: n != 1
LOCALE_EN.countries = COUNTRIES
LOCALE_EN.countries_map = COUNTRIES_MAP

# Patch the locales to look less formal
LOCALES['fr'].currency_formats[None] = parse_pattern('#,##0.00\u202f\xa4')
LOCALES['fr'].currency_symbols['USD'] = '$'


def get_locale_for_request(request):
    accept_lang = request.headers.get("Accept-Language", "")
    languages = (lang.split(";", 1)[0] for lang in accept_lang.split(","))
    for lang in languages:
        lang = regularize_locale(lang)
        loc = LOCALES.get(lang)
        if loc:
            return loc
    return LOCALE_EN


def format_currency_with_options(number, currency, locale=LOCALE_EN, trailing_zeroes=True):
Exemple #48
0
def load_i18n(canonical_host, canonical_scheme, project_root, tell_sentry):
    # Load the locales
    localeDir = os.path.join(project_root, 'i18n', 'core')
    locales = LOCALES
    for file in os.listdir(localeDir):
        try:
            parts = file.split(".")
            if not (len(parts) == 2 and parts[1] == "po"):
                continue
            lang = parts[0]
            with open(os.path.join(localeDir, file)) as f:
                l = locales[lang.lower()] = Locale(lang)
                c = l.catalog = read_po(f)
                c.plural_func = get_function_from_rule(c.plural_expr)
                try:
                    l.countries = make_sorted_dict(COUNTRIES, l.territories)
                except KeyError:
                    l.countries = COUNTRIES
                try:
                    l.languages_2 = make_sorted_dict(LANGUAGES_2, l.languages)
                except KeyError:
                    l.languages_2 = LANGUAGES_2
        except Exception as e:
            tell_sentry(e, {}, allow_reraise=True)

    # Prepare a unique and sorted list for use in the language switcher
    for l in locales.values():
        strings = [m.string for m in l.catalog]
        l.completion = sum(1 for s in strings if s) / len(strings)
    loc_url = canonical_scheme+'://%s.'+canonical_host
    lang_list = sorted(
        (
            (l.completion, l.language, l.language_name.title(), loc_url % l.language)
            for l in set(locales.values())
        ),
        key=lambda t: (-t[0], t[1]),
    )

    # Add aliases
    for k, v in list(locales.items()):
        locales.setdefault(ALIASES.get(k, k), v)
        locales.setdefault(ALIASES_R.get(k, k), v)
    for k, v in list(locales.items()):
        locales.setdefault(k.split('_', 1)[0], v)

    # Patch the locales to look less formal
    locales['fr'].currency_formats[None] = parse_pattern('#,##0.00\u202f\xa4')
    locales['fr'].currency_symbols['USD'] = '$'

    # Load the markdown files
    docs = {}
    heading_re = re.compile(r'^(#+ )', re.M)
    for path in find_files(os.path.join(project_root, 'i18n'), '*.md'):
        d, b = os.path.split(path)
        doc = os.path.basename(d)
        lang = b[:-3]
        with open(path, 'rb') as f:
            md = f.read().decode('utf8')
            if md.startswith('# '):
                md = '\n'.join(md.split('\n')[1:]).strip()
                md = heading_re.sub(r'##\1', md)
            docs.setdefault(doc, {}).__setitem__(lang, markdown.render(md))

    return {'docs': docs, 'lang_list': lang_list, 'locales': locales}
Exemple #49
0
def main():
    parser = OptionParser(usage='%prog path/to/cldr')
    options, args = parser.parse_args()
    if len(args) != 1:
        parser.error('incorrect number of arguments')

    srcdir = args[0]
    destdir = os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])),
                           '..', 'babel')

    sup_filename = os.path.join(srcdir, 'supplemental', 'supplementalData.xml')
    bcp47_timezone = parse(os.path.join(srcdir, 'bcp47', 'timezone.xml'))
    sup_windows_zones = parse(os.path.join(srcdir, 'supplemental',
                                           'windowsZones.xml'))
    sup_metadata = parse(os.path.join(srcdir, 'supplemental',
                                      'supplementalMetadata.xml'))
    sup_likely = parse(os.path.join(srcdir, 'supplemental',
                                    'likelySubtags.xml'))
    sup = parse(sup_filename)

    # Import global data from the supplemental files
    global_path = os.path.join(destdir, 'global.dat')
    global_data = {}
    if need_conversion(global_path, global_data, sup_filename):
        territory_zones = global_data.setdefault('territory_zones', {})
        zone_aliases = global_data.setdefault('zone_aliases', {})
        zone_territories = global_data.setdefault('zone_territories', {})
        win_mapping = global_data.setdefault('windows_zone_mapping', {})
        language_aliases = global_data.setdefault('language_aliases', {})
        territory_aliases = global_data.setdefault('territory_aliases', {})
        script_aliases = global_data.setdefault('script_aliases', {})
        variant_aliases = global_data.setdefault('variant_aliases', {})
        likely_subtags = global_data.setdefault('likely_subtags', {})
        territory_currencies = global_data.setdefault('territory_currencies', {})
        parent_exceptions = global_data.setdefault('parent_exceptions', {})

        # create auxiliary zone->territory map from the windows zones (we don't set
        # the 'zones_territories' map directly here, because there are some zones
        # aliases listed and we defer the decision of which ones to choose to the
        # 'bcp47' data
        _zone_territory_map = {}
        for map_zone in sup_windows_zones.findall(
                './/windowsZones/mapTimezones/mapZone'):
            if map_zone.attrib.get('territory') == '001':
                win_mapping[map_zone.attrib['other']] = \
                    map_zone.attrib['type'].split()[0]
            for tzid in text_type(map_zone.attrib['type']).split():
                _zone_territory_map[tzid] = \
                    text_type(map_zone.attrib['territory'])

        for key_elem in bcp47_timezone.findall('.//keyword/key'):
            if key_elem.attrib['name'] == 'tz':
                for elem in key_elem.findall('type'):
                    if 'deprecated' not in elem.attrib:
                        aliases = text_type(elem.attrib['alias']).split()
                        tzid = aliases.pop(0)
                        territory = _zone_territory_map.get(tzid, '001')
                        territory_zones.setdefault(territory, []).append(tzid)
                        zone_territories[tzid] = territory
                        for alias in aliases:
                            zone_aliases[alias] = tzid
                break

        # Import Metazone mapping
        meta_zones = global_data.setdefault('meta_zones', {})
        tzsup = parse(os.path.join(srcdir, 'supplemental', 'metaZones.xml'))
        for elem in tzsup.findall('.//timezone'):
            for child in elem.findall('usesMetazone'):
                if 'to' not in child.attrib: # FIXME: support old mappings
                    meta_zones[elem.attrib['type']] = child.attrib['mzone']

        # Language aliases
        for alias in sup_metadata.findall('.//alias/languageAlias'):
            # We don't have a use for those at the moment.  They don't
            # pass our parser anyways.
            if '_' in alias.attrib['type']:
                continue
            language_aliases[alias.attrib['type']] = alias.attrib['replacement']

        # Territory aliases
        for alias in sup_metadata.findall('.//alias/territoryAlias'):
            territory_aliases[alias.attrib['type']] = \
                alias.attrib['replacement'].split()

        # Script aliases
        for alias in sup_metadata.findall('.//alias/scriptAlias'):
            script_aliases[alias.attrib['type']] = alias.attrib['replacement']

        # Variant aliases
        for alias in sup_metadata.findall('.//alias/variantAlias'):
            repl = alias.attrib.get('replacement')
            if repl:
                variant_aliases[alias.attrib['type']] = repl

        # Likely subtags
        for likely_subtag in sup_likely.findall('.//likelySubtags/likelySubtag'):
            likely_subtags[likely_subtag.attrib['from']] = \
                likely_subtag.attrib['to']

        # Currencies in territories
        for region in sup.findall('.//currencyData/region'):
            region_code = region.attrib['iso3166']
            region_currencies = []
            for currency in region.findall('./currency'):
                cur_start = _parse_currency_date(currency.attrib.get('from'))
                cur_end = _parse_currency_date(currency.attrib.get('to'))
                region_currencies.append((currency.attrib['iso4217'],
                                          cur_start, cur_end,
                                          currency.attrib.get(
                                              'tender', 'true') == 'true'))
            region_currencies.sort(key=_currency_sort_key)
            territory_currencies[region_code] = region_currencies

        # Explicit parent locales
        for paternity in sup.findall('.//parentLocales/parentLocale'):
            parent = paternity.attrib['parent']
            for child in paternity.attrib['locales'].split():
                parent_exceptions[child] = parent

        outfile = open(global_path, 'wb')
        try:
            pickle.dump(global_data, outfile, 2)
        finally:
            outfile.close()

    # build a territory containment mapping for inheritance
    regions = {}
    for elem in sup.findall('.//territoryContainment/group'):
        regions[elem.attrib['type']] = elem.attrib['contains'].split()

    # Resolve territory containment
    territory_containment = {}
    region_items = sorted(regions.items())
    for group, territory_list in region_items:
        for territory in territory_list:
            containers = territory_containment.setdefault(territory, set([]))
            if group in territory_containment:
                containers |= territory_containment[group]
            containers.add(group)

    # prepare the per-locale plural rules definitions
    plural_rules = {}
    prsup = parse(os.path.join(srcdir, 'supplemental', 'plurals.xml'))
    for elem in prsup.findall('.//plurals/pluralRules'):
        rules = []
        for rule in elem.findall('pluralRule'):
            rules.append((rule.attrib['count'], text_type(rule.text)))
        pr = PluralRule(rules)
        for locale in elem.attrib['locales'].split():
            plural_rules[locale] = pr

    filenames = os.listdir(os.path.join(srcdir, 'main'))
    filenames.remove('root.xml')
    filenames.sort(key=len)
    filenames.insert(0, 'root.xml')

    for filename in filenames:
        stem, ext = os.path.splitext(filename)
        if ext != '.xml':
            continue

        full_filename = os.path.join(srcdir, 'main', filename)
        data_filename = os.path.join(destdir, 'localedata', stem + '.dat')

        data = {}
        if not need_conversion(data_filename, data, full_filename):
            continue

        tree = parse(full_filename)

        language = None
        elem = tree.find('.//identity/language')
        if elem is not None:
            language = elem.attrib['type']

        territory = None
        elem = tree.find('.//identity/territory')
        if elem is not None:
            territory = elem.attrib['type']
        else:
            territory = '001' # world
        regions = territory_containment.get(territory, [])

        log('Processing %s (Language = %s; Territory = %s)',
            filename, language, territory)

        # plural rules
        locale_id = '_'.join(filter(None, [
            language,
            territory != '001' and territory or None
        ]))
        if locale_id in plural_rules:
            data['plural_form'] = plural_rules[locale_id]

        # <localeDisplayNames>

        territories = data.setdefault('territories', {})
        for elem in tree.findall('.//territories/territory'):
            if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                    and elem.attrib['type'] in territories:
                continue
            territories[elem.attrib['type']] = _text(elem)

        languages = data.setdefault('languages', {})
        for elem in tree.findall('.//languages/language'):
            if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                    and elem.attrib['type'] in languages:
                continue
            languages[elem.attrib['type']] = _text(elem)

        variants = data.setdefault('variants', {})
        for elem in tree.findall('.//variants/variant'):
            if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                    and elem.attrib['type'] in variants:
                continue
            variants[elem.attrib['type']] = _text(elem)

        scripts = data.setdefault('scripts', {})
        for elem in tree.findall('.//scripts/script'):
            if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                    and elem.attrib['type'] in scripts:
                continue
            scripts[elem.attrib['type']] = _text(elem)

        # <dates>

        week_data = data.setdefault('week_data', {})
        supelem = sup.find('.//weekData')

        for elem in supelem.findall('minDays'):
            territories = elem.attrib['territories'].split()
            if territory in territories or any([r in territories for r in regions]):
                week_data['min_days'] = int(elem.attrib['count'])

        for elem in supelem.findall('firstDay'):
            territories = elem.attrib['territories'].split()
            if territory in territories or any([r in territories for r in regions]):
                week_data['first_day'] = weekdays[elem.attrib['day']]

        for elem in supelem.findall('weekendStart'):
            territories = elem.attrib['territories'].split()
            if territory in territories or any([r in territories for r in regions]):
                week_data['weekend_start'] = weekdays[elem.attrib['day']]

        for elem in supelem.findall('weekendEnd'):
            territories = elem.attrib['territories'].split()
            if territory in territories or any([r in territories for r in regions]):
                week_data['weekend_end'] = weekdays[elem.attrib['day']]

        zone_formats = data.setdefault('zone_formats', {})
        for elem in tree.findall('.//timeZoneNames/gmtFormat'):
            if 'draft' not in elem.attrib and 'alt' not in elem.attrib:
                zone_formats['gmt'] = text_type(elem.text).replace('{0}', '%s')
                break
        for elem in tree.findall('.//timeZoneNames/regionFormat'):
            if 'draft' not in elem.attrib and 'alt' not in elem.attrib:
                zone_formats['region'] = text_type(elem.text).replace('{0}', '%s')
                break
        for elem in tree.findall('.//timeZoneNames/fallbackFormat'):
            if 'draft' not in elem.attrib and 'alt' not in elem.attrib:
                zone_formats['fallback'] = text_type(elem.text) \
                    .replace('{0}', '%(0)s').replace('{1}', '%(1)s')
                break
        for elem in tree.findall('.//timeZoneNames/fallbackRegionFormat'):
            if 'draft' not in elem.attrib and 'alt' not in elem.attrib:
                zone_formats['fallback_region'] = text_type(elem.text) \
                    .replace('{0}', '%(0)s').replace('{1}', '%(1)s')
                break

        time_zones = data.setdefault('time_zones', {})
        for elem in tree.findall('.//timeZoneNames/zone'):
            info = {}
            city = elem.findtext('exemplarCity')
            if city:
                info['city'] = text_type(city)
            for child in elem.findall('long/*'):
                info.setdefault('long', {})[child.tag] = text_type(child.text)
            for child in elem.findall('short/*'):
                info.setdefault('short', {})[child.tag] = text_type(child.text)
            time_zones[elem.attrib['type']] = info

        meta_zones = data.setdefault('meta_zones', {})
        for elem in tree.findall('.//timeZoneNames/metazone'):
            info = {}
            city = elem.findtext('exemplarCity')
            if city:
                info['city'] = text_type(city)
            for child in elem.findall('long/*'):
                info.setdefault('long', {})[child.tag] = text_type(child.text)
            for child in elem.findall('short/*'):
                info.setdefault('short', {})[child.tag] = text_type(child.text)
            meta_zones[elem.attrib['type']] = info

        for calendar in tree.findall('.//calendars/calendar'):
            if calendar.attrib['type'] != 'gregorian':
                # TODO: support other calendar types
                continue

            months = data.setdefault('months', {})
            for ctxt in calendar.findall('months/monthContext'):
                ctxt_type = ctxt.attrib['type']
                ctxts = months.setdefault(ctxt_type, {})
                for width in ctxt.findall('monthWidth'):
                    width_type = width.attrib['type']
                    widths = ctxts.setdefault(width_type, {})
                    for elem in width.getiterator():
                        if elem.tag == 'month':
                            if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                                    and int(elem.attrib['type']) in widths:
                                continue
                            widths[int(elem.attrib.get('type'))] = \
                                text_type(elem.text)
                        elif elem.tag == 'alias':
                            ctxts[width_type] = Alias(
                                _translate_alias(['months', ctxt_type, width_type],
                                                 elem.attrib['path'])
                            )

            days = data.setdefault('days', {})
            for ctxt in calendar.findall('days/dayContext'):
                ctxt_type = ctxt.attrib['type']
                ctxts = days.setdefault(ctxt_type, {})
                for width in ctxt.findall('dayWidth'):
                    width_type = width.attrib['type']
                    widths = ctxts.setdefault(width_type, {})
                    for elem in width.getiterator():
                        if elem.tag == 'day':
                            dtype = weekdays[elem.attrib['type']]
                            if ('draft' in elem.attrib or
                                'alt' not in elem.attrib) \
                                    and dtype in widths:
                                continue
                            widths[dtype] = text_type(elem.text)
                        elif elem.tag == 'alias':
                            ctxts[width_type] = Alias(
                                _translate_alias(['days', ctxt_type, width_type],
                                                 elem.attrib['path'])
                            )

            quarters = data.setdefault('quarters', {})
            for ctxt in calendar.findall('quarters/quarterContext'):
                ctxt_type = ctxt.attrib['type']
                ctxts = quarters.setdefault(ctxt.attrib['type'], {})
                for width in ctxt.findall('quarterWidth'):
                    width_type = width.attrib['type']
                    widths = ctxts.setdefault(width_type, {})
                    for elem in width.getiterator():
                        if elem.tag == 'quarter':
                            if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                                    and int(elem.attrib['type']) in widths:
                                continue
                            widths[int(elem.attrib['type'])] = text_type(elem.text)
                        elif elem.tag == 'alias':
                            ctxts[width_type] = Alias(
                                _translate_alias(['quarters', ctxt_type,
                                                  width_type],
                                                 elem.attrib['path']))

            eras = data.setdefault('eras', {})
            for width in calendar.findall('eras/*'):
                width_type = NAME_MAP[width.tag]
                widths = eras.setdefault(width_type, {})
                for elem in width.getiterator():
                    if elem.tag == 'era':
                        if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                                and int(elem.attrib['type']) in widths:
                            continue
                        widths[int(elem.attrib.get('type'))] = text_type(elem.text)
                    elif elem.tag == 'alias':
                        eras[width_type] = Alias(
                            _translate_alias(['eras', width_type],
                                             elem.attrib['path'])
                        )

            # AM/PM
            periods = data.setdefault('periods', {})
            for day_period_width in calendar.findall(
                    'dayPeriods/dayPeriodContext/dayPeriodWidth'):
                if day_period_width.attrib['type'] == 'wide':
                    for day_period in day_period_width.findall('dayPeriod'):
                        if 'alt' not in day_period.attrib:
                            periods[day_period.attrib['type']] = text_type(
                                day_period.text)

            date_formats = data.setdefault('date_formats', {})
            for format in calendar.findall('dateFormats'):
                for elem in format.getiterator():
                    if elem.tag == 'dateFormatLength':
                        if 'draft' in elem.attrib and \
                                elem.attrib.get('type') in date_formats:
                            continue
                        try:
                            date_formats[elem.attrib.get('type')] = \
                                dates.parse_pattern(text_type(
                                    elem.findtext('dateFormat/pattern')))
                        except ValueError as e:
                            error(e)
                    elif elem.tag == 'alias':
                        date_formats = Alias(_translate_alias(
                            ['date_formats'], elem.attrib['path'])
                        )

            time_formats = data.setdefault('time_formats', {})
            for format in calendar.findall('timeFormats'):
                for elem in format.getiterator():
                    if elem.tag == 'timeFormatLength':
                        if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                                and elem.attrib.get('type') in time_formats:
                            continue
                        try:
                            time_formats[elem.attrib.get('type')] = \
                                dates.parse_pattern(text_type(
                                    elem.findtext('timeFormat/pattern')))
                        except ValueError as e:
                            error(e)
                    elif elem.tag == 'alias':
                        time_formats = Alias(_translate_alias(
                            ['time_formats'], elem.attrib['path'])
                        )

            datetime_formats = data.setdefault('datetime_formats', {})
            for format in calendar.findall('dateTimeFormats'):
                for elem in format.getiterator():
                    if elem.tag == 'dateTimeFormatLength':
                        if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                                and elem.attrib.get('type') in datetime_formats:
                            continue
                        try:
                            datetime_formats[elem.attrib.get('type')] = \
                                text_type(elem.findtext('dateTimeFormat/pattern'))
                        except ValueError as e:
                            error(e)
                    elif elem.tag == 'alias':
                        datetime_formats = Alias(_translate_alias(
                            ['datetime_formats'], elem.attrib['path'])
                        )

        # <numbers>

        number_symbols = data.setdefault('number_symbols', {})
        for elem in tree.findall('.//numbers/symbols/*'):
            if ('draft' in elem.attrib or 'alt' in elem.attrib):
                continue
            number_symbols[elem.tag] = text_type(elem.text)

        decimal_formats = data.setdefault('decimal_formats', {})
        for elem in tree.findall('.//decimalFormats/decimalFormatLength'):
            if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                    and elem.attrib.get('type') in decimal_formats:
                continue
            if elem.findall('./alias'):
                # TODO map the alias to its target
                continue
            pattern = text_type(elem.findtext('./decimalFormat/pattern'))
            decimal_formats[elem.attrib.get('type')] = \
                numbers.parse_pattern(pattern)

        scientific_formats = data.setdefault('scientific_formats', {})
        for elem in tree.findall('.//scientificFormats/scientificFormatLength'):
            if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                    and elem.attrib.get('type') in scientific_formats:
                continue
            pattern = text_type(elem.findtext('scientificFormat/pattern'))
            scientific_formats[elem.attrib.get('type')] = \
                numbers.parse_pattern(pattern)

        currency_formats = data.setdefault('currency_formats', {})
        for elem in tree.findall('.//currencyFormats/currencyFormatLength'):
            if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                    and elem.attrib.get('type') in currency_formats:
                continue
            pattern = text_type(elem.findtext('currencyFormat/pattern'))
            currency_formats[elem.attrib.get('type')] = \
                numbers.parse_pattern(pattern)

        percent_formats = data.setdefault('percent_formats', {})
        for elem in tree.findall('.//percentFormats/percentFormatLength'):
            if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                    and elem.attrib.get('type') in percent_formats:
                continue
            pattern = text_type(elem.findtext('percentFormat/pattern'))
            percent_formats[elem.attrib.get('type')] = \
                numbers.parse_pattern(pattern)

        currency_names = data.setdefault('currency_names', {})
        currency_names_plural = data.setdefault('currency_names_plural', {})
        currency_symbols = data.setdefault('currency_symbols', {})
        for elem in tree.findall('.//currencies/currency'):
            code = elem.attrib['type']
            for name in elem.findall('displayName'):
                if ('draft' in name.attrib) and code in currency_names:
                    continue
                if 'count' in name.attrib:
                    currency_names_plural.setdefault(code, {})[
                        name.attrib['count']] = text_type(name.text)
                else:
                    currency_names[code] = text_type(name.text)
            # TODO: support choice patterns for currency symbol selection
            symbol = elem.find('symbol')
            if symbol is not None and 'draft' not in symbol.attrib \
                    and 'choice' not in symbol.attrib:
                currency_symbols[code] = text_type(symbol.text)

        # <units>

        unit_patterns = data.setdefault('unit_patterns', {})
        for elem in tree.findall('.//units/unitLength'):
            unit_length_type = elem.attrib['type']
            for unit in elem.findall('unit'):
                unit_type = unit.attrib['type']
                for pattern in unit.findall('unitPattern'):
                    box = unit_type
                    box += ':' + unit_length_type
                    unit_patterns.setdefault(box, {})[pattern.attrib['count']] = \
                        text_type(pattern.text)

        date_fields = data.setdefault('date_fields', {})
        for elem in tree.findall('.//dates/fields/field'):
            field_type = elem.attrib['type']
            date_fields.setdefault(field_type, {})
            for rel_time in elem.findall('relativeTime'):
                rel_time_type = rel_time.attrib['type']
                for pattern in rel_time.findall('relativeTimePattern'):
                    date_fields[field_type].setdefault(rel_time_type, {})\
                        [pattern.attrib['count']] = text_type(pattern.text)

        outfile = open(data_filename, 'wb')
        try:
            pickle.dump(data, outfile, 2)
        finally:
            outfile.close()
Exemple #50
0
def load_i18n(canonical_host, canonical_scheme, project_root, tell_sentry):
    # Load the locales
    localeDir = os.path.join(project_root, 'i18n', 'core')
    locales = LOCALES
    source_strings = {}
    for file in os.listdir(localeDir):
        try:
            parts = file.split(".")
            if not (len(parts) == 2 and parts[1] == "po"):
                continue
            lang = parts[0]
            with open(os.path.join(localeDir, file)) as f:
                l = locales[lang.lower()] = Locale(lang)
                c = l.catalog = read_po(f)
                share_source_strings(c, source_strings)
                c.plural_func = get_function_from_rule(c.plural_expr)
                replace_unused_singulars(c)
                try:
                    l.countries = make_sorted_dict(COUNTRIES, l.territories)
                except KeyError:
                    l.countries = COUNTRIES
                try:
                    l.languages_2 = make_sorted_dict(LANGUAGES_2, l.languages)
                except KeyError:
                    l.languages_2 = LANGUAGES_2
        except Exception as e:
            tell_sentry(e, {})
    del source_strings

    # Prepare a unique and sorted list for use in the language switcher
    percent = lambda l, total: sum((percent(s, len(s))
                                    if isinstance(s, tuple) else 1) for s in l
                                   if s) / total
    for l in list(locales.values()):
        if l.language == 'en':
            l.completion = 1
            continue
        l.completion = percent(
            [m.string for m in l.catalog if m.id and not m.fuzzy],
            len(l.catalog))
        if l.completion == 0:
            del locales[l.language]
    loc_url = canonical_scheme + '://%s.' + canonical_host
    domain, port = (canonical_host.split(':') + [None])[:2]
    port = int(port) if port else socket.getservbyname(canonical_scheme, 'tcp')
    subdomains = {
        k: loc_url % k
        for k in locales if resolve(k + '.' + domain, port)
    }
    lang_list = sorted(
        ((l.completion, l.language, l.language_name.title(),
          loc_url % l.language)
         for l in set(locales.values()) if l.completion > 0.5),
        key=lambda t: (-t[0], t[1]),
    )

    # Add year-less date format
    year_re = re.compile(r'(^y+[^a-zA-Z]+|[^a-zA-Z]+y+$)')
    for l in locales.values():
        short_format = l.date_formats['short'].pattern
        assert short_format[0] == 'y' or short_format[-1] == 'y', (
            l.language, short_format)
        l.date_formats['short_yearless'] = year_re.sub('', short_format)

    # Add aliases
    for k, v in list(locales.items()):
        locales.setdefault(ALIASES.get(k, k), v)
        locales.setdefault(ALIASES_R.get(k, k), v)
    for k, v in list(locales.items()):
        locales.setdefault(k.split('_', 1)[0], v)

    # Patch the locales to look less formal
    locales['fr'].currency_formats['standard'] = parse_pattern(
        '#,##0.00\u202f\xa4')
    locales['fr'].currency_symbols['USD'] = '$'
    locales['fr'].currencies['USD'] = 'dollar états-unien'

    # Load the markdown files
    docs = {}
    heading_re = re.compile(r'^(#+ )', re.M)
    for path in find_files(os.path.join(project_root, 'i18n'), '*.md'):
        d, b = os.path.split(path)
        doc = os.path.basename(d)
        lang = b[:-3]
        with open(path, 'rb') as f:
            md = f.read().decode('utf8')
            if md.startswith('# '):
                md = '\n'.join(md.split('\n')[1:]).strip()
                md = heading_re.sub(r'##\1', md)
            docs.setdefault(doc, {}).__setitem__(lang, markdown.render(md))

    return {
        'docs': docs,
        'lang_list': lang_list,
        'locales': locales,
        'subdomains': subdomains
    }
Exemple #51
0
def main():
    parser = OptionParser(usage='%prog path/to/cldr')
    options, args = parser.parse_args()
    if len(args) != 1:
        parser.error('incorrect number of arguments')

    srcdir = args[0]
    destdir = os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])), '..',
                           'babel')

    sup_filename = os.path.join(srcdir, 'supplemental', 'supplementalData.xml')
    bcp47_timezone = parse(os.path.join(srcdir, 'bcp47', 'timezone.xml'))
    sup_windows_zones = parse(
        os.path.join(srcdir, 'supplemental', 'windowsZones.xml'))
    sup_metadata = parse(
        os.path.join(srcdir, 'supplemental', 'supplementalMetadata.xml'))
    sup_likely = parse(
        os.path.join(srcdir, 'supplemental', 'likelySubtags.xml'))
    sup = parse(sup_filename)

    # Import global data from the supplemental files
    global_path = os.path.join(destdir, 'global.dat')
    global_data = {}
    if need_conversion(global_path, global_data, sup_filename):
        territory_zones = global_data.setdefault('territory_zones', {})
        zone_aliases = global_data.setdefault('zone_aliases', {})
        zone_territories = global_data.setdefault('zone_territories', {})
        win_mapping = global_data.setdefault('windows_zone_mapping', {})
        language_aliases = global_data.setdefault('language_aliases', {})
        territory_aliases = global_data.setdefault('territory_aliases', {})
        script_aliases = global_data.setdefault('script_aliases', {})
        variant_aliases = global_data.setdefault('variant_aliases', {})
        likely_subtags = global_data.setdefault('likely_subtags', {})
        territory_currencies = global_data.setdefault('territory_currencies',
                                                      {})

        # create auxiliary zone->territory map from the windows zones (we don't set
        # the 'zones_territories' map directly here, because there are some zones
        # aliases listed and we defer the decision of which ones to choose to the
        # 'bcp47' data
        _zone_territory_map = {}
        for map_zone in sup_windows_zones.findall(
                './/windowsZones/mapTimezones/mapZone'):
            if map_zone.attrib.get('territory') == '001':
                win_mapping[map_zone.attrib['other']] = \
                    map_zone.attrib['type'].split()[0]
            for tzid in text_type(map_zone.attrib['type']).split():
                _zone_territory_map[tzid] = \
                    text_type(map_zone.attrib['territory'])

        for key_elem in bcp47_timezone.findall('.//keyword/key'):
            if key_elem.attrib['name'] == 'tz':
                for elem in key_elem.findall('type'):
                    aliases = text_type(elem.attrib['alias']).split()
                    tzid = aliases.pop(0)
                    territory = _zone_territory_map.get(tzid, '001')
                    territory_zones.setdefault(territory, []).append(tzid)
                    zone_territories[tzid] = territory
                    for alias in aliases:
                        zone_aliases[alias] = tzid
                break

        # Import Metazone mapping
        meta_zones = global_data.setdefault('meta_zones', {})
        tzsup = parse(os.path.join(srcdir, 'supplemental', 'metaZones.xml'))
        for elem in tzsup.findall('.//timezone'):
            for child in elem.findall('usesMetazone'):
                if 'to' not in child.attrib:  # FIXME: support old mappings
                    meta_zones[elem.attrib['type']] = child.attrib['mzone']

        # Language aliases
        for alias in sup_metadata.findall('.//alias/languageAlias'):
            # We don't have a use for those at the moment.  They don't
            # pass our parser anyways.
            if '-' in alias.attrib['type']:
                continue
            language_aliases[
                alias.attrib['type']] = alias.attrib['replacement']

        # Territory aliases
        for alias in sup_metadata.findall('.//alias/territoryAlias'):
            territory_aliases[alias.attrib['type']] = \
                alias.attrib['replacement'].split()

        # Script aliases
        for alias in sup_metadata.findall('.//alias/scriptAlias'):
            script_aliases[alias.attrib['type']] = alias.attrib['replacement']

        # Variant aliases
        for alias in sup_metadata.findall('.//alias/variantAlias'):
            repl = alias.attrib.get('replacement')
            if repl:
                variant_aliases[alias.attrib['type']] = repl

        # Likely subtags
        for likely_subtag in sup_likely.findall(
                './/likelySubtags/likelySubtag'):
            likely_subtags[likely_subtag.attrib['from']] = \
                likely_subtag.attrib['to']

        # Currencies in territories
        for region in sup.findall('.//currencyData/region'):
            region_code = region.attrib['iso3166']
            region_currencies = []
            for currency in region.findall('./currency'):
                cur_start = _parse_currency_date(currency.attrib.get('from'))
                cur_end = _parse_currency_date(currency.attrib.get('to'))
                region_currencies.append(
                    (currency.attrib['iso4217'], cur_start, cur_end,
                     currency.attrib.get('tender', 'true') == 'true'))
            region_currencies.sort(key=_currency_sort_key)
            territory_currencies[region_code] = region_currencies

        outfile = open(global_path, 'wb')
        try:
            pickle.dump(global_data, outfile, 2)
        finally:
            outfile.close()

    # build a territory containment mapping for inheritance
    regions = {}
    for elem in sup.findall('.//territoryContainment/group'):
        regions[elem.attrib['type']] = elem.attrib['contains'].split()

    # Resolve territory containment
    territory_containment = {}
    region_items = sorted(regions.items())
    for group, territory_list in region_items:
        for territory in territory_list:
            containers = territory_containment.setdefault(territory, set([]))
            if group in territory_containment:
                containers |= territory_containment[group]
            containers.add(group)

    # prepare the per-locale plural rules definitions
    plural_rules = {}
    prsup = parse(os.path.join(srcdir, 'supplemental', 'plurals.xml'))
    for elem in prsup.findall('.//plurals/pluralRules'):
        rules = []
        for rule in elem.findall('pluralRule'):
            rules.append((rule.attrib['count'], text_type(rule.text)))
        pr = PluralRule(rules)
        for locale in elem.attrib['locales'].split():
            plural_rules[locale] = pr

    filenames = os.listdir(os.path.join(srcdir, 'main'))
    filenames.remove('root.xml')
    filenames.sort(key=len)
    filenames.insert(0, 'root.xml')

    for filename in filenames:
        stem, ext = os.path.splitext(filename)
        if ext != '.xml':
            continue

        full_filename = os.path.join(srcdir, 'main', filename)
        data_filename = os.path.join(destdir, 'localedata', stem + '.dat')

        data = {}
        if not need_conversion(data_filename, data, full_filename):
            continue

        tree = parse(full_filename)

        language = None
        elem = tree.find('.//identity/language')
        if elem is not None:
            language = elem.attrib['type']

        territory = None
        elem = tree.find('.//identity/territory')
        if elem is not None:
            territory = elem.attrib['type']
        else:
            territory = '001'  # world
        regions = territory_containment.get(territory, [])

        log('Processing %s (Language = %s; Territory = %s)', filename,
            language, territory)

        # plural rules
        locale_id = '_'.join(
            filter(None, [language, territory != '001' and territory or None]))
        if locale_id in plural_rules:
            data['plural_form'] = plural_rules[locale_id]

        # <localeDisplayNames>

        territories = data.setdefault('territories', {})
        for elem in tree.findall('.//territories/territory'):
            if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                    and elem.attrib['type'] in territories:
                continue
            territories[elem.attrib['type']] = _text(elem)

        languages = data.setdefault('languages', {})
        for elem in tree.findall('.//languages/language'):
            if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                    and elem.attrib['type'] in languages:
                continue
            languages[elem.attrib['type']] = _text(elem)

        variants = data.setdefault('variants', {})
        for elem in tree.findall('.//variants/variant'):
            if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                    and elem.attrib['type'] in variants:
                continue
            variants[elem.attrib['type']] = _text(elem)

        scripts = data.setdefault('scripts', {})
        for elem in tree.findall('.//scripts/script'):
            if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                    and elem.attrib['type'] in scripts:
                continue
            scripts[elem.attrib['type']] = _text(elem)

        # <dates>

        week_data = data.setdefault('week_data', {})
        supelem = sup.find('.//weekData')

        for elem in supelem.findall('minDays'):
            territories = elem.attrib['territories'].split()
            if territory in territories or any(
                [r in territories for r in regions]):
                week_data['min_days'] = int(elem.attrib['count'])

        for elem in supelem.findall('firstDay'):
            territories = elem.attrib['territories'].split()
            if territory in territories or any(
                [r in territories for r in regions]):
                week_data['first_day'] = weekdays[elem.attrib['day']]

        for elem in supelem.findall('weekendStart'):
            territories = elem.attrib['territories'].split()
            if territory in territories or any(
                [r in territories for r in regions]):
                week_data['weekend_start'] = weekdays[elem.attrib['day']]

        for elem in supelem.findall('weekendEnd'):
            territories = elem.attrib['territories'].split()
            if territory in territories or any(
                [r in territories for r in regions]):
                week_data['weekend_end'] = weekdays[elem.attrib['day']]

        zone_formats = data.setdefault('zone_formats', {})
        for elem in tree.findall('.//timeZoneNames/gmtFormat'):
            if 'draft' not in elem.attrib and 'alt' not in elem.attrib:
                zone_formats['gmt'] = text_type(elem.text).replace('{0}', '%s')
                break
        for elem in tree.findall('.//timeZoneNames/regionFormat'):
            if 'draft' not in elem.attrib and 'alt' not in elem.attrib:
                zone_formats['region'] = text_type(elem.text).replace(
                    '{0}', '%s')
                break
        for elem in tree.findall('.//timeZoneNames/fallbackFormat'):
            if 'draft' not in elem.attrib and 'alt' not in elem.attrib:
                zone_formats['fallback'] = text_type(elem.text) \
                    .replace('{0}', '%(0)s').replace('{1}', '%(1)s')
                break
        for elem in tree.findall('.//timeZoneNames/fallbackRegionFormat'):
            if 'draft' not in elem.attrib and 'alt' not in elem.attrib:
                zone_formats['fallback_region'] = text_type(elem.text) \
                    .replace('{0}', '%(0)s').replace('{1}', '%(1)s')
                break

        time_zones = data.setdefault('time_zones', {})
        for elem in tree.findall('.//timeZoneNames/zone'):
            info = {}
            city = elem.findtext('exemplarCity')
            if city:
                info['city'] = text_type(city)
            for child in elem.findall('long/*'):
                info.setdefault('long', {})[child.tag] = text_type(child.text)
            for child in elem.findall('short/*'):
                info.setdefault('short', {})[child.tag] = text_type(child.text)
            time_zones[elem.attrib['type']] = info

        meta_zones = data.setdefault('meta_zones', {})
        for elem in tree.findall('.//timeZoneNames/metazone'):
            info = {}
            city = elem.findtext('exemplarCity')
            if city:
                info['city'] = text_type(city)
            for child in elem.findall('long/*'):
                info.setdefault('long', {})[child.tag] = text_type(child.text)
            for child in elem.findall('short/*'):
                info.setdefault('short', {})[child.tag] = text_type(child.text)
            meta_zones[elem.attrib['type']] = info

        for calendar in tree.findall('.//calendars/calendar'):
            if calendar.attrib['type'] != 'gregorian':
                # TODO: support other calendar types
                continue

            months = data.setdefault('months', {})
            for ctxt in calendar.findall('months/monthContext'):
                ctxt_type = ctxt.attrib['type']
                ctxts = months.setdefault(ctxt_type, {})
                for width in ctxt.findall('monthWidth'):
                    width_type = width.attrib['type']
                    widths = ctxts.setdefault(width_type, {})
                    for elem in width.getiterator():
                        if elem.tag == 'month':
                            if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                                    and int(elem.attrib['type']) in widths:
                                continue
                            widths[int(elem.attrib.get('type'))] = \
                                text_type(elem.text)
                        elif elem.tag == 'alias':
                            ctxts[width_type] = Alias(
                                _translate_alias(
                                    ['months', ctxt_type, width_type],
                                    elem.attrib['path']))

            days = data.setdefault('days', {})
            for ctxt in calendar.findall('days/dayContext'):
                ctxt_type = ctxt.attrib['type']
                ctxts = days.setdefault(ctxt_type, {})
                for width in ctxt.findall('dayWidth'):
                    width_type = width.attrib['type']
                    widths = ctxts.setdefault(width_type, {})
                    for elem in width.getiterator():
                        if elem.tag == 'day':
                            dtype = weekdays[elem.attrib['type']]
                            if ('draft' in elem.attrib or
                                'alt' not in elem.attrib) \
                                    and dtype in widths:
                                continue
                            widths[dtype] = text_type(elem.text)
                        elif elem.tag == 'alias':
                            ctxts[width_type] = Alias(
                                _translate_alias(
                                    ['days', ctxt_type, width_type],
                                    elem.attrib['path']))

            quarters = data.setdefault('quarters', {})
            for ctxt in calendar.findall('quarters/quarterContext'):
                ctxt_type = ctxt.attrib['type']
                ctxts = quarters.setdefault(ctxt.attrib['type'], {})
                for width in ctxt.findall('quarterWidth'):
                    width_type = width.attrib['type']
                    widths = ctxts.setdefault(width_type, {})
                    for elem in width.getiterator():
                        if elem.tag == 'quarter':
                            if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                                    and int(elem.attrib['type']) in widths:
                                continue
                            widths[int(elem.attrib['type'])] = text_type(
                                elem.text)
                        elif elem.tag == 'alias':
                            ctxts[width_type] = Alias(
                                _translate_alias(
                                    ['quarters', ctxt_type, width_type],
                                    elem.attrib['path']))

            eras = data.setdefault('eras', {})
            for width in calendar.findall('eras/*'):
                width_type = NAME_MAP[width.tag]
                widths = eras.setdefault(width_type, {})
                for elem in width.getiterator():
                    if elem.tag == 'era':
                        if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                                and int(elem.attrib['type']) in widths:
                            continue
                        widths[int(elem.attrib.get('type'))] = text_type(
                            elem.text)
                    elif elem.tag == 'alias':
                        eras[width_type] = Alias(
                            _translate_alias(['eras', width_type],
                                             elem.attrib['path']))

            # AM/PM
            periods = data.setdefault('periods', {})
            for day_period_width in calendar.findall(
                    'dayPeriods/dayPeriodContext/dayPeriodWidth'):
                if day_period_width.attrib['type'] == 'wide':
                    for day_period in day_period_width.findall('dayPeriod'):
                        if 'alt' not in day_period.attrib:
                            periods[day_period.attrib['type']] = text_type(
                                day_period.text)

            date_formats = data.setdefault('date_formats', {})
            for format in calendar.findall('dateFormats'):
                for elem in format.getiterator():
                    if elem.tag == 'dateFormatLength':
                        if 'draft' in elem.attrib and \
                                elem.attrib.get('type') in date_formats:
                            continue
                        try:
                            date_formats[elem.attrib.get('type')] = \
                                dates.parse_pattern(text_type(
                                    elem.findtext('dateFormat/pattern')))
                        except ValueError as e:
                            error(e)
                    elif elem.tag == 'alias':
                        date_formats = Alias(
                            _translate_alias(['date_formats'],
                                             elem.attrib['path']))

            time_formats = data.setdefault('time_formats', {})
            for format in calendar.findall('timeFormats'):
                for elem in format.getiterator():
                    if elem.tag == 'timeFormatLength':
                        if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                                and elem.attrib.get('type') in time_formats:
                            continue
                        try:
                            time_formats[elem.attrib.get('type')] = \
                                dates.parse_pattern(text_type(
                                    elem.findtext('timeFormat/pattern')))
                        except ValueError as e:
                            error(e)
                    elif elem.tag == 'alias':
                        time_formats = Alias(
                            _translate_alias(['time_formats'],
                                             elem.attrib['path']))

            datetime_formats = data.setdefault('datetime_formats', {})
            for format in calendar.findall('dateTimeFormats'):
                for elem in format.getiterator():
                    if elem.tag == 'dateTimeFormatLength':
                        if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                                and elem.attrib.get('type') in datetime_formats:
                            continue
                        try:
                            datetime_formats[elem.attrib.get('type')] = \
                                text_type(elem.findtext('dateTimeFormat/pattern'))
                        except ValueError as e:
                            error(e)
                    elif elem.tag == 'alias':
                        datetime_formats = Alias(
                            _translate_alias(['datetime_formats'],
                                             elem.attrib['path']))

        # <numbers>

        number_symbols = data.setdefault('number_symbols', {})
        for elem in tree.findall('.//numbers/symbols/*'):
            if ('draft' in elem.attrib or 'alt' in elem.attrib):
                continue
            number_symbols[elem.tag] = text_type(elem.text)

        decimal_formats = data.setdefault('decimal_formats', {})
        for elem in tree.findall('.//decimalFormats/decimalFormatLength'):
            if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                    and elem.attrib.get('type') in decimal_formats:
                continue
            if elem.findall('./alias'):
                # TODO map the alias to its target
                continue
            pattern = text_type(elem.findtext('./decimalFormat/pattern'))
            decimal_formats[elem.attrib.get('type')] = \
                numbers.parse_pattern(pattern)

        scientific_formats = data.setdefault('scientific_formats', {})
        for elem in tree.findall(
                './/scientificFormats/scientificFormatLength'):
            if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                    and elem.attrib.get('type') in scientific_formats:
                continue
            pattern = text_type(elem.findtext('scientificFormat/pattern'))
            scientific_formats[elem.attrib.get('type')] = \
                numbers.parse_pattern(pattern)

        currency_formats = data.setdefault('currency_formats', {})
        for elem in tree.findall('.//currencyFormats/currencyFormatLength'):
            if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                    and elem.attrib.get('type') in currency_formats:
                continue
            pattern = text_type(elem.findtext('currencyFormat/pattern'))
            currency_formats[elem.attrib.get('type')] = \
                numbers.parse_pattern(pattern)

        percent_formats = data.setdefault('percent_formats', {})
        for elem in tree.findall('.//percentFormats/percentFormatLength'):
            if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                    and elem.attrib.get('type') in percent_formats:
                continue
            pattern = text_type(elem.findtext('percentFormat/pattern'))
            percent_formats[elem.attrib.get('type')] = \
                numbers.parse_pattern(pattern)

        currency_names = data.setdefault('currency_names', {})
        currency_names_plural = data.setdefault('currency_names_plural', {})
        currency_symbols = data.setdefault('currency_symbols', {})
        for elem in tree.findall('.//currencies/currency'):
            code = elem.attrib['type']
            for name in elem.findall('displayName'):
                if ('draft' in name.attrib) and code in currency_names:
                    continue
                if 'count' in name.attrib:
                    currency_names_plural.setdefault(
                        code, {})[name.attrib['count']] = text_type(name.text)
                else:
                    currency_names[code] = text_type(name.text)
            # TODO: support choice patterns for currency symbol selection
            symbol = elem.find('symbol')
            if symbol is not None and 'draft' not in symbol.attrib \
                    and 'choice' not in symbol.attrib:
                currency_symbols[code] = text_type(symbol.text)

        # <units>

        unit_patterns = data.setdefault('unit_patterns', {})
        for elem in tree.findall('.//units/unit'):
            unit_type = elem.attrib['type']
            for pattern in elem.findall('unitPattern'):
                box = unit_type
                if 'alt' in pattern.attrib:
                    box += ':' + pattern.attrib['alt']
                unit_patterns.setdefault(box, {})[pattern.attrib['count']] = \
                    text_type(pattern.text)

        outfile = open(data_filename, 'wb')
        try:
            pickle.dump(data, outfile, 2)
        finally:
            outfile.close()
Exemple #52
0
def load_i18n(canonical_host, canonical_scheme, project_root, tell_sentry):
    def compute_percentage(it, total):
        return sum(
            (compute_percentage(s, len(s)) if isinstance(s, tuple) else 1)
            for s in it if s) / total

    # Load the base locales
    localeDir = os.path.join(project_root, 'i18n', 'core')
    locales = LOCALES
    source_strings = {}
    for file in os.listdir(localeDir):
        try:
            parts = file.split(".")
            if not (len(parts) == 2 and parts[1] == "po"):
                continue
            lang = parts[0]
            with open(os.path.join(localeDir, file), 'rb') as f:
                l = Locale(lang)
                c = l.catalog = read_po(f)
                share_source_strings(c, source_strings)
                c.plural_func = get_function_from_rule(c.plural_expr)
                replace_unused_singulars(c)
                l.completion = compute_percentage(
                    (m.string for m in c if m.id and not m.fuzzy), len(c))
                if l.completion == 0:
                    continue
                else:
                    locales[lang.lower()] = l
                try:
                    l.countries = make_sorted_dict(COUNTRIES, l.territories)
                except KeyError:
                    l.countries = COUNTRIES
                try:
                    l.languages_2 = make_sorted_dict(LANGUAGES_2, l.languages)
                except KeyError:
                    l.languages_2 = LANGUAGES_2
        except Exception as e:
            tell_sentry(e)
    del source_strings

    # Load the variants
    for loc_id in babel.localedata.locale_identifiers():
        if loc_id in locales:
            continue
        i = loc_id.rfind('_')
        if i == -1:
            continue
        base = locales.get(loc_id[:i])
        if base:
            l = locales[loc_id.lower()] = Locale.parse(loc_id)
            l.catalog = base.catalog
            l.completion = base.completion
            l.countries = base.countries
            l.languages_2 = base.languages_2

    # Unload the Babel data that we no longer need
    # We load a lot of data to populate the LANGUAGE_NAMES dict, we don't want
    # to keep it all in RAM.
    used_data_dict_addresses = set(id(l._data._data) for l in locales.values())
    for key, data_dict in list(babel.localedata._cache.items()):
        if id(data_dict) not in used_data_dict_addresses:
            del babel.localedata._cache[key]

    # Prepare a unique and sorted list for use in the language switcher
    loc_url = canonical_scheme + '://%s.' + canonical_host
    domain, port = (canonical_host.split(':') + [None])[:2]
    port = int(port) if port else socket.getservbyname(canonical_scheme, 'tcp')
    subdomains = {
        l.subdomain: loc_url % l.subdomain
        for l in locales.values()
        if not l.territory and resolve(l.subdomain + '.' + domain, port)
    }
    lang_list = sorted(
        ((l.completion, l.language, l.language_name.title(),
          loc_url % l.subdomain) for l in set(locales.values())
         if not l.territory and l.completion > 0.5),
        key=lambda t: (-t[0], t[1]),
    )

    # Add year-less date format
    year_re = re.compile(r'(^y+[^a-zA-Z]+|[^a-zA-Z]+y+$)')
    for l in locales.values():
        short_format = l.date_formats['short'].pattern
        assert short_format[0] == 'y' or short_format[-1] == 'y', (
            l.language, short_format)
        l.date_formats['short_yearless'] = year_re.sub('', short_format)

    # Add aliases
    for k, v in list(locales.items()):
        locales.setdefault(ALIASES.get(k, k), v)
        locales.setdefault(ALIASES_R.get(k, k), v)
    for k, v in list(locales.items()):
        locales.setdefault(k.split('_', 1)[0], v)

    # Add universal strings
    # These strings don't need to be translated, but they have to be in the catalogs
    # so that they're counted as translated.
    for l in locales.values():
        l.catalog.add("PayPal", "PayPal")

    # Patch the locales to look less formal
    locales['fr'].currency_formats['standard'] = parse_pattern(
        '#,##0.00\u202f\xa4')
    locales['fr'].currencies['USD'] = 'dollar états-unien'

    # Load the markdown files
    docs = {}
    heading_re = re.compile(r'^(#+ )', re.M)
    for path in find_files(os.path.join(project_root, 'i18n'), '*.md'):
        d, b = os.path.split(path)
        doc = os.path.basename(d)
        lang = b[:-3]
        with open(path, 'rb') as f:
            md = f.read().decode('utf8')
            if md.startswith('# '):
                md = '\n'.join(md.split('\n')[1:]).strip()
                md = heading_re.sub(r'##\1', md)
            docs.setdefault(doc, {}).__setitem__(lang, markdown.render(md))

    return {
        'docs': docs,
        'lang_list': lang_list,
        'locales': locales,
        'subdomains': subdomains
    }
Exemple #53
0
def main():
    parser = OptionParser(usage="%prog path/to/cldr")
    options, args = parser.parse_args()
    if len(args) != 1:
        parser.error("incorrect number of arguments")

    srcdir = args[0]
    destdir = os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])), "..", "babel")

    sup_filename = os.path.join(srcdir, "supplemental", "supplementalData.xml")
    bcp47_timezone = parse(os.path.join(srcdir, "bcp47", "timezone.xml"))
    sup_windows_zones = parse(os.path.join(srcdir, "supplemental", "windowsZones.xml"))
    sup_metadata = parse(os.path.join(srcdir, "supplemental", "supplementalMetadata.xml"))
    sup_likely = parse(os.path.join(srcdir, "supplemental", "likelySubtags.xml"))
    sup = parse(sup_filename)

    # Import global data from the supplemental files
    global_path = os.path.join(destdir, "global.dat")
    global_data = {}
    if need_conversion(global_path, global_data, sup_filename):
        territory_zones = global_data.setdefault("territory_zones", {})
        zone_aliases = global_data.setdefault("zone_aliases", {})
        zone_territories = global_data.setdefault("zone_territories", {})
        win_mapping = global_data.setdefault("windows_zone_mapping", {})
        language_aliases = global_data.setdefault("language_aliases", {})
        territory_aliases = global_data.setdefault("territory_aliases", {})
        script_aliases = global_data.setdefault("script_aliases", {})
        variant_aliases = global_data.setdefault("variant_aliases", {})
        likely_subtags = global_data.setdefault("likely_subtags", {})
        territory_currencies = global_data.setdefault("territory_currencies", {})
        parent_exceptions = global_data.setdefault("parent_exceptions", {})
        currency_fractions = global_data.setdefault("currency_fractions", {})

        # create auxiliary zone->territory map from the windows zones (we don't set
        # the 'zones_territories' map directly here, because there are some zones
        # aliases listed and we defer the decision of which ones to choose to the
        # 'bcp47' data
        _zone_territory_map = {}
        for map_zone in sup_windows_zones.findall(".//windowsZones/mapTimezones/mapZone"):
            if map_zone.attrib.get("territory") == "001":
                win_mapping[map_zone.attrib["other"]] = map_zone.attrib["type"].split()[0]
            for tzid in text_type(map_zone.attrib["type"]).split():
                _zone_territory_map[tzid] = text_type(map_zone.attrib["territory"])

        for key_elem in bcp47_timezone.findall(".//keyword/key"):
            if key_elem.attrib["name"] == "tz":
                for elem in key_elem.findall("type"):
                    if "deprecated" not in elem.attrib:
                        aliases = text_type(elem.attrib["alias"]).split()
                        tzid = aliases.pop(0)
                        territory = _zone_territory_map.get(tzid, "001")
                        territory_zones.setdefault(territory, []).append(tzid)
                        zone_territories[tzid] = territory
                        for alias in aliases:
                            zone_aliases[alias] = tzid
                break

        # Import Metazone mapping
        meta_zones = global_data.setdefault("meta_zones", {})
        tzsup = parse(os.path.join(srcdir, "supplemental", "metaZones.xml"))
        for elem in tzsup.findall(".//timezone"):
            for child in elem.findall("usesMetazone"):
                if "to" not in child.attrib:  # FIXME: support old mappings
                    meta_zones[elem.attrib["type"]] = child.attrib["mzone"]

        # Language aliases
        for alias in sup_metadata.findall(".//alias/languageAlias"):
            # We don't have a use for those at the moment.  They don't
            # pass our parser anyways.
            if "_" in alias.attrib["type"]:
                continue
            language_aliases[alias.attrib["type"]] = alias.attrib["replacement"]

        # Territory aliases
        for alias in sup_metadata.findall(".//alias/territoryAlias"):
            territory_aliases[alias.attrib["type"]] = alias.attrib["replacement"].split()

        # Script aliases
        for alias in sup_metadata.findall(".//alias/scriptAlias"):
            script_aliases[alias.attrib["type"]] = alias.attrib["replacement"]

        # Variant aliases
        for alias in sup_metadata.findall(".//alias/variantAlias"):
            repl = alias.attrib.get("replacement")
            if repl:
                variant_aliases[alias.attrib["type"]] = repl

        # Likely subtags
        for likely_subtag in sup_likely.findall(".//likelySubtags/likelySubtag"):
            likely_subtags[likely_subtag.attrib["from"]] = likely_subtag.attrib["to"]

        # Currencies in territories
        for region in sup.findall(".//currencyData/region"):
            region_code = region.attrib["iso3166"]
            region_currencies = []
            for currency in region.findall("./currency"):
                cur_start = _parse_currency_date(currency.attrib.get("from"))
                cur_end = _parse_currency_date(currency.attrib.get("to"))
                region_currencies.append(
                    (currency.attrib["iso4217"], cur_start, cur_end, currency.attrib.get("tender", "true") == "true")
                )
            region_currencies.sort(key=_currency_sort_key)
            territory_currencies[region_code] = region_currencies

        # Explicit parent locales
        for paternity in sup.findall(".//parentLocales/parentLocale"):
            parent = paternity.attrib["parent"]
            for child in paternity.attrib["locales"].split():
                parent_exceptions[child] = parent

        # Currency decimal and rounding digits
        for fraction in sup.findall(".//currencyData/fractions/info"):
            cur_code = fraction.attrib["iso4217"]
            cur_digits = int(fraction.attrib["digits"])
            cur_rounding = int(fraction.attrib["rounding"])
            cur_cdigits = int(fraction.attrib.get("cashDigits", cur_digits))
            cur_crounding = int(fraction.attrib.get("cashRounding", cur_rounding))
            currency_fractions[cur_code] = (cur_digits, cur_rounding, cur_cdigits, cur_crounding)

        outfile = open(global_path, "wb")
        try:
            pickle.dump(global_data, outfile, 2)
        finally:
            outfile.close()

    # build a territory containment mapping for inheritance
    regions = {}
    for elem in sup.findall(".//territoryContainment/group"):
        regions[elem.attrib["type"]] = elem.attrib["contains"].split()

    # Resolve territory containment
    territory_containment = {}
    region_items = sorted(regions.items())
    for group, territory_list in region_items:
        for territory in territory_list:
            containers = territory_containment.setdefault(territory, set([]))
            if group in territory_containment:
                containers |= territory_containment[group]
            containers.add(group)

    # prepare the per-locale plural rules definitions
    plural_rules = {}
    prsup = parse(os.path.join(srcdir, "supplemental", "plurals.xml"))
    for elem in prsup.findall(".//plurals/pluralRules"):
        rules = []
        for rule in elem.findall("pluralRule"):
            rules.append((rule.attrib["count"], text_type(rule.text)))
        pr = PluralRule(rules)
        for locale in elem.attrib["locales"].split():
            plural_rules[locale] = pr

    filenames = os.listdir(os.path.join(srcdir, "main"))
    filenames.remove("root.xml")
    filenames.sort(key=len)
    filenames.insert(0, "root.xml")

    for filename in filenames:
        stem, ext = os.path.splitext(filename)
        if ext != ".xml":
            continue

        full_filename = os.path.join(srcdir, "main", filename)
        data_filename = os.path.join(destdir, "locale-data", stem + ".dat")

        data = {}
        if not need_conversion(data_filename, data, full_filename):
            continue

        tree = parse(full_filename)

        language = None
        elem = tree.find(".//identity/language")
        if elem is not None:
            language = elem.attrib["type"]

        territory = None
        elem = tree.find(".//identity/territory")
        if elem is not None:
            territory = elem.attrib["type"]
        else:
            territory = "001"  # world
        regions = territory_containment.get(territory, [])

        log("Processing %s (Language = %s; Territory = %s)", filename, language, territory)

        # plural rules
        locale_id = "_".join(filter(None, [language, territory != "001" and territory or None]))
        if locale_id in plural_rules:
            data["plural_form"] = plural_rules[locale_id]

        # <localeDisplayNames>

        territories = data.setdefault("territories", {})
        for elem in tree.findall(".//territories/territory"):
            if ("draft" in elem.attrib or "alt" in elem.attrib) and elem.attrib["type"] in territories:
                continue
            territories[elem.attrib["type"]] = _text(elem)

        languages = data.setdefault("languages", {})
        for elem in tree.findall(".//languages/language"):
            if ("draft" in elem.attrib or "alt" in elem.attrib) and elem.attrib["type"] in languages:
                continue
            languages[elem.attrib["type"]] = _text(elem)

        variants = data.setdefault("variants", {})
        for elem in tree.findall(".//variants/variant"):
            if ("draft" in elem.attrib or "alt" in elem.attrib) and elem.attrib["type"] in variants:
                continue
            variants[elem.attrib["type"]] = _text(elem)

        scripts = data.setdefault("scripts", {})
        for elem in tree.findall(".//scripts/script"):
            if ("draft" in elem.attrib or "alt" in elem.attrib) and elem.attrib["type"] in scripts:
                continue
            scripts[elem.attrib["type"]] = _text(elem)

        # <dates>

        week_data = data.setdefault("week_data", {})
        supelem = sup.find(".//weekData")

        for elem in supelem.findall("minDays"):
            territories = elem.attrib["territories"].split()
            if territory in territories or any([r in territories for r in regions]):
                week_data["min_days"] = int(elem.attrib["count"])

        for elem in supelem.findall("firstDay"):
            territories = elem.attrib["territories"].split()
            if territory in territories or any([r in territories for r in regions]):
                week_data["first_day"] = weekdays[elem.attrib["day"]]

        for elem in supelem.findall("weekendStart"):
            territories = elem.attrib["territories"].split()
            if territory in territories or any([r in territories for r in regions]):
                week_data["weekend_start"] = weekdays[elem.attrib["day"]]

        for elem in supelem.findall("weekendEnd"):
            territories = elem.attrib["territories"].split()
            if territory in territories or any([r in territories for r in regions]):
                week_data["weekend_end"] = weekdays[elem.attrib["day"]]

        zone_formats = data.setdefault("zone_formats", {})
        for elem in tree.findall(".//timeZoneNames/gmtFormat"):
            if "draft" not in elem.attrib and "alt" not in elem.attrib:
                zone_formats["gmt"] = text_type(elem.text).replace("{0}", "%s")
                break
        for elem in tree.findall(".//timeZoneNames/regionFormat"):
            if "draft" not in elem.attrib and "alt" not in elem.attrib:
                zone_formats["region"] = text_type(elem.text).replace("{0}", "%s")
                break
        for elem in tree.findall(".//timeZoneNames/fallbackFormat"):
            if "draft" not in elem.attrib and "alt" not in elem.attrib:
                zone_formats["fallback"] = text_type(elem.text).replace("{0}", "%(0)s").replace("{1}", "%(1)s")
                break
        for elem in tree.findall(".//timeZoneNames/fallbackRegionFormat"):
            if "draft" not in elem.attrib and "alt" not in elem.attrib:
                zone_formats["fallback_region"] = text_type(elem.text).replace("{0}", "%(0)s").replace("{1}", "%(1)s")
                break

        time_zones = data.setdefault("time_zones", {})
        for elem in tree.findall(".//timeZoneNames/zone"):
            info = {}
            city = elem.findtext("exemplarCity")
            if city:
                info["city"] = text_type(city)
            for child in elem.findall("long/*"):
                info.setdefault("long", {})[child.tag] = text_type(child.text)
            for child in elem.findall("short/*"):
                info.setdefault("short", {})[child.tag] = text_type(child.text)
            time_zones[elem.attrib["type"]] = info

        meta_zones = data.setdefault("meta_zones", {})
        for elem in tree.findall(".//timeZoneNames/metazone"):
            info = {}
            city = elem.findtext("exemplarCity")
            if city:
                info["city"] = text_type(city)
            for child in elem.findall("long/*"):
                info.setdefault("long", {})[child.tag] = text_type(child.text)
            for child in elem.findall("short/*"):
                info.setdefault("short", {})[child.tag] = text_type(child.text)
            meta_zones[elem.attrib["type"]] = info

        for calendar in tree.findall(".//calendars/calendar"):
            if calendar.attrib["type"] != "gregorian":
                # TODO: support other calendar types
                continue

            months = data.setdefault("months", {})
            for ctxt in calendar.findall("months/monthContext"):
                ctxt_type = ctxt.attrib["type"]
                ctxts = months.setdefault(ctxt_type, {})
                for width in ctxt.findall("monthWidth"):
                    width_type = width.attrib["type"]
                    widths = ctxts.setdefault(width_type, {})
                    for elem in width.getiterator():
                        if elem.tag == "month":
                            if ("draft" in elem.attrib or "alt" in elem.attrib) and int(elem.attrib["type"]) in widths:
                                continue
                            widths[int(elem.attrib.get("type"))] = text_type(elem.text)
                        elif elem.tag == "alias":
                            ctxts[width_type] = Alias(
                                _translate_alias(["months", ctxt_type, width_type], elem.attrib["path"])
                            )

            days = data.setdefault("days", {})
            for ctxt in calendar.findall("days/dayContext"):
                ctxt_type = ctxt.attrib["type"]
                ctxts = days.setdefault(ctxt_type, {})
                for width in ctxt.findall("dayWidth"):
                    width_type = width.attrib["type"]
                    widths = ctxts.setdefault(width_type, {})
                    for elem in width.getiterator():
                        if elem.tag == "day":
                            dtype = weekdays[elem.attrib["type"]]
                            if ("draft" in elem.attrib or "alt" not in elem.attrib) and dtype in widths:
                                continue
                            widths[dtype] = text_type(elem.text)
                        elif elem.tag == "alias":
                            ctxts[width_type] = Alias(
                                _translate_alias(["days", ctxt_type, width_type], elem.attrib["path"])
                            )

            quarters = data.setdefault("quarters", {})
            for ctxt in calendar.findall("quarters/quarterContext"):
                ctxt_type = ctxt.attrib["type"]
                ctxts = quarters.setdefault(ctxt.attrib["type"], {})
                for width in ctxt.findall("quarterWidth"):
                    width_type = width.attrib["type"]
                    widths = ctxts.setdefault(width_type, {})
                    for elem in width.getiterator():
                        if elem.tag == "quarter":
                            if ("draft" in elem.attrib or "alt" in elem.attrib) and int(elem.attrib["type"]) in widths:
                                continue
                            widths[int(elem.attrib["type"])] = text_type(elem.text)
                        elif elem.tag == "alias":
                            ctxts[width_type] = Alias(
                                _translate_alias(["quarters", ctxt_type, width_type], elem.attrib["path"])
                            )

            eras = data.setdefault("eras", {})
            for width in calendar.findall("eras/*"):
                width_type = NAME_MAP[width.tag]
                widths = eras.setdefault(width_type, {})
                for elem in width.getiterator():
                    if elem.tag == "era":
                        if ("draft" in elem.attrib or "alt" in elem.attrib) and int(elem.attrib["type"]) in widths:
                            continue
                        widths[int(elem.attrib.get("type"))] = text_type(elem.text)
                    elif elem.tag == "alias":
                        eras[width_type] = Alias(_translate_alias(["eras", width_type], elem.attrib["path"]))

            # AM/PM
            periods = data.setdefault("periods", {})
            for day_period_width in calendar.findall("dayPeriods/dayPeriodContext/dayPeriodWidth"):
                if day_period_width.attrib["type"] == "wide":
                    for day_period in day_period_width.findall("dayPeriod"):
                        if "alt" not in day_period.attrib:
                            periods[day_period.attrib["type"]] = text_type(day_period.text)

            date_formats = data.setdefault("date_formats", {})
            for format in calendar.findall("dateFormats"):
                for elem in format.getiterator():
                    if elem.tag == "dateFormatLength":
                        if "draft" in elem.attrib and elem.attrib.get("type") in date_formats:
                            continue
                        try:
                            date_formats[elem.attrib.get("type")] = dates.parse_pattern(
                                text_type(elem.findtext("dateFormat/pattern"))
                            )
                        except ValueError as e:
                            error(e)
                    elif elem.tag == "alias":
                        date_formats = Alias(_translate_alias(["date_formats"], elem.attrib["path"]))

            time_formats = data.setdefault("time_formats", {})
            for format in calendar.findall("timeFormats"):
                for elem in format.getiterator():
                    if elem.tag == "timeFormatLength":
                        if ("draft" in elem.attrib or "alt" in elem.attrib) and elem.attrib.get("type") in time_formats:
                            continue
                        try:
                            time_formats[elem.attrib.get("type")] = dates.parse_pattern(
                                text_type(elem.findtext("timeFormat/pattern"))
                            )
                        except ValueError as e:
                            error(e)
                    elif elem.tag == "alias":
                        time_formats = Alias(_translate_alias(["time_formats"], elem.attrib["path"]))

            datetime_formats = data.setdefault("datetime_formats", {})
            for format in calendar.findall("dateTimeFormats"):
                for elem in format.getiterator():
                    if elem.tag == "dateTimeFormatLength":
                        if ("draft" in elem.attrib or "alt" in elem.attrib) and elem.attrib.get(
                            "type"
                        ) in datetime_formats:
                            continue
                        try:
                            datetime_formats[elem.attrib.get("type")] = text_type(
                                elem.findtext("dateTimeFormat/pattern")
                            )
                        except ValueError as e:
                            error(e)
                    elif elem.tag == "alias":
                        datetime_formats = Alias(_translate_alias(["datetime_formats"], elem.attrib["path"]))

        # <numbers>

        number_symbols = data.setdefault("number_symbols", {})
        for elem in tree.findall(".//numbers/symbols/*"):
            if "draft" in elem.attrib or "alt" in elem.attrib:
                continue
            number_symbols[elem.tag] = text_type(elem.text)

        decimal_formats = data.setdefault("decimal_formats", {})
        for elem in tree.findall(".//decimalFormats/decimalFormatLength"):
            if ("draft" in elem.attrib or "alt" in elem.attrib) and elem.attrib.get("type") in decimal_formats:
                continue
            if elem.findall("./alias"):
                # TODO map the alias to its target
                continue
            pattern = text_type(elem.findtext("./decimalFormat/pattern"))
            decimal_formats[elem.attrib.get("type")] = numbers.parse_pattern(pattern)

        scientific_formats = data.setdefault("scientific_formats", {})
        for elem in tree.findall(".//scientificFormats/scientificFormatLength"):
            if ("draft" in elem.attrib or "alt" in elem.attrib) and elem.attrib.get("type") in scientific_formats:
                continue
            pattern = text_type(elem.findtext("scientificFormat/pattern"))
            scientific_formats[elem.attrib.get("type")] = numbers.parse_pattern(pattern)

        currency_formats = data.setdefault("currency_formats", {})
        for elem in tree.findall(".//currencyFormats/currencyFormatLength/currencyFormat"):
            if ("draft" in elem.attrib or "alt" in elem.attrib) and elem.attrib.get("type") in currency_formats:
                continue
            for child in elem.getiterator():
                if child.tag == "alias":
                    currency_formats[elem.attrib.get("type")] = Alias(
                        _translate_alias(["currency_formats", elem.attrib["type"]], child.attrib["path"])
                    )
                elif child.tag == "pattern":
                    pattern = text_type(child.text)
                    currency_formats[elem.attrib.get("type")] = numbers.parse_pattern(pattern)

        percent_formats = data.setdefault("percent_formats", {})
        for elem in tree.findall(".//percentFormats/percentFormatLength"):
            if ("draft" in elem.attrib or "alt" in elem.attrib) and elem.attrib.get("type") in percent_formats:
                continue
            pattern = text_type(elem.findtext("percentFormat/pattern"))
            percent_formats[elem.attrib.get("type")] = numbers.parse_pattern(pattern)

        currency_names = data.setdefault("currency_names", {})
        currency_names_plural = data.setdefault("currency_names_plural", {})
        currency_symbols = data.setdefault("currency_symbols", {})
        for elem in tree.findall(".//currencies/currency"):
            code = elem.attrib["type"]
            for name in elem.findall("displayName"):
                if ("draft" in name.attrib) and code in currency_names:
                    continue
                if "count" in name.attrib:
                    currency_names_plural.setdefault(code, {})[name.attrib["count"]] = text_type(name.text)
                else:
                    currency_names[code] = text_type(name.text)
            # TODO: support choice patterns for currency symbol selection
            symbol = elem.find("symbol")
            if symbol is not None and "draft" not in symbol.attrib and "choice" not in symbol.attrib:
                currency_symbols[code] = text_type(symbol.text)

        # <units>

        unit_patterns = data.setdefault("unit_patterns", {})
        for elem in tree.findall(".//units/unitLength"):
            unit_length_type = elem.attrib["type"]
            for unit in elem.findall("unit"):
                unit_type = unit.attrib["type"]
                for pattern in unit.findall("unitPattern"):
                    box = unit_type
                    box += ":" + unit_length_type
                    unit_patterns.setdefault(box, {})[pattern.attrib["count"]] = text_type(pattern.text)

        date_fields = data.setdefault("date_fields", {})
        for elem in tree.findall(".//dates/fields/field"):
            field_type = elem.attrib["type"]
            date_fields.setdefault(field_type, {})
            for rel_time in elem.findall("relativeTime"):
                rel_time_type = rel_time.attrib["type"]
                for pattern in rel_time.findall("relativeTimePattern"):
                    date_fields[field_type].setdefault(rel_time_type, {})[pattern.attrib["count"]] = text_type(
                        pattern.text
                    )

        outfile = open(data_filename, "wb")
        try:
            pickle.dump(data, outfile, 2)
        finally:
            outfile.close()
Exemple #54
0
def test_parse_static_pattern():
    assert numbers.parse_pattern('Kun')  # in the So locale in CLDR 30
Exemple #55
0
        # <numbers>

        number_symbols = data.setdefault('number_symbols', {})
        for elem in tree.findall('.//numbers/symbols/*'):
            if ('draft' in elem.attrib or 'alt' in elem.attrib):
                continue
            number_symbols[elem.tag] = unicode(elem.text)

        decimal_formats = data.setdefault('decimal_formats', {})
        for elem in tree.findall('.//decimalFormats/decimalFormatLength'):
            if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                    and elem.attrib.get('type') in decimal_formats:
                continue
            pattern = unicode(elem.findtext('decimalFormat/pattern'))
            decimal_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern)

        scientific_formats = data.setdefault('scientific_formats', {})
        for elem in tree.findall('.//scientificFormats/scientificFormatLength'):
            if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                    and elem.attrib.get('type') in scientific_formats:
                continue
            pattern = unicode(elem.findtext('scientificFormat/pattern'))
            scientific_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern)

        currency_formats = data.setdefault('currency_formats', {})
        for elem in tree.findall('.//currencyFormats/currencyFormatLength'):
            if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                    and elem.attrib.get('type') in currency_formats:
                continue
            pattern = unicode(elem.findtext('currencyFormat/pattern'))