def __init__(self, *a, **kw): super(Locale, self).__init__(*a, **kw) self.currency_formats['amount_only'] = MONEY_AMOUNT_FORMAT delta_p = self.currency_formats['standard'].pattern minus_sign = self.number_symbols.get('minusSign', '-') plus_sign = self.number_symbols.get('plusSign', '+') if ';' in delta_p: pos, neg = delta_p.split(';') assert len(neg) > len(pos) assert minus_sign in neg pos = neg.replace(minus_sign, plus_sign) self.currency_delta_pattern = parse_pattern('%s;%s' % (pos, neg)) else: self.currency_delta_pattern = parse_pattern('{0}{2};{1}{2}'.format( plus_sign, minus_sign, delta_p))
def test_parse_pattern_negative(): # No negative format specified np = numbers.parse_pattern(u'¤#,##0.00') assert np.prefix == (u'¤', u'-¤') assert np.suffix == (u'', u'') # Negative format is specified np = numbers.parse_pattern(u'¤#,##0.00;(¤#,##0.00)') assert np.prefix == (u'¤', u'(¤') assert np.suffix == (u'', u')') # Negative sign is a suffix np = numbers.parse_pattern(u'¤ #,##0.00;¤ #,##0.00-') assert np.prefix == (u'¤ ', u'¤ ') assert np.suffix == (u'', u'-')
def format_scientific_field(spec, prec, number, locale): prec = SCIENTIFIC_DECIMAL_DIGITS if prec is None else int(prec) format_ = u'0.%sE+000' % (u'#' * prec) pattern = parse_pattern(format_) decimal_symbol = get_decimal_symbol(locale) string = pattern.apply(number, locale).replace(u'.', decimal_symbol) return string.lower() if spec.islower() else string
def load_i18n(project_root, tell_sentry): # Load the locales localeDir = os.path.join(project_root, 'i18n', 'core') locales = LOCALES for file in os.listdir(localeDir): try: parts = file.split(".") if not (len(parts) == 2 and parts[1] == "po"): continue lang = parts[0] with open(os.path.join(localeDir, file)) as f: l = locales[lang.lower()] = Locale(lang) c = l.catalog = read_po(f) c.plural_func = get_function_from_rule(c.plural_expr) try: l.countries = make_sorted_dict(COUNTRIES, l.territories) except KeyError: l.countries = COUNTRIES try: l.languages_2 = make_sorted_dict(LANGUAGES_2, l.languages) except KeyError: l.languages_2 = LANGUAGES_2 except Exception as e: tell_sentry(e, {}) # Add aliases for k, v in list(locales.items()): locales.setdefault(ALIASES.get(k, k), v) locales.setdefault(ALIASES_R.get(k, k), v) for k, v in list(locales.items()): locales.setdefault(k.split('_', 1)[0], v) # Patch the locales to look less formal locales['fr'].currency_formats[None] = parse_pattern('#,##0.00\u202f\xa4') locales['fr'].currency_symbols['USD'] = '$'
def parse_currency_formats(data, tree): currency_formats = data.setdefault('currency_formats', {}) for currency_format in tree.findall('.//currencyFormats'): if _should_skip_number_elem( data, currency_format): # TODO: Support other number systems continue for length_elem in currency_format.findall('./currencyFormatLength'): curr_length_type = length_elem.attrib.get('type') for elem in length_elem.findall('currencyFormat'): type = elem.attrib.get('type') if curr_length_type: # Handle `<currencyFormatLength type="short">`, etc. # TODO(3.x): use nested dicts instead of colon-separated madness type = '%s:%s' % (type, curr_length_type) if _should_skip_elem(elem, type, currency_formats): continue for child in elem.iter(): if child.tag == 'alias': currency_formats[type] = Alias( _translate_alias( ['currency_formats', elem.attrib['type']], child.attrib['path'])) elif child.tag == 'pattern': pattern = text_type(child.text) currency_formats[type] = numbers.parse_pattern(pattern)
def parse_decimal_formats(data, tree): decimal_formats = data.setdefault('decimal_formats', {}) for df_elem in tree.findall('.//decimalFormats'): if _should_skip_number_elem( data, df_elem): # TODO: Support other number systems continue for elem in df_elem.findall('./decimalFormatLength'): length_type = elem.attrib.get('type') if _should_skip_elem(elem, length_type, decimal_formats): continue if elem.findall('./alias'): # TODO map the alias to its target continue for pattern_el in elem.findall('./decimalFormat/pattern'): pattern_type = pattern_el.attrib.get('type') pattern = numbers.parse_pattern(text_type(pattern_el.text)) if pattern_type: # This is a compact decimal format, see: # https://www.unicode.org/reports/tr35/tr35-45/tr35-numbers.html#Compact_Number_Formats # These are mapped into a `compact_decimal_formats` dictionary # with the format {length: {count: {multiplier: pattern}}}. # TODO: Add support for formatting them. compact_decimal_formats = data.setdefault( 'compact_decimal_formats', {}) length_map = compact_decimal_formats.setdefault( length_type, {}) length_count_map = length_map.setdefault( pattern_el.attrib['count'], {}) length_count_map[pattern_type] = pattern else: # Regular decimal format. decimal_formats[length_type] = pattern
def parse_currency_formats(data, tree): currency_formats = data.setdefault('currency_formats', {}) for currency_format in tree.findall('.//currencyFormats'): if _should_skip_number_elem(data, currency_format): # TODO: Support other number systems continue for length_elem in currency_format.findall('./currencyFormatLength'): curr_length_type = length_elem.attrib.get('type') for elem in length_elem.findall('currencyFormat'): type = elem.attrib.get('type') if curr_length_type: # Handle `<currencyFormatLength type="short">`, etc. # TODO(3.x): use nested dicts instead of colon-separated madness type = '%s:%s' % (type, curr_length_type) if _should_skip_elem(elem, type, currency_formats): continue for child in elem.getiterator(): if child.tag == 'alias': currency_formats[type] = Alias( _translate_alias(['currency_formats', elem.attrib['type']], child.attrib['path']) ) elif child.tag == 'pattern': pattern = text_type(child.text) currency_formats[type] = numbers.parse_pattern(pattern)
def parse_decimal_formats(data, tree): decimal_formats = data.setdefault('decimal_formats', {}) for df_elem in tree.findall('.//decimalFormats'): if _should_skip_number_elem(data, df_elem): # TODO: Support other number systems continue for elem in df_elem.findall('./decimalFormatLength'): length_type = elem.attrib.get('type') if _should_skip_elem(elem, length_type, decimal_formats): continue if elem.findall('./alias'): # TODO map the alias to its target continue for pattern_el in elem.findall('./decimalFormat/pattern'): pattern_type = pattern_el.attrib.get('type') pattern = numbers.parse_pattern(text_type(pattern_el.text)) if pattern_type: # This is a compact decimal format, see: # https://www.unicode.org/reports/tr35/tr35-45/tr35-numbers.html#Compact_Number_Formats # These are mapped into a `compact_decimal_formats` dictionary # with the format {length: {count: {multiplier: pattern}}}. # TODO: Add support for formatting them. compact_decimal_formats = data.setdefault('compact_decimal_formats', {}) length_map = compact_decimal_formats.setdefault(length_type, {}) length_count_map = length_map.setdefault(pattern_el.attrib['count'], {}) length_count_map[pattern_type] = pattern else: # Regular decimal format. decimal_formats[length_type] = pattern
def test_numberpattern_repr(): """repr() outputs the pattern string""" # This implementation looks a bit funny, but that's cause strings are # repr'd differently in Python 2 vs 3 and this test runs under both. format = u'¤#,##0.00;(¤#,##0.00)' np = numbers.parse_pattern(format) assert repr(format) in repr(np)
def parse_percent_formats(data, tree): percent_formats = data.setdefault('percent_formats', {}) for elem in tree.findall('.//percentFormats/percentFormatLength'): type = elem.attrib.get('type') if _should_skip_elem(elem, type, percent_formats): continue pattern = text_type(elem.findtext('percentFormat/pattern')) percent_formats[type] = numbers.parse_pattern(pattern)
def test_parse_pattern(): # Original pattern is preserved np = numbers.parse_pattern(u'¤#,##0.00') assert np.pattern == u'¤#,##0.00' np = numbers.parse_pattern(u'¤#,##0.00;(¤#,##0.00)') assert np.pattern == u'¤#,##0.00;(¤#,##0.00)' # Given a NumberPattern object, we don't return a new instance. # However, we don't cache NumberPattern objects, so calling # parse_pattern with the same format string will create new # instances np1 = numbers.parse_pattern(u'¤ #,##0.00') np2 = numbers.parse_pattern(u'¤ #,##0.00') assert np1 is not np2 assert np1 is numbers.parse_pattern(np1)
def format_float_field(__, prec, number, locale): """Formats a fixed-point field.""" format_ = u'0.' if prec is None: format_ += u'#' * NUMBER_DECIMAL_DIGITS else: format_ += u'0' * int(prec) pattern = parse_pattern(format_) return pattern.apply(number, locale)
def format_number(value, digits=None): locale = get_current_babel_locale() if digits is None: return format_decimal(value, locale=locale) (min_digits, max_digits) = ( digits if isinstance(digits, tuple) else (digits, digits)) format = locale.decimal_formats.get(None) pattern = parse_pattern(format) # type: babel.numbers.NumberPattern return pattern.apply(value, locale, force_frac=(min_digits, max_digits))
def parse_decimal_formats(data, tree): decimal_formats = data.setdefault('decimal_formats', {}) for elem in tree.findall('.//decimalFormats/decimalFormatLength'): type = elem.attrib.get('type') if _should_skip_elem(elem, type, decimal_formats): continue if elem.findall('./alias'): # TODO map the alias to its target continue pattern = text_type(elem.findtext('./decimalFormat/pattern')) decimal_formats[type] = numbers.parse_pattern(pattern)
def parse_scientific_formats(data, tree): scientific_formats = data.setdefault('scientific_formats', {}) for sf_elem in tree.findall('.//scientificFormats'): if _should_skip_number_elem(data, sf_elem): # TODO: Support other number systems continue for elem in sf_elem.findall('./scientificFormatLength'): type = elem.attrib.get('type') if _should_skip_elem(elem, type, scientific_formats): continue pattern = text_type(elem.findtext('scientificFormat/pattern')) scientific_formats[type] = numbers.parse_pattern(pattern)
def format_field(spec, arg, value, locale): if spec and isinstance(value, Number): if arg: spec += arg try: pattern = parse_pattern(spec) except ValueError: return spec else: return pattern.apply(value, locale) return str(value)
def parse_scientific_formats(data, tree): scientific_formats = data.setdefault('scientific_formats', {}) for sf_elem in tree.findall('.//scientificFormats'): if _should_skip_number_elem( data, sf_elem): # TODO: Support other number systems continue for elem in sf_elem.findall('./scientificFormatLength'): type = elem.attrib.get('type') if _should_skip_elem(elem, type, scientific_formats): continue pattern = text_type(elem.findtext('scientificFormat/pattern')) scientific_formats[type] = numbers.parse_pattern(pattern)
def processFormats(self, locale, formats): ''' Process the formats to a complete list of formats that will be used by conversion. ''' assert isinstance(formats, dict), 'Invalid formats %s' % formats assert isinstance(locale, Locale), 'Invalid locale %s' % locale for clsTyp, format in formats.items(): # In here we just check that the format is valid. try: if clsTyp in (Number, Percentage): bn.parse_pattern(format) elif format not in self.formats[clsTyp]: bd.parse_pattern(format) except Exception as e: raise FormatError('invalid %s format \'%s\' because: %s' % (clsTyp.__name__, format, str(e))) if Number not in formats: formats[Number] = locale.decimal_formats.get(None).pattern if Percentage not in formats: formats[Percentage] = locale.percent_formats.get(None).pattern for clsTyp, default in self.defaults.items(): if clsTyp not in formats: formats[clsTyp] = default return formats
def parse_percent_formats(data, tree): percent_formats = data.setdefault('percent_formats', {}) for pf_elem in tree.findall('.//percentFormats'): if _should_skip_number_elem( data, pf_elem): # TODO: Support other number systems continue for elem in pf_elem.findall('.//percentFormatLength'): type = elem.attrib.get('type') if _should_skip_elem(elem, type, percent_formats): continue pattern = str(elem.findtext('percentFormat/pattern')) percent_formats[type] = numbers.parse_pattern(pattern)
def load_i18n(project_root, tell_sentry): # Load the locales key = lambda t: strip_accents(t[1]) localeDir = os.path.join(project_root, 'i18n', 'core') locales = i18n.LOCALES for file in os.listdir(localeDir): try: parts = file.split(".") if not (len(parts) == 2 and parts[1] == "po"): continue lang = parts[0] with open(os.path.join(localeDir, file)) as f: l = locales[lang.lower()] = Locale(lang) c = l.catalog = read_po(f) c.plural_func = get_function_from_rule(c.plural_expr) try: l.countries_map = { k: l.territories[k] for k in COUNTRIES_MAP } l.countries = sorted(l.countries_map.items(), key=key) except KeyError: l.countries_map = COUNTRIES_MAP l.countries = COUNTRIES except Exception as e: tell_sentry(e) # Add the default English locale locale_en = i18n.LOCALE_EN = locales['en'] = Locale('en') locale_en.catalog = Catalog('en') locale_en.catalog.plural_func = lambda n: n != 1 locale_en.countries = COUNTRIES locale_en.countries_map = COUNTRIES_MAP # Add aliases for k, v in list(locales.items()): locales.setdefault(ALIASES.get(k, k), v) locales.setdefault(ALIASES_R.get(k, k), v) for k, v in list(locales.items()): locales.setdefault(k.split('_', 1)[0], v) # Patch the locales to look less formal locales['fr'].currency_formats[None] = parse_pattern('#,##0.00\u202f\xa4') locales['fr'].currency_symbols['USD'] = '$'
def parse_currency_formats(data, tree): currency_formats = data.setdefault('currency_formats', {}) for length_elem in tree.findall('.//currencyFormats/currencyFormatLength'): curr_length_type = length_elem.attrib.get('type') for elem in length_elem.findall('currencyFormat'): type = elem.attrib.get('type') if curr_length_type: # Handle `<currencyFormatLength type="short">`, etc. type = '%s:%s' % (type, curr_length_type) if _should_skip_elem(elem, type, currency_formats): continue for child in elem.getiterator(): if child.tag == 'alias': currency_formats[type] = Alias( _translate_alias( ['currency_formats', elem.attrib['type']], child.attrib['path'])) elif child.tag == 'pattern': pattern = text_type(child.text) currency_formats[type] = numbers.parse_pattern(pattern)
def parse_currency_formats(data, tree): currency_formats = data.setdefault('currency_formats', {}) for length_elem in tree.findall('.//currencyFormats/currencyFormatLength'): curr_length_type = length_elem.attrib.get('type') for elem in length_elem.findall('currencyFormat'): type = elem.attrib.get('type') if curr_length_type: # Handle `<currencyFormatLength type="short">`, etc. type = '%s:%s' % (type, curr_length_type) if _should_skip_elem(elem, type, currency_formats): continue for child in elem.getiterator(): if child.tag == 'alias': currency_formats[type] = Alias( _translate_alias(['currency_formats', elem.attrib['type']], child.attrib['path']) ) elif child.tag == 'pattern': pattern = text_type(child.text) currency_formats[type] = numbers.parse_pattern(pattern)
def load_i18n(website): # Load the locales key = lambda t: strip_accents(t[1]) localeDir = os.path.join(website.project_root, 'i18n', 'core') locales = website.locales = {} for file in os.listdir(localeDir): try: parts = file.split(".") if not (len(parts) == 2 and parts[1] == "po"): continue lang = parts[0] with open(os.path.join(localeDir, file)) as f: l = locales[lang.lower()] = Locale(lang) c = l.catalog = read_po(f) c.plural_func = get_function_from_rule(c.plural_expr) try: l.countries_map = {k: l.territories[k] for k in COUNTRIES_MAP} l.countries = sorted(l.countries_map.items(), key=key) except KeyError: l.countries_map = COUNTRIES_MAP l.countries = COUNTRIES except Exception as e: website.tell_sentry(e) # Add the default English locale locale_en = website.locale_en = locales['en'] = Locale('en') locale_en.catalog = Catalog('en') locale_en.catalog.plural_func = lambda n: n != 1 locale_en.countries = COUNTRIES locale_en.countries_map = COUNTRIES_MAP # Add aliases for k, v in list(locales.items()): locales.setdefault(ALIASES.get(k, k), v) locales.setdefault(ALIASES_R.get(k, k), v) for k, v in list(locales.items()): locales.setdefault(k.split('_', 1)[0], v) # Patch the locales to look less formal locales['fr'].currency_formats[None] = parse_pattern('#,##0.00\u202f\xa4') locales['fr'].currency_symbols['USD'] = '$'
return number if isinstance(number, int): return FluentInt(number, **kwargs) elif isinstance(number, float): return FluentFloat(number, **kwargs) elif isinstance(number, Decimal): return FluentDecimal(number, **kwargs) elif isinstance(number, FluentNone): return number else: raise TypeError( "Can't use fluent_number with object {0} for type {1}".format( number, type(number))) _UNGROUPED_PATTERN = parse_pattern("#0") def clone_pattern(pattern): return NumberPattern(pattern.pattern, pattern.prefix, pattern.suffix, pattern.grouping, pattern.int_prec, pattern.frac_prec, pattern.exp_prec, pattern.exp_plus) @attr.s class DateFormatOptions(object): # Parameters. # See https://projectfluent.org/fluent/guide/functions.html#datetime # Developer only timeZone = attr.ib(default=None)
from unicodedata import combining, normalize import babel.core from babel.dates import format_date, format_datetime, format_time, format_timedelta from babel.messages.pofile import Catalog from babel.numbers import parse_pattern from markupsafe import Markup from pando.utils import utcnow from ..constants import CURRENCIES, D_MAX from ..exceptions import AmbiguousNumber, InvalidNumber from ..website import website from .currencies import Money, MoneyBasket MONEY_AMOUNT_FORMAT = parse_pattern('#,##0.00') ONLY_ZERO = {'0'} def no_escape(s): return s def LegacyMoney(o): return o if isinstance(o, (Money, MoneyBasket)) else Money(o, 'EUR') Wrap = namedtuple('Wrap', 'value wrapper') BOLD = Markup('<b>%s</b>')
def test_parse_static_pattern(): assert numbers.parse_pattern('Kun') # in the So locale in CLDR 30
def load_i18n(canonical_host, canonical_scheme, project_root, tell_sentry): # Load the locales localeDir = os.path.join(project_root, 'i18n', 'core') locales = LOCALES for file in os.listdir(localeDir): try: parts = file.split(".") if not (len(parts) == 2 and parts[1] == "po"): continue lang = parts[0] with open(os.path.join(localeDir, file)) as f: l = locales[lang.lower()] = Locale(lang) c = l.catalog = read_po(f) c.plural_func = get_function_from_rule(c.plural_expr) try: l.countries = make_sorted_dict(COUNTRIES, l.territories) except KeyError: l.countries = COUNTRIES try: l.languages_2 = make_sorted_dict(LANGUAGES_2, l.languages) except KeyError: l.languages_2 = LANGUAGES_2 except Exception as e: tell_sentry(e, {}, allow_reraise=True) # Prepare a unique and sorted list for use in the language switcher percent = lambda l: sum( (percent(s) if isinstance(s, tuple) else 1) for s in l if s) / len(l) for l in locales.values(): if l.language == 'en': l.completion = 1 continue l.completion = percent([m.string for m in l.catalog if m.id]) loc_url = canonical_scheme + '://%s.' + canonical_host lang_list = sorted( ((l.completion, l.language, l.language_name.title(), loc_url % l.language) for l in set(locales.values()) if l.completion), key=lambda t: (-t[0], t[1]), ) # Add aliases for k, v in list(locales.items()): locales.setdefault(ALIASES.get(k, k), v) locales.setdefault(ALIASES_R.get(k, k), v) for k, v in list(locales.items()): locales.setdefault(k.split('_', 1)[0], v) # Patch the locales to look less formal locales['fr'].currency_formats[None] = parse_pattern('#,##0.00\u202f\xa4') locales['fr'].currency_symbols['USD'] = '$' # Load the markdown files docs = {} heading_re = re.compile(r'^(#+ )', re.M) for path in find_files(os.path.join(project_root, 'i18n'), '*.md'): d, b = os.path.split(path) doc = os.path.basename(d) lang = b[:-3] with open(path, 'rb') as f: md = f.read().decode('utf8') if md.startswith('# '): md = '\n'.join(md.split('\n')[1:]).strip() md = heading_re.sub(r'##\1', md) docs.setdefault(doc, {}).__setitem__(lang, markdown.render(md)) return {'docs': docs, 'lang_list': lang_list, 'locales': locales}
def test_parse_pattern(): assert numbers.parse_pattern(u'¤#,##0.00;(¤#,##0.00)').suffix == (u'', u')') assert numbers.parse_pattern(u'¤ #,##0.00;¤ #,##0.00-').suffix == (u'', u'-')
l.countries = COUNTRIES return langs # Load the locales LOCALES = load_langs("i18n") # Add the default English locale LOCALE_EN = LOCALES['en'] = Locale('en') LOCALE_EN.catalog = Catalog('en') LOCALE_EN.catalog.plural_func = lambda n: n != 1 LOCALE_EN.countries = COUNTRIES LOCALE_EN.countries_map = COUNTRIES_MAP # Patch the locales to look less formal LOCALE_EN.currency_formats[None] = parse_pattern('\xa4#,##0.##') LOCALES['fr'].currency_formats[None] = parse_pattern('#,##0.##\u202f\xa4') LOCALES['fr'].currency_symbols['USD'] = '$' def get_locale_for_request(request): accept_lang = request.headers.get("Accept-Language", "") languages = (lang.split(";", 1)[0] for lang in accept_lang.split(",")) for lang in languages: lang = regularize_locale(lang) loc = LOCALES.get(lang) if loc: return loc return LOCALE_EN
def _format_currency(cls, number): locale = Locale.parse(cls.LOCALE) format = cls.FORMAT or locale.currency_formats.get(None) pattern = parse_pattern(format) pattern.frac_prec = (2, cls.DECIMAL_PLACES) return pattern.apply(number, locale, currency=cls.CODE)
def main(): parser = OptionParser(usage='%prog path/to/cldr') options, args = parser.parse_args() if len(args) != 1: parser.error('incorrect number of arguments') srcdir = args[0] destdir = os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])), '..', 'babel') sup = parse(os.path.join(srcdir, 'supplemental', 'supplementalData.xml')) # Import global data from the supplemental files global_data = {} territory_zones = global_data.setdefault('territory_zones', {}) zone_aliases = global_data.setdefault('zone_aliases', {}) zone_territories = global_data.setdefault('zone_territories', {}) for elem in sup.findall('.//timezoneData/zoneFormatting/zoneItem'): tzid = elem.attrib['type'] territory_zones.setdefault(elem.attrib['territory'], []).append(tzid) zone_territories[tzid] = elem.attrib['territory'] if 'aliases' in elem.attrib: for alias in elem.attrib['aliases'].split(): zone_aliases[alias] = tzid # Import Metazone mapping meta_zones = global_data.setdefault('meta_zones', {}) tzsup = parse(os.path.join(srcdir, 'supplemental', 'metazoneInfo.xml')) for elem in tzsup.findall('.//timezone'): for child in elem.findall('usesMetazone'): if 'to' not in child.attrib: # FIXME: support old mappings meta_zones[elem.attrib['type']] = child.attrib['mzone'] outfile = open(os.path.join(destdir, 'global.dat'), 'wb') try: pickle.dump(global_data, outfile, 2) finally: outfile.close() # build a territory containment mapping for inheritance regions = {} for elem in sup.findall('.//territoryContainment/group'): regions[elem.attrib['type']] = elem.attrib['contains'].split() # Resolve territory containment territory_containment = {} region_items = sorted(regions.items()) for group, territory_list in region_items: for territory in territory_list: containers = territory_containment.setdefault(territory, set([])) if group in territory_containment: containers |= territory_containment[group] containers.add(group) # prepare the per-locale plural rules definitions plural_rules = {} prsup = parse(os.path.join(srcdir, 'supplemental', 'plurals.xml')) for elem in prsup.findall('.//plurals/pluralRules'): rules = [] for rule in elem.findall('pluralRule'): rules.append((rule.attrib['count'], text_type(rule.text))) pr = PluralRule(rules) for locale in elem.attrib['locales'].split(): plural_rules[locale] = pr filenames = os.listdir(os.path.join(srcdir, 'main')) filenames.remove('root.xml') filenames.sort(key=lambda a: len(a)) filenames.insert(0, 'root.xml') for filename in filenames: stem, ext = os.path.splitext(filename) if ext != '.xml': continue sys.stderr.write('Processing input file %r\n' % filename) tree = parse(os.path.join(srcdir, 'main', filename)) data = {} language = None elem = tree.find('.//identity/language') if elem is not None: language = elem.attrib['type'] sys.stderr.write(' Language: %r\n' % language) territory = None elem = tree.find('.//identity/territory') if elem is not None: territory = elem.attrib['type'] else: territory = '001' # world sys.stderr.write(' Territory: %r\n' % territory) regions = territory_containment.get(territory, []) sys.stderr.write(' Regions: %r\n' % regions) # plural rules locale_id = '_'.join([ _f for _f in [language, territory != '001' and territory or None] if _f ]) if locale_id in plural_rules: data['plural_form'] = plural_rules[locale_id] # <localeDisplayNames> territories = data.setdefault('territories', {}) for elem in tree.findall('.//territories/territory'): if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and elem.attrib['type'] in territories: continue territories[elem.attrib['type']] = _text(elem) languages = data.setdefault('languages', {}) for elem in tree.findall('.//languages/language'): if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and elem.attrib['type'] in languages: continue languages[elem.attrib['type']] = _text(elem) variants = data.setdefault('variants', {}) for elem in tree.findall('.//variants/variant'): if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and elem.attrib['type'] in variants: continue variants[elem.attrib['type']] = _text(elem) scripts = data.setdefault('scripts', {}) for elem in tree.findall('.//scripts/script'): if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and elem.attrib['type'] in scripts: continue scripts[elem.attrib['type']] = _text(elem) # <dates> week_data = data.setdefault('week_data', {}) supelem = sup.find('.//weekData') for elem in supelem.findall('minDays'): territories = elem.attrib['territories'].split() if territory in territories or any( [r in territories for r in regions]): week_data['min_days'] = int(elem.attrib['count']) for elem in supelem.findall('firstDay'): if 'alt' not in elem.attrib: # ignore alternatives territories = elem.attrib['territories'].split() if territory in territories or any( [r in territories for r in regions]): week_data['first_day'] = weekdays[elem.attrib['day']] for elem in supelem.findall('weekendStart'): territories = elem.attrib['territories'].split() if territory in territories or any( [r in territories for r in regions]): week_data['weekend_start'] = weekdays[elem.attrib['day']] for elem in supelem.findall('weekendEnd'): territories = elem.attrib['territories'].split() if territory in territories or any( [r in territories for r in regions]): week_data['weekend_end'] = weekdays[elem.attrib['day']] zone_formats = data.setdefault('zone_formats', {}) for elem in tree.findall('.//timeZoneNames/gmtFormat'): if 'draft' not in elem.attrib and 'alt' not in elem.attrib: zone_formats['gmt'] = text_type(elem.text).replace('{0}', '%s') break for elem in tree.findall('.//timeZoneNames/regionFormat'): if 'draft' not in elem.attrib and 'alt' not in elem.attrib: zone_formats['region'] = text_type(elem.text).replace( '{0}', '%s') break for elem in tree.findall('.//timeZoneNames/fallbackFormat'): if 'draft' not in elem.attrib and 'alt' not in elem.attrib: zone_formats['fallback'] = text_type(elem.text) \ .replace('{0}', '%(0)s').replace('{1}', '%(1)s') break time_zones = data.setdefault('time_zones', {}) for elem in tree.findall('.//timeZoneNames/zone'): info = {} city = elem.findtext('exemplarCity') if city: info['city'] = text_type(city) for child in elem.findall('long/*'): info.setdefault('long', {})[child.tag] = text_type(child.text) for child in elem.findall('short/*'): info.setdefault('short', {})[child.tag] = text_type(child.text) time_zones[elem.attrib['type']] = info meta_zones = data.setdefault('meta_zones', {}) for elem in tree.findall('.//timeZoneNames/metazone'): info = {} city = elem.findtext('exemplarCity') if city: info['city'] = text_type(city) for child in elem.findall('long/*'): info.setdefault('long', {})[child.tag] = text_type(child.text) for child in elem.findall('short/*'): info.setdefault('short', {})[child.tag] = text_type(child.text) info['common'] = elem.findtext('commonlyUsed') == 'true' meta_zones[elem.attrib['type']] = info for calendar in tree.findall('.//calendars/calendar'): if calendar.attrib['type'] != 'gregorian': # TODO: support other calendar types continue months = data.setdefault('months', {}) for ctxt in calendar.findall('months/monthContext'): ctxt_type = ctxt.attrib['type'] ctxts = months.setdefault(ctxt_type, {}) for width in ctxt.findall('monthWidth'): width_type = width.attrib['type'] widths = ctxts.setdefault(width_type, {}) for elem in width.getiterator(): if elem.tag == 'month': if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and int(elem.attrib['type']) in widths: continue widths[int(elem.attrib.get('type'))] = text_type( elem.text) elif elem.tag == 'alias': ctxts[width_type] = Alias( _translate_alias( ['months', ctxt_type, width_type], elem.attrib['path'])) days = data.setdefault('days', {}) for ctxt in calendar.findall('days/dayContext'): ctxt_type = ctxt.attrib['type'] ctxts = days.setdefault(ctxt_type, {}) for width in ctxt.findall('dayWidth'): width_type = width.attrib['type'] widths = ctxts.setdefault(width_type, {}) for elem in width.getiterator(): if elem.tag == 'day': dtype = weekdays[elem.attrib['type']] if ('draft' in elem.attrib or 'alt' not in elem.attrib) \ and dtype in widths: continue widths[dtype] = text_type(elem.text) elif elem.tag == 'alias': ctxts[width_type] = Alias( _translate_alias( ['days', ctxt_type, width_type], elem.attrib['path'])) quarters = data.setdefault('quarters', {}) for ctxt in calendar.findall('quarters/quarterContext'): ctxt_type = ctxt.attrib['type'] ctxts = quarters.setdefault(ctxt.attrib['type'], {}) for width in ctxt.findall('quarterWidth'): width_type = width.attrib['type'] widths = ctxts.setdefault(width_type, {}) for elem in width.getiterator(): if elem.tag == 'quarter': if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and int(elem.attrib['type']) in widths: continue widths[int(elem.attrib['type'])] = text_type( elem.text) elif elem.tag == 'alias': ctxts[width_type] = Alias( _translate_alias( ['quarters', ctxt_type, width_type], elem.attrib['path'])) eras = data.setdefault('eras', {}) for width in calendar.findall('eras/*'): width_type = NAME_MAP[width.tag] widths = eras.setdefault(width_type, {}) for elem in width.getiterator(): if elem.tag == 'era': if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and int(elem.attrib['type']) in widths: continue widths[int(elem.attrib.get('type'))] = text_type( elem.text) elif elem.tag == 'alias': eras[width_type] = Alias( _translate_alias(['eras', width_type], elem.attrib['path'])) # AM/PM periods = data.setdefault('periods', {}) for elem in calendar.findall('am'): if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and elem.tag in periods: continue periods[elem.tag] = text_type(elem.text) for elem in calendar.findall('pm'): if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and elem.tag in periods: continue periods[elem.tag] = text_type(elem.text) date_formats = data.setdefault('date_formats', {}) for format in calendar.findall('dateFormats'): for elem in format.getiterator(): if elem.tag == 'dateFormatLength': if 'draft' in elem.attrib and \ elem.attrib.get('type') in date_formats: continue try: date_formats[elem.attrib.get('type')] = \ dates.parse_pattern(text_type(elem.findtext('dateFormat/pattern'))) except ValueError: sys.stderr.write('ERROR: %s\n' % sys.exc_info()[1]) elif elem.tag == 'alias': date_formats = Alias( _translate_alias(['date_formats'], elem.attrib['path'])) time_formats = data.setdefault('time_formats', {}) for format in calendar.findall('timeFormats'): for elem in format.getiterator(): if elem.tag == 'timeFormatLength': if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and elem.attrib.get('type') in time_formats: continue try: time_formats[elem.attrib.get('type')] = \ dates.parse_pattern(text_type(elem.findtext('timeFormat/pattern'))) except ValueError: sys.stderr.write('ERROR: %s\n' % sys.exc_info()[1]) elif elem.tag == 'alias': time_formats = Alias( _translate_alias(['time_formats'], elem.attrib['path'])) datetime_formats = data.setdefault('datetime_formats', {}) for format in calendar.findall('dateTimeFormats'): for elem in format.getiterator(): if elem.tag == 'dateTimeFormatLength': if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and elem.attrib.get('type') in datetime_formats: continue try: datetime_formats[elem.attrib.get('type')] = \ text_type(elem.findtext('dateTimeFormat/pattern')) except ValueError: sys.stderr.write('ERROR: %s\n' % sys.exc_info()[1]) elif elem.tag == 'alias': datetime_formats = Alias( _translate_alias(['datetime_formats'], elem.attrib['path'])) # <numbers> number_symbols = data.setdefault('number_symbols', {}) for elem in tree.findall('.//numbers/symbols/*'): if ('draft' in elem.attrib or 'alt' in elem.attrib): continue number_symbols[elem.tag] = text_type(elem.text) decimal_formats = data.setdefault('decimal_formats', {}) for elem in tree.findall('.//decimalFormats/decimalFormatLength'): if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and elem.attrib.get('type') in decimal_formats: continue pattern = text_type(elem.findtext('decimalFormat/pattern')) decimal_formats[elem.attrib.get('type')] = numbers.parse_pattern( pattern) scientific_formats = data.setdefault('scientific_formats', {}) for elem in tree.findall( './/scientificFormats/scientificFormatLength'): if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and elem.attrib.get('type') in scientific_formats: continue pattern = text_type(elem.findtext('scientificFormat/pattern')) scientific_formats[elem.attrib.get( 'type')] = numbers.parse_pattern(pattern) currency_formats = data.setdefault('currency_formats', {}) for elem in tree.findall('.//currencyFormats/currencyFormatLength'): if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and elem.attrib.get('type') in currency_formats: continue pattern = text_type(elem.findtext('currencyFormat/pattern')) currency_formats[elem.attrib.get('type')] = numbers.parse_pattern( pattern) percent_formats = data.setdefault('percent_formats', {}) for elem in tree.findall('.//percentFormats/percentFormatLength'): if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and elem.attrib.get('type') in percent_formats: continue pattern = text_type(elem.findtext('percentFormat/pattern')) percent_formats[elem.attrib.get('type')] = numbers.parse_pattern( pattern) currency_names = data.setdefault('currency_names', {}) currency_symbols = data.setdefault('currency_symbols', {}) for elem in tree.findall('.//currencies/currency'): code = elem.attrib['type'] # TODO: support plural rules for currency name selection for name in elem.findall('displayName'): if ('draft' in name.attrib or 'count' in name.attrib) \ and code in currency_names: continue currency_names[code] = text_type(name.text) # TODO: support choice patterns for currency symbol selection symbol = elem.find('symbol') if symbol is not None and 'draft' not in symbol.attrib \ and 'choice' not in symbol.attrib: currency_symbols[code] = text_type(symbol.text) # <units> unit_patterns = data.setdefault('unit_patterns', {}) for elem in tree.findall('.//units/unit'): unit_type = elem.attrib['type'] unit_pattern = unit_patterns.setdefault(unit_type, {}) for pattern in elem.findall('unitPattern'): unit_patterns[unit_type][pattern.attrib['count']] = \ text_type(pattern.text) outfile = open(os.path.join(destdir, 'localedata', stem + '.dat'), 'wb') try: pickle.dump(data, outfile, 2) finally: outfile.close()
l.countries = COUNTRIES return langs # Load the locales LOCALES = load_langs("i18n") # Add the default English locale LOCALE_EN = LOCALES['en'] = Locale('en') LOCALE_EN.catalog = Catalog('en') LOCALE_EN.catalog.plural_func = lambda n: n != 1 LOCALE_EN.countries = COUNTRIES LOCALE_EN.countries_map = COUNTRIES_MAP # Patch the locales to look less formal LOCALES['fr'].currency_formats[None] = parse_pattern('#,##0.00\u202f\xa4') LOCALES['fr'].currency_symbols['USD'] = '$' def get_locale_for_request(request): accept_lang = request.headers.get("Accept-Language", "") languages = (lang.split(";", 1)[0] for lang in accept_lang.split(",")) for lang in languages: lang = regularize_locale(lang) loc = LOCALES.get(lang) if loc: return loc return LOCALE_EN def format_currency_with_options(number,
def main(): parser = OptionParser(usage='%prog path/to/cldr') options, args = parser.parse_args() if len(args) != 1: parser.error('incorrect number of arguments') srcdir = args[0] destdir = os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])), '..', 'babel') sup = parse(os.path.join(srcdir, 'supplemental', 'supplementalData.xml')) # Import global data from the supplemental files global_data = {} territory_zones = global_data.setdefault('territory_zones', {}) zone_aliases = global_data.setdefault('zone_aliases', {}) zone_territories = global_data.setdefault('zone_territories', {}) for elem in sup.findall('.//timezoneData/zoneFormatting/zoneItem'): tzid = elem.attrib['type'] territory_zones.setdefault(elem.attrib['territory'], []).append(tzid) zone_territories[tzid] = elem.attrib['territory'] if 'aliases' in elem.attrib: for alias in elem.attrib['aliases'].split(): zone_aliases[alias] = tzid # Import Metazone mapping meta_zones = global_data.setdefault('meta_zones', {}) tzsup = parse(os.path.join(srcdir, 'supplemental', 'metazoneInfo.xml')) for elem in tzsup.findall('.//timezone'): for child in elem.findall('usesMetazone'): if 'to' not in child.attrib: # FIXME: support old mappings meta_zones[elem.attrib['type']] = child.attrib['mzone'] outfile = open(os.path.join(destdir, 'global.dat'), 'wb') try: pickle.dump(global_data, outfile, 2) finally: outfile.close() # build a territory containment mapping for inheritance regions = {} for elem in sup.findall('.//territoryContainment/group'): regions[elem.attrib['type']] = elem.attrib['contains'].split() # Resolve territory containment territory_containment = {} region_items = sorted(regions.items()) for group, territory_list in region_items: for territory in territory_list: containers = territory_containment.setdefault(territory, set([])) if group in territory_containment: containers |= territory_containment[group] containers.add(group) # prepare the per-locale plural rules definitions plural_rules = {} prsup = parse(os.path.join(srcdir, 'supplemental', 'plurals.xml')) for elem in prsup.findall('.//plurals/pluralRules'): rules = [] for rule in elem.findall('pluralRule'): rules.append((rule.attrib['count'], text_type(rule.text))) pr = PluralRule(rules) for locale in elem.attrib['locales'].split(): plural_rules[locale] = pr filenames = os.listdir(os.path.join(srcdir, 'main')) filenames.remove('root.xml') filenames.sort(key=lambda a: len(a)) filenames.insert(0, 'root.xml') for filename in filenames: stem, ext = os.path.splitext(filename) if ext != '.xml': continue sys.stderr.write('Processing input file %r\n' % filename) tree = parse(os.path.join(srcdir, 'main', filename)) data = {} language = None elem = tree.find('.//identity/language') if elem is not None: language = elem.attrib['type'] sys.stderr.write(' Language: %r\n' % language) territory = None elem = tree.find('.//identity/territory') if elem is not None: territory = elem.attrib['type'] else: territory = '001' # world sys.stderr.write(' Territory: %r\n' % territory) regions = territory_containment.get(territory, []) sys.stderr.write(' Regions: %r\n' % regions) # plural rules locale_id = '_'.join([_f for _f in [ language, territory != '001' and territory or None ] if _f]) if locale_id in plural_rules: data['plural_form'] = plural_rules[locale_id] # <localeDisplayNames> territories = data.setdefault('territories', {}) for elem in tree.findall('.//territories/territory'): if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and elem.attrib['type'] in territories: continue territories[elem.attrib['type']] = _text(elem) languages = data.setdefault('languages', {}) for elem in tree.findall('.//languages/language'): if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and elem.attrib['type'] in languages: continue languages[elem.attrib['type']] = _text(elem) variants = data.setdefault('variants', {}) for elem in tree.findall('.//variants/variant'): if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and elem.attrib['type'] in variants: continue variants[elem.attrib['type']] = _text(elem) scripts = data.setdefault('scripts', {}) for elem in tree.findall('.//scripts/script'): if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and elem.attrib['type'] in scripts: continue scripts[elem.attrib['type']] = _text(elem) # <dates> week_data = data.setdefault('week_data', {}) supelem = sup.find('.//weekData') for elem in supelem.findall('minDays'): territories = elem.attrib['territories'].split() if territory in territories or any([r in territories for r in regions]): week_data['min_days'] = int(elem.attrib['count']) for elem in supelem.findall('firstDay'): territories = elem.attrib['territories'].split() if territory in territories or any([r in territories for r in regions]): week_data['first_day'] = weekdays[elem.attrib['day']] for elem in supelem.findall('weekendStart'): territories = elem.attrib['territories'].split() if territory in territories or any([r in territories for r in regions]): week_data['weekend_start'] = weekdays[elem.attrib['day']] for elem in supelem.findall('weekendEnd'): territories = elem.attrib['territories'].split() if territory in territories or any([r in territories for r in regions]): week_data['weekend_end'] = weekdays[elem.attrib['day']] zone_formats = data.setdefault('zone_formats', {}) for elem in tree.findall('.//timeZoneNames/gmtFormat'): if 'draft' not in elem.attrib and 'alt' not in elem.attrib: zone_formats['gmt'] = text_type(elem.text).replace('{0}', '%s') break for elem in tree.findall('.//timeZoneNames/regionFormat'): if 'draft' not in elem.attrib and 'alt' not in elem.attrib: zone_formats['region'] = text_type(elem.text).replace('{0}', '%s') break for elem in tree.findall('.//timeZoneNames/fallbackFormat'): if 'draft' not in elem.attrib and 'alt' not in elem.attrib: zone_formats['fallback'] = text_type(elem.text) \ .replace('{0}', '%(0)s').replace('{1}', '%(1)s') break time_zones = data.setdefault('time_zones', {}) for elem in tree.findall('.//timeZoneNames/zone'): info = {} city = elem.findtext('exemplarCity') if city: info['city'] = text_type(city) for child in elem.findall('long/*'): info.setdefault('long', {})[child.tag] = text_type(child.text) for child in elem.findall('short/*'): info.setdefault('short', {})[child.tag] = text_type(child.text) time_zones[elem.attrib['type']] = info meta_zones = data.setdefault('meta_zones', {}) for elem in tree.findall('.//timeZoneNames/metazone'): info = {} city = elem.findtext('exemplarCity') if city: info['city'] = text_type(city) for child in elem.findall('long/*'): info.setdefault('long', {})[child.tag] = text_type(child.text) for child in elem.findall('short/*'): info.setdefault('short', {})[child.tag] = text_type(child.text) info['common'] = elem.findtext('commonlyUsed') == 'true' meta_zones[elem.attrib['type']] = info for calendar in tree.findall('.//calendars/calendar'): if calendar.attrib['type'] != 'gregorian': # TODO: support other calendar types continue months = data.setdefault('months', {}) for ctxt in calendar.findall('months/monthContext'): ctxt_type = ctxt.attrib['type'] ctxts = months.setdefault(ctxt_type, {}) for width in ctxt.findall('monthWidth'): width_type = width.attrib['type'] widths = ctxts.setdefault(width_type, {}) for elem in width.getiterator(): if elem.tag == 'month': if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and int(elem.attrib['type']) in widths: continue widths[int(elem.attrib.get('type'))] = text_type(elem.text) elif elem.tag == 'alias': ctxts[width_type] = Alias( _translate_alias(['months', ctxt_type, width_type], elem.attrib['path']) ) days = data.setdefault('days', {}) for ctxt in calendar.findall('days/dayContext'): ctxt_type = ctxt.attrib['type'] ctxts = days.setdefault(ctxt_type, {}) for width in ctxt.findall('dayWidth'): width_type = width.attrib['type'] widths = ctxts.setdefault(width_type, {}) for elem in width.getiterator(): if elem.tag == 'day': dtype = weekdays[elem.attrib['type']] if ('draft' in elem.attrib or 'alt' not in elem.attrib) \ and dtype in widths: continue widths[dtype] = text_type(elem.text) elif elem.tag == 'alias': ctxts[width_type] = Alias( _translate_alias(['days', ctxt_type, width_type], elem.attrib['path']) ) quarters = data.setdefault('quarters', {}) for ctxt in calendar.findall('quarters/quarterContext'): ctxt_type = ctxt.attrib['type'] ctxts = quarters.setdefault(ctxt.attrib['type'], {}) for width in ctxt.findall('quarterWidth'): width_type = width.attrib['type'] widths = ctxts.setdefault(width_type, {}) for elem in width.getiterator(): if elem.tag == 'quarter': if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and int(elem.attrib['type']) in widths: continue widths[int(elem.attrib['type'])] = text_type(elem.text) elif elem.tag == 'alias': ctxts[width_type] = Alias( _translate_alias(['quarters', ctxt_type, width_type], elem.attrib['path']) ) eras = data.setdefault('eras', {}) for width in calendar.findall('eras/*'): width_type = NAME_MAP[width.tag] widths = eras.setdefault(width_type, {}) for elem in width.getiterator(): if elem.tag == 'era': if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and int(elem.attrib['type']) in widths: continue widths[int(elem.attrib.get('type'))] = text_type(elem.text) elif elem.tag == 'alias': eras[width_type] = Alias( _translate_alias(['eras', width_type], elem.attrib['path']) ) # AM/PM periods = data.setdefault('periods', {}) for elem in calendar.findall('am'): if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and elem.tag in periods: continue periods[elem.tag] = text_type(elem.text) for elem in calendar.findall('pm'): if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and elem.tag in periods: continue periods[elem.tag] = text_type(elem.text) date_formats = data.setdefault('date_formats', {}) for format in calendar.findall('dateFormats'): for elem in format.getiterator(): if elem.tag == 'dateFormatLength': if 'draft' in elem.attrib and \ elem.attrib.get('type') in date_formats: continue try: date_formats[elem.attrib.get('type')] = \ dates.parse_pattern(text_type(elem.findtext('dateFormat/pattern'))) except ValueError: sys.stderr.write('ERROR: %s\n' % sys.exc_info()[1]) elif elem.tag == 'alias': date_formats = Alias(_translate_alias( ['date_formats'], elem.attrib['path']) ) time_formats = data.setdefault('time_formats', {}) for format in calendar.findall('timeFormats'): for elem in format.getiterator(): if elem.tag == 'timeFormatLength': if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and elem.attrib.get('type') in time_formats: continue try: time_formats[elem.attrib.get('type')] = \ dates.parse_pattern(text_type(elem.findtext('timeFormat/pattern'))) except ValueError: sys.stderr.write('ERROR: %s\n' % sys.exc_info()[1]) elif elem.tag == 'alias': time_formats = Alias(_translate_alias( ['time_formats'], elem.attrib['path']) ) datetime_formats = data.setdefault('datetime_formats', {}) for format in calendar.findall('dateTimeFormats'): for elem in format.getiterator(): if elem.tag == 'dateTimeFormatLength': if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and elem.attrib.get('type') in datetime_formats: continue try: datetime_formats[elem.attrib.get('type')] = \ text_type(elem.findtext('dateTimeFormat/pattern')) except ValueError: sys.stderr.write('ERROR: %s\n' % sys.exc_info()[1]) elif elem.tag == 'alias': datetime_formats = Alias(_translate_alias( ['datetime_formats'], elem.attrib['path']) ) # <numbers> number_symbols = data.setdefault('number_symbols', {}) for elem in tree.findall('.//numbers/symbols/*'): if ('draft' in elem.attrib or 'alt' in elem.attrib): continue number_symbols[elem.tag] = text_type(elem.text) decimal_formats = data.setdefault('decimal_formats', {}) for elem in tree.findall('.//decimalFormats/decimalFormatLength'): if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and elem.attrib.get('type') in decimal_formats: continue pattern = text_type(elem.findtext('decimalFormat/pattern')) decimal_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern) scientific_formats = data.setdefault('scientific_formats', {}) for elem in tree.findall('.//scientificFormats/scientificFormatLength'): if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and elem.attrib.get('type') in scientific_formats: continue pattern = text_type(elem.findtext('scientificFormat/pattern')) scientific_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern) currency_formats = data.setdefault('currency_formats', {}) for elem in tree.findall('.//currencyFormats/currencyFormatLength'): if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and elem.attrib.get('type') in currency_formats: continue pattern = text_type(elem.findtext('currencyFormat/pattern')) currency_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern) percent_formats = data.setdefault('percent_formats', {}) for elem in tree.findall('.//percentFormats/percentFormatLength'): if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and elem.attrib.get('type') in percent_formats: continue pattern = text_type(elem.findtext('percentFormat/pattern')) percent_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern) currency_names = data.setdefault('currency_names', {}) currency_symbols = data.setdefault('currency_symbols', {}) for elem in tree.findall('.//currencies/currency'): code = elem.attrib['type'] # TODO: support plural rules for currency name selection for name in elem.findall('displayName'): if ('draft' in name.attrib or 'count' in name.attrib) \ and code in currency_names: continue currency_names[code] = text_type(name.text) # TODO: support choice patterns for currency symbol selection symbol = elem.find('symbol') if symbol is not None and 'draft' not in symbol.attrib \ and 'choice' not in symbol.attrib: currency_symbols[code] = text_type(symbol.text) # <units> unit_patterns = data.setdefault('unit_patterns', {}) for elem in tree.findall('.//units/unit'): unit_type = elem.attrib['type'] unit_pattern = unit_patterns.setdefault(unit_type, {}) for pattern in elem.findall('unitPattern'): unit_patterns[unit_type][pattern.attrib['count']] = \ text_type(pattern.text) dest = os.path.join(destdir, 'localedata', stem + '.dat') if not os.path.exists(os.path.dirname(dest)): os.mkdir(os.path.dirname(dest)) outfile = open(dest, 'wb') try: pickle.dump(data, outfile, 2) finally: outfile.close()
def load_i18n(canonical_host, canonical_scheme, project_root, tell_sentry): # Load the locales localeDir = os.path.join(project_root, 'i18n', 'core') locales = LOCALES source_strings = {} for file in os.listdir(localeDir): try: parts = file.split(".") if not (len(parts) == 2 and parts[1] == "po"): continue lang = parts[0] with open(os.path.join(localeDir, file), 'rb') as f: l = locales[lang.lower()] = Locale(lang) c = l.catalog = read_po(f) share_source_strings(c, source_strings) c.plural_func = get_function_from_rule(c.plural_expr) replace_unused_singulars(c) try: l.countries = make_sorted_dict(COUNTRIES, l.territories) except KeyError: l.countries = COUNTRIES try: l.languages_2 = make_sorted_dict(LANGUAGES_2, l.languages) except KeyError: l.languages_2 = LANGUAGES_2 except Exception as e: tell_sentry(e, {}) del source_strings # Prepare a unique and sorted list for use in the language switcher percent = lambda l, total: sum((percent(s, len(s)) if isinstance(s, tuple) else 1) for s in l if s) / total for l in list(locales.values()): if l.language == 'en': l.completion = 1 continue l.completion = percent([m.string for m in l.catalog if m.id and not m.fuzzy], len(l.catalog)) if l.completion == 0: del locales[l.language] loc_url = canonical_scheme+'://%s.'+canonical_host domain, port = (canonical_host.split(':') + [None])[:2] port = int(port) if port else socket.getservbyname(canonical_scheme, 'tcp') subdomains = { l.subdomain: loc_url % l.subdomain for l in locales.values() if resolve(l.subdomain + '.' + domain, port) } lang_list = sorted( ( (l.completion, l.language, l.language_name.title(), loc_url % l.subdomain) for l in set(locales.values()) if l.completion > 0.5 ), key=lambda t: (-t[0], t[1]), ) # Add year-less date format year_re = re.compile(r'(^y+[^a-zA-Z]+|[^a-zA-Z]+y+$)') for l in locales.values(): short_format = l.date_formats['short'].pattern assert short_format[0] == 'y' or short_format[-1] == 'y', (l.language, short_format) l.date_formats['short_yearless'] = year_re.sub('', short_format) # Add aliases for k, v in list(locales.items()): locales.setdefault(ALIASES.get(k, k), v) locales.setdefault(ALIASES_R.get(k, k), v) for k, v in list(locales.items()): locales.setdefault(k.split('_', 1)[0], v) # Patch the locales to look less formal locales['fr'].currency_formats['standard'] = parse_pattern('#,##0.00\u202f\xa4') locales['fr'].currencies['USD'] = 'dollar états-unien' # Load the markdown files docs = {} heading_re = re.compile(r'^(#+ )', re.M) for path in find_files(os.path.join(project_root, 'i18n'), '*.md'): d, b = os.path.split(path) doc = os.path.basename(d) lang = b[:-3] with open(path, 'rb') as f: md = f.read().decode('utf8') if md.startswith('# '): md = '\n'.join(md.split('\n')[1:]).strip() md = heading_re.sub(r'##\1', md) docs.setdefault(doc, {}).__setitem__(lang, markdown.render(md)) return {'docs': docs, 'lang_list': lang_list, 'locales': locales, 'subdomains': subdomains}
# <numbers> number_symbols = data.setdefault('number_symbols', {}) for elem in tree.findall('.//numbers/symbols/*'): if ('draft' in elem.attrib or 'alt' in elem.attrib): continue number_symbols[elem.tag] = unicode(elem.text) decimal_formats = data.setdefault('decimal_formats', {}) for elem in tree.findall('.//decimalFormats/decimalFormatLength'): if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and elem.attrib.get('type') in decimal_formats: continue pattern = unicode(elem.findtext('decimalFormat/pattern')) decimal_formats[elem.attrib.get('type')] = numbers.parse_pattern( pattern) scientific_formats = data.setdefault('scientific_formats', {}) for elem in tree.findall( './/scientificFormats/scientificFormatLength'): if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and elem.attrib.get('type') in scientific_formats: continue pattern = unicode(elem.findtext('scientificFormat/pattern')) scientific_formats[elem.attrib.get( 'type')] = numbers.parse_pattern(pattern) currency_formats = data.setdefault('currency_formats', {}) for elem in tree.findall('.//currencyFormats/currencyFormatLength'): if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and elem.attrib.get('type') in currency_formats:
l.countries = COUNTRIES return langs # Load the locales LOCALES = load_langs("i18n/core") # Add the default English locale LOCALE_EN = LOCALES['en'] = Locale('en') LOCALE_EN.catalog = Catalog('en') LOCALE_EN.catalog.plural_func = lambda n: n != 1 LOCALE_EN.countries = COUNTRIES LOCALE_EN.countries_map = COUNTRIES_MAP # Patch the locales to look less formal LOCALES['fr'].currency_formats[None] = parse_pattern('#,##0.00\u202f\xa4') LOCALES['fr'].currency_symbols['USD'] = '$' def get_locale_for_request(request): accept_lang = request.headers.get("Accept-Language", "") languages = (lang.split(";", 1)[0] for lang in accept_lang.split(",")) for lang in languages: lang = regularize_locale(lang) loc = LOCALES.get(lang) if loc: return loc return LOCALE_EN def format_currency_with_options(number, currency, locale=LOCALE_EN, trailing_zeroes=True):
def load_i18n(canonical_host, canonical_scheme, project_root, tell_sentry): # Load the locales localeDir = os.path.join(project_root, 'i18n', 'core') locales = LOCALES for file in os.listdir(localeDir): try: parts = file.split(".") if not (len(parts) == 2 and parts[1] == "po"): continue lang = parts[0] with open(os.path.join(localeDir, file)) as f: l = locales[lang.lower()] = Locale(lang) c = l.catalog = read_po(f) c.plural_func = get_function_from_rule(c.plural_expr) try: l.countries = make_sorted_dict(COUNTRIES, l.territories) except KeyError: l.countries = COUNTRIES try: l.languages_2 = make_sorted_dict(LANGUAGES_2, l.languages) except KeyError: l.languages_2 = LANGUAGES_2 except Exception as e: tell_sentry(e, {}, allow_reraise=True) # Prepare a unique and sorted list for use in the language switcher for l in locales.values(): strings = [m.string for m in l.catalog] l.completion = sum(1 for s in strings if s) / len(strings) loc_url = canonical_scheme+'://%s.'+canonical_host lang_list = sorted( ( (l.completion, l.language, l.language_name.title(), loc_url % l.language) for l in set(locales.values()) ), key=lambda t: (-t[0], t[1]), ) # Add aliases for k, v in list(locales.items()): locales.setdefault(ALIASES.get(k, k), v) locales.setdefault(ALIASES_R.get(k, k), v) for k, v in list(locales.items()): locales.setdefault(k.split('_', 1)[0], v) # Patch the locales to look less formal locales['fr'].currency_formats[None] = parse_pattern('#,##0.00\u202f\xa4') locales['fr'].currency_symbols['USD'] = '$' # Load the markdown files docs = {} heading_re = re.compile(r'^(#+ )', re.M) for path in find_files(os.path.join(project_root, 'i18n'), '*.md'): d, b = os.path.split(path) doc = os.path.basename(d) lang = b[:-3] with open(path, 'rb') as f: md = f.read().decode('utf8') if md.startswith('# '): md = '\n'.join(md.split('\n')[1:]).strip() md = heading_re.sub(r'##\1', md) docs.setdefault(doc, {}).__setitem__(lang, markdown.render(md)) return {'docs': docs, 'lang_list': lang_list, 'locales': locales}
def main(): parser = OptionParser(usage='%prog path/to/cldr') options, args = parser.parse_args() if len(args) != 1: parser.error('incorrect number of arguments') srcdir = args[0] destdir = os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])), '..', 'babel') sup_filename = os.path.join(srcdir, 'supplemental', 'supplementalData.xml') bcp47_timezone = parse(os.path.join(srcdir, 'bcp47', 'timezone.xml')) sup_windows_zones = parse(os.path.join(srcdir, 'supplemental', 'windowsZones.xml')) sup_metadata = parse(os.path.join(srcdir, 'supplemental', 'supplementalMetadata.xml')) sup_likely = parse(os.path.join(srcdir, 'supplemental', 'likelySubtags.xml')) sup = parse(sup_filename) # Import global data from the supplemental files global_path = os.path.join(destdir, 'global.dat') global_data = {} if need_conversion(global_path, global_data, sup_filename): territory_zones = global_data.setdefault('territory_zones', {}) zone_aliases = global_data.setdefault('zone_aliases', {}) zone_territories = global_data.setdefault('zone_territories', {}) win_mapping = global_data.setdefault('windows_zone_mapping', {}) language_aliases = global_data.setdefault('language_aliases', {}) territory_aliases = global_data.setdefault('territory_aliases', {}) script_aliases = global_data.setdefault('script_aliases', {}) variant_aliases = global_data.setdefault('variant_aliases', {}) likely_subtags = global_data.setdefault('likely_subtags', {}) territory_currencies = global_data.setdefault('territory_currencies', {}) parent_exceptions = global_data.setdefault('parent_exceptions', {}) # create auxiliary zone->territory map from the windows zones (we don't set # the 'zones_territories' map directly here, because there are some zones # aliases listed and we defer the decision of which ones to choose to the # 'bcp47' data _zone_territory_map = {} for map_zone in sup_windows_zones.findall( './/windowsZones/mapTimezones/mapZone'): if map_zone.attrib.get('territory') == '001': win_mapping[map_zone.attrib['other']] = \ map_zone.attrib['type'].split()[0] for tzid in text_type(map_zone.attrib['type']).split(): _zone_territory_map[tzid] = \ text_type(map_zone.attrib['territory']) for key_elem in bcp47_timezone.findall('.//keyword/key'): if key_elem.attrib['name'] == 'tz': for elem in key_elem.findall('type'): if 'deprecated' not in elem.attrib: aliases = text_type(elem.attrib['alias']).split() tzid = aliases.pop(0) territory = _zone_territory_map.get(tzid, '001') territory_zones.setdefault(territory, []).append(tzid) zone_territories[tzid] = territory for alias in aliases: zone_aliases[alias] = tzid break # Import Metazone mapping meta_zones = global_data.setdefault('meta_zones', {}) tzsup = parse(os.path.join(srcdir, 'supplemental', 'metaZones.xml')) for elem in tzsup.findall('.//timezone'): for child in elem.findall('usesMetazone'): if 'to' not in child.attrib: # FIXME: support old mappings meta_zones[elem.attrib['type']] = child.attrib['mzone'] # Language aliases for alias in sup_metadata.findall('.//alias/languageAlias'): # We don't have a use for those at the moment. They don't # pass our parser anyways. if '_' in alias.attrib['type']: continue language_aliases[alias.attrib['type']] = alias.attrib['replacement'] # Territory aliases for alias in sup_metadata.findall('.//alias/territoryAlias'): territory_aliases[alias.attrib['type']] = \ alias.attrib['replacement'].split() # Script aliases for alias in sup_metadata.findall('.//alias/scriptAlias'): script_aliases[alias.attrib['type']] = alias.attrib['replacement'] # Variant aliases for alias in sup_metadata.findall('.//alias/variantAlias'): repl = alias.attrib.get('replacement') if repl: variant_aliases[alias.attrib['type']] = repl # Likely subtags for likely_subtag in sup_likely.findall('.//likelySubtags/likelySubtag'): likely_subtags[likely_subtag.attrib['from']] = \ likely_subtag.attrib['to'] # Currencies in territories for region in sup.findall('.//currencyData/region'): region_code = region.attrib['iso3166'] region_currencies = [] for currency in region.findall('./currency'): cur_start = _parse_currency_date(currency.attrib.get('from')) cur_end = _parse_currency_date(currency.attrib.get('to')) region_currencies.append((currency.attrib['iso4217'], cur_start, cur_end, currency.attrib.get( 'tender', 'true') == 'true')) region_currencies.sort(key=_currency_sort_key) territory_currencies[region_code] = region_currencies # Explicit parent locales for paternity in sup.findall('.//parentLocales/parentLocale'): parent = paternity.attrib['parent'] for child in paternity.attrib['locales'].split(): parent_exceptions[child] = parent outfile = open(global_path, 'wb') try: pickle.dump(global_data, outfile, 2) finally: outfile.close() # build a territory containment mapping for inheritance regions = {} for elem in sup.findall('.//territoryContainment/group'): regions[elem.attrib['type']] = elem.attrib['contains'].split() # Resolve territory containment territory_containment = {} region_items = sorted(regions.items()) for group, territory_list in region_items: for territory in territory_list: containers = territory_containment.setdefault(territory, set([])) if group in territory_containment: containers |= territory_containment[group] containers.add(group) # prepare the per-locale plural rules definitions plural_rules = {} prsup = parse(os.path.join(srcdir, 'supplemental', 'plurals.xml')) for elem in prsup.findall('.//plurals/pluralRules'): rules = [] for rule in elem.findall('pluralRule'): rules.append((rule.attrib['count'], text_type(rule.text))) pr = PluralRule(rules) for locale in elem.attrib['locales'].split(): plural_rules[locale] = pr filenames = os.listdir(os.path.join(srcdir, 'main')) filenames.remove('root.xml') filenames.sort(key=len) filenames.insert(0, 'root.xml') for filename in filenames: stem, ext = os.path.splitext(filename) if ext != '.xml': continue full_filename = os.path.join(srcdir, 'main', filename) data_filename = os.path.join(destdir, 'localedata', stem + '.dat') data = {} if not need_conversion(data_filename, data, full_filename): continue tree = parse(full_filename) language = None elem = tree.find('.//identity/language') if elem is not None: language = elem.attrib['type'] territory = None elem = tree.find('.//identity/territory') if elem is not None: territory = elem.attrib['type'] else: territory = '001' # world regions = territory_containment.get(territory, []) log('Processing %s (Language = %s; Territory = %s)', filename, language, territory) # plural rules locale_id = '_'.join(filter(None, [ language, territory != '001' and territory or None ])) if locale_id in plural_rules: data['plural_form'] = plural_rules[locale_id] # <localeDisplayNames> territories = data.setdefault('territories', {}) for elem in tree.findall('.//territories/territory'): if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and elem.attrib['type'] in territories: continue territories[elem.attrib['type']] = _text(elem) languages = data.setdefault('languages', {}) for elem in tree.findall('.//languages/language'): if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and elem.attrib['type'] in languages: continue languages[elem.attrib['type']] = _text(elem) variants = data.setdefault('variants', {}) for elem in tree.findall('.//variants/variant'): if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and elem.attrib['type'] in variants: continue variants[elem.attrib['type']] = _text(elem) scripts = data.setdefault('scripts', {}) for elem in tree.findall('.//scripts/script'): if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and elem.attrib['type'] in scripts: continue scripts[elem.attrib['type']] = _text(elem) # <dates> week_data = data.setdefault('week_data', {}) supelem = sup.find('.//weekData') for elem in supelem.findall('minDays'): territories = elem.attrib['territories'].split() if territory in territories or any([r in territories for r in regions]): week_data['min_days'] = int(elem.attrib['count']) for elem in supelem.findall('firstDay'): territories = elem.attrib['territories'].split() if territory in territories or any([r in territories for r in regions]): week_data['first_day'] = weekdays[elem.attrib['day']] for elem in supelem.findall('weekendStart'): territories = elem.attrib['territories'].split() if territory in territories or any([r in territories for r in regions]): week_data['weekend_start'] = weekdays[elem.attrib['day']] for elem in supelem.findall('weekendEnd'): territories = elem.attrib['territories'].split() if territory in territories or any([r in territories for r in regions]): week_data['weekend_end'] = weekdays[elem.attrib['day']] zone_formats = data.setdefault('zone_formats', {}) for elem in tree.findall('.//timeZoneNames/gmtFormat'): if 'draft' not in elem.attrib and 'alt' not in elem.attrib: zone_formats['gmt'] = text_type(elem.text).replace('{0}', '%s') break for elem in tree.findall('.//timeZoneNames/regionFormat'): if 'draft' not in elem.attrib and 'alt' not in elem.attrib: zone_formats['region'] = text_type(elem.text).replace('{0}', '%s') break for elem in tree.findall('.//timeZoneNames/fallbackFormat'): if 'draft' not in elem.attrib and 'alt' not in elem.attrib: zone_formats['fallback'] = text_type(elem.text) \ .replace('{0}', '%(0)s').replace('{1}', '%(1)s') break for elem in tree.findall('.//timeZoneNames/fallbackRegionFormat'): if 'draft' not in elem.attrib and 'alt' not in elem.attrib: zone_formats['fallback_region'] = text_type(elem.text) \ .replace('{0}', '%(0)s').replace('{1}', '%(1)s') break time_zones = data.setdefault('time_zones', {}) for elem in tree.findall('.//timeZoneNames/zone'): info = {} city = elem.findtext('exemplarCity') if city: info['city'] = text_type(city) for child in elem.findall('long/*'): info.setdefault('long', {})[child.tag] = text_type(child.text) for child in elem.findall('short/*'): info.setdefault('short', {})[child.tag] = text_type(child.text) time_zones[elem.attrib['type']] = info meta_zones = data.setdefault('meta_zones', {}) for elem in tree.findall('.//timeZoneNames/metazone'): info = {} city = elem.findtext('exemplarCity') if city: info['city'] = text_type(city) for child in elem.findall('long/*'): info.setdefault('long', {})[child.tag] = text_type(child.text) for child in elem.findall('short/*'): info.setdefault('short', {})[child.tag] = text_type(child.text) meta_zones[elem.attrib['type']] = info for calendar in tree.findall('.//calendars/calendar'): if calendar.attrib['type'] != 'gregorian': # TODO: support other calendar types continue months = data.setdefault('months', {}) for ctxt in calendar.findall('months/monthContext'): ctxt_type = ctxt.attrib['type'] ctxts = months.setdefault(ctxt_type, {}) for width in ctxt.findall('monthWidth'): width_type = width.attrib['type'] widths = ctxts.setdefault(width_type, {}) for elem in width.getiterator(): if elem.tag == 'month': if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and int(elem.attrib['type']) in widths: continue widths[int(elem.attrib.get('type'))] = \ text_type(elem.text) elif elem.tag == 'alias': ctxts[width_type] = Alias( _translate_alias(['months', ctxt_type, width_type], elem.attrib['path']) ) days = data.setdefault('days', {}) for ctxt in calendar.findall('days/dayContext'): ctxt_type = ctxt.attrib['type'] ctxts = days.setdefault(ctxt_type, {}) for width in ctxt.findall('dayWidth'): width_type = width.attrib['type'] widths = ctxts.setdefault(width_type, {}) for elem in width.getiterator(): if elem.tag == 'day': dtype = weekdays[elem.attrib['type']] if ('draft' in elem.attrib or 'alt' not in elem.attrib) \ and dtype in widths: continue widths[dtype] = text_type(elem.text) elif elem.tag == 'alias': ctxts[width_type] = Alias( _translate_alias(['days', ctxt_type, width_type], elem.attrib['path']) ) quarters = data.setdefault('quarters', {}) for ctxt in calendar.findall('quarters/quarterContext'): ctxt_type = ctxt.attrib['type'] ctxts = quarters.setdefault(ctxt.attrib['type'], {}) for width in ctxt.findall('quarterWidth'): width_type = width.attrib['type'] widths = ctxts.setdefault(width_type, {}) for elem in width.getiterator(): if elem.tag == 'quarter': if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and int(elem.attrib['type']) in widths: continue widths[int(elem.attrib['type'])] = text_type(elem.text) elif elem.tag == 'alias': ctxts[width_type] = Alias( _translate_alias(['quarters', ctxt_type, width_type], elem.attrib['path'])) eras = data.setdefault('eras', {}) for width in calendar.findall('eras/*'): width_type = NAME_MAP[width.tag] widths = eras.setdefault(width_type, {}) for elem in width.getiterator(): if elem.tag == 'era': if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and int(elem.attrib['type']) in widths: continue widths[int(elem.attrib.get('type'))] = text_type(elem.text) elif elem.tag == 'alias': eras[width_type] = Alias( _translate_alias(['eras', width_type], elem.attrib['path']) ) # AM/PM periods = data.setdefault('periods', {}) for day_period_width in calendar.findall( 'dayPeriods/dayPeriodContext/dayPeriodWidth'): if day_period_width.attrib['type'] == 'wide': for day_period in day_period_width.findall('dayPeriod'): if 'alt' not in day_period.attrib: periods[day_period.attrib['type']] = text_type( day_period.text) date_formats = data.setdefault('date_formats', {}) for format in calendar.findall('dateFormats'): for elem in format.getiterator(): if elem.tag == 'dateFormatLength': if 'draft' in elem.attrib and \ elem.attrib.get('type') in date_formats: continue try: date_formats[elem.attrib.get('type')] = \ dates.parse_pattern(text_type( elem.findtext('dateFormat/pattern'))) except ValueError as e: error(e) elif elem.tag == 'alias': date_formats = Alias(_translate_alias( ['date_formats'], elem.attrib['path']) ) time_formats = data.setdefault('time_formats', {}) for format in calendar.findall('timeFormats'): for elem in format.getiterator(): if elem.tag == 'timeFormatLength': if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and elem.attrib.get('type') in time_formats: continue try: time_formats[elem.attrib.get('type')] = \ dates.parse_pattern(text_type( elem.findtext('timeFormat/pattern'))) except ValueError as e: error(e) elif elem.tag == 'alias': time_formats = Alias(_translate_alias( ['time_formats'], elem.attrib['path']) ) datetime_formats = data.setdefault('datetime_formats', {}) for format in calendar.findall('dateTimeFormats'): for elem in format.getiterator(): if elem.tag == 'dateTimeFormatLength': if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and elem.attrib.get('type') in datetime_formats: continue try: datetime_formats[elem.attrib.get('type')] = \ text_type(elem.findtext('dateTimeFormat/pattern')) except ValueError as e: error(e) elif elem.tag == 'alias': datetime_formats = Alias(_translate_alias( ['datetime_formats'], elem.attrib['path']) ) # <numbers> number_symbols = data.setdefault('number_symbols', {}) for elem in tree.findall('.//numbers/symbols/*'): if ('draft' in elem.attrib or 'alt' in elem.attrib): continue number_symbols[elem.tag] = text_type(elem.text) decimal_formats = data.setdefault('decimal_formats', {}) for elem in tree.findall('.//decimalFormats/decimalFormatLength'): if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and elem.attrib.get('type') in decimal_formats: continue if elem.findall('./alias'): # TODO map the alias to its target continue pattern = text_type(elem.findtext('./decimalFormat/pattern')) decimal_formats[elem.attrib.get('type')] = \ numbers.parse_pattern(pattern) scientific_formats = data.setdefault('scientific_formats', {}) for elem in tree.findall('.//scientificFormats/scientificFormatLength'): if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and elem.attrib.get('type') in scientific_formats: continue pattern = text_type(elem.findtext('scientificFormat/pattern')) scientific_formats[elem.attrib.get('type')] = \ numbers.parse_pattern(pattern) currency_formats = data.setdefault('currency_formats', {}) for elem in tree.findall('.//currencyFormats/currencyFormatLength'): if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and elem.attrib.get('type') in currency_formats: continue pattern = text_type(elem.findtext('currencyFormat/pattern')) currency_formats[elem.attrib.get('type')] = \ numbers.parse_pattern(pattern) percent_formats = data.setdefault('percent_formats', {}) for elem in tree.findall('.//percentFormats/percentFormatLength'): if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and elem.attrib.get('type') in percent_formats: continue pattern = text_type(elem.findtext('percentFormat/pattern')) percent_formats[elem.attrib.get('type')] = \ numbers.parse_pattern(pattern) currency_names = data.setdefault('currency_names', {}) currency_names_plural = data.setdefault('currency_names_plural', {}) currency_symbols = data.setdefault('currency_symbols', {}) for elem in tree.findall('.//currencies/currency'): code = elem.attrib['type'] for name in elem.findall('displayName'): if ('draft' in name.attrib) and code in currency_names: continue if 'count' in name.attrib: currency_names_plural.setdefault(code, {})[ name.attrib['count']] = text_type(name.text) else: currency_names[code] = text_type(name.text) # TODO: support choice patterns for currency symbol selection symbol = elem.find('symbol') if symbol is not None and 'draft' not in symbol.attrib \ and 'choice' not in symbol.attrib: currency_symbols[code] = text_type(symbol.text) # <units> unit_patterns = data.setdefault('unit_patterns', {}) for elem in tree.findall('.//units/unitLength'): unit_length_type = elem.attrib['type'] for unit in elem.findall('unit'): unit_type = unit.attrib['type'] for pattern in unit.findall('unitPattern'): box = unit_type box += ':' + unit_length_type unit_patterns.setdefault(box, {})[pattern.attrib['count']] = \ text_type(pattern.text) date_fields = data.setdefault('date_fields', {}) for elem in tree.findall('.//dates/fields/field'): field_type = elem.attrib['type'] date_fields.setdefault(field_type, {}) for rel_time in elem.findall('relativeTime'): rel_time_type = rel_time.attrib['type'] for pattern in rel_time.findall('relativeTimePattern'): date_fields[field_type].setdefault(rel_time_type, {})\ [pattern.attrib['count']] = text_type(pattern.text) outfile = open(data_filename, 'wb') try: pickle.dump(data, outfile, 2) finally: outfile.close()
def load_i18n(canonical_host, canonical_scheme, project_root, tell_sentry): # Load the locales localeDir = os.path.join(project_root, 'i18n', 'core') locales = LOCALES source_strings = {} for file in os.listdir(localeDir): try: parts = file.split(".") if not (len(parts) == 2 and parts[1] == "po"): continue lang = parts[0] with open(os.path.join(localeDir, file)) as f: l = locales[lang.lower()] = Locale(lang) c = l.catalog = read_po(f) share_source_strings(c, source_strings) c.plural_func = get_function_from_rule(c.plural_expr) replace_unused_singulars(c) try: l.countries = make_sorted_dict(COUNTRIES, l.territories) except KeyError: l.countries = COUNTRIES try: l.languages_2 = make_sorted_dict(LANGUAGES_2, l.languages) except KeyError: l.languages_2 = LANGUAGES_2 except Exception as e: tell_sentry(e, {}) del source_strings # Prepare a unique and sorted list for use in the language switcher percent = lambda l, total: sum((percent(s, len(s)) if isinstance(s, tuple) else 1) for s in l if s) / total for l in list(locales.values()): if l.language == 'en': l.completion = 1 continue l.completion = percent( [m.string for m in l.catalog if m.id and not m.fuzzy], len(l.catalog)) if l.completion == 0: del locales[l.language] loc_url = canonical_scheme + '://%s.' + canonical_host domain, port = (canonical_host.split(':') + [None])[:2] port = int(port) if port else socket.getservbyname(canonical_scheme, 'tcp') subdomains = { k: loc_url % k for k in locales if resolve(k + '.' + domain, port) } lang_list = sorted( ((l.completion, l.language, l.language_name.title(), loc_url % l.language) for l in set(locales.values()) if l.completion > 0.5), key=lambda t: (-t[0], t[1]), ) # Add year-less date format year_re = re.compile(r'(^y+[^a-zA-Z]+|[^a-zA-Z]+y+$)') for l in locales.values(): short_format = l.date_formats['short'].pattern assert short_format[0] == 'y' or short_format[-1] == 'y', ( l.language, short_format) l.date_formats['short_yearless'] = year_re.sub('', short_format) # Add aliases for k, v in list(locales.items()): locales.setdefault(ALIASES.get(k, k), v) locales.setdefault(ALIASES_R.get(k, k), v) for k, v in list(locales.items()): locales.setdefault(k.split('_', 1)[0], v) # Patch the locales to look less formal locales['fr'].currency_formats['standard'] = parse_pattern( '#,##0.00\u202f\xa4') locales['fr'].currency_symbols['USD'] = '$' locales['fr'].currencies['USD'] = 'dollar états-unien' # Load the markdown files docs = {} heading_re = re.compile(r'^(#+ )', re.M) for path in find_files(os.path.join(project_root, 'i18n'), '*.md'): d, b = os.path.split(path) doc = os.path.basename(d) lang = b[:-3] with open(path, 'rb') as f: md = f.read().decode('utf8') if md.startswith('# '): md = '\n'.join(md.split('\n')[1:]).strip() md = heading_re.sub(r'##\1', md) docs.setdefault(doc, {}).__setitem__(lang, markdown.render(md)) return { 'docs': docs, 'lang_list': lang_list, 'locales': locales, 'subdomains': subdomains }
def main(): parser = OptionParser(usage='%prog path/to/cldr') options, args = parser.parse_args() if len(args) != 1: parser.error('incorrect number of arguments') srcdir = args[0] destdir = os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])), '..', 'babel') sup_filename = os.path.join(srcdir, 'supplemental', 'supplementalData.xml') bcp47_timezone = parse(os.path.join(srcdir, 'bcp47', 'timezone.xml')) sup_windows_zones = parse( os.path.join(srcdir, 'supplemental', 'windowsZones.xml')) sup_metadata = parse( os.path.join(srcdir, 'supplemental', 'supplementalMetadata.xml')) sup_likely = parse( os.path.join(srcdir, 'supplemental', 'likelySubtags.xml')) sup = parse(sup_filename) # Import global data from the supplemental files global_path = os.path.join(destdir, 'global.dat') global_data = {} if need_conversion(global_path, global_data, sup_filename): territory_zones = global_data.setdefault('territory_zones', {}) zone_aliases = global_data.setdefault('zone_aliases', {}) zone_territories = global_data.setdefault('zone_territories', {}) win_mapping = global_data.setdefault('windows_zone_mapping', {}) language_aliases = global_data.setdefault('language_aliases', {}) territory_aliases = global_data.setdefault('territory_aliases', {}) script_aliases = global_data.setdefault('script_aliases', {}) variant_aliases = global_data.setdefault('variant_aliases', {}) likely_subtags = global_data.setdefault('likely_subtags', {}) territory_currencies = global_data.setdefault('territory_currencies', {}) # create auxiliary zone->territory map from the windows zones (we don't set # the 'zones_territories' map directly here, because there are some zones # aliases listed and we defer the decision of which ones to choose to the # 'bcp47' data _zone_territory_map = {} for map_zone in sup_windows_zones.findall( './/windowsZones/mapTimezones/mapZone'): if map_zone.attrib.get('territory') == '001': win_mapping[map_zone.attrib['other']] = \ map_zone.attrib['type'].split()[0] for tzid in text_type(map_zone.attrib['type']).split(): _zone_territory_map[tzid] = \ text_type(map_zone.attrib['territory']) for key_elem in bcp47_timezone.findall('.//keyword/key'): if key_elem.attrib['name'] == 'tz': for elem in key_elem.findall('type'): aliases = text_type(elem.attrib['alias']).split() tzid = aliases.pop(0) territory = _zone_territory_map.get(tzid, '001') territory_zones.setdefault(territory, []).append(tzid) zone_territories[tzid] = territory for alias in aliases: zone_aliases[alias] = tzid break # Import Metazone mapping meta_zones = global_data.setdefault('meta_zones', {}) tzsup = parse(os.path.join(srcdir, 'supplemental', 'metaZones.xml')) for elem in tzsup.findall('.//timezone'): for child in elem.findall('usesMetazone'): if 'to' not in child.attrib: # FIXME: support old mappings meta_zones[elem.attrib['type']] = child.attrib['mzone'] # Language aliases for alias in sup_metadata.findall('.//alias/languageAlias'): # We don't have a use for those at the moment. They don't # pass our parser anyways. if '-' in alias.attrib['type']: continue language_aliases[ alias.attrib['type']] = alias.attrib['replacement'] # Territory aliases for alias in sup_metadata.findall('.//alias/territoryAlias'): territory_aliases[alias.attrib['type']] = \ alias.attrib['replacement'].split() # Script aliases for alias in sup_metadata.findall('.//alias/scriptAlias'): script_aliases[alias.attrib['type']] = alias.attrib['replacement'] # Variant aliases for alias in sup_metadata.findall('.//alias/variantAlias'): repl = alias.attrib.get('replacement') if repl: variant_aliases[alias.attrib['type']] = repl # Likely subtags for likely_subtag in sup_likely.findall( './/likelySubtags/likelySubtag'): likely_subtags[likely_subtag.attrib['from']] = \ likely_subtag.attrib['to'] # Currencies in territories for region in sup.findall('.//currencyData/region'): region_code = region.attrib['iso3166'] region_currencies = [] for currency in region.findall('./currency'): cur_start = _parse_currency_date(currency.attrib.get('from')) cur_end = _parse_currency_date(currency.attrib.get('to')) region_currencies.append( (currency.attrib['iso4217'], cur_start, cur_end, currency.attrib.get('tender', 'true') == 'true')) region_currencies.sort(key=_currency_sort_key) territory_currencies[region_code] = region_currencies outfile = open(global_path, 'wb') try: pickle.dump(global_data, outfile, 2) finally: outfile.close() # build a territory containment mapping for inheritance regions = {} for elem in sup.findall('.//territoryContainment/group'): regions[elem.attrib['type']] = elem.attrib['contains'].split() # Resolve territory containment territory_containment = {} region_items = sorted(regions.items()) for group, territory_list in region_items: for territory in territory_list: containers = territory_containment.setdefault(territory, set([])) if group in territory_containment: containers |= territory_containment[group] containers.add(group) # prepare the per-locale plural rules definitions plural_rules = {} prsup = parse(os.path.join(srcdir, 'supplemental', 'plurals.xml')) for elem in prsup.findall('.//plurals/pluralRules'): rules = [] for rule in elem.findall('pluralRule'): rules.append((rule.attrib['count'], text_type(rule.text))) pr = PluralRule(rules) for locale in elem.attrib['locales'].split(): plural_rules[locale] = pr filenames = os.listdir(os.path.join(srcdir, 'main')) filenames.remove('root.xml') filenames.sort(key=len) filenames.insert(0, 'root.xml') for filename in filenames: stem, ext = os.path.splitext(filename) if ext != '.xml': continue full_filename = os.path.join(srcdir, 'main', filename) data_filename = os.path.join(destdir, 'localedata', stem + '.dat') data = {} if not need_conversion(data_filename, data, full_filename): continue tree = parse(full_filename) language = None elem = tree.find('.//identity/language') if elem is not None: language = elem.attrib['type'] territory = None elem = tree.find('.//identity/territory') if elem is not None: territory = elem.attrib['type'] else: territory = '001' # world regions = territory_containment.get(territory, []) log('Processing %s (Language = %s; Territory = %s)', filename, language, territory) # plural rules locale_id = '_'.join( filter(None, [language, territory != '001' and territory or None])) if locale_id in plural_rules: data['plural_form'] = plural_rules[locale_id] # <localeDisplayNames> territories = data.setdefault('territories', {}) for elem in tree.findall('.//territories/territory'): if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and elem.attrib['type'] in territories: continue territories[elem.attrib['type']] = _text(elem) languages = data.setdefault('languages', {}) for elem in tree.findall('.//languages/language'): if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and elem.attrib['type'] in languages: continue languages[elem.attrib['type']] = _text(elem) variants = data.setdefault('variants', {}) for elem in tree.findall('.//variants/variant'): if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and elem.attrib['type'] in variants: continue variants[elem.attrib['type']] = _text(elem) scripts = data.setdefault('scripts', {}) for elem in tree.findall('.//scripts/script'): if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and elem.attrib['type'] in scripts: continue scripts[elem.attrib['type']] = _text(elem) # <dates> week_data = data.setdefault('week_data', {}) supelem = sup.find('.//weekData') for elem in supelem.findall('minDays'): territories = elem.attrib['territories'].split() if territory in territories or any( [r in territories for r in regions]): week_data['min_days'] = int(elem.attrib['count']) for elem in supelem.findall('firstDay'): territories = elem.attrib['territories'].split() if territory in territories or any( [r in territories for r in regions]): week_data['first_day'] = weekdays[elem.attrib['day']] for elem in supelem.findall('weekendStart'): territories = elem.attrib['territories'].split() if territory in territories or any( [r in territories for r in regions]): week_data['weekend_start'] = weekdays[elem.attrib['day']] for elem in supelem.findall('weekendEnd'): territories = elem.attrib['territories'].split() if territory in territories or any( [r in territories for r in regions]): week_data['weekend_end'] = weekdays[elem.attrib['day']] zone_formats = data.setdefault('zone_formats', {}) for elem in tree.findall('.//timeZoneNames/gmtFormat'): if 'draft' not in elem.attrib and 'alt' not in elem.attrib: zone_formats['gmt'] = text_type(elem.text).replace('{0}', '%s') break for elem in tree.findall('.//timeZoneNames/regionFormat'): if 'draft' not in elem.attrib and 'alt' not in elem.attrib: zone_formats['region'] = text_type(elem.text).replace( '{0}', '%s') break for elem in tree.findall('.//timeZoneNames/fallbackFormat'): if 'draft' not in elem.attrib and 'alt' not in elem.attrib: zone_formats['fallback'] = text_type(elem.text) \ .replace('{0}', '%(0)s').replace('{1}', '%(1)s') break for elem in tree.findall('.//timeZoneNames/fallbackRegionFormat'): if 'draft' not in elem.attrib and 'alt' not in elem.attrib: zone_formats['fallback_region'] = text_type(elem.text) \ .replace('{0}', '%(0)s').replace('{1}', '%(1)s') break time_zones = data.setdefault('time_zones', {}) for elem in tree.findall('.//timeZoneNames/zone'): info = {} city = elem.findtext('exemplarCity') if city: info['city'] = text_type(city) for child in elem.findall('long/*'): info.setdefault('long', {})[child.tag] = text_type(child.text) for child in elem.findall('short/*'): info.setdefault('short', {})[child.tag] = text_type(child.text) time_zones[elem.attrib['type']] = info meta_zones = data.setdefault('meta_zones', {}) for elem in tree.findall('.//timeZoneNames/metazone'): info = {} city = elem.findtext('exemplarCity') if city: info['city'] = text_type(city) for child in elem.findall('long/*'): info.setdefault('long', {})[child.tag] = text_type(child.text) for child in elem.findall('short/*'): info.setdefault('short', {})[child.tag] = text_type(child.text) meta_zones[elem.attrib['type']] = info for calendar in tree.findall('.//calendars/calendar'): if calendar.attrib['type'] != 'gregorian': # TODO: support other calendar types continue months = data.setdefault('months', {}) for ctxt in calendar.findall('months/monthContext'): ctxt_type = ctxt.attrib['type'] ctxts = months.setdefault(ctxt_type, {}) for width in ctxt.findall('monthWidth'): width_type = width.attrib['type'] widths = ctxts.setdefault(width_type, {}) for elem in width.getiterator(): if elem.tag == 'month': if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and int(elem.attrib['type']) in widths: continue widths[int(elem.attrib.get('type'))] = \ text_type(elem.text) elif elem.tag == 'alias': ctxts[width_type] = Alias( _translate_alias( ['months', ctxt_type, width_type], elem.attrib['path'])) days = data.setdefault('days', {}) for ctxt in calendar.findall('days/dayContext'): ctxt_type = ctxt.attrib['type'] ctxts = days.setdefault(ctxt_type, {}) for width in ctxt.findall('dayWidth'): width_type = width.attrib['type'] widths = ctxts.setdefault(width_type, {}) for elem in width.getiterator(): if elem.tag == 'day': dtype = weekdays[elem.attrib['type']] if ('draft' in elem.attrib or 'alt' not in elem.attrib) \ and dtype in widths: continue widths[dtype] = text_type(elem.text) elif elem.tag == 'alias': ctxts[width_type] = Alias( _translate_alias( ['days', ctxt_type, width_type], elem.attrib['path'])) quarters = data.setdefault('quarters', {}) for ctxt in calendar.findall('quarters/quarterContext'): ctxt_type = ctxt.attrib['type'] ctxts = quarters.setdefault(ctxt.attrib['type'], {}) for width in ctxt.findall('quarterWidth'): width_type = width.attrib['type'] widths = ctxts.setdefault(width_type, {}) for elem in width.getiterator(): if elem.tag == 'quarter': if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and int(elem.attrib['type']) in widths: continue widths[int(elem.attrib['type'])] = text_type( elem.text) elif elem.tag == 'alias': ctxts[width_type] = Alias( _translate_alias( ['quarters', ctxt_type, width_type], elem.attrib['path'])) eras = data.setdefault('eras', {}) for width in calendar.findall('eras/*'): width_type = NAME_MAP[width.tag] widths = eras.setdefault(width_type, {}) for elem in width.getiterator(): if elem.tag == 'era': if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and int(elem.attrib['type']) in widths: continue widths[int(elem.attrib.get('type'))] = text_type( elem.text) elif elem.tag == 'alias': eras[width_type] = Alias( _translate_alias(['eras', width_type], elem.attrib['path'])) # AM/PM periods = data.setdefault('periods', {}) for day_period_width in calendar.findall( 'dayPeriods/dayPeriodContext/dayPeriodWidth'): if day_period_width.attrib['type'] == 'wide': for day_period in day_period_width.findall('dayPeriod'): if 'alt' not in day_period.attrib: periods[day_period.attrib['type']] = text_type( day_period.text) date_formats = data.setdefault('date_formats', {}) for format in calendar.findall('dateFormats'): for elem in format.getiterator(): if elem.tag == 'dateFormatLength': if 'draft' in elem.attrib and \ elem.attrib.get('type') in date_formats: continue try: date_formats[elem.attrib.get('type')] = \ dates.parse_pattern(text_type( elem.findtext('dateFormat/pattern'))) except ValueError as e: error(e) elif elem.tag == 'alias': date_formats = Alias( _translate_alias(['date_formats'], elem.attrib['path'])) time_formats = data.setdefault('time_formats', {}) for format in calendar.findall('timeFormats'): for elem in format.getiterator(): if elem.tag == 'timeFormatLength': if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and elem.attrib.get('type') in time_formats: continue try: time_formats[elem.attrib.get('type')] = \ dates.parse_pattern(text_type( elem.findtext('timeFormat/pattern'))) except ValueError as e: error(e) elif elem.tag == 'alias': time_formats = Alias( _translate_alias(['time_formats'], elem.attrib['path'])) datetime_formats = data.setdefault('datetime_formats', {}) for format in calendar.findall('dateTimeFormats'): for elem in format.getiterator(): if elem.tag == 'dateTimeFormatLength': if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and elem.attrib.get('type') in datetime_formats: continue try: datetime_formats[elem.attrib.get('type')] = \ text_type(elem.findtext('dateTimeFormat/pattern')) except ValueError as e: error(e) elif elem.tag == 'alias': datetime_formats = Alias( _translate_alias(['datetime_formats'], elem.attrib['path'])) # <numbers> number_symbols = data.setdefault('number_symbols', {}) for elem in tree.findall('.//numbers/symbols/*'): if ('draft' in elem.attrib or 'alt' in elem.attrib): continue number_symbols[elem.tag] = text_type(elem.text) decimal_formats = data.setdefault('decimal_formats', {}) for elem in tree.findall('.//decimalFormats/decimalFormatLength'): if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and elem.attrib.get('type') in decimal_formats: continue if elem.findall('./alias'): # TODO map the alias to its target continue pattern = text_type(elem.findtext('./decimalFormat/pattern')) decimal_formats[elem.attrib.get('type')] = \ numbers.parse_pattern(pattern) scientific_formats = data.setdefault('scientific_formats', {}) for elem in tree.findall( './/scientificFormats/scientificFormatLength'): if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and elem.attrib.get('type') in scientific_formats: continue pattern = text_type(elem.findtext('scientificFormat/pattern')) scientific_formats[elem.attrib.get('type')] = \ numbers.parse_pattern(pattern) currency_formats = data.setdefault('currency_formats', {}) for elem in tree.findall('.//currencyFormats/currencyFormatLength'): if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and elem.attrib.get('type') in currency_formats: continue pattern = text_type(elem.findtext('currencyFormat/pattern')) currency_formats[elem.attrib.get('type')] = \ numbers.parse_pattern(pattern) percent_formats = data.setdefault('percent_formats', {}) for elem in tree.findall('.//percentFormats/percentFormatLength'): if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and elem.attrib.get('type') in percent_formats: continue pattern = text_type(elem.findtext('percentFormat/pattern')) percent_formats[elem.attrib.get('type')] = \ numbers.parse_pattern(pattern) currency_names = data.setdefault('currency_names', {}) currency_names_plural = data.setdefault('currency_names_plural', {}) currency_symbols = data.setdefault('currency_symbols', {}) for elem in tree.findall('.//currencies/currency'): code = elem.attrib['type'] for name in elem.findall('displayName'): if ('draft' in name.attrib) and code in currency_names: continue if 'count' in name.attrib: currency_names_plural.setdefault( code, {})[name.attrib['count']] = text_type(name.text) else: currency_names[code] = text_type(name.text) # TODO: support choice patterns for currency symbol selection symbol = elem.find('symbol') if symbol is not None and 'draft' not in symbol.attrib \ and 'choice' not in symbol.attrib: currency_symbols[code] = text_type(symbol.text) # <units> unit_patterns = data.setdefault('unit_patterns', {}) for elem in tree.findall('.//units/unit'): unit_type = elem.attrib['type'] for pattern in elem.findall('unitPattern'): box = unit_type if 'alt' in pattern.attrib: box += ':' + pattern.attrib['alt'] unit_patterns.setdefault(box, {})[pattern.attrib['count']] = \ text_type(pattern.text) outfile = open(data_filename, 'wb') try: pickle.dump(data, outfile, 2) finally: outfile.close()
def load_i18n(canonical_host, canonical_scheme, project_root, tell_sentry): def compute_percentage(it, total): return sum( (compute_percentage(s, len(s)) if isinstance(s, tuple) else 1) for s in it if s) / total # Load the base locales localeDir = os.path.join(project_root, 'i18n', 'core') locales = LOCALES source_strings = {} for file in os.listdir(localeDir): try: parts = file.split(".") if not (len(parts) == 2 and parts[1] == "po"): continue lang = parts[0] with open(os.path.join(localeDir, file), 'rb') as f: l = Locale(lang) c = l.catalog = read_po(f) share_source_strings(c, source_strings) c.plural_func = get_function_from_rule(c.plural_expr) replace_unused_singulars(c) l.completion = compute_percentage( (m.string for m in c if m.id and not m.fuzzy), len(c)) if l.completion == 0: continue else: locales[lang.lower()] = l try: l.countries = make_sorted_dict(COUNTRIES, l.territories) except KeyError: l.countries = COUNTRIES try: l.languages_2 = make_sorted_dict(LANGUAGES_2, l.languages) except KeyError: l.languages_2 = LANGUAGES_2 except Exception as e: tell_sentry(e) del source_strings # Load the variants for loc_id in babel.localedata.locale_identifiers(): if loc_id in locales: continue i = loc_id.rfind('_') if i == -1: continue base = locales.get(loc_id[:i]) if base: l = locales[loc_id.lower()] = Locale.parse(loc_id) l.catalog = base.catalog l.completion = base.completion l.countries = base.countries l.languages_2 = base.languages_2 # Unload the Babel data that we no longer need # We load a lot of data to populate the LANGUAGE_NAMES dict, we don't want # to keep it all in RAM. used_data_dict_addresses = set(id(l._data._data) for l in locales.values()) for key, data_dict in list(babel.localedata._cache.items()): if id(data_dict) not in used_data_dict_addresses: del babel.localedata._cache[key] # Prepare a unique and sorted list for use in the language switcher loc_url = canonical_scheme + '://%s.' + canonical_host domain, port = (canonical_host.split(':') + [None])[:2] port = int(port) if port else socket.getservbyname(canonical_scheme, 'tcp') subdomains = { l.subdomain: loc_url % l.subdomain for l in locales.values() if not l.territory and resolve(l.subdomain + '.' + domain, port) } lang_list = sorted( ((l.completion, l.language, l.language_name.title(), loc_url % l.subdomain) for l in set(locales.values()) if not l.territory and l.completion > 0.5), key=lambda t: (-t[0], t[1]), ) # Add year-less date format year_re = re.compile(r'(^y+[^a-zA-Z]+|[^a-zA-Z]+y+$)') for l in locales.values(): short_format = l.date_formats['short'].pattern assert short_format[0] == 'y' or short_format[-1] == 'y', ( l.language, short_format) l.date_formats['short_yearless'] = year_re.sub('', short_format) # Add aliases for k, v in list(locales.items()): locales.setdefault(ALIASES.get(k, k), v) locales.setdefault(ALIASES_R.get(k, k), v) for k, v in list(locales.items()): locales.setdefault(k.split('_', 1)[0], v) # Add universal strings # These strings don't need to be translated, but they have to be in the catalogs # so that they're counted as translated. for l in locales.values(): l.catalog.add("PayPal", "PayPal") # Patch the locales to look less formal locales['fr'].currency_formats['standard'] = parse_pattern( '#,##0.00\u202f\xa4') locales['fr'].currencies['USD'] = 'dollar états-unien' # Load the markdown files docs = {} heading_re = re.compile(r'^(#+ )', re.M) for path in find_files(os.path.join(project_root, 'i18n'), '*.md'): d, b = os.path.split(path) doc = os.path.basename(d) lang = b[:-3] with open(path, 'rb') as f: md = f.read().decode('utf8') if md.startswith('# '): md = '\n'.join(md.split('\n')[1:]).strip() md = heading_re.sub(r'##\1', md) docs.setdefault(doc, {}).__setitem__(lang, markdown.render(md)) return { 'docs': docs, 'lang_list': lang_list, 'locales': locales, 'subdomains': subdomains }
def main(): parser = OptionParser(usage="%prog path/to/cldr") options, args = parser.parse_args() if len(args) != 1: parser.error("incorrect number of arguments") srcdir = args[0] destdir = os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])), "..", "babel") sup_filename = os.path.join(srcdir, "supplemental", "supplementalData.xml") bcp47_timezone = parse(os.path.join(srcdir, "bcp47", "timezone.xml")) sup_windows_zones = parse(os.path.join(srcdir, "supplemental", "windowsZones.xml")) sup_metadata = parse(os.path.join(srcdir, "supplemental", "supplementalMetadata.xml")) sup_likely = parse(os.path.join(srcdir, "supplemental", "likelySubtags.xml")) sup = parse(sup_filename) # Import global data from the supplemental files global_path = os.path.join(destdir, "global.dat") global_data = {} if need_conversion(global_path, global_data, sup_filename): territory_zones = global_data.setdefault("territory_zones", {}) zone_aliases = global_data.setdefault("zone_aliases", {}) zone_territories = global_data.setdefault("zone_territories", {}) win_mapping = global_data.setdefault("windows_zone_mapping", {}) language_aliases = global_data.setdefault("language_aliases", {}) territory_aliases = global_data.setdefault("territory_aliases", {}) script_aliases = global_data.setdefault("script_aliases", {}) variant_aliases = global_data.setdefault("variant_aliases", {}) likely_subtags = global_data.setdefault("likely_subtags", {}) territory_currencies = global_data.setdefault("territory_currencies", {}) parent_exceptions = global_data.setdefault("parent_exceptions", {}) currency_fractions = global_data.setdefault("currency_fractions", {}) # create auxiliary zone->territory map from the windows zones (we don't set # the 'zones_territories' map directly here, because there are some zones # aliases listed and we defer the decision of which ones to choose to the # 'bcp47' data _zone_territory_map = {} for map_zone in sup_windows_zones.findall(".//windowsZones/mapTimezones/mapZone"): if map_zone.attrib.get("territory") == "001": win_mapping[map_zone.attrib["other"]] = map_zone.attrib["type"].split()[0] for tzid in text_type(map_zone.attrib["type"]).split(): _zone_territory_map[tzid] = text_type(map_zone.attrib["territory"]) for key_elem in bcp47_timezone.findall(".//keyword/key"): if key_elem.attrib["name"] == "tz": for elem in key_elem.findall("type"): if "deprecated" not in elem.attrib: aliases = text_type(elem.attrib["alias"]).split() tzid = aliases.pop(0) territory = _zone_territory_map.get(tzid, "001") territory_zones.setdefault(territory, []).append(tzid) zone_territories[tzid] = territory for alias in aliases: zone_aliases[alias] = tzid break # Import Metazone mapping meta_zones = global_data.setdefault("meta_zones", {}) tzsup = parse(os.path.join(srcdir, "supplemental", "metaZones.xml")) for elem in tzsup.findall(".//timezone"): for child in elem.findall("usesMetazone"): if "to" not in child.attrib: # FIXME: support old mappings meta_zones[elem.attrib["type"]] = child.attrib["mzone"] # Language aliases for alias in sup_metadata.findall(".//alias/languageAlias"): # We don't have a use for those at the moment. They don't # pass our parser anyways. if "_" in alias.attrib["type"]: continue language_aliases[alias.attrib["type"]] = alias.attrib["replacement"] # Territory aliases for alias in sup_metadata.findall(".//alias/territoryAlias"): territory_aliases[alias.attrib["type"]] = alias.attrib["replacement"].split() # Script aliases for alias in sup_metadata.findall(".//alias/scriptAlias"): script_aliases[alias.attrib["type"]] = alias.attrib["replacement"] # Variant aliases for alias in sup_metadata.findall(".//alias/variantAlias"): repl = alias.attrib.get("replacement") if repl: variant_aliases[alias.attrib["type"]] = repl # Likely subtags for likely_subtag in sup_likely.findall(".//likelySubtags/likelySubtag"): likely_subtags[likely_subtag.attrib["from"]] = likely_subtag.attrib["to"] # Currencies in territories for region in sup.findall(".//currencyData/region"): region_code = region.attrib["iso3166"] region_currencies = [] for currency in region.findall("./currency"): cur_start = _parse_currency_date(currency.attrib.get("from")) cur_end = _parse_currency_date(currency.attrib.get("to")) region_currencies.append( (currency.attrib["iso4217"], cur_start, cur_end, currency.attrib.get("tender", "true") == "true") ) region_currencies.sort(key=_currency_sort_key) territory_currencies[region_code] = region_currencies # Explicit parent locales for paternity in sup.findall(".//parentLocales/parentLocale"): parent = paternity.attrib["parent"] for child in paternity.attrib["locales"].split(): parent_exceptions[child] = parent # Currency decimal and rounding digits for fraction in sup.findall(".//currencyData/fractions/info"): cur_code = fraction.attrib["iso4217"] cur_digits = int(fraction.attrib["digits"]) cur_rounding = int(fraction.attrib["rounding"]) cur_cdigits = int(fraction.attrib.get("cashDigits", cur_digits)) cur_crounding = int(fraction.attrib.get("cashRounding", cur_rounding)) currency_fractions[cur_code] = (cur_digits, cur_rounding, cur_cdigits, cur_crounding) outfile = open(global_path, "wb") try: pickle.dump(global_data, outfile, 2) finally: outfile.close() # build a territory containment mapping for inheritance regions = {} for elem in sup.findall(".//territoryContainment/group"): regions[elem.attrib["type"]] = elem.attrib["contains"].split() # Resolve territory containment territory_containment = {} region_items = sorted(regions.items()) for group, territory_list in region_items: for territory in territory_list: containers = territory_containment.setdefault(territory, set([])) if group in territory_containment: containers |= territory_containment[group] containers.add(group) # prepare the per-locale plural rules definitions plural_rules = {} prsup = parse(os.path.join(srcdir, "supplemental", "plurals.xml")) for elem in prsup.findall(".//plurals/pluralRules"): rules = [] for rule in elem.findall("pluralRule"): rules.append((rule.attrib["count"], text_type(rule.text))) pr = PluralRule(rules) for locale in elem.attrib["locales"].split(): plural_rules[locale] = pr filenames = os.listdir(os.path.join(srcdir, "main")) filenames.remove("root.xml") filenames.sort(key=len) filenames.insert(0, "root.xml") for filename in filenames: stem, ext = os.path.splitext(filename) if ext != ".xml": continue full_filename = os.path.join(srcdir, "main", filename) data_filename = os.path.join(destdir, "locale-data", stem + ".dat") data = {} if not need_conversion(data_filename, data, full_filename): continue tree = parse(full_filename) language = None elem = tree.find(".//identity/language") if elem is not None: language = elem.attrib["type"] territory = None elem = tree.find(".//identity/territory") if elem is not None: territory = elem.attrib["type"] else: territory = "001" # world regions = territory_containment.get(territory, []) log("Processing %s (Language = %s; Territory = %s)", filename, language, territory) # plural rules locale_id = "_".join(filter(None, [language, territory != "001" and territory or None])) if locale_id in plural_rules: data["plural_form"] = plural_rules[locale_id] # <localeDisplayNames> territories = data.setdefault("territories", {}) for elem in tree.findall(".//territories/territory"): if ("draft" in elem.attrib or "alt" in elem.attrib) and elem.attrib["type"] in territories: continue territories[elem.attrib["type"]] = _text(elem) languages = data.setdefault("languages", {}) for elem in tree.findall(".//languages/language"): if ("draft" in elem.attrib or "alt" in elem.attrib) and elem.attrib["type"] in languages: continue languages[elem.attrib["type"]] = _text(elem) variants = data.setdefault("variants", {}) for elem in tree.findall(".//variants/variant"): if ("draft" in elem.attrib or "alt" in elem.attrib) and elem.attrib["type"] in variants: continue variants[elem.attrib["type"]] = _text(elem) scripts = data.setdefault("scripts", {}) for elem in tree.findall(".//scripts/script"): if ("draft" in elem.attrib or "alt" in elem.attrib) and elem.attrib["type"] in scripts: continue scripts[elem.attrib["type"]] = _text(elem) # <dates> week_data = data.setdefault("week_data", {}) supelem = sup.find(".//weekData") for elem in supelem.findall("minDays"): territories = elem.attrib["territories"].split() if territory in territories or any([r in territories for r in regions]): week_data["min_days"] = int(elem.attrib["count"]) for elem in supelem.findall("firstDay"): territories = elem.attrib["territories"].split() if territory in territories or any([r in territories for r in regions]): week_data["first_day"] = weekdays[elem.attrib["day"]] for elem in supelem.findall("weekendStart"): territories = elem.attrib["territories"].split() if territory in territories or any([r in territories for r in regions]): week_data["weekend_start"] = weekdays[elem.attrib["day"]] for elem in supelem.findall("weekendEnd"): territories = elem.attrib["territories"].split() if territory in territories or any([r in territories for r in regions]): week_data["weekend_end"] = weekdays[elem.attrib["day"]] zone_formats = data.setdefault("zone_formats", {}) for elem in tree.findall(".//timeZoneNames/gmtFormat"): if "draft" not in elem.attrib and "alt" not in elem.attrib: zone_formats["gmt"] = text_type(elem.text).replace("{0}", "%s") break for elem in tree.findall(".//timeZoneNames/regionFormat"): if "draft" not in elem.attrib and "alt" not in elem.attrib: zone_formats["region"] = text_type(elem.text).replace("{0}", "%s") break for elem in tree.findall(".//timeZoneNames/fallbackFormat"): if "draft" not in elem.attrib and "alt" not in elem.attrib: zone_formats["fallback"] = text_type(elem.text).replace("{0}", "%(0)s").replace("{1}", "%(1)s") break for elem in tree.findall(".//timeZoneNames/fallbackRegionFormat"): if "draft" not in elem.attrib and "alt" not in elem.attrib: zone_formats["fallback_region"] = text_type(elem.text).replace("{0}", "%(0)s").replace("{1}", "%(1)s") break time_zones = data.setdefault("time_zones", {}) for elem in tree.findall(".//timeZoneNames/zone"): info = {} city = elem.findtext("exemplarCity") if city: info["city"] = text_type(city) for child in elem.findall("long/*"): info.setdefault("long", {})[child.tag] = text_type(child.text) for child in elem.findall("short/*"): info.setdefault("short", {})[child.tag] = text_type(child.text) time_zones[elem.attrib["type"]] = info meta_zones = data.setdefault("meta_zones", {}) for elem in tree.findall(".//timeZoneNames/metazone"): info = {} city = elem.findtext("exemplarCity") if city: info["city"] = text_type(city) for child in elem.findall("long/*"): info.setdefault("long", {})[child.tag] = text_type(child.text) for child in elem.findall("short/*"): info.setdefault("short", {})[child.tag] = text_type(child.text) meta_zones[elem.attrib["type"]] = info for calendar in tree.findall(".//calendars/calendar"): if calendar.attrib["type"] != "gregorian": # TODO: support other calendar types continue months = data.setdefault("months", {}) for ctxt in calendar.findall("months/monthContext"): ctxt_type = ctxt.attrib["type"] ctxts = months.setdefault(ctxt_type, {}) for width in ctxt.findall("monthWidth"): width_type = width.attrib["type"] widths = ctxts.setdefault(width_type, {}) for elem in width.getiterator(): if elem.tag == "month": if ("draft" in elem.attrib or "alt" in elem.attrib) and int(elem.attrib["type"]) in widths: continue widths[int(elem.attrib.get("type"))] = text_type(elem.text) elif elem.tag == "alias": ctxts[width_type] = Alias( _translate_alias(["months", ctxt_type, width_type], elem.attrib["path"]) ) days = data.setdefault("days", {}) for ctxt in calendar.findall("days/dayContext"): ctxt_type = ctxt.attrib["type"] ctxts = days.setdefault(ctxt_type, {}) for width in ctxt.findall("dayWidth"): width_type = width.attrib["type"] widths = ctxts.setdefault(width_type, {}) for elem in width.getiterator(): if elem.tag == "day": dtype = weekdays[elem.attrib["type"]] if ("draft" in elem.attrib or "alt" not in elem.attrib) and dtype in widths: continue widths[dtype] = text_type(elem.text) elif elem.tag == "alias": ctxts[width_type] = Alias( _translate_alias(["days", ctxt_type, width_type], elem.attrib["path"]) ) quarters = data.setdefault("quarters", {}) for ctxt in calendar.findall("quarters/quarterContext"): ctxt_type = ctxt.attrib["type"] ctxts = quarters.setdefault(ctxt.attrib["type"], {}) for width in ctxt.findall("quarterWidth"): width_type = width.attrib["type"] widths = ctxts.setdefault(width_type, {}) for elem in width.getiterator(): if elem.tag == "quarter": if ("draft" in elem.attrib or "alt" in elem.attrib) and int(elem.attrib["type"]) in widths: continue widths[int(elem.attrib["type"])] = text_type(elem.text) elif elem.tag == "alias": ctxts[width_type] = Alias( _translate_alias(["quarters", ctxt_type, width_type], elem.attrib["path"]) ) eras = data.setdefault("eras", {}) for width in calendar.findall("eras/*"): width_type = NAME_MAP[width.tag] widths = eras.setdefault(width_type, {}) for elem in width.getiterator(): if elem.tag == "era": if ("draft" in elem.attrib or "alt" in elem.attrib) and int(elem.attrib["type"]) in widths: continue widths[int(elem.attrib.get("type"))] = text_type(elem.text) elif elem.tag == "alias": eras[width_type] = Alias(_translate_alias(["eras", width_type], elem.attrib["path"])) # AM/PM periods = data.setdefault("periods", {}) for day_period_width in calendar.findall("dayPeriods/dayPeriodContext/dayPeriodWidth"): if day_period_width.attrib["type"] == "wide": for day_period in day_period_width.findall("dayPeriod"): if "alt" not in day_period.attrib: periods[day_period.attrib["type"]] = text_type(day_period.text) date_formats = data.setdefault("date_formats", {}) for format in calendar.findall("dateFormats"): for elem in format.getiterator(): if elem.tag == "dateFormatLength": if "draft" in elem.attrib and elem.attrib.get("type") in date_formats: continue try: date_formats[elem.attrib.get("type")] = dates.parse_pattern( text_type(elem.findtext("dateFormat/pattern")) ) except ValueError as e: error(e) elif elem.tag == "alias": date_formats = Alias(_translate_alias(["date_formats"], elem.attrib["path"])) time_formats = data.setdefault("time_formats", {}) for format in calendar.findall("timeFormats"): for elem in format.getiterator(): if elem.tag == "timeFormatLength": if ("draft" in elem.attrib or "alt" in elem.attrib) and elem.attrib.get("type") in time_formats: continue try: time_formats[elem.attrib.get("type")] = dates.parse_pattern( text_type(elem.findtext("timeFormat/pattern")) ) except ValueError as e: error(e) elif elem.tag == "alias": time_formats = Alias(_translate_alias(["time_formats"], elem.attrib["path"])) datetime_formats = data.setdefault("datetime_formats", {}) for format in calendar.findall("dateTimeFormats"): for elem in format.getiterator(): if elem.tag == "dateTimeFormatLength": if ("draft" in elem.attrib or "alt" in elem.attrib) and elem.attrib.get( "type" ) in datetime_formats: continue try: datetime_formats[elem.attrib.get("type")] = text_type( elem.findtext("dateTimeFormat/pattern") ) except ValueError as e: error(e) elif elem.tag == "alias": datetime_formats = Alias(_translate_alias(["datetime_formats"], elem.attrib["path"])) # <numbers> number_symbols = data.setdefault("number_symbols", {}) for elem in tree.findall(".//numbers/symbols/*"): if "draft" in elem.attrib or "alt" in elem.attrib: continue number_symbols[elem.tag] = text_type(elem.text) decimal_formats = data.setdefault("decimal_formats", {}) for elem in tree.findall(".//decimalFormats/decimalFormatLength"): if ("draft" in elem.attrib or "alt" in elem.attrib) and elem.attrib.get("type") in decimal_formats: continue if elem.findall("./alias"): # TODO map the alias to its target continue pattern = text_type(elem.findtext("./decimalFormat/pattern")) decimal_formats[elem.attrib.get("type")] = numbers.parse_pattern(pattern) scientific_formats = data.setdefault("scientific_formats", {}) for elem in tree.findall(".//scientificFormats/scientificFormatLength"): if ("draft" in elem.attrib or "alt" in elem.attrib) and elem.attrib.get("type") in scientific_formats: continue pattern = text_type(elem.findtext("scientificFormat/pattern")) scientific_formats[elem.attrib.get("type")] = numbers.parse_pattern(pattern) currency_formats = data.setdefault("currency_formats", {}) for elem in tree.findall(".//currencyFormats/currencyFormatLength/currencyFormat"): if ("draft" in elem.attrib or "alt" in elem.attrib) and elem.attrib.get("type") in currency_formats: continue for child in elem.getiterator(): if child.tag == "alias": currency_formats[elem.attrib.get("type")] = Alias( _translate_alias(["currency_formats", elem.attrib["type"]], child.attrib["path"]) ) elif child.tag == "pattern": pattern = text_type(child.text) currency_formats[elem.attrib.get("type")] = numbers.parse_pattern(pattern) percent_formats = data.setdefault("percent_formats", {}) for elem in tree.findall(".//percentFormats/percentFormatLength"): if ("draft" in elem.attrib or "alt" in elem.attrib) and elem.attrib.get("type") in percent_formats: continue pattern = text_type(elem.findtext("percentFormat/pattern")) percent_formats[elem.attrib.get("type")] = numbers.parse_pattern(pattern) currency_names = data.setdefault("currency_names", {}) currency_names_plural = data.setdefault("currency_names_plural", {}) currency_symbols = data.setdefault("currency_symbols", {}) for elem in tree.findall(".//currencies/currency"): code = elem.attrib["type"] for name in elem.findall("displayName"): if ("draft" in name.attrib) and code in currency_names: continue if "count" in name.attrib: currency_names_plural.setdefault(code, {})[name.attrib["count"]] = text_type(name.text) else: currency_names[code] = text_type(name.text) # TODO: support choice patterns for currency symbol selection symbol = elem.find("symbol") if symbol is not None and "draft" not in symbol.attrib and "choice" not in symbol.attrib: currency_symbols[code] = text_type(symbol.text) # <units> unit_patterns = data.setdefault("unit_patterns", {}) for elem in tree.findall(".//units/unitLength"): unit_length_type = elem.attrib["type"] for unit in elem.findall("unit"): unit_type = unit.attrib["type"] for pattern in unit.findall("unitPattern"): box = unit_type box += ":" + unit_length_type unit_patterns.setdefault(box, {})[pattern.attrib["count"]] = text_type(pattern.text) date_fields = data.setdefault("date_fields", {}) for elem in tree.findall(".//dates/fields/field"): field_type = elem.attrib["type"] date_fields.setdefault(field_type, {}) for rel_time in elem.findall("relativeTime"): rel_time_type = rel_time.attrib["type"] for pattern in rel_time.findall("relativeTimePattern"): date_fields[field_type].setdefault(rel_time_type, {})[pattern.attrib["count"]] = text_type( pattern.text ) outfile = open(data_filename, "wb") try: pickle.dump(data, outfile, 2) finally: outfile.close()
# <numbers> number_symbols = data.setdefault('number_symbols', {}) for elem in tree.findall('.//numbers/symbols/*'): if ('draft' in elem.attrib or 'alt' in elem.attrib): continue number_symbols[elem.tag] = unicode(elem.text) decimal_formats = data.setdefault('decimal_formats', {}) for elem in tree.findall('.//decimalFormats/decimalFormatLength'): if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and elem.attrib.get('type') in decimal_formats: continue pattern = unicode(elem.findtext('decimalFormat/pattern')) decimal_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern) scientific_formats = data.setdefault('scientific_formats', {}) for elem in tree.findall('.//scientificFormats/scientificFormatLength'): if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and elem.attrib.get('type') in scientific_formats: continue pattern = unicode(elem.findtext('scientificFormat/pattern')) scientific_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern) currency_formats = data.setdefault('currency_formats', {}) for elem in tree.findall('.//currencyFormats/currencyFormatLength'): if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and elem.attrib.get('type') in currency_formats: continue pattern = unicode(elem.findtext('currencyFormat/pattern'))