Beispiel #1
0
def get_plural_form_key(count, language=DEFAULT_LANGUAGE_CODE):
    mappings = {
        "en": {
            "one": "n is 1"
        },
        "cy": {
            "zero": "n is 0",
            "one": "n is 1",
            "two": "n is 2",
            "few": "n is 3",
            "many": "n is 6",
        },
        "ga": {
            "one": "n is 1",
            "two": "n is 2",
            "few": "n in 3..6",
            "many": "n in 7..10",
        },
        "eo": {
            "one": "n is 1"
        },
    }

    plural_rule = PluralRule(mappings[language])

    return plural_rule(count)
Beispiel #2
0
def _extract_plural_rules(file_path):
    rule_dict = {}
    prsup = parse(file_path)
    for elem in prsup.findall('.//plurals/pluralRules'):
        rules = []
        for rule in elem.findall('pluralRule'):
            rules.append((rule.attrib['count'], text_type(rule.text)))
        pr = PluralRule(rules)
        for locale in elem.attrib['locales'].split():
            rule_dict[locale] = pr
    return rule_dict
Beispiel #3
0
def plural_to_gettext(rule):
    """This is a copy of the code of ``babel.plural.to_gettext``.

    We need to use a custom version, because the original only returns
    a full plural_forms string, which the Babel catalog object does not
    allow us to assign to anything. Instead, we need the expr and the
    plural count separately. See http://babel.edgewall.org/ticket/291.
    """
    from babel.plural import (PluralRule, _fallback_tag, _plural_tags,
                              _GettextCompiler)
    rule = PluralRule.parse(rule)

    used_tags = rule.tags | set([_fallback_tag])
    _compile = _GettextCompiler().compile
    _get_index = [tag for tag in _plural_tags if tag in used_tags].index

    expr = ['(']
    for tag, ast in rule.abstract:
        expr.append('%s ? %d : ' % (_compile(ast), _get_index(tag)))
    expr.append('%d)' % _get_index(_fallback_tag))
    return len(used_tags), ''.join(expr)
Beispiel #4
0
def plural_to_gettext(rule):
    """This is a copy of the code of ``babel.plural.to_gettext``.

    We need to use a custom version, because the original only returns
    a full plural_forms string, which the Babel catalog object does not
    allow us to assign to anything. Instead, we need the expr and the
    plural count separately. See http://babel.edgewall.org/ticket/291.
    """
    from babel.plural import PluralRule, _fallback_tag, _plural_tags, _GettextCompiler

    rule = PluralRule.parse(rule)

    used_tags = rule.tags | set([_fallback_tag])
    _compile = _GettextCompiler().compile
    _get_index = [tag for tag in _plural_tags if tag in used_tags].index

    expr = ["("]
    for tag, ast in rule.abstract:
        expr.append("%s ? %d : " % (_compile(ast), _get_index(tag)))
    expr.append("%d)" % _get_index(_fallback_tag))
    return len(used_tags), "".join(expr)
Beispiel #5
0
    def plurality(self, n: Union[int, float]):
        warnings.warn(
            "Locale.plurality is deprecated and will be removed in the future. You should"
            " switch to ICULocale if you require plurality handling.",
            DeprecationWarning,
            stacklevel=3,
        )

        locale = self.babel(self.name)
        if locale not in _plural_cache:
            if "plural_rules" not in _plural_cache:
                with open(
                        Path(__file__).parent.parent / "data" /
                        "plurals.yml") as f:
                    # the warning from yaml.load doesn't apply here
                    with warnings.catch_warnings(record=True):
                        _plural_cache["plural_rules"] = yaml.safe_load(f)
            rules = _plural_cache["plural_rules"]
            _plural_cache[locale] = PluralRule(
                rules.get(closest_locale(locale, rules.keys()) or "en"))

        return _plural_cache[locale](n)
Beispiel #6
0
    :copyright: (c) 2013 by the Babel Team.
    :license: BSD, see LICENSE for more details.
"""

import os

from babel import localedata
from babel._compat import pickle, string_types
from babel.plural import PluralRule

__all__ = ['UnknownLocaleError', 'Locale', 'default_locale', 'negotiate_locale',
           'parse_locale']


_global_data = None
_default_plural_rule = PluralRule({})


def _raise_no_data_error():
    raise RuntimeError('The babel data files are not available. '
                       'This usually happens because you are using '
                       'a source checkout from Babel and you did '
                       'not build the data files.  Just make sure '
                       'to run "python setup.py import_cldr" before '
                       'installing the library.')


def get_global(key):
    """Return the dictionary for the given key in the global data.

    The global data is stored in the ``babel/global.dat`` file and contains
Beispiel #7
0
def main():
    parser = OptionParser(usage='%prog path/to/cldr')
    options, args = parser.parse_args()
    if len(args) != 1:
        parser.error('incorrect number of arguments')

    srcdir = args[0]
    destdir = os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])), '..',
                           'babel')

    sup = parse(os.path.join(srcdir, 'supplemental', 'supplementalData.xml'))

    # Import global data from the supplemental files
    global_data = {}

    territory_zones = global_data.setdefault('territory_zones', {})
    zone_aliases = global_data.setdefault('zone_aliases', {})
    zone_territories = global_data.setdefault('zone_territories', {})
    for elem in sup.findall('.//timezoneData/zoneFormatting/zoneItem'):
        tzid = elem.attrib['type']
        territory_zones.setdefault(elem.attrib['territory'], []).append(tzid)
        zone_territories[tzid] = elem.attrib['territory']
        if 'aliases' in elem.attrib:
            for alias in elem.attrib['aliases'].split():
                zone_aliases[alias] = tzid

    # Import Metazone mapping
    meta_zones = global_data.setdefault('meta_zones', {})
    tzsup = parse(os.path.join(srcdir, 'supplemental', 'metazoneInfo.xml'))
    for elem in tzsup.findall('.//timezone'):
        for child in elem.findall('usesMetazone'):
            if 'to' not in child.attrib:  # FIXME: support old mappings
                meta_zones[elem.attrib['type']] = child.attrib['mzone']

    outfile = open(os.path.join(destdir, 'global.dat'), 'wb')
    try:
        pickle.dump(global_data, outfile, 2)
    finally:
        outfile.close()

    # build a territory containment mapping for inheritance
    regions = {}
    for elem in sup.findall('.//territoryContainment/group'):
        regions[elem.attrib['type']] = elem.attrib['contains'].split()

    # Resolve territory containment
    territory_containment = {}
    region_items = regions.items()
    region_items.sort()
    for group, territory_list in region_items:
        for territory in territory_list:
            containers = territory_containment.setdefault(territory, set([]))
            if group in territory_containment:
                containers |= territory_containment[group]
            containers.add(group)

    # prepare the per-locale plural rules definitions
    plural_rules = {}
    prsup = parse(os.path.join(srcdir, 'supplemental', 'plurals.xml'))
    for elem in prsup.findall('.//plurals/pluralRules'):
        rules = []
        for rule in elem.findall('pluralRule'):
            rules.append((rule.attrib['count'], unicode(rule.text)))
        pr = PluralRule(rules)
        for locale in elem.attrib['locales'].split():
            plural_rules[locale] = pr

    filenames = os.listdir(os.path.join(srcdir, 'main'))
    filenames.remove('root.xml')
    filenames.sort(lambda a, b: len(a) - len(b))
    filenames.insert(0, 'root.xml')

    for filename in filenames:
        stem, ext = os.path.splitext(filename)
        if ext != '.xml':
            continue

        print >> sys.stderr, 'Processing input file %r' % filename
        tree = parse(os.path.join(srcdir, 'main', filename))
        data = {}

        language = None
        elem = tree.find('.//identity/language')
        if elem is not None:
            language = elem.attrib['type']
        print >> sys.stderr, '  Language:  %r' % language

        territory = None
        elem = tree.find('.//identity/territory')
        if elem is not None:
            territory = elem.attrib['type']
        else:
            territory = '001'  # world
        print >> sys.stderr, '  Territory: %r' % territory
        regions = territory_containment.get(territory, [])
        print >> sys.stderr, '  Regions:    %r' % regions

        # plural rules
        locale_id = '_'.join(
            filter(None, [language, territory != '001' and territory or None]))
        if locale_id in plural_rules:
            data['plural_form'] = plural_rules[locale_id]

        # <localeDisplayNames>

        territories = data.setdefault('territories', {})
        for elem in tree.findall('.//territories/territory'):
            if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                    and elem.attrib['type'] in territories:
                continue
            territories[elem.attrib['type']] = _text(elem)

        languages = data.setdefault('languages', {})
        for elem in tree.findall('.//languages/language'):
            if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                    and elem.attrib['type'] in languages:
                continue
            languages[elem.attrib['type']] = _text(elem)

        variants = data.setdefault('variants', {})
        for elem in tree.findall('.//variants/variant'):
            if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                    and elem.attrib['type'] in variants:
                continue
            variants[elem.attrib['type']] = _text(elem)

        scripts = data.setdefault('scripts', {})
        for elem in tree.findall('.//scripts/script'):
            if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                    and elem.attrib['type'] in scripts:
                continue
            scripts[elem.attrib['type']] = _text(elem)

        # <dates>

        week_data = data.setdefault('week_data', {})
        supelem = sup.find('.//weekData')

        for elem in supelem.findall('minDays'):
            territories = elem.attrib['territories'].split()
            if territory in territories or any(
                [r in territories for r in regions]):
                week_data['min_days'] = int(elem.attrib['count'])

        for elem in supelem.findall('firstDay'):
            territories = elem.attrib['territories'].split()
            if territory in territories or any(
                [r in territories for r in regions]):
                week_data['first_day'] = weekdays[elem.attrib['day']]

        for elem in supelem.findall('weekendStart'):
            territories = elem.attrib['territories'].split()
            if territory in territories or any(
                [r in territories for r in regions]):
                week_data['weekend_start'] = weekdays[elem.attrib['day']]

        for elem in supelem.findall('weekendEnd'):
            territories = elem.attrib['territories'].split()
            if territory in territories or any(
                [r in territories for r in regions]):
                week_data['weekend_end'] = weekdays[elem.attrib['day']]

        zone_formats = data.setdefault('zone_formats', {})
        for elem in tree.findall('.//timeZoneNames/gmtFormat'):
            if 'draft' not in elem.attrib and 'alt' not in elem.attrib:
                zone_formats['gmt'] = unicode(elem.text).replace('{0}', '%s')
                break
        for elem in tree.findall('.//timeZoneNames/regionFormat'):
            if 'draft' not in elem.attrib and 'alt' not in elem.attrib:
                zone_formats['region'] = unicode(elem.text).replace(
                    '{0}', '%s')
                break
        for elem in tree.findall('.//timeZoneNames/fallbackFormat'):
            if 'draft' not in elem.attrib and 'alt' not in elem.attrib:
                zone_formats['fallback'] = unicode(elem.text) \
                    .replace('{0}', '%(0)s').replace('{1}', '%(1)s')
                break

        time_zones = data.setdefault('time_zones', {})
        for elem in tree.findall('.//timeZoneNames/zone'):
            info = {}
            city = elem.findtext('exemplarCity')
            if city:
                info['city'] = unicode(city)
            for child in elem.findall('long/*'):
                info.setdefault('long', {})[child.tag] = unicode(child.text)
            for child in elem.findall('short/*'):
                info.setdefault('short', {})[child.tag] = unicode(child.text)
            time_zones[elem.attrib['type']] = info

        meta_zones = data.setdefault('meta_zones', {})
        for elem in tree.findall('.//timeZoneNames/metazone'):
            info = {}
            city = elem.findtext('exemplarCity')
            if city:
                info['city'] = unicode(city)
            for child in elem.findall('long/*'):
                info.setdefault('long', {})[child.tag] = unicode(child.text)
            for child in elem.findall('short/*'):
                info.setdefault('short', {})[child.tag] = unicode(child.text)
            info['common'] = elem.findtext('commonlyUsed') == 'true'
            meta_zones[elem.attrib['type']] = info

        for calendar in tree.findall('.//calendars/calendar'):
            if calendar.attrib['type'] != 'gregorian':
                # TODO: support other calendar types
                continue

            months = data.setdefault('months', {})
            for ctxt in calendar.findall('months/monthContext'):
                ctxt_type = ctxt.attrib['type']
                ctxts = months.setdefault(ctxt_type, {})
                for width in ctxt.findall('monthWidth'):
                    width_type = width.attrib['type']
                    widths = ctxts.setdefault(width_type, {})
                    for elem in width.getiterator():
                        if elem.tag == 'month':
                            if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                                    and int(elem.attrib['type']) in widths:
                                continue
                            widths[int(elem.attrib.get('type'))] = unicode(
                                elem.text)
                        elif elem.tag == 'alias':
                            ctxts[width_type] = Alias(
                                _translate_alias(
                                    ['months', ctxt_type, width_type],
                                    elem.attrib['path']))

            days = data.setdefault('days', {})
            for ctxt in calendar.findall('days/dayContext'):
                ctxt_type = ctxt.attrib['type']
                ctxts = days.setdefault(ctxt_type, {})
                for width in ctxt.findall('dayWidth'):
                    width_type = width.attrib['type']
                    widths = ctxts.setdefault(width_type, {})
                    for elem in width.getiterator():
                        if elem.tag == 'day':
                            dtype = weekdays[elem.attrib['type']]
                            if ('draft' in elem.attrib or 'alt' not in elem.attrib) \
                                    and dtype in widths:
                                continue
                            widths[dtype] = unicode(elem.text)
                        elif elem.tag == 'alias':
                            ctxts[width_type] = Alias(
                                _translate_alias(
                                    ['days', ctxt_type, width_type],
                                    elem.attrib['path']))

            quarters = data.setdefault('quarters', {})
            for ctxt in calendar.findall('quarters/quarterContext'):
                ctxt_type = ctxt.attrib['type']
                ctxts = quarters.setdefault(ctxt.attrib['type'], {})
                for width in ctxt.findall('quarterWidth'):
                    width_type = width.attrib['type']
                    widths = ctxts.setdefault(width_type, {})
                    for elem in width.getiterator():
                        if elem.tag == 'quarter':
                            if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                                    and int(elem.attrib['type']) in widths:
                                continue
                            widths[int(elem.attrib['type'])] = unicode(
                                elem.text)
                        elif elem.tag == 'alias':
                            ctxts[width_type] = Alias(
                                _translate_alias(
                                    ['quarters', ctxt_type, width_type],
                                    elem.attrib['path']))

            eras = data.setdefault('eras', {})
            for width in calendar.findall('eras/*'):
                width_type = NAME_MAP[width.tag]
                widths = eras.setdefault(width_type, {})
                for elem in width.getiterator():
                    if elem.tag == 'era':
                        if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                                and int(elem.attrib['type']) in widths:
                            continue
                        widths[int(elem.attrib.get('type'))] = unicode(
                            elem.text)
                    elif elem.tag == 'alias':
                        eras[width_type] = Alias(
                            _translate_alias(['eras', width_type],
                                             elem.attrib['path']))

            # AM/PM
            periods = data.setdefault('periods', {})
            for elem in calendar.findall('am'):
                if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                        and elem.tag in periods:
                    continue
                periods[elem.tag] = unicode(elem.text)
            for elem in calendar.findall('pm'):
                if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                        and elem.tag in periods:
                    continue
                periods[elem.tag] = unicode(elem.text)

            date_formats = data.setdefault('date_formats', {})
            for format in calendar.findall('dateFormats'):
                for elem in format.getiterator():
                    if elem.tag == 'dateFormatLength':
                        if 'draft' in elem.attrib and \
                                elem.attrib.get('type') in date_formats:
                            continue
                        try:
                            date_formats[elem.attrib.get('type')] = \
                                dates.parse_pattern(unicode(elem.findtext('dateFormat/pattern')))
                        except ValueError, e:
                            print >> sys.stderr, 'ERROR: %s' % e
                    elif elem.tag == 'alias':
                        date_formats = Alias(
                            _translate_alias(['date_formats'],
                                             elem.attrib['path']))

            time_formats = data.setdefault('time_formats', {})
            for format in calendar.findall('timeFormats'):
                for elem in format.getiterator():
                    if elem.tag == 'timeFormatLength':
                        if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                                and elem.attrib.get('type') in time_formats:
                            continue
                        try:
                            time_formats[elem.attrib.get('type')] = \
                                dates.parse_pattern(unicode(elem.findtext('timeFormat/pattern')))
                        except ValueError, e:
                            print >> sys.stderr, 'ERROR: %s' % e
                    elif elem.tag == 'alias':
                        time_formats = Alias(
                            _translate_alias(['time_formats'],
                                             elem.attrib['path']))
from fastapi.staticfiles import StaticFiles
from fastapi.templating import Jinja2Templates
from babel.plural import PluralRule

import glob
import json
import os.path

app = FastAPI()

app.mount("/static", StaticFiles(directory="static"), name="static")
templates = Jinja2Templates(directory="templates")

default_fallback = 'en'
languages = {}
plural_rule = PluralRule({'one': 'n in 0..1'})

language_list = glob.glob("languages/*.json")
for lang in language_list:
    filename = os.path.basename(lang)
    lang_code, ext = os.path.splitext(filename)

    with open(lang, 'r', encoding='utf8') as file:
        languages[lang_code] = json.load(file)


# custom filters for Jinja2
def plural_formatting(key_value, input, locale):
    key = ''
    for i in languages[locale]:
        if (key_value == languages[locale][i]):
Beispiel #9
0
def main():
    parser = OptionParser(usage='%prog path/to/cldr')
    options, args = parser.parse_args()
    if len(args) != 1:
        parser.error('incorrect number of arguments')

    srcdir = args[0]
    destdir = os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])), '..',
                           'babel')

    sup_filename = os.path.join(srcdir, 'supplemental', 'supplementalData.xml')
    bcp47_timezone = parse(os.path.join(srcdir, 'bcp47', 'timezone.xml'))
    sup_windows_zones = parse(
        os.path.join(srcdir, 'supplemental', 'windowsZones.xml'))
    sup_metadata = parse(
        os.path.join(srcdir, 'supplemental', 'supplementalMetadata.xml'))
    sup_likely = parse(
        os.path.join(srcdir, 'supplemental', 'likelySubtags.xml'))
    sup = parse(sup_filename)

    # Import global data from the supplemental files
    global_path = os.path.join(destdir, 'global.dat')
    global_data = {}
    if need_conversion(global_path, global_data, sup_filename):
        territory_zones = global_data.setdefault('territory_zones', {})
        zone_aliases = global_data.setdefault('zone_aliases', {})
        zone_territories = global_data.setdefault('zone_territories', {})
        win_mapping = global_data.setdefault('windows_zone_mapping', {})
        language_aliases = global_data.setdefault('language_aliases', {})
        territory_aliases = global_data.setdefault('territory_aliases', {})
        script_aliases = global_data.setdefault('script_aliases', {})
        variant_aliases = global_data.setdefault('variant_aliases', {})
        likely_subtags = global_data.setdefault('likely_subtags', {})
        territory_currencies = global_data.setdefault('territory_currencies',
                                                      {})

        # create auxiliary zone->territory map from the windows zones (we don't set
        # the 'zones_territories' map directly here, because there are some zones
        # aliases listed and we defer the decision of which ones to choose to the
        # 'bcp47' data
        _zone_territory_map = {}
        for map_zone in sup_windows_zones.findall(
                './/windowsZones/mapTimezones/mapZone'):
            if map_zone.attrib.get('territory') == '001':
                win_mapping[map_zone.attrib['other']] = \
                    map_zone.attrib['type'].split()[0]
            for tzid in text_type(map_zone.attrib['type']).split():
                _zone_territory_map[tzid] = \
                    text_type(map_zone.attrib['territory'])

        for key_elem in bcp47_timezone.findall('.//keyword/key'):
            if key_elem.attrib['name'] == 'tz':
                for elem in key_elem.findall('type'):
                    aliases = text_type(elem.attrib['alias']).split()
                    tzid = aliases.pop(0)
                    territory = _zone_territory_map.get(tzid, '001')
                    territory_zones.setdefault(territory, []).append(tzid)
                    zone_territories[tzid] = territory
                    for alias in aliases:
                        zone_aliases[alias] = tzid
                break

        # Import Metazone mapping
        meta_zones = global_data.setdefault('meta_zones', {})
        tzsup = parse(os.path.join(srcdir, 'supplemental', 'metaZones.xml'))
        for elem in tzsup.findall('.//timezone'):
            for child in elem.findall('usesMetazone'):
                if 'to' not in child.attrib:  # FIXME: support old mappings
                    meta_zones[elem.attrib['type']] = child.attrib['mzone']

        # Language aliases
        for alias in sup_metadata.findall('.//alias/languageAlias'):
            # We don't have a use for those at the moment.  They don't
            # pass our parser anyways.
            if '-' in alias.attrib['type']:
                continue
            language_aliases[
                alias.attrib['type']] = alias.attrib['replacement']

        # Territory aliases
        for alias in sup_metadata.findall('.//alias/territoryAlias'):
            territory_aliases[alias.attrib['type']] = \
                alias.attrib['replacement'].split()

        # Script aliases
        for alias in sup_metadata.findall('.//alias/scriptAlias'):
            script_aliases[alias.attrib['type']] = alias.attrib['replacement']

        # Variant aliases
        for alias in sup_metadata.findall('.//alias/variantAlias'):
            repl = alias.attrib.get('replacement')
            if repl:
                variant_aliases[alias.attrib['type']] = repl

        # Likely subtags
        for likely_subtag in sup_likely.findall(
                './/likelySubtags/likelySubtag'):
            likely_subtags[likely_subtag.attrib['from']] = \
                likely_subtag.attrib['to']

        # Currencies in territories
        for region in sup.findall('.//currencyData/region'):
            region_code = region.attrib['iso3166']
            region_currencies = []
            for currency in region.findall('./currency'):
                cur_start = _parse_currency_date(currency.attrib.get('from'))
                cur_end = _parse_currency_date(currency.attrib.get('to'))
                region_currencies.append(
                    (currency.attrib['iso4217'], cur_start, cur_end,
                     currency.attrib.get('tender', 'true') == 'true'))
            region_currencies.sort(key=_currency_sort_key)
            territory_currencies[region_code] = region_currencies

        outfile = open(global_path, 'wb')
        try:
            pickle.dump(global_data, outfile, 2)
        finally:
            outfile.close()

    # build a territory containment mapping for inheritance
    regions = {}
    for elem in sup.findall('.//territoryContainment/group'):
        regions[elem.attrib['type']] = elem.attrib['contains'].split()

    # Resolve territory containment
    territory_containment = {}
    region_items = sorted(regions.items())
    for group, territory_list in region_items:
        for territory in territory_list:
            containers = territory_containment.setdefault(territory, set([]))
            if group in territory_containment:
                containers |= territory_containment[group]
            containers.add(group)

    # prepare the per-locale plural rules definitions
    plural_rules = {}
    prsup = parse(os.path.join(srcdir, 'supplemental', 'plurals.xml'))
    for elem in prsup.findall('.//plurals/pluralRules'):
        rules = []
        for rule in elem.findall('pluralRule'):
            rules.append((rule.attrib['count'], text_type(rule.text)))
        pr = PluralRule(rules)
        for locale in elem.attrib['locales'].split():
            plural_rules[locale] = pr

    filenames = os.listdir(os.path.join(srcdir, 'main'))
    filenames.remove('root.xml')
    filenames.sort(key=len)
    filenames.insert(0, 'root.xml')

    for filename in filenames:
        stem, ext = os.path.splitext(filename)
        if ext != '.xml':
            continue

        full_filename = os.path.join(srcdir, 'main', filename)
        data_filename = os.path.join(destdir, 'localedata', stem + '.dat')

        data = {}
        if not need_conversion(data_filename, data, full_filename):
            continue

        tree = parse(full_filename)

        language = None
        elem = tree.find('.//identity/language')
        if elem is not None:
            language = elem.attrib['type']

        territory = None
        elem = tree.find('.//identity/territory')
        if elem is not None:
            territory = elem.attrib['type']
        else:
            territory = '001'  # world
        regions = territory_containment.get(territory, [])

        log('Processing %s (Language = %s; Territory = %s)', filename,
            language, territory)

        # plural rules
        locale_id = '_'.join(
            filter(None, [language, territory != '001' and territory or None]))
        if locale_id in plural_rules:
            data['plural_form'] = plural_rules[locale_id]

        # <localeDisplayNames>

        territories = data.setdefault('territories', {})
        for elem in tree.findall('.//territories/territory'):
            if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                    and elem.attrib['type'] in territories:
                continue
            territories[elem.attrib['type']] = _text(elem)

        languages = data.setdefault('languages', {})
        for elem in tree.findall('.//languages/language'):
            if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                    and elem.attrib['type'] in languages:
                continue
            languages[elem.attrib['type']] = _text(elem)

        variants = data.setdefault('variants', {})
        for elem in tree.findall('.//variants/variant'):
            if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                    and elem.attrib['type'] in variants:
                continue
            variants[elem.attrib['type']] = _text(elem)

        scripts = data.setdefault('scripts', {})
        for elem in tree.findall('.//scripts/script'):
            if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                    and elem.attrib['type'] in scripts:
                continue
            scripts[elem.attrib['type']] = _text(elem)

        # <dates>

        week_data = data.setdefault('week_data', {})
        supelem = sup.find('.//weekData')

        for elem in supelem.findall('minDays'):
            territories = elem.attrib['territories'].split()
            if territory in territories or any(
                [r in territories for r in regions]):
                week_data['min_days'] = int(elem.attrib['count'])

        for elem in supelem.findall('firstDay'):
            territories = elem.attrib['territories'].split()
            if territory in territories or any(
                [r in territories for r in regions]):
                week_data['first_day'] = weekdays[elem.attrib['day']]

        for elem in supelem.findall('weekendStart'):
            territories = elem.attrib['territories'].split()
            if territory in territories or any(
                [r in territories for r in regions]):
                week_data['weekend_start'] = weekdays[elem.attrib['day']]

        for elem in supelem.findall('weekendEnd'):
            territories = elem.attrib['territories'].split()
            if territory in territories or any(
                [r in territories for r in regions]):
                week_data['weekend_end'] = weekdays[elem.attrib['day']]

        zone_formats = data.setdefault('zone_formats', {})
        for elem in tree.findall('.//timeZoneNames/gmtFormat'):
            if 'draft' not in elem.attrib and 'alt' not in elem.attrib:
                zone_formats['gmt'] = text_type(elem.text).replace('{0}', '%s')
                break
        for elem in tree.findall('.//timeZoneNames/regionFormat'):
            if 'draft' not in elem.attrib and 'alt' not in elem.attrib:
                zone_formats['region'] = text_type(elem.text).replace(
                    '{0}', '%s')
                break
        for elem in tree.findall('.//timeZoneNames/fallbackFormat'):
            if 'draft' not in elem.attrib and 'alt' not in elem.attrib:
                zone_formats['fallback'] = text_type(elem.text) \
                    .replace('{0}', '%(0)s').replace('{1}', '%(1)s')
                break
        for elem in tree.findall('.//timeZoneNames/fallbackRegionFormat'):
            if 'draft' not in elem.attrib and 'alt' not in elem.attrib:
                zone_formats['fallback_region'] = text_type(elem.text) \
                    .replace('{0}', '%(0)s').replace('{1}', '%(1)s')
                break

        time_zones = data.setdefault('time_zones', {})
        for elem in tree.findall('.//timeZoneNames/zone'):
            info = {}
            city = elem.findtext('exemplarCity')
            if city:
                info['city'] = text_type(city)
            for child in elem.findall('long/*'):
                info.setdefault('long', {})[child.tag] = text_type(child.text)
            for child in elem.findall('short/*'):
                info.setdefault('short', {})[child.tag] = text_type(child.text)
            time_zones[elem.attrib['type']] = info

        meta_zones = data.setdefault('meta_zones', {})
        for elem in tree.findall('.//timeZoneNames/metazone'):
            info = {}
            city = elem.findtext('exemplarCity')
            if city:
                info['city'] = text_type(city)
            for child in elem.findall('long/*'):
                info.setdefault('long', {})[child.tag] = text_type(child.text)
            for child in elem.findall('short/*'):
                info.setdefault('short', {})[child.tag] = text_type(child.text)
            meta_zones[elem.attrib['type']] = info

        for calendar in tree.findall('.//calendars/calendar'):
            if calendar.attrib['type'] != 'gregorian':
                # TODO: support other calendar types
                continue

            months = data.setdefault('months', {})
            for ctxt in calendar.findall('months/monthContext'):
                ctxt_type = ctxt.attrib['type']
                ctxts = months.setdefault(ctxt_type, {})
                for width in ctxt.findall('monthWidth'):
                    width_type = width.attrib['type']
                    widths = ctxts.setdefault(width_type, {})
                    for elem in width.getiterator():
                        if elem.tag == 'month':
                            if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                                    and int(elem.attrib['type']) in widths:
                                continue
                            widths[int(elem.attrib.get('type'))] = \
                                text_type(elem.text)
                        elif elem.tag == 'alias':
                            ctxts[width_type] = Alias(
                                _translate_alias(
                                    ['months', ctxt_type, width_type],
                                    elem.attrib['path']))

            days = data.setdefault('days', {})
            for ctxt in calendar.findall('days/dayContext'):
                ctxt_type = ctxt.attrib['type']
                ctxts = days.setdefault(ctxt_type, {})
                for width in ctxt.findall('dayWidth'):
                    width_type = width.attrib['type']
                    widths = ctxts.setdefault(width_type, {})
                    for elem in width.getiterator():
                        if elem.tag == 'day':
                            dtype = weekdays[elem.attrib['type']]
                            if ('draft' in elem.attrib or
                                'alt' not in elem.attrib) \
                                    and dtype in widths:
                                continue
                            widths[dtype] = text_type(elem.text)
                        elif elem.tag == 'alias':
                            ctxts[width_type] = Alias(
                                _translate_alias(
                                    ['days', ctxt_type, width_type],
                                    elem.attrib['path']))

            quarters = data.setdefault('quarters', {})
            for ctxt in calendar.findall('quarters/quarterContext'):
                ctxt_type = ctxt.attrib['type']
                ctxts = quarters.setdefault(ctxt.attrib['type'], {})
                for width in ctxt.findall('quarterWidth'):
                    width_type = width.attrib['type']
                    widths = ctxts.setdefault(width_type, {})
                    for elem in width.getiterator():
                        if elem.tag == 'quarter':
                            if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                                    and int(elem.attrib['type']) in widths:
                                continue
                            widths[int(elem.attrib['type'])] = text_type(
                                elem.text)
                        elif elem.tag == 'alias':
                            ctxts[width_type] = Alias(
                                _translate_alias(
                                    ['quarters', ctxt_type, width_type],
                                    elem.attrib['path']))

            eras = data.setdefault('eras', {})
            for width in calendar.findall('eras/*'):
                width_type = NAME_MAP[width.tag]
                widths = eras.setdefault(width_type, {})
                for elem in width.getiterator():
                    if elem.tag == 'era':
                        if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                                and int(elem.attrib['type']) in widths:
                            continue
                        widths[int(elem.attrib.get('type'))] = text_type(
                            elem.text)
                    elif elem.tag == 'alias':
                        eras[width_type] = Alias(
                            _translate_alias(['eras', width_type],
                                             elem.attrib['path']))

            # AM/PM
            periods = data.setdefault('periods', {})
            for day_period_width in calendar.findall(
                    'dayPeriods/dayPeriodContext/dayPeriodWidth'):
                if day_period_width.attrib['type'] == 'wide':
                    for day_period in day_period_width.findall('dayPeriod'):
                        if 'alt' not in day_period.attrib:
                            periods[day_period.attrib['type']] = text_type(
                                day_period.text)

            date_formats = data.setdefault('date_formats', {})
            for format in calendar.findall('dateFormats'):
                for elem in format.getiterator():
                    if elem.tag == 'dateFormatLength':
                        if 'draft' in elem.attrib and \
                                elem.attrib.get('type') in date_formats:
                            continue
                        try:
                            date_formats[elem.attrib.get('type')] = \
                                dates.parse_pattern(text_type(
                                    elem.findtext('dateFormat/pattern')))
                        except ValueError as e:
                            error(e)
                    elif elem.tag == 'alias':
                        date_formats = Alias(
                            _translate_alias(['date_formats'],
                                             elem.attrib['path']))

            time_formats = data.setdefault('time_formats', {})
            for format in calendar.findall('timeFormats'):
                for elem in format.getiterator():
                    if elem.tag == 'timeFormatLength':
                        if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                                and elem.attrib.get('type') in time_formats:
                            continue
                        try:
                            time_formats[elem.attrib.get('type')] = \
                                dates.parse_pattern(text_type(
                                    elem.findtext('timeFormat/pattern')))
                        except ValueError as e:
                            error(e)
                    elif elem.tag == 'alias':
                        time_formats = Alias(
                            _translate_alias(['time_formats'],
                                             elem.attrib['path']))

            datetime_formats = data.setdefault('datetime_formats', {})
            for format in calendar.findall('dateTimeFormats'):
                for elem in format.getiterator():
                    if elem.tag == 'dateTimeFormatLength':
                        if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                                and elem.attrib.get('type') in datetime_formats:
                            continue
                        try:
                            datetime_formats[elem.attrib.get('type')] = \
                                text_type(elem.findtext('dateTimeFormat/pattern'))
                        except ValueError as e:
                            error(e)
                    elif elem.tag == 'alias':
                        datetime_formats = Alias(
                            _translate_alias(['datetime_formats'],
                                             elem.attrib['path']))

        # <numbers>

        number_symbols = data.setdefault('number_symbols', {})
        for elem in tree.findall('.//numbers/symbols/*'):
            if ('draft' in elem.attrib or 'alt' in elem.attrib):
                continue
            number_symbols[elem.tag] = text_type(elem.text)

        decimal_formats = data.setdefault('decimal_formats', {})
        for elem in tree.findall('.//decimalFormats/decimalFormatLength'):
            if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                    and elem.attrib.get('type') in decimal_formats:
                continue
            if elem.findall('./alias'):
                # TODO map the alias to its target
                continue
            pattern = text_type(elem.findtext('./decimalFormat/pattern'))
            decimal_formats[elem.attrib.get('type')] = \
                numbers.parse_pattern(pattern)

        scientific_formats = data.setdefault('scientific_formats', {})
        for elem in tree.findall(
                './/scientificFormats/scientificFormatLength'):
            if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                    and elem.attrib.get('type') in scientific_formats:
                continue
            pattern = text_type(elem.findtext('scientificFormat/pattern'))
            scientific_formats[elem.attrib.get('type')] = \
                numbers.parse_pattern(pattern)

        currency_formats = data.setdefault('currency_formats', {})
        for elem in tree.findall('.//currencyFormats/currencyFormatLength'):
            if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                    and elem.attrib.get('type') in currency_formats:
                continue
            pattern = text_type(elem.findtext('currencyFormat/pattern'))
            currency_formats[elem.attrib.get('type')] = \
                numbers.parse_pattern(pattern)

        percent_formats = data.setdefault('percent_formats', {})
        for elem in tree.findall('.//percentFormats/percentFormatLength'):
            if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                    and elem.attrib.get('type') in percent_formats:
                continue
            pattern = text_type(elem.findtext('percentFormat/pattern'))
            percent_formats[elem.attrib.get('type')] = \
                numbers.parse_pattern(pattern)

        currency_names = data.setdefault('currency_names', {})
        currency_names_plural = data.setdefault('currency_names_plural', {})
        currency_symbols = data.setdefault('currency_symbols', {})
        for elem in tree.findall('.//currencies/currency'):
            code = elem.attrib['type']
            for name in elem.findall('displayName'):
                if ('draft' in name.attrib) and code in currency_names:
                    continue
                if 'count' in name.attrib:
                    currency_names_plural.setdefault(
                        code, {})[name.attrib['count']] = text_type(name.text)
                else:
                    currency_names[code] = text_type(name.text)
            # TODO: support choice patterns for currency symbol selection
            symbol = elem.find('symbol')
            if symbol is not None and 'draft' not in symbol.attrib \
                    and 'choice' not in symbol.attrib:
                currency_symbols[code] = text_type(symbol.text)

        # <units>

        unit_patterns = data.setdefault('unit_patterns', {})
        for elem in tree.findall('.//units/unit'):
            unit_type = elem.attrib['type']
            for pattern in elem.findall('unitPattern'):
                box = unit_type
                if 'alt' in pattern.attrib:
                    box += ':' + pattern.attrib['alt']
                unit_patterns.setdefault(box, {})[pattern.attrib['count']] = \
                    text_type(pattern.text)

        outfile = open(data_filename, 'wb')
        try:
            pickle.dump(data, outfile, 2)
        finally:
            outfile.close()