Example #1
0
def load_i18n(canonical_host, canonical_scheme, project_root, tell_sentry):
    # Load the locales
    localeDir = os.path.join(project_root, 'i18n', 'core')
    locales = LOCALES
    source_strings = {}
    for file in os.listdir(localeDir):
        try:
            parts = file.split(".")
            if not (len(parts) == 2 and parts[1] == "po"):
                continue
            lang = parts[0]
            with open(os.path.join(localeDir, file), 'rb') as f:
                l = locales[lang.lower()] = Locale(lang)
                c = l.catalog = read_po(f)
                share_source_strings(c, source_strings)
                c.plural_func = get_function_from_rule(c.plural_expr)
                replace_unused_singulars(c)
                try:
                    l.countries = make_sorted_dict(COUNTRIES, l.territories)
                except KeyError:
                    l.countries = COUNTRIES
                try:
                    l.languages_2 = make_sorted_dict(LANGUAGES_2, l.languages)
                except KeyError:
                    l.languages_2 = LANGUAGES_2
        except Exception as e:
            tell_sentry(e, {})
    del source_strings

    # Prepare a unique and sorted list for use in the language switcher
    percent = lambda l, total: sum((percent(s, len(s)) if isinstance(s, tuple) else 1) for s in l if s) / total
    for l in list(locales.values()):
        if l.language == 'en':
            l.completion = 1
            continue
        l.completion = percent([m.string for m in l.catalog if m.id and not m.fuzzy], len(l.catalog))
        if l.completion == 0:
            del locales[l.language]
    loc_url = canonical_scheme+'://%s.'+canonical_host
    domain, port = (canonical_host.split(':') + [None])[:2]
    port = int(port) if port else socket.getservbyname(canonical_scheme, 'tcp')
    subdomains = {
        l.subdomain: loc_url % l.subdomain for l in locales.values()
        if resolve(l.subdomain + '.' + domain, port)
    }
    lang_list = sorted(
        (
            (l.completion, l.language, l.language_name.title(), loc_url % l.subdomain)
            for l in set(locales.values()) if l.completion > 0.5
        ),
        key=lambda t: (-t[0], t[1]),
    )

    # Add year-less date format
    year_re = re.compile(r'(^y+[^a-zA-Z]+|[^a-zA-Z]+y+$)')
    for l in locales.values():
        short_format = l.date_formats['short'].pattern
        assert short_format[0] == 'y' or short_format[-1] == 'y', (l.language, short_format)
        l.date_formats['short_yearless'] = year_re.sub('', short_format)

    # Add aliases
    for k, v in list(locales.items()):
        locales.setdefault(ALIASES.get(k, k), v)
        locales.setdefault(ALIASES_R.get(k, k), v)
    for k, v in list(locales.items()):
        locales.setdefault(k.split('_', 1)[0], v)

    # Patch the locales to look less formal
    locales['fr'].currency_formats['standard'] = parse_pattern('#,##0.00\u202f\xa4')
    locales['fr'].currencies['USD'] = 'dollar états-unien'

    # Load the markdown files
    docs = {}
    heading_re = re.compile(r'^(#+ )', re.M)
    for path in find_files(os.path.join(project_root, 'i18n'), '*.md'):
        d, b = os.path.split(path)
        doc = os.path.basename(d)
        lang = b[:-3]
        with open(path, 'rb') as f:
            md = f.read().decode('utf8')
            if md.startswith('# '):
                md = '\n'.join(md.split('\n')[1:]).strip()
                md = heading_re.sub(r'##\1', md)
            docs.setdefault(doc, {}).__setitem__(lang, markdown.render(md))

    return {'docs': docs, 'lang_list': lang_list, 'locales': locales, 'subdomains': subdomains}
Example #2
0
def load_i18n(canonical_host, canonical_scheme, project_root, tell_sentry):
    def compute_percentage(it, total):
        return sum(
            (compute_percentage(s, len(s)) if isinstance(s, tuple) else 1)
            for s in it if s) / total

    # Load the base locales
    localeDir = os.path.join(project_root, 'i18n', 'core')
    locales = LOCALES
    source_strings = {}
    for file in os.listdir(localeDir):
        try:
            parts = file.split(".")
            if not (len(parts) == 2 and parts[1] == "po"):
                continue
            lang = parts[0]
            with open(os.path.join(localeDir, file), 'rb') as f:
                l = Locale(lang)
                c = l.catalog = read_po(f)
                share_source_strings(c, source_strings)
                c.plural_func = get_function_from_rule(c.plural_expr)
                replace_unused_singulars(c)
                l.completion = compute_percentage(
                    (m.string for m in c if m.id and not m.fuzzy), len(c))
                if l.completion == 0:
                    continue
                else:
                    locales[lang.lower()] = l
                try:
                    l.countries = make_sorted_dict(COUNTRIES, l.territories)
                except KeyError:
                    l.countries = COUNTRIES
                try:
                    l.languages_2 = make_sorted_dict(LANGUAGES_2, l.languages)
                except KeyError:
                    l.languages_2 = LANGUAGES_2
        except Exception as e:
            tell_sentry(e)
    del source_strings

    # Load the variants
    for loc_id in babel.localedata.locale_identifiers():
        if loc_id in locales:
            continue
        i = loc_id.rfind('_')
        if i == -1:
            continue
        base = locales.get(loc_id[:i])
        if base:
            l = locales[loc_id.lower()] = Locale.parse(loc_id)
            l.catalog = base.catalog
            l.completion = base.completion
            l.countries = base.countries
            l.languages_2 = base.languages_2

    # Unload the Babel data that we no longer need
    # We load a lot of data to populate the LANGUAGE_NAMES dict, we don't want
    # to keep it all in RAM.
    used_data_dict_addresses = set(id(l._data._data) for l in locales.values())
    for key, data_dict in list(babel.localedata._cache.items()):
        if id(data_dict) not in used_data_dict_addresses:
            del babel.localedata._cache[key]

    # Prepare a unique and sorted list for use in the language switcher
    loc_url = canonical_scheme + '://%s.' + canonical_host
    domain, port = (canonical_host.split(':') + [None])[:2]
    port = int(port) if port else socket.getservbyname(canonical_scheme, 'tcp')
    subdomains = {
        l.subdomain: loc_url % l.subdomain
        for l in locales.values()
        if not l.territory and resolve(l.subdomain + '.' + domain, port)
    }
    lang_list = sorted(
        ((l.completion, l.language, l.language_name.title(),
          loc_url % l.subdomain) for l in set(locales.values())
         if not l.territory and l.completion > 0.5),
        key=lambda t: (-t[0], t[1]),
    )

    # Add year-less date format
    year_re = re.compile(r'(^y+[^a-zA-Z]+|[^a-zA-Z]+y+$)')
    for l in locales.values():
        short_format = l.date_formats['short'].pattern
        assert short_format[0] == 'y' or short_format[-1] == 'y', (
            l.language, short_format)
        l.date_formats['short_yearless'] = year_re.sub('', short_format)

    # Add aliases
    for k, v in list(locales.items()):
        locales.setdefault(ALIASES.get(k, k), v)
        locales.setdefault(ALIASES_R.get(k, k), v)
    for k, v in list(locales.items()):
        locales.setdefault(k.split('_', 1)[0], v)

    # Add universal strings
    # These strings don't need to be translated, but they have to be in the catalogs
    # so that they're counted as translated.
    for l in locales.values():
        l.catalog.add("PayPal", "PayPal")

    # Patch the locales to look less formal
    locales['fr'].currency_formats['standard'] = parse_pattern(
        '#,##0.00\u202f\xa4')
    locales['fr'].currencies['USD'] = 'dollar états-unien'

    # Load the markdown files
    docs = {}
    heading_re = re.compile(r'^(#+ )', re.M)
    for path in find_files(os.path.join(project_root, 'i18n'), '*.md'):
        d, b = os.path.split(path)
        doc = os.path.basename(d)
        lang = b[:-3]
        with open(path, 'rb') as f:
            md = f.read().decode('utf8')
            if md.startswith('# '):
                md = '\n'.join(md.split('\n')[1:]).strip()
                md = heading_re.sub(r'##\1', md)
            docs.setdefault(doc, {}).__setitem__(lang, markdown.render(md))

    return {
        'docs': docs,
        'lang_list': lang_list,
        'locales': locales,
        'subdomains': subdomains
    }