예제 #1
0
def read_cldr_name_file(path, langcode, category):
    data = read_cldr_names(path, langcode, category)
    name_quads = []
    for subtag, name in sorted(data.items()):
        if (langcode, subtag) in OVERRIDES:
            name = OVERRIDES[langcode, subtag]
            if name is None:
                continue

        if subtag == name:
            # Default entries that map a language code to itself, which
            # an inattentive annotator just left there
            continue

        # CLDR assigns multiple names to one code by adding -alt-* to
        # the end of the code. For example, the English name of 'az' is
        # Azerbaijani, but the English name of 'az-alt-short' is Azeri.
        if normalize_name(name) == normalize_name(subtag):
            # Giving the name "zh (Hans)" to "zh-Hans" is still lazy
            continue

        priority = 3
        if subtag.endswith('-alt-menu') and name == 'mandarin':
            # The -alt-menu entries are supposed to do things like alphabetize
            # "Mandarin Chinese" under "Chinese, Mandarin". A few languages
            # just put the string "mandarin" there, which seems wrong and
            # messes up our name lookups.
            continue
        if '-alt-' in subtag:
            subtag, _ = subtag.split('-alt-', 1)
            priority = 1

        name_quads.append((langcode, subtag, name, priority))
    return name_quads
예제 #2
0
def read_cldr_name_file(path, langcode, category):
    data = read_cldr_names(path, langcode, category)
    name_quads = []
    for subtag, name in sorted(data.items()):
        if (langcode, subtag) in OVERRIDES:
            name = OVERRIDES[langcode, subtag]
            if name is None:
                continue

        if subtag == name:
            # Default entries that map a language code to itself, which
            # a lazy annotator just left there
            continue

        # CLDR assigns multiple names to one code by adding -alt-* to
        # the end of the code. For example, the English name of 'az' is
        # Azerbaijani, but the English name of 'az-alt-short' is Azeri.
        if normalize_name(name) == normalize_name(subtag):
            # Giving the name "zh (Hans)" to "zh-Hans" is still lazy
            continue

        priority = 3
        if '-alt-' in subtag:
            subtag, _ = subtag.split('-alt-', 1)
            priority = 1

        name_quads.append((langcode, subtag, name, priority))
    return name_quads
예제 #3
0
def update_names(names_fwd, names_rev, name_quads):
    for name_language, referent, name, priority in name_quads:
        # Get just the language from name_language, not the territory or script.
        short_language = langcodes.get(name_language).language
        rev_all = names_rev.setdefault('und', {})
        rev_language = names_rev.setdefault(short_language, {})
        for rev_dict in (rev_all, rev_language):
            rev_dict.setdefault(normalize_name(name), []).append(
                (name_language, referent, priority))

        fwd_key = '{}@{}'.format(referent.lower(), name_language)
        if fwd_key not in names_fwd:
            names_fwd[fwd_key] = name