def coverage(font, threshold=10): cmap = set(chr(c) for c in font.getBestCmap()) languages = set() scripts = set() partial = {} for locale in Locale.getAvailableLocales(): data = LocaleData(locale) examplar = set("".join(data.getExemplarSet())) if not cmap.isdisjoint(examplar): locale = Locale(locale) locale.addLikelySubtags() diff = examplar - cmap if not diff: scripts.add(locale.getDisplayScript()) languages.add(locale.getDisplayLanguage()) elif len(diff) <= threshold: partial[locale.getDisplayLanguage()] = diff return scripts, languages, partial
try: from icu import Locale, Collator HAVE_ICU = True except ImportError: try: from PyICU import Locale, Collator HAVE_ICU = True except ImportError as err: # No logger, save the warning message for later. _icu_err = ( "ICU not loaded because %s. Localization will be impaired. " "Use your package manager to install PyICU" % str(err)) ICU_LOCALES = None if HAVE_ICU: ICU_LOCALES = Locale.getAvailableLocales() # Map of languages for converting to Microsoft locales and naming # locales for display to the user. It's important to add to this list # when a new translation is added. Note the dummy _(): That's just to # get xgettext to include the string in gramps.pot; actual translation # is done in _get_language_string() below. # (The gramps officially-supported language list is ALL_LINGUAS in setup.py) _ = lambda x: x _LOCALE_NAMES = { 'ar': ('Arabic_Saudi Arabia', '1256', _("Arabic")), 'bg': ('Bulgrian_Bulgaria', '1251', _("Bulgarian")), 'br': (None, None, _("Breton")), #Windows has no translation for Breton 'ca': ('Catalan_Spain', '1252', _("Catalan")), 'cs': ('Czech_Czech Republic', '1250', _("Czech")), 'da': ('Danish_Denmark', '1252', _("Danish")),
# -*- coding: utf-8 -*- """ """ import os import json from icu import Locale BASE_PATH = os.path.dirname(os.path.abspath(__file__)) locales = [] for locale in Locale.getAvailableLocales().values(): locales.append({'locale': locale.getName(), 'name': locale.getDisplayName(locale)}) json.dump(locales, open(os.path.join(BASE_PATH, 'locales.json'), 'w'))
# LOG.setLevel(logging.DEBUG) try: from icu import Locale, Collator HAVE_ICU = True except ImportError: try: from PyICU import Locale, Collator HAVE_ICU = True except ImportError as err: # No logger, save the warning message for later. _icu_err = ("ICU not loaded because %s. Localization will be impaired. " "Use your package manager to install PyICU" % str(err)) ICU_LOCALES = None if HAVE_ICU: ICU_LOCALES = Locale.getAvailableLocales() # Map of languages for converting to Microsoft locales and naming # locales for display to the user. It's important to add to this list # when a new translation is added. Note the dummy _(): That's just to # get xgettext to include the string in wearnow.pot; actual translation # is done in _get_language_string() below. # (The wearnow officially-supported language list is ALL_LINGUAS in setup.py) _ = lambda x: x _LOCALE_NAMES = { 'ar': ('Arabic_Saudi Arabia', '1256', _("Arabic")), 'bg': ('Bulgrian_Bulgaria', '1251', _("Bulgarian")), 'br': (None, None, _("Breton")), #Windows has no translation for Breton 'ca': ('Catalan_Spain', '1252', _("Catalan")), 'cs': ('Czech_Czech Republic', '1250', _("Czech")), 'da': ('Danish_Denmark', '1252', _("Danish")),
fonts = get_sys_fonts() font_names = np.array(list(fonts.keys())) print("Found %d system fonts" % len(fonts), file=sys.stderr) # Make a huge sparse binary matrix that gives the availability of glyphs for each char in each font glyph_avail = lil_matrix( (max(b['stop'] for b in unicode_blocks.values()), len(font_names)), dtype=np.uint8) for i, (name, font) in tqdm(enumerate(fonts.items()), total=len(fonts), desc="Checking glyph availability"): chars = get_unicode_tables_by_font(font) glyph_avail[chars, i] = 1 # Package all locales locales = [(k, v) for k, v in Locale.getAvailableLocales().items() if k in [ 'en', 'te', 'th', 'vi', 'ar', 'he', 'km', 'ta', 'gu', 'bn', 'ml', 'el', 'ru', 'ko', 'zh', 'ja' ]] for code, locale in tqdm(locales, desc="Packaging locales"): chars = get_locale_chars(code, unicode=True) name = locale.getDisplayName().encode('ascii', 'ignore').decode('ascii') char_codes = np.sort(list( chain(*chars.values()))) # Unicode code of each char row_by_code = dict(zip(char_codes, range(len( char_codes)))) # Where each code appears in the resulting matrix # Convert the char dict to use matrix indices