def _create_lang_script_to_names(lang_script_data): """Generate a map from lang-script to English (and possibly native) names. Whether the script is included in the name depends on the number of used and unused scripts. If there's one used script, that script is omitted. Else if there's no used script and one unused script, that script is omitted. Else the script is included. If there's no English name for the lang_script, it is excluded.""" lang_to_names = {} for lang in lang_script_data: used, unused = lang_script_data[lang] if len(used) == 1: exclude_script = iter(used).next() elif not used and len(unused) == 1: exclude_script = iter(unused).next() else: exclude_script = '' for script in (used | unused): lang_script = lang + '-' + script target = lang if script == exclude_script else lang_script # special case, not generally useful if target.startswith('und-'): en_name = cldr_data.get_english_script_name(target[4:]) + ' script' else: en_name = cldr_data.get_english_language_name(target) if not en_name: print '!No english name for %s' % lang_script continue native_name = cldr_data.get_native_language_name(lang_script, exclude_script) if native_name == en_name: native_name = None lang_to_names[lang_script] = [en_name, native_name] if native_name else [en_name] return lang_to_names
def generate_table(filename): with codecs.open(filename, "w", "utf-8") as f: script_to_samples = _get_script_to_samples() f.write(_HTML_HEADER) f.write("<table>\n") f.write("<tr><th>Script<br/>BCP<th>name<th>type<th>text\n") for script, samples in sorted(script_to_samples.items()): script_en = cldr_data.get_english_script_name(script) f.write("<tr><th colspan=4>%s\n" % script_en) for bcp, sample_type, sample_text in samples: try: lsrv = cldr_data.loc_tag_to_lsrv(bcp) lsrv = (lsrv[0], None, lsrv[2], lsrv[3]) bcp_no_script = cldr_data.lsrv_to_loc_tag(lsrv) bcp_en = cldr_data.get_english_language_name(bcp_no_script) if not bcp_en: bcp_en = "No name" if bcp_en == "Unknown Language" and sample_type == "chars": bcp_en = "(characters)" except: print("could not get english name for %s" % bcp) bcp_en = bcp cols = ["<tr>"] cols.append(bcp_no_script) cols.append(bcp_en) cols.append(sample_type) cols.append(sample_text) f.write("<td>".join(cols) + "\n") f.write("<tr><td colspan=4> \n") f.write("</table>\n") f.write(_HTML_FOOTER + "\n")
def generate_table(filename): with codecs.open(filename, 'w', 'utf-8') as f: script_to_samples = _get_script_to_samples() print >> f, _HTML_HEADER print >> f, '<table>' print >> f, '<tr><th>Script<br/>BCP<th>name<th>type<th>text' for script, samples in sorted(script_to_samples.iteritems()): script_en = cldr_data.get_english_script_name(script) print >> f, '<tr><th colspan=4>%s' % script_en for bcp, sample_type, sample_text in samples: try: lsrv = cldr_data.loc_tag_to_lsrv(bcp) lsrv = (lsrv[0], None, lsrv[2], lsrv[3]) bcp_no_script = cldr_data.lsrv_to_loc_tag(lsrv) bcp_en = cldr_data.get_english_language_name(bcp_no_script) if not bcp_en: bcp_en = 'No name' if bcp_en == 'Unknown Language' and sample_type == 'chars': bcp_en = '(characters)' except: print 'could not get english name for %s' % bcp bcp_en = bcp cols = ['<tr>'] cols.append(bcp_no_script) cols.append(bcp_en) cols.append(sample_type) cols.append(sample_text) print >> f, '<td>'.join(cols) print >> f, '<tr><td colspan=4> ' print >> f, '</table>' print >> f, _HTML_FOOTER
def lang_key(lang_tag): name = cldr_data.get_english_language_name(lang_tag) if not name: print 'no name for %s' % lang_tag return None if name.endswith (' script)'): ix = name.rfind('(') - 1 script_sfx = ' ' + name[ix + 2: len(name) - 8] name = name[:ix] else: script_sfx = '' key = name for prefix in ['Ancient', 'Central', 'Eastern', 'Lower', 'Middle', 'North', 'Northern', 'Old', 'Southern', 'Southwestern', 'Upper', 'West', 'Western']: if name.startswith(prefix + ' '): key = name[len(prefix) + 1:] + ' ' + name[:len(prefix)] break for cluster in ['Arabic', 'French', 'Chinese', 'English', 'German', 'Hindi', 'Malay', 'Nahuatl', 'Tamazight', 'Thai']: if name.find(cluster) != -1: key = cluster + '-' + name break return key + script_sfx
def lang_key(lang_tag): name = cldr_data.get_english_language_name(lang_tag) if not name: print 'no name for %s' % lang_tag return None if name.endswith(' script)'): ix = name.rfind('(') - 1 script_sfx = ' ' + name[ix + 2:len(name) - 8] name = name[:ix] else: script_sfx = '' key = name for prefix in [ 'Ancient', 'Central', 'Eastern', 'Lower', 'Middle', 'North', 'Northern', 'Old', 'Southern', 'Southwestern', 'Upper', 'West', 'Western' ]: if name.startswith(prefix + ' '): key = name[len(prefix) + 1:] + ' ' + name[:len(prefix)] break for cluster in [ 'Arabic', 'French', 'Chinese', 'English', 'German', 'Hindi', 'Malay', 'Nahuatl', 'Tamazight', 'Thai' ]: if name.find(cluster) != -1: key = cluster + '-' + name break return key + script_sfx
def _create_lang_script_to_names(lang_script_data): """Generate a map from lang-script to English (and possibly native) names. Whether the script is included in the name depends on the number of used and unused scripts. If there's one used script, that script is omitted. Else if there's no used script and one unused script, that script is omitted. Else the script is included. If there's no English name for the lang_script, it is excluded. """ lang_to_names = {} for lang in lang_script_data: used, unused = lang_script_data[lang] if len(used) == 1: exclude_script = iter(used).next() elif not used and len(unused) == 1: exclude_script = iter(unused).next() else: exclude_script = '' for script in (used | unused): lang_script = lang + '-' + script target = lang if script == exclude_script else lang_script # special case, not generally useful if target.startswith('und-'): en_name = cldr_data.get_english_script_name( target[4:]) + ' script' else: en_name = cldr_data.get_english_language_name(target) if not en_name: # Easier than patching the cldr_data, not sure I want to go there. if lang_script == 'tlh-Piqd': en_name = u'Klingon' else: _log('No english name for %s' % lang_script) continue native_name = cldr_data.get_native_language_name( lang_script, exclude_script) if native_name == en_name: native_name = None lang_to_names[lang_script] = ([en_name, native_name] if native_name else [en_name]) return lang_to_names
def build_data_json(self, families, family_zip_info, universal_zip_info, family_id_to_lang_tags, family_id_to_regions, lang_tag_to_family_ids, region_to_family_ids): data_obj = collections.OrderedDict() families_obj = collections.OrderedDict() # Sort families by English name, except 'Noto Sans' and 'Noto Serif' come first family_ids = [family_id for family_id in families if family_id != 'sans' and family_id != 'serif'] family_ids = sorted(family_ids, key=lambda f: families[f].name) sorted_ids = ['sans', 'serif'] sorted_ids.extend(family_ids) for k in sorted_ids: family = families[k] family_obj = {} family_obj['name'] = family.name name, hinted_size, unhinted_size = family_zip_info[k] pkg_obj = collections.OrderedDict() if hinted_size: pkg_obj['hinted'] = hinted_size if unhinted_size: pkg_obj['unhinted'] = unhinted_size family_obj['pkgSize'] = pkg_obj family_obj['fonts'] = len(family.hinted_members or family.unhinted_members) family_obj['langs'] = len(family_id_to_lang_tags[k]) family_obj['regions'] = len(family_id_to_regions[k]) families_obj[k] = family_obj data_obj['family'] = families_obj data_obj['familyOrder'] = sorted_ids langs_obj = collections.OrderedDict() # Dont list 'und-' lang tags, these are for default samples and not listed in the UI lang_tags = [lang for lang in lang_tag_to_family_ids if not lang.startswith('und-')] lang_tags = sorted(lang_tags, key=lambda l: cldr_data.get_english_language_name(l)) for lang in lang_tags: lang_obj = collections.OrderedDict() english_name = cldr_data.get_english_language_name(lang) lang_obj['name'] = english_name lang_obj['families'] = sorted(lang_tag_to_family_ids[lang]) native_name = cldr_data.get_native_language_name(lang) if native_name and native_name != english_name: lang_obj['keywords'] = [native_name] langs_obj[lang] = lang_obj data_obj['lang'] = langs_obj regions_obj = collections.OrderedDict() for region in sorted(region_to_family_ids, key=lambda r: cldr_data.get_english_region_name(r)): region_obj = collections.OrderedDict() region_obj['families'] = sorted(region_to_family_ids[region]) region_obj['keywords'] = [cldr_data.get_english_region_name(region)] regions_obj[region] = region_obj data_obj['region'] = regions_obj pkg_obj = collections.OrderedDict() pkg_obj['hinted'] = universal_zip_info[1] pkg_obj['unhinted'] = universal_zip_info[2] data_obj['pkgSize'] = pkg_obj self.write_json(data_obj, 'data')
def generate(self): if self.clean: self.clean_target_dir() if not self.no_build: self.ensure_target_dirs_exist() # debug/print # ['families', 'script_to_family_ids', 'used_lang_data', # 'family_id_to_lang_tags', 'family_id_to_default_lang_tag'] debug = frozenset([]) fonts = get_noto_fonts() families = get_families(fonts) if 'families' in debug: print '\nfamilies' for family_id, family in sorted(families.iteritems()): print family_id, family.rep_member.script script_to_family_ids = get_script_to_family_ids(families) if 'script_to_family_ids' in debug: print '\nscript to family ids' for script, family_ids in sorted(script_to_family_ids.iteritems()): print script, family_ids supported_scripts = set(script_to_family_ids.keys()) used_lang_data = get_used_lang_data(supported_scripts) if 'used_lang_data' in debug: print '\nused lang data' for lang, data in sorted(used_lang_data.iteritems()): used = ', '.join(data[0]) unused = ', '.join(data[1]) if unused: unused = '(' + unused + ')' if used: unused = ' ' + unused print '%s: %s%s' % (lang, used, unused) langs_to_delete = [] for lang in used_lang_data.keys(): if not cldr_data.get_english_language_name(lang): langs_to_delete.append(lang) if langs_to_delete: print 'deleting languages with no english name: %s' % langs_to_delete for lang in langs_to_delete: del used_lang_data[lang] lang_tag_to_family_ids = get_lang_tag_to_family_ids(used_lang_data, script_to_family_ids) region_to_family_ids = get_region_to_family_ids(script_to_family_ids) family_id_to_lang_tags = get_family_id_to_lang_tags(lang_tag_to_family_ids, families) if 'family_id_to_lang_tags' in debug: print '\nfamily id to lang tags' for family_id, lang_tags in sorted(family_id_to_lang_tags.iteritems()): print '%s: %s' % (family_id, ','.join(sorted(lang_tags))) family_id_to_regions = get_family_id_to_regions(region_to_family_ids, families) family_id_to_default_lang_tag = get_family_id_to_default_lang_tag( family_id_to_lang_tags) if 'family_id_to_default_lang_tag' in debug: print '\nfamily id to default lang tag' for family_id, lang_tag in family_id_to_default_lang_tag.iteritems(): print family_id, lang_tag used_lang_tags = get_used_lang_tags( lang_tag_to_family_ids.keys(), family_id_to_default_lang_tag.values()) lang_tag_to_sample_data = get_lang_tag_to_sample_data(used_lang_tags) # find the samples that can't be displayed. tested_keys = set() failed_keys = set() family_langs_to_remove = {} for lang_tag in sorted(lang_tag_to_sample_data): sample_info = lang_tag_to_sample_data[lang_tag] sample = sample_info[1] sample_key = sample_info[3] for family_id in sorted(lang_tag_to_family_ids[lang_tag]): full_key = sample_key + '-' + family_id if full_key in tested_keys: if full_key in failed_keys: print 'failed sample %s lang %s' % (full_key, lang_tag) if family_id not in family_langs_to_remove: family_langs_to_remove[family_id] = set() family_langs_to_remove[family_id].add(lang_tag) continue failed_cps = set() tested_keys.add(full_key) charset = families[family_id].charset for cp in sample: if ord(cp) in [0xa, 0x28, 0x29, 0x2c, 0x2d, 0x2e, 0x3b, 0x5b, 0x5d, 0x2010]: continue if ord(cp) not in charset: failed_cps.add(ord(cp)) if failed_cps: print 'sample %s cannot be displayed in %s (lang %s):\n %s' % ( sample_key, family_id, lang_tag, '\n '.join('%04x (%s)' % (cp, unichr(cp)) for cp in sorted(failed_cps))) failed_keys.add(full_key) if family_id not in family_langs_to_remove: family_langs_to_remove[family_id] = set() family_langs_to_remove[family_id].add(lang_tag) for family_id in sorted(family_langs_to_remove): langs_to_remove = family_langs_to_remove[family_id] print 'remove from %s: %s' % (family_id, ','.join(sorted(langs_to_remove))) family_id_to_lang_tags[family_id] -= langs_to_remove default_lang_tag = family_id_to_default_lang_tag[family_id] if default_lang_tag in langs_to_remove: print '!removing default lang tag %s for family %s' % ( default_lang_tag, family_id) for lang in langs_to_remove: lang_tag_to_family_ids[lang] -= set([family_id]) region_data = get_region_lat_lng_data(region_to_family_ids.keys()) lang_tag_sort_order = get_lang_tag_sort_order(lang_tag_to_family_ids.keys()) if self.no_build: print 'skipping build output' return # build outputs if self.no_zips: print 'skipping zip output' else: family_zip_info = self.build_zips(families) universal_zip_info = self.build_universal_zips(families) # build outputs not used by the json but linked to from the web page self.build_ttc_zips() if self.no_css: print 'skipping css output' else: family_css_info = self.build_css(families) if self.no_data or self.no_zips or self.no_css: reason = '' if self.no_data else 'no zips' if self.no_zips else 'no css' print 'skipping data output%s' % reason else: self.build_data_json(families, family_zip_info, universal_zip_info, family_id_to_lang_tags, family_id_to_regions, lang_tag_to_family_ids, region_to_family_ids) self.build_families_json(families, family_id_to_lang_tags, family_id_to_default_lang_tag, family_id_to_regions, family_css_info, lang_tag_sort_order) self.build_misc_json(lang_tag_to_sample_data, region_data) if self.no_images: print 'skipping image output' else: self.build_images(families, family_id_to_lang_tags, family_id_to_default_lang_tag, lang_tag_to_sample_data)
def generate(self): if self.clean: self.clean_target_dir() if not self.no_build: self.ensure_target_dirs_exist() # debug/print # ['families', 'script_to_family_ids', 'used_lang_data', # 'family_id_to_lang_tags', 'family_id_to_default_lang_tag'] debug = frozenset([]) fonts = noto_fonts.get_noto_fonts() families = noto_fonts.get_families(fonts) if 'families' in debug: print '\nfamilies' for family_id, family in sorted(families.iteritems()): print family_id, family.rep_member.script script_to_family_ids = get_script_to_family_ids(families) if 'script_to_family_ids' in debug: print '\nscript to family ids' for script, family_ids in sorted(script_to_family_ids.iteritems()): print script, family_ids supported_scripts = set(script_to_family_ids.keys()) used_lang_data = get_used_lang_data(supported_scripts) if 'used_lang_data' in debug: print '\nused lang data' for lang, data in sorted(used_lang_data.iteritems()): used = ', '.join(data[0]) unused = ', '.join(data[1]) if unused: unused = '(' + unused + ')' if used: unused = ' ' + unused print '%s: %s%s' % (lang, used, unused) langs_to_delete = [] for lang in used_lang_data.keys(): if not cldr_data.get_english_language_name(lang): langs_to_delete.append(lang) if langs_to_delete: print 'deleting languages with no english name: %s' % langs_to_delete for lang in langs_to_delete: del used_lang_data[lang] lang_tag_to_family_ids = get_lang_tag_to_family_ids( used_lang_data, script_to_family_ids) region_to_family_ids = get_region_to_family_ids(script_to_family_ids) family_id_to_lang_tags = get_family_id_to_lang_tags( lang_tag_to_family_ids, families) if 'family_id_to_lang_tags' in debug: print '\nfamily id to lang tags' for family_id, lang_tags in sorted( family_id_to_lang_tags.iteritems()): print '%s: %s' % (family_id, ','.join(sorted(lang_tags))) family_id_to_regions = get_family_id_to_regions( region_to_family_ids, families) family_id_to_default_lang_tag = get_family_id_to_default_lang_tag( family_id_to_lang_tags) if 'family_id_to_default_lang_tag' in debug: print '\nfamily id to default lang tag' for family_id, lang_tag in family_id_to_default_lang_tag.iteritems( ): print family_id, lang_tag used_lang_tags = get_used_lang_tags( lang_tag_to_family_ids.keys(), family_id_to_default_lang_tag.values()) lang_tag_to_sample_data = get_lang_tag_to_sample_data(used_lang_tags) # find the samples that can't be displayed. tested_keys = set() failed_keys = set() family_langs_to_remove = {} for lang_tag in sorted(lang_tag_to_sample_data): sample_info = lang_tag_to_sample_data[lang_tag] sample = sample_info[1] sample_key = sample_info[3] for family_id in sorted(lang_tag_to_family_ids[lang_tag]): full_key = sample_key + '-' + family_id if full_key in tested_keys: if full_key in failed_keys: print 'failed sample %s lang %s' % (full_key, lang_tag) if family_id not in family_langs_to_remove: family_langs_to_remove[family_id] = set() family_langs_to_remove[family_id].add(lang_tag) continue failed_cps = set() tested_keys.add(full_key) charset = families[family_id].charset for cp in sample: if ord(cp) in [ 0xa, 0x28, 0x29, 0x2c, 0x2d, 0x2e, 0x3b, 0x5b, 0x5d, 0x2010 ]: continue if ord(cp) not in charset: failed_cps.add(ord(cp)) if failed_cps: print 'sample %s cannot be displayed in %s (lang %s):\n %s' % ( sample_key, family_id, lang_tag, '\n '.join( '%04x (%s)' % (cp, unichr(cp)) for cp in sorted(failed_cps))) failed_keys.add(full_key) if family_id not in family_langs_to_remove: family_langs_to_remove[family_id] = set() family_langs_to_remove[family_id].add(lang_tag) for family_id in sorted(family_langs_to_remove): langs_to_remove = family_langs_to_remove[family_id] print 'remove from %s: %s' % (family_id, ','.join( sorted(langs_to_remove))) family_id_to_lang_tags[family_id] -= langs_to_remove default_lang_tag = family_id_to_default_lang_tag[family_id] if default_lang_tag in langs_to_remove: print '!removing default lang tag %s for family %s' % ( default_lang_tag, family_id) for lang in langs_to_remove: lang_tag_to_family_ids[lang] -= set([family_id]) region_data = get_region_lat_lng_data(region_to_family_ids.keys()) lang_tag_sort_order = get_lang_tag_sort_order( lang_tag_to_family_ids.keys()) if self.no_build: print 'skipping build output' return # build outputs if self.no_zips: print 'skipping zip output' else: family_zip_info = self.build_zips(families) universal_zip_info = self.build_universal_zips(families) # build outputs not used by the json but linked to from the web page self.build_ttc_zips() if self.no_css: print 'skipping css output' else: family_css_info = self.build_css(families) if self.no_data or self.no_zips or self.no_css: reason = '' if self.no_data else 'no zips' if self.no_zips else 'no css' print 'skipping data output%s' % reason else: self.build_data_json(families, family_zip_info, universal_zip_info, family_id_to_lang_tags, family_id_to_regions, lang_tag_to_family_ids, region_to_family_ids) self.build_families_json(families, family_id_to_lang_tags, family_id_to_default_lang_tag, family_id_to_regions, family_css_info, lang_tag_sort_order) self.build_misc_json(lang_tag_to_sample_data, region_data) if self.no_images: print 'skipping image output' else: self.build_images(families, family_id_to_lang_tags, family_id_to_default_lang_tag, lang_tag_to_sample_data)
def build_data_json(self, families, family_zip_info, universal_zip_info, family_id_to_lang_tags, family_id_to_regions, lang_tag_to_family_ids, region_to_family_ids): data_obj = collections.OrderedDict() families_obj = collections.OrderedDict() # Sort families by English name, except 'Noto Sans' and 'Noto Serif' come first family_ids = [ family_id for family_id in families if family_id != 'sans' and family_id != 'serif' ] family_ids = sorted(family_ids, key=lambda f: families[f].name) sorted_ids = ['sans', 'serif'] sorted_ids.extend(family_ids) for k in sorted_ids: family = families[k] family_obj = {} family_obj['name'] = family.name name, hinted_size, unhinted_size = family_zip_info[k] pkg_obj = collections.OrderedDict() if hinted_size: pkg_obj['hinted'] = hinted_size if unhinted_size: pkg_obj['unhinted'] = unhinted_size family_obj['pkgSize'] = pkg_obj family_obj['fonts'] = len(family.hinted_members or family.unhinted_members) family_obj['langs'] = len(family_id_to_lang_tags[k]) family_obj['regions'] = len(family_id_to_regions[k]) families_obj[k] = family_obj data_obj['family'] = families_obj data_obj['familyOrder'] = sorted_ids langs_obj = collections.OrderedDict() # Dont list 'und-' lang tags, these are for default samples and not listed in the UI lang_tags = [ lang for lang in lang_tag_to_family_ids if not lang.startswith('und-') ] lang_tags = sorted( lang_tags, key=lambda l: cldr_data.get_english_language_name(l)) for lang in lang_tags: lang_obj = collections.OrderedDict() english_name = cldr_data.get_english_language_name(lang) lang_obj['name'] = english_name lang_obj['families'] = sorted(lang_tag_to_family_ids[lang]) native_name = cldr_data.get_native_language_name(lang) if native_name and native_name != english_name: lang_obj['keywords'] = [native_name] langs_obj[lang] = lang_obj data_obj['lang'] = langs_obj regions_obj = collections.OrderedDict() for region in sorted( region_to_family_ids, key=lambda r: cldr_data.get_english_region_name(r)): region_obj = collections.OrderedDict() region_obj['families'] = sorted(region_to_family_ids[region]) region_obj['keywords'] = [ cldr_data.get_english_region_name(region) ] regions_obj[region] = region_obj data_obj['region'] = regions_obj pkg_obj = collections.OrderedDict() pkg_obj['hinted'] = universal_zip_info[1] pkg_obj['unhinted'] = universal_zip_info[2] data_obj['pkgSize'] = pkg_obj self.write_json(data_obj, 'data')