0x260E, # ☎ BLACK TELEPHONE 0x261D, # ☝ WHITE UP POINTING INDEX 0x263A, # ☺ WHITE SMILING FACE 0x2660, # ♠ BLACK SPADE SUIT 0x2663, # ♣ BLACK CLUB SUIT 0x2665, # ♥ BLACK HEART SUIT 0x2666, # ♦ BLACK DIAMOND SUIT 0x270C, # ✌ VICTORY HAND 0x2744, # ❄ SNOWFLAKE 0x2764, # ❤ HEAVY BLACK HEART } # TV symbols, see https://github.com/googlei18n/noto-fonts/issues/557 TV_SYMBOLS_FOR_SUBSETTED = tool_utils.parse_int_ranges('1f19b-1f1ac 1f23b') EMOJI = unicode_data.get_presentation_default_emoji() | ANDROID_EMOJI def _format_set(char_set, name, filename): lines = ['%s = {' % name] for cp in sorted(char_set): name = unicode_data.name(cp) lines.append(' 0x%04X, # %s %s' % (cp, unichr(cp), name)) lines.append('}\n') with codecs.open(filename, 'w', 'UTF-8') as f: f.write('\n'.join(lines)) print 'wrote', filename def subset_symbols(srcdir, dstdir): """Subset Noto Sans Symbols in a curated way.
0x261D, # ☝ WHITE UP POINTING INDEX 0x263A, # ☺ WHITE SMILING FACE 0x2660, # ♠ BLACK SPADE SUIT 0x2663, # ♣ BLACK CLUB SUIT 0x2665, # ♥ BLACK HEART SUIT 0x2666, # ♦ BLACK DIAMOND SUIT 0x270C, # ✌ VICTORY HAND 0x2744, # ❄ SNOWFLAKE 0x2764, # ❤ HEAVY BLACK HEART } # TV symbols, see https://github.com/googlei18n/noto-fonts/issues/557 TV_SYMBOLS_FOR_SUBSETTED = tool_utils.parse_int_ranges( '1f19b-1f1ac 1f23b') EMOJI = unicode_data.get_presentation_default_emoji() | ANDROID_EMOJI def _format_set(char_set, name, filename): lines = ['%s = {' % name] for cp in sorted(char_set): name = unicode_data.name(cp) lines.append(' 0x%04X, # %s %s' % (cp, unichr(cp), name)) lines.append('}\n') with codecs.open(filename, 'w', 'UTF-8') as f: f.write('\n'.join(lines)) print 'wrote', filename def subset_symbols(srcdir, dstdir): """Subset Noto Sans Symbols in a curated way.
def _subset_symbols(): """Subset Noto Sans Symbols in a curated way. Noto Sans Symbols is now subsetted in a curated way. Changes include: * Currency symbols now included in Roboto are removed. * All combining marks for symbols (except for combining keycap) are added, to combine with other symbols if needed. * Characters in symbol blocks that are also covered by Noto CJK fonts are added, for better harmony with the rest of the fonts in non-CJK settings. The dentistry characters at U+23BE..23CC are not added, since they appear to be Japan-only and full-width. * Characters that UTR #51 defines as default text are added, although they may also exist in the color emoji font, to make sure they get a default text style. * Characters that UTR #51 defines as default emoji are removed, to make sure they don't block the fallback to the color emoji font. * A few math symbols that are currently included in Roboto are added, to prepare for potentially removing them from Roboto when they are lower-quality in Roboto. Based on subset_noto_sans_symbols.py from AOSP external/noto-fonts.""" # TODO see if we need to change this subset based on Noto Serif coverage # (so the serif fallback chain would support them) target_coverage = set() # Add all characters in BLOCKS_TO_INCLUDE for first, last, _ in unicode_data._parse_code_ranges(BLOCKS_TO_INCLUDE): target_coverage.update(range(first, last + 1)) # Add one-off characters target_coverage |= ONE_OFF_ADDITIONS # Remove characters preferably coming from Roboto target_coverage -= LETTERLIKE_CHARS_IN_ROBOTO # Remove characters that are supposed to default to emoji target_coverage -= unicode_data.get_presentation_default_emoji() # Remove dentistry symbols, as their main use appears to be for CJK: # http://www.unicode.org/L2/L2000/00098-n2195.pdf target_coverage -= set(range(0x23BE, 0x23CC + 1)) # Remove COMBINING ENCLOSING KEYCAP. It's needed for Android's color emoji # mechanism to work properly target_coverage.remove(0x20E3) # Remove symbol characters for Android that belong in subsetted2 but not # subsetted. target_coverage -= BELONG_IN_SUBSETTED2 # add TV symbols target_coverage |= TV_SYMBOLS_FOR_SUBSETTED for font_file in glob.glob(path.join(SRC_DIR, 'NotoSansSymbols-*.ttf')): print 'main subset', font_file out_file = path.join(DST_DIR, path.basename(font_file)[:-4] + '-Subsetted.ttf') subset.subset_font(font_file, out_file, include=target_coverage) # Roozbeh wants a second subset with emoji presentation characters that # take text-presentation variation sequences. This will be a fallback # after the color emoji. target_coverage = set(unicode_data.get_presentation_default_emoji() & unicode_data.get_unicode_emoji_variants()) target_coverage |= BELONG_IN_SUBSETTED2 for font_file in glob.glob(path.join(SRC_DIR, 'NotoSansSymbols-*.ttf')): print 'secondary subset', font_file out_file = path.join(DST_DIR, path.basename(font_file)[:-4] + '-Subsetted2.ttf') subset.subset_font(font_file, out_file, include=target_coverage)
0x2120, # ℠ SERVICE MARK 0x2121, # ℡ TELEPHONE SIGN 0x2122, # ™ TRADE MARK SIGN 0x2123, # ℣ VERSICLE 0x2125, # ℥ OUNCE SIGN 0x2126, # Ω OHM SIGN 0x212A, # K KELVIN SIGN 0x212B, # Å ANGSTROM SIGN 0x212E, # ℮ ESTIMATED SYMBOL 0x2132, # Ⅎ TURNED CAPITAL F 0x213B, # ℻ FACSIMILE SIGN 0x214D, # ⅍ AKTIESELSKAB 0x214F, # ⅏ SYMBOL FOR SAMARITAN SOURCE } DEFAULT_EMOJI = unicode_data.get_presentation_default_emoji() EMOJI_ADDITIONS_FILE = os.path.join( os.path.dirname(__file__), os.path.pardir, os.path.pardir, 'unicode', 'additions', 'emoji-data.txt') # Characters we have decided we are doing as emoji-style in Android, # despite UTR#51's recommendation def get_android_emoji(): """Return additional Android default emojis.""" android_emoji = set() with open(EMOJI_ADDITIONS_FILE) as emoji_additions: data = unicode_data._parse_semicolon_separated_data( emoji_additions.read()) for codepoint, prop in data:
0x2120, # ℠ SERVICE MARK 0x2121, # ℡ TELEPHONE SIGN 0x2122, # ™ TRADE MARK SIGN 0x2123, # ℣ VERSICLE 0x2125, # ℥ OUNCE SIGN 0x2126, # Ω OHM SIGN 0x212A, # K KELVIN SIGN 0x212B, # Å ANGSTROM SIGN 0x212E, # ℮ ESTIMATED SYMBOL 0x2132, # Ⅎ TURNED CAPITAL F 0x213B, # ℻ FACSIMILE SIGN 0x214D, # ⅍ AKTIESELSKAB 0x214F, # ⅏ SYMBOL FOR SAMARITAN SOURCE } DEFAULT_EMOJI = unicode_data.get_presentation_default_emoji() EMOJI_ADDITIONS_FILE = os.path.join(os.path.dirname(__file__), os.path.pardir, os.path.pardir, 'unicode', 'additions', 'emoji-data.txt') # Characters we have decided we are doing as emoji-style in Android, # despite UTR#51's recommendation def get_android_emoji(): """Return additional Android default emojis.""" android_emoji = set() with open(EMOJI_ADDITIONS_FILE) as emoji_additions: data = unicode_data._parse_semicolon_separated_data( emoji_additions.read()) for codepoint, prop in data:
def _subset_symbols(): """Subset Noto Sans Symbols in a curated way. Noto Sans Symbols is now subsetted in a curated way. Changes include: * Currency symbols now included in Roboto are removed. * All combining marks for symbols (except for combining keycap) are added, to combine with other symbols if needed. * Characters in symbol blocks that are also covered by Noto CJK fonts are added, for better harmony with the rest of the fonts in non-CJK settings. The dentistry characters at U+23BE..23CC are not added, since they appear to be Japan-only and full-width. * Characters that UTR #51 defines as default text are added, although they may also exist in the color emoji font, to make sure they get a default text style. * Characters that UTR #51 defines as default emoji are removed, to make sure they don't block the fallback to the color emoji font. * A few math symbols that are currently included in Roboto are added, to prepare for potentially removing them from Roboto when they are lower-quality in Roboto. Based on subset_noto_sans_symbols.py from AOSP external/noto-fonts.""" # TODO see if we need to change this subset based on Noto Serif coverage # (so the serif fallback chain would support them) target_coverage = set() # Add all characters in BLOCKS_TO_INCLUDE for first, last, _ in unicode_data._parse_code_ranges(BLOCKS_TO_INCLUDE): target_coverage.update(range(first, last+1)) # Add one-off characters target_coverage |= ONE_OFF_ADDITIONS # Remove characters preferably coming from Roboto target_coverage -= LETTERLIKE_CHARS_IN_ROBOTO # Remove characters that are supposed to default to emoji target_coverage -= unicode_data.get_presentation_default_emoji() # Remove dentistry symbols, as their main use appears to be for CJK: # http://www.unicode.org/L2/L2000/00098-n2195.pdf target_coverage -= set(range(0x23BE, 0x23CC+1)) # Remove COMBINING ENCLOSING KEYCAP. It's needed for Android's color emoji # mechanism to work properly target_coverage.remove(0x20E3) # Remove symbol characters for Android that belong in subsetted2 but not # subsetted. target_coverage -= BELONG_IN_SUBSETTED2 # add TV symbols target_coverage |= TV_SYMBOLS_FOR_SUBSETTED for font_file in glob.glob(path.join(SRC_DIR, 'NotoSansSymbols-*.ttf')): print 'main subset', font_file out_file = path.join( DST_DIR, path.basename(font_file)[:-4] + '-Subsetted.ttf') subset.subset_font(font_file, out_file, include=target_coverage) # Roozbeh wants a second subset with emoji presentation characters that # take text-presentation variation sequences. This will be a fallback # after the color emoji. target_coverage = set( unicode_data.get_presentation_default_emoji() & unicode_data.get_unicode_emoji_variants()) target_coverage |= BELONG_IN_SUBSETTED2 for font_file in glob.glob(path.join(SRC_DIR, 'NotoSansSymbols-*.ttf')): print 'secondary subset', font_file out_file = path.join( DST_DIR, path.basename(font_file)[:-4] + '-Subsetted2.ttf') subset.subset_font(font_file, out_file, include=target_coverage)