Example #1
0
    0x260E,  # ☎ BLACK TELEPHONE
    0x261D,  # ☝ WHITE UP POINTING INDEX
    0x263A,  # ☺ WHITE SMILING FACE
    0x2660,  # ♠ BLACK SPADE SUIT
    0x2663,  # ♣ BLACK CLUB SUIT
    0x2665,  # ♥ BLACK HEART SUIT
    0x2666,  # ♦ BLACK DIAMOND SUIT
    0x270C,  # ✌ VICTORY HAND
    0x2744,  # ❄ SNOWFLAKE
    0x2764,  # ❤ HEAVY BLACK HEART
}

# TV symbols, see https://github.com/googlei18n/noto-fonts/issues/557
TV_SYMBOLS_FOR_SUBSETTED = tool_utils.parse_int_ranges('1f19b-1f1ac 1f23b')

EMOJI = unicode_data.get_presentation_default_emoji() | ANDROID_EMOJI


def _format_set(char_set, name, filename):
    lines = ['%s = {' % name]
    for cp in sorted(char_set):
        name = unicode_data.name(cp)
        lines.append('    0x%04X,  # %s %s' % (cp, unichr(cp), name))
    lines.append('}\n')
    with codecs.open(filename, 'w', 'UTF-8') as f:
        f.write('\n'.join(lines))
    print 'wrote', filename


def subset_symbols(srcdir, dstdir):
    """Subset Noto Sans Symbols in a curated way.
Example #2
0
    0x261D,  # ☝ WHITE UP POINTING INDEX
    0x263A,  # ☺ WHITE SMILING FACE
    0x2660,  # ♠ BLACK SPADE SUIT
    0x2663,  # ♣ BLACK CLUB SUIT
    0x2665,  # ♥ BLACK HEART SUIT
    0x2666,  # ♦ BLACK DIAMOND SUIT
    0x270C,  # ✌ VICTORY HAND
    0x2744,  # ❄ SNOWFLAKE
    0x2764,  # ❤ HEAVY BLACK HEART
}

# TV symbols, see https://github.com/googlei18n/noto-fonts/issues/557
TV_SYMBOLS_FOR_SUBSETTED = tool_utils.parse_int_ranges(
  '1f19b-1f1ac 1f23b')

EMOJI = unicode_data.get_presentation_default_emoji() | ANDROID_EMOJI

def _format_set(char_set, name, filename):
  lines = ['%s = {' % name]
  for cp in sorted(char_set):
    name = unicode_data.name(cp)
    lines.append('    0x%04X,  # %s %s' % (cp, unichr(cp), name))
  lines.append('}\n')
  with codecs.open(filename, 'w', 'UTF-8') as f:
    f.write('\n'.join(lines))
  print 'wrote', filename


def subset_symbols(srcdir, dstdir):
  """Subset Noto Sans Symbols in a curated way.
Example #3
0
def _subset_symbols():
    """Subset Noto Sans Symbols in a curated way.

  Noto Sans Symbols is now subsetted in a curated way. Changes include:

  * Currency symbols now included in Roboto are removed.

  * All combining marks for symbols (except for combining keycap) are
    added, to combine with other symbols if needed.

  * Characters in symbol blocks that are also covered by Noto CJK fonts
    are added, for better harmony with the rest of the fonts in non-CJK
    settings. The dentistry characters at U+23BE..23CC are not added,
    since they appear to be Japan-only and full-width.

  * Characters that UTR #51 defines as default text are added, although
    they may also exist in the color emoji font, to make sure they get
    a default text style.

  * Characters that UTR #51 defines as default emoji are removed, to
    make sure they don't block the fallback to the color emoji font.

  * A few math symbols that are currently included in Roboto are added,
    to prepare for potentially removing them from Roboto when they are
    lower-quality in Roboto.

  Based on subset_noto_sans_symbols.py from AOSP external/noto-fonts."""

    # TODO see if we need to change this subset based on Noto Serif coverage
    # (so the serif fallback chain would support them)

    target_coverage = set()
    # Add all characters in BLOCKS_TO_INCLUDE
    for first, last, _ in unicode_data._parse_code_ranges(BLOCKS_TO_INCLUDE):
        target_coverage.update(range(first, last + 1))

    # Add one-off characters
    target_coverage |= ONE_OFF_ADDITIONS
    # Remove characters preferably coming from Roboto
    target_coverage -= LETTERLIKE_CHARS_IN_ROBOTO
    # Remove characters that are supposed to default to emoji
    target_coverage -= unicode_data.get_presentation_default_emoji()

    # Remove dentistry symbols, as their main use appears to be for CJK:
    # http://www.unicode.org/L2/L2000/00098-n2195.pdf
    target_coverage -= set(range(0x23BE, 0x23CC + 1))

    # Remove COMBINING ENCLOSING KEYCAP. It's needed for Android's color emoji
    # mechanism to work properly
    target_coverage.remove(0x20E3)

    # Remove symbol characters for Android that belong in subsetted2 but not
    # subsetted.
    target_coverage -= BELONG_IN_SUBSETTED2

    # add TV symbols
    target_coverage |= TV_SYMBOLS_FOR_SUBSETTED

    for font_file in glob.glob(path.join(SRC_DIR, 'NotoSansSymbols-*.ttf')):
        print 'main subset', font_file
        out_file = path.join(DST_DIR,
                             path.basename(font_file)[:-4] + '-Subsetted.ttf')
        subset.subset_font(font_file, out_file, include=target_coverage)

    # Roozbeh wants a second subset with emoji presentation characters that
    # take text-presentation variation sequences.  This will be a fallback
    # after the color emoji.
    target_coverage = set(unicode_data.get_presentation_default_emoji()
                          & unicode_data.get_unicode_emoji_variants())
    target_coverage |= BELONG_IN_SUBSETTED2

    for font_file in glob.glob(path.join(SRC_DIR, 'NotoSansSymbols-*.ttf')):
        print 'secondary subset', font_file
        out_file = path.join(DST_DIR,
                             path.basename(font_file)[:-4] + '-Subsetted2.ttf')
        subset.subset_font(font_file, out_file, include=target_coverage)
    0x2120, # ℠ SERVICE MARK
    0x2121, # ℡ TELEPHONE SIGN
    0x2122, # ™ TRADE MARK SIGN
    0x2123, # ℣ VERSICLE
    0x2125, # ℥ OUNCE SIGN
    0x2126, # Ω OHM SIGN
    0x212A, # K KELVIN SIGN
    0x212B, # Å ANGSTROM SIGN
    0x212E, # ℮ ESTIMATED SYMBOL
    0x2132, # Ⅎ TURNED CAPITAL F
    0x213B, # ℻ FACSIMILE SIGN
    0x214D, # ⅍ AKTIESELSKAB
    0x214F, # ⅏ SYMBOL FOR SAMARITAN SOURCE
}

DEFAULT_EMOJI = unicode_data.get_presentation_default_emoji()

EMOJI_ADDITIONS_FILE = os.path.join(
    os.path.dirname(__file__), os.path.pardir, os.path.pardir,
    'unicode', 'additions', 'emoji-data.txt')


# Characters we have decided we are doing as emoji-style in Android,
# despite UTR#51's recommendation
def get_android_emoji():
    """Return additional Android default emojis."""
    android_emoji = set()
    with open(EMOJI_ADDITIONS_FILE) as emoji_additions:
        data = unicode_data._parse_semicolon_separated_data(
            emoji_additions.read())
        for codepoint, prop in data:
Example #5
0
    0x2120,  # ℠ SERVICE MARK
    0x2121,  # ℡ TELEPHONE SIGN
    0x2122,  # ™ TRADE MARK SIGN
    0x2123,  # ℣ VERSICLE
    0x2125,  # ℥ OUNCE SIGN
    0x2126,  # Ω OHM SIGN
    0x212A,  # K KELVIN SIGN
    0x212B,  # Å ANGSTROM SIGN
    0x212E,  # ℮ ESTIMATED SYMBOL
    0x2132,  # Ⅎ TURNED CAPITAL F
    0x213B,  # ℻ FACSIMILE SIGN
    0x214D,  # ⅍ AKTIESELSKAB
    0x214F,  # ⅏ SYMBOL FOR SAMARITAN SOURCE
}

DEFAULT_EMOJI = unicode_data.get_presentation_default_emoji()

EMOJI_ADDITIONS_FILE = os.path.join(os.path.dirname(__file__), os.path.pardir,
                                    os.path.pardir, 'unicode', 'additions',
                                    'emoji-data.txt')


# Characters we have decided we are doing as emoji-style in Android,
# despite UTR#51's recommendation
def get_android_emoji():
    """Return additional Android default emojis."""
    android_emoji = set()
    with open(EMOJI_ADDITIONS_FILE) as emoji_additions:
        data = unicode_data._parse_semicolon_separated_data(
            emoji_additions.read())
        for codepoint, prop in data:
Example #6
0
def _subset_symbols():
  """Subset Noto Sans Symbols in a curated way.

  Noto Sans Symbols is now subsetted in a curated way. Changes include:

  * Currency symbols now included in Roboto are removed.

  * All combining marks for symbols (except for combining keycap) are
    added, to combine with other symbols if needed.

  * Characters in symbol blocks that are also covered by Noto CJK fonts
    are added, for better harmony with the rest of the fonts in non-CJK
    settings. The dentistry characters at U+23BE..23CC are not added,
    since they appear to be Japan-only and full-width.

  * Characters that UTR #51 defines as default text are added, although
    they may also exist in the color emoji font, to make sure they get
    a default text style.

  * Characters that UTR #51 defines as default emoji are removed, to
    make sure they don't block the fallback to the color emoji font.

  * A few math symbols that are currently included in Roboto are added,
    to prepare for potentially removing them from Roboto when they are
    lower-quality in Roboto.

  Based on subset_noto_sans_symbols.py from AOSP external/noto-fonts."""

  # TODO see if we need to change this subset based on Noto Serif coverage
  # (so the serif fallback chain would support them)

  target_coverage = set()
  # Add all characters in BLOCKS_TO_INCLUDE
  for first, last, _ in unicode_data._parse_code_ranges(BLOCKS_TO_INCLUDE):
    target_coverage.update(range(first, last+1))

  # Add one-off characters
  target_coverage |= ONE_OFF_ADDITIONS
  # Remove characters preferably coming from Roboto
  target_coverage -= LETTERLIKE_CHARS_IN_ROBOTO
  # Remove characters that are supposed to default to emoji
  target_coverage -= unicode_data.get_presentation_default_emoji()

  # Remove dentistry symbols, as their main use appears to be for CJK:
  # http://www.unicode.org/L2/L2000/00098-n2195.pdf
  target_coverage -= set(range(0x23BE, 0x23CC+1))

  # Remove COMBINING ENCLOSING KEYCAP. It's needed for Android's color emoji
  # mechanism to work properly
  target_coverage.remove(0x20E3)

  # Remove symbol characters for Android that belong in subsetted2 but not
  # subsetted.
  target_coverage -= BELONG_IN_SUBSETTED2

  # add TV symbols
  target_coverage |= TV_SYMBOLS_FOR_SUBSETTED

  for font_file in glob.glob(path.join(SRC_DIR, 'NotoSansSymbols-*.ttf')):
    print 'main subset', font_file
    out_file = path.join(
        DST_DIR, path.basename(font_file)[:-4] + '-Subsetted.ttf')
    subset.subset_font(font_file, out_file, include=target_coverage)

  # Roozbeh wants a second subset with emoji presentation characters that
  # take text-presentation variation sequences.  This will be a fallback
  # after the color emoji.
  target_coverage = set(
      unicode_data.get_presentation_default_emoji() &
      unicode_data.get_unicode_emoji_variants())
  target_coverage |= BELONG_IN_SUBSETTED2

  for font_file in glob.glob(path.join(SRC_DIR, 'NotoSansSymbols-*.ttf')):
    print 'secondary subset', font_file
    out_file = path.join(
        DST_DIR, path.basename(font_file)[:-4] + '-Subsetted2.ttf')
    subset.subset_font(font_file, out_file, include=target_coverage)