예제 #1
0
def _check_no_alias_sources(sorted_seq_to_filepath):
  """Check that we don't have sequences that we expect to be aliased to
  some other sequence."""
  aliases = add_aliases.read_default_emoji_aliases()
  for seq, fp in sorted_seq_to_filepath.iteritems():
    if seq in aliases:
      print('check no alias sources: aliased sequence %s' % fp)
def _check_no_alias_sources(sorted_seq_to_filepath):
    """Check that we don't have sequences that we expect to be aliased to
  some other sequence."""
    aliases = add_aliases.read_default_emoji_aliases()
    for seq, fp in sorted_seq_to_filepath.iteritems():
        if seq in aliases:
            print('check no alias sources: aliased sequence %s' % fp)
def get_inv_aliases():
  """Return a mapping from target to list of sources for all alias
  targets in either the default alias table or the unknown_flag alias
  table."""

  inv_aliases = collections.defaultdict(list)

  standard_aliases = add_aliases.read_default_emoji_aliases()
  for k, v in standard_aliases.iteritems():
    inv_aliases[v].append(k)

  unknown_flag_aliases = add_aliases.read_emoji_aliases(
      'unknown_flag_aliases.txt')
  for k, v in unknown_flag_aliases.iteritems():
    inv_aliases[v].append(k)

  return inv_aliases
예제 #4
0
def get_inv_aliases():
    """Return a mapping from target to list of sources for all alias
  targets in either the default alias table or the unknown_flag alias
  table."""

    inv_aliases = collections.defaultdict(list)

    standard_aliases = add_aliases.read_default_emoji_aliases()
    for k, v in standard_aliases.iteritems():
        inv_aliases[v].append(k)

    unknown_flag_aliases = add_aliases.read_emoji_aliases(
        'unknown_flag_aliases.txt')
    for k, v in unknown_flag_aliases.iteritems():
        inv_aliases[v].append(k)

    return inv_aliases
예제 #5
0
def _check_coverage(seq_to_filepath, unicode_version):
  """Ensure we have all and only the cps and sequences that we need for the
  font as of this version."""

  age = unicode_version

  non_vs_to_canonical = {}
  for k in seq_to_filepath:
    if EMOJI_VS in k:
      non_vs = unicode_data.strip_emoji_vs(k)
      non_vs_to_canonical[non_vs] = k

  aliases = add_aliases.read_default_emoji_aliases()
  for k, v in sorted(aliases.items()):
    if v not in seq_to_filepath and v not in non_vs_to_canonical:
      alias_str = unicode_data.seq_to_string(k)
      target_str = unicode_data.seq_to_string(v)
      print('coverage: alias %s missing target %s' % (alias_str, target_str))
      continue
    if k in seq_to_filepath or k in non_vs_to_canonical:
      alias_str = unicode_data.seq_to_string(k)
      target_str = unicode_data.seq_to_string(v)
      print('coverage: alias %s already exists as %s (%s)' % (
          alias_str, target_str, seq_name(v)))
      continue
    filename = seq_to_filepath.get(v) or seq_to_filepath[non_vs_to_canonical[v]]
    seq_to_filepath[k] = 'alias:' + filename

  # check single emoji, this includes most of the special chars
  emoji = sorted(unicode_data.get_emoji(age=age))
  for cp in emoji:
    if tuple([cp]) not in seq_to_filepath:
      print(
          'coverage: missing single %04x (%s)' % (
              cp, unicode_data.name(cp, '<no name>')))

  # special characters
  # all but combining enclosing keycap are currently marked as emoji
  for cp in [ord('*'), ord('#'), ord(u'\u20e3')] + range(0x30, 0x3a):
    if cp not in emoji and tuple([cp]) not in seq_to_filepath:
      print('coverage: missing special %04x (%s)' % (cp, unicode_data.name(cp)))

  # combining sequences
  comb_seq_to_name = sorted(
      unicode_data.get_emoji_combining_sequences(age=age).iteritems())
  for seq, name in comb_seq_to_name:
    if seq not in seq_to_filepath:
      # strip vs and try again
      non_vs_seq = unicode_data.strip_emoji_vs(seq)
      if non_vs_seq not in seq_to_filepath:
        print('coverage: missing combining sequence %s (%s)' %
              (unicode_data.seq_to_string(seq), name))

  # flag sequences
  flag_seq_to_name = sorted(
      unicode_data.get_emoji_flag_sequences(age=age).iteritems())
  for seq, name in flag_seq_to_name:
    if seq not in seq_to_filepath:
      print('coverage: missing flag sequence %s (%s)' %
            (unicode_data.seq_to_string(seq), name))

  # skin tone modifier sequences
  mod_seq_to_name = sorted(
      unicode_data.get_emoji_modifier_sequences(age=age).iteritems())
  for seq, name in mod_seq_to_name:
    if seq not in seq_to_filepath:
      print('coverage: missing modifier sequence %s (%s)' % (
          unicode_data.seq_to_string(seq), name))

  # zwj sequences
  # some of ours include the emoji presentation variation selector and some
  # don't, and the same is true for the canonical sequences.  normalize all
  # of them to omit it to test coverage, but report the canonical sequence.
  zwj_seq_without_vs = set()
  for seq in seq_to_filepath:
    if ZWJ not in seq:
      continue
    if EMOJI_VS in seq:
      seq = tuple(cp for cp in seq if cp != EMOJI_VS)
    zwj_seq_without_vs.add(seq)

  for seq, name in sorted(
      unicode_data.get_emoji_zwj_sequences(age=age).iteritems()):
    if EMOJI_VS in seq:
      test_seq = tuple(s for s in seq if s != EMOJI_VS)
    else:
      test_seq = seq
    if test_seq not in zwj_seq_without_vs:
      print('coverage: missing (canonical) zwj sequence %s (%s)' % (
          unicode_data.seq_to_string(seq), name))

  # check for 'unknown flag'
  # this is either emoji_ufe82b or 'unknown_flag', but we filter out things that
  # don't start with our prefix so 'unknown_flag' would be excluded by default.
  if tuple([0xfe82b]) not in seq_to_filepath:
    print('coverage: missing unknown flag PUA fe82b')
예제 #6
0
def _get_canonical_aliases():
  def canon(seq):
    return unicode_data.get_canonical_emoji_sequence(seq) or seq
  aliases = add_aliases.read_default_emoji_aliases()
  return {canon(k): canon(v) for k, v in aliases.iteritems()}
def _check_coverage(seq_to_filepath, unicode_version):
    """Ensure we have all and only the cps and sequences that we need for the
  font as of this version."""

    age = unicode_version

    non_vs_to_canonical = {}
    for k in seq_to_filepath:
        if EMOJI_VS in k:
            non_vs = unicode_data.strip_emoji_vs(k)
            non_vs_to_canonical[non_vs] = k

    aliases = add_aliases.read_default_emoji_aliases()
    for k, v in sorted(aliases.items()):
        if v not in seq_to_filepath and v not in non_vs_to_canonical:
            alias_str = unicode_data.seq_to_string(k)
            target_str = unicode_data.seq_to_string(v)
            print('coverage: alias %s missing target %s' %
                  (alias_str, target_str))
            continue
        if k in seq_to_filepath or k in non_vs_to_canonical:
            alias_str = unicode_data.seq_to_string(k)
            target_str = unicode_data.seq_to_string(v)
            print('coverage: alias %s already exists as %s (%s)' %
                  (alias_str, target_str, seq_name(v)))
            continue
        filename = seq_to_filepath.get(v) or seq_to_filepath[
            non_vs_to_canonical[v]]
        seq_to_filepath[k] = 'alias:' + filename

    # check single emoji, this includes most of the special chars
    emoji = sorted(unicode_data.get_emoji(age=age))
    for cp in emoji:
        if tuple([cp]) not in seq_to_filepath:
            print('coverage: missing single %04x (%s)' %
                  (cp, unicode_data.name(cp, '<no name>')))

    # special characters
    # all but combining enclosing keycap are currently marked as emoji
    for cp in [ord('*'), ord('#'), ord(u'\u20e3')] + range(0x30, 0x3a):
        if cp not in emoji and tuple([cp]) not in seq_to_filepath:
            print('coverage: missing special %04x (%s)' %
                  (cp, unicode_data.name(cp)))

    # combining sequences
    comb_seq_to_name = sorted(
        unicode_data.get_emoji_combining_sequences(age=age).iteritems())
    for seq, name in comb_seq_to_name:
        if seq not in seq_to_filepath:
            # strip vs and try again
            non_vs_seq = unicode_data.strip_emoji_vs(seq)
            if non_vs_seq not in seq_to_filepath:
                print('coverage: missing combining sequence %s (%s)' %
                      (unicode_data.seq_to_string(seq), name))

    # flag sequences
    flag_seq_to_name = sorted(
        unicode_data.get_emoji_flag_sequences(age=age).iteritems())
    for seq, name in flag_seq_to_name:
        if seq not in seq_to_filepath:
            print('coverage: missing flag sequence %s (%s)' %
                  (unicode_data.seq_to_string(seq), name))

    # skin tone modifier sequences
    mod_seq_to_name = sorted(
        unicode_data.get_emoji_modifier_sequences(age=age).iteritems())
    for seq, name in mod_seq_to_name:
        if seq not in seq_to_filepath:
            print('coverage: missing modifier sequence %s (%s)' %
                  (unicode_data.seq_to_string(seq), name))

    # zwj sequences
    # some of ours include the emoji presentation variation selector and some
    # don't, and the same is true for the canonical sequences.  normalize all
    # of them to omit it to test coverage, but report the canonical sequence.
    zwj_seq_without_vs = set()
    for seq in seq_to_filepath:
        if ZWJ not in seq:
            continue
        if EMOJI_VS in seq:
            seq = tuple(cp for cp in seq if cp != EMOJI_VS)
        zwj_seq_without_vs.add(seq)

    for seq, name in sorted(
            unicode_data.get_emoji_zwj_sequences(age=age).iteritems()):
        if EMOJI_VS in seq:
            test_seq = tuple(s for s in seq if s != EMOJI_VS)
        else:
            test_seq = seq
        if test_seq not in zwj_seq_without_vs:
            print('coverage: missing (canonical) zwj sequence %s (%s)' %
                  (unicode_data.seq_to_string(seq), name))

    # check for 'unknown flag'
    # this is either emoji_ufe82b or 'unknown_flag', but we filter out things that
    # don't start with our prefix so 'unknown_flag' would be excluded by default.
    if tuple([0xfe82b]) not in seq_to_filepath:
        print('coverage: missing unknown flag PUA fe82b')
예제 #8
0
def _check_coverage(seq_to_filepath, unicode_version):
    """Ensure we have all and only the cps and sequences that we need for the
  font as of this version."""

    coverage_pass = True
    age = unicode_version

    non_vs_to_canonical = {}
    for k in seq_to_filepath:
        if EMOJI_VS in k:
            non_vs = unicode_data.strip_emoji_vs(k)
            non_vs_to_canonical[non_vs] = k

    aliases = add_aliases.read_default_emoji_aliases()
    for k, v in sorted(aliases.items()):
        if v not in seq_to_filepath and v not in non_vs_to_canonical:
            alias_str = unicode_data.seq_to_string(k)
            target_str = unicode_data.seq_to_string(v)
            print(f'coverage: alias {alias_str} missing target {target_str}')
            coverage_pass = False
            continue
        if k in seq_to_filepath or k in non_vs_to_canonical:
            alias_str = unicode_data.seq_to_string(k)
            target_str = unicode_data.seq_to_string(v)
            print(
                f'coverage: alias {alias_str} already exists as {target_str} ({seq_name(v)})'
            )
            coverage_pass = False
            continue
        filename = seq_to_filepath.get(v) or seq_to_filepath[
            non_vs_to_canonical[v]]
        seq_to_filepath[k] = 'alias:' + filename

    # check single emoji, this includes most of the special chars
    emoji = sorted(unicode_data.get_emoji())
    for cp in emoji:
        if tuple([cp]) not in seq_to_filepath:
            print(f'coverage: missing single {cp} ({unicode_data.name(cp)})')
            coverage_pass = False

    # special characters
    # all but combining enclosing keycap are currently marked as emoji
    for cp in [ord('*'), ord('#'), ord(u'\u20e3')] + list(range(0x30, 0x3a)):
        if cp not in emoji and tuple([cp]) not in seq_to_filepath:
            print(f'coverage: missing special {cp} ({unicode_data.name(cp)})')
            coverage_pass = False

    # combining sequences
    comb_seq_to_name = sorted(unicode_data._emoji_sequence_data.items())
    for seq, name in comb_seq_to_name:
        if seq not in seq_to_filepath:
            # strip vs and try again
            non_vs_seq = unicode_data.strip_emoji_vs(seq)
            if non_vs_seq not in seq_to_filepath:
                print(
                    f'coverage: missing combining sequence {unicode_data.seq_to_string(seq)} ({name})'
                )
                coverage_pass = False

    # check for 'unknown flag'
    # this is either emoji_ufe82b or 'unknown_flag', but we filter out things that
    # don't start with our prefix so 'unknown_flag' would be excluded by default.
    if tuple([0xfe82b]) not in seq_to_filepath:
        print('coverage: missing unknown flag PUA fe82b')
        coverage_pass = False

    if not coverage_pass:
        exit(
            "Please fix the problems metioned above or run: make BYPASS_SEQUENCE_CHECK='True'"
        )