def _list_details(start_cp, limit_cp, defined_cps, defined_count, details):
  num = 0
  initial_cp = start_cp
  while num < details - 1 and num < defined_count:
    if initial_cp in defined_cps:
      print '%13d %04x %s' % (
          num + 1, initial_cp, unicode_data.name(initial_cp, '(unnamed)'))
      num += 1
    initial_cp += 1
  if num < defined_count:
    final_cp = limit_cp - 1
    final_name = None
    while final_cp >= initial_cp:
      if final_cp in defined_cps:
        final_name = unicode_data.name(final_cp, '(unnamed)')
        num += 1
        break
      final_cp -= 1
    if final_name and num < defined_count:
      middle_cp = final_cp - 1
      while middle_cp >= initial_cp:
        if middle_cp in defined_cps:
          print '%13s' % '...'
          break
        middle_cp -= 1
    if final_name:
      print '%13d %04x %s' % (defined_count, final_cp, final_name)
Example #2
0
def report_compare(compare_result, detailed=True):
  compare, base_cmap_data, target_cmap_data = compare_result
  base_map = cmap_data.create_map_from_table(base_cmap_data.table)
  target_map = cmap_data.create_map_from_table(target_cmap_data.table)

  base_title = title_from_metadata(base_cmap_data.meta)
  target_title = title_from_metadata(target_cmap_data.meta)

  print 'base: %s' % base_title
  print 'target: %s' % target_title
  for script in sorted(compare):
    added, removed = compare[script]
    if added or removed:
      name = base_map[script].name
      print '%s # %s' % (script, name)
      if added:
        print '  added (%d): %s' % (
            len(added), lint_config.write_int_ranges(added))
        if detailed:
          for cp in sorted(added):
            print '    %6s %s' % (
                '%04x' % cp, unicode_data.name(cp, ''))
      if removed:
        print '  removed (%d): %s' % (
            len(removed), lint_config.write_int_ranges(removed))
        if detailed:
          for cp in sorted(removed):
            print '    %6s %s' % (
                '%04x' % cp, unicode_data.name(cp, ''))
def _print_detailed(cps, inverted_target=None):
  last_block = None
  for cp in sorted(cps):
    block = unicode_data.block(cp)
    if block != last_block:
      print '    %s' % block
      last_block = block
    script = unicode_data.script(cp)
    extensions = unicode_data.script_extensions(cp) - set([script])
    if extensions:
      extensions = ' (%s)' % ','.join(sorted(extensions))
    else:
      extensions = ''
    if not inverted_target:
      extra = ''
    elif cp not in inverted_target:
      extra = ' !missing'
    else:
      scripts = sorted(inverted_target[cp])
      if len(scripts) > 3:
        script_text = ', '.join(scripts[:3]) + '... ' + scripts[-1]
      else:
        script_text = ', '.join(scripts)
      extra = ' (in %s)' % script_text
    print '    %6s %4s %2s %3s %s%s%s' % (
        '%04x' % cp,
        script,
        unicode_data.category(cp),
        unicode_data.age(cp),
        unicode_data.name(cp, ''),
        extensions,
        extra)
def seq_name(seq):
  global _namedata

  if not _namedata:
    def strip_vs_map(seq_map):
      return {
          unicode_data.strip_emoji_vs(k): v
          for k, v in seq_map.iteritems()}
    _namedata = [
        strip_vs_map(unicode_data.get_emoji_combining_sequences()),
        strip_vs_map(unicode_data.get_emoji_flag_sequences()),
        strip_vs_map(unicode_data.get_emoji_modifier_sequences()),
        strip_vs_map(unicode_data.get_emoji_zwj_sequences()),
        ]

  if len(seq) == 1:
    return unicode_data.name(seq[0], None)

  for data in _namedata:
    if seq in data:
      return data[seq]
  if EMOJI_VS in seq:
    non_vs_seq = unicode_data.strip_emoji_vs(seq)
    for data in _namedata:
      if non_vs_seq in data:
        return data[non_vs_seq]

  return None
def _char_name(cp):
  if _is_regional_indicator(cp):
    return 'RIS ' + _regional_indicator_letter(cp)
  if _is_fitzpatrick(cp):
    return None
  if cp in [0x200d, 0xfe0f]:
    return None
  return unicode_data.name(cp, '<?>')
def _report_info(title, cps_list):
  if not cps_list:
    return
  print '%s %d:' % (title, len(cps_list))
  for cps in sorted(cps_list):
    print '  %s (%s)' % (
        '_'.join('%04x' % cp for cp in cps),
        ','.join(unicode_data.name(cp, '') for cp in cps))
Example #7
0
def _format_set(char_set, name, filename):
  lines = ['%s = {' % name]
  for cp in sorted(char_set):
    name = unicode_data.name(cp)
    lines.append('    0x%04X,  # %s %s' % (cp, unichr(cp), name))
  lines.append('}\n')
  with codecs.open(filename, 'w', 'UTF-8') as f:
    f.write('\n'.join(lines))
  print 'wrote', filename
Example #8
0
def show_cps_by_block(cps):
  print '%d missing codepoints' % len(cps)
  block = None
  for cp in sorted(cps):
    new_block = unicode_data.block(cp)
    if new_block != block:
      print '# %s' % new_block
      block = new_block
    print '%5s %s' % ('%04x' % cp, unicode_data.name(cp))
def _print_detailed(cps, inverted_target=None):
  last_block = None
  undefined_start = -1
  undefined_end = -1
  def show_undefined(start, end):
    if start >= 0:
      if end > start:
        print '      %04x-%04x Zzzz <%d undefined>' % (
            start, end, end - start - 1)
      else:
        print '      %04x Zzzz <1 undefined>' % start

  for cp in sorted(cps):
    block = unicode_data.block(cp)
    if block != last_block or (undefined_end > -1 and cp > undefined_end + 1):
      show_undefined(undefined_start, undefined_end)
      undefined_start, undefined_end = -1, -1
      if block != last_block:
        print '    %s' % block
        last_block = block
    script = unicode_data.script(cp)
    if script == 'Zzzz':
      if undefined_start >= 0:
        undefined_end = cp
      else:
        undefined_start, undefined_end = cp, cp
      continue

    show_undefined(undefined_start, undefined_end)
    undefined_start, undefined_end = -1, -1
    extensions = unicode_data.script_extensions(cp) - set([script])
    if extensions:
      extensions = ' (%s)' % ','.join(sorted(extensions))
    else:
      extensions = ''
    if not inverted_target:
      extra = ''
    elif cp not in inverted_target:
      extra = ' !missing'
    else:
      scripts = sorted(inverted_target[cp])
      if len(scripts) > 3:
        script_text = ', '.join(scripts[:3]) + '... ' + scripts[-1]
      else:
        script_text = ', '.join(scripts)
      extra = ' (in %s)' % script_text
    print '    %6s %4s %2s %3s %s%s%s' % (
        '%04x' % cp,
        script,
        unicode_data.category(cp),
        unicode_data.age(cp),
        unicode_data.name(cp, ''),
        extensions,
        extra)
  show_undefined(undefined_start, undefined_end)
Example #10
0
def _get_name(key_tuple):
  CELL_PREFIX = '<td class="name">'
  if len(key_tuple) != 1:
    name = ''
  else:
    cp = key_tuple[0]
    if cp in unicode_data.proposed_emoji_cps():
      name = '(proposed) ' + unicode_data.proposed_emoji_name(cp)
    else:
      name = unicode_data.name(cp, '(error)')
  return CELL_PREFIX + name
def _flagged_name(cp, flag_sets):
  """Prepend any flags to cp's unicode name, and return.  Flag_sets
  is a map from flag name to a tuple of cp set and boolean.
  True means add flag if cp in set, False means add flag if it is
  not in the set."""
  name = unicode_data.name(cp)
  flags = []
  for k, v in sorted(flag_sets.iteritems()):
    if (cp in v[0]) == v[1]:
      flags.append(k)
  if flags:
    name = '(%s) %s' % (', '.join(flags),  name)
  return name
Example #12
0
def _get_char_names(charset):
  name_map = {}
  if charset:
    for cp in charset:
      try:
        name = unicode_data.name(cp)
      except:
        name = None
      if not name or name == '<control>':
        name = '%04x' % cp
      else:
        name = '%04x %s' % (cp, name.lower())
      name_map[name] = cp

  return name_map
def generate_names(srcdir, outfile, force):
    if not path.isdir(srcdir):
        print "%s is not a directory" % srcdir
        return

    if path.exists(outfile):
        if not force:
            print "%s already exists" % outfile
            return
        if not path.isfile(outfile):
            print "%s is not a file" % outfile
            return
    else:
        parent = path.dirname(outfile)
        if parent and not os.path.exists(parent):
            os.makedirs(parent)

    output = {}
    skipped = []
    for f in glob.glob(path.join(srcdir, "emoji_u*.png")):
        fname = path.basename(f)
        parts = fname[7:-4].split("_")
        # Omit emoji presentation variation selector, it should not be necessary.
        cps = [int(part, 16) for part in parts if part != "fe0f"]
        if not keep_sequence(cps):
            skipped.append(cps)
            continue

        sequence = "".join("&#x%x;" % cp for cp in cps)
        if len(cps) == 1:
            name = unicode_name(cps[0])
        elif is_flag_sequence(cps):
            name = flag_sequence_name(cps)
        else:
            name = ""
        output[tuple(cps)] = (fname, sequence, name)

    with open(outfile, "w") as f:
        f.write("[\n")
        for k in sorted(output):
            f.write('  {"image":"%s", "sequence":"%s", "name":"%s"},\n' % output[k])
        f.write("]\n")
    print "wrote %s" % outfile

    if skipped:
        print "skipped %d images:" % len(skipped)
        for cps in sorted(skipped):
            print "  %s (%s)" % ("_".join("%04x" % cp for cp in cps), ",".join(unicode_data.name(cp, "") for cp in cps))
Example #14
0
    _FITZ_START + 3: '5',
    _FITZ_START + 4: '6'
}


def _fitz_sequence_name(cps):
    # return '%s Type %s' % (_unicode_name(cps[0]), _FITZ_NAMES[cps[1]])
    return _unicode_name(cps[0])


def _is_keycap_sequence(cps):
    return len(cps) == 2 and cps[1] == 0x20e3


_KEYCAP_NAMES = {
    cp: unicode_data.name(cp)[6:]
    for cp in range(0x30, 0x30 + 10)
}


def _keycap_sequence_name(cps):
    name = _KEYCAP_NAMES.get(cps[0], unicode_data.name(cps[0]))
    return 'Keycap ' + name.title()


def _create_extra_sequence_names():
    BOY = 0x1f466
    GIRL = 0x1f467
    MAN = 0x1f468
    WOMAN = 0x1f469
    HEART = 0x2764  # Heavy Black Heart
Example #15
0
def _build_text(name_map, initial_text=""):
    text = initial_text
    print("build text using map of length %d" % len(name_map))
    while True:
        line = input("> ")
        if not line:
            continue
        if line == "quit":
            break
        if line == "help":
            _help()
            continue
        if line == "names":
            print("names:\n  " + "\n  ".join(sorted(name_map.keys())))
            continue
        if line == "dump":
            print("dump: '%s'" % text)
            for cp in text:
                print("%06x %s" % (ord(cp), unicode_data.name(ord(cp))))
            continue
        if line == "clear":
            text = ""
            continue
        if line == "write":
            line = input("file name> ")
            if line:
                _write_text(line, text)
            continue

        matches = []
        for name, cp in sorted(name_map.items()):
            if line in name:
                matches.append(name)
        if not matches:
            print('no match for "%s"' % line)
            continue

        if len(matches) == 1:
            print(matches[0])
            text += unichr(name_map[matches[0]])
            continue

        # if we match a full line, then use that
        if line in matches:
            print(line)
            text += unichr(name_map[line])
            continue

        new_matches = []
        for m in matches:
            if line in m.split(" "):
                new_matches.append(m)

        # if we match a full word, and only one line has this full word, use that
        if len(new_matches) == 1:
            print(new_matches[0])
            text += unichr(name_map[new_matches[0]])
            continue

        select_multiple = True
        while select_multiple:
            print("multiple matches:\n  " +
                  "\n  ".join("[%2d] %s" % (i, n)
                              for i, n in enumerate(matches)))
            while True:
                line = input("0-%d or q to skip> " % (len(matches) - 1))
                if line == "q":
                    select_multiple = False
                    break
                try:
                    n = int(line)
                    break
                except ValueError:
                    continue

            if not select_multiple:  # q
                break

            if n < 0 or n >= len(matches):
                print("%d out of range" % n)
                continue

            text += unichr(name_map[matches[n]])
            select_multiple = False

    print("done.")
    return text
def _check_coverage(seq_to_filepath, unicode_version):
  """Ensure we have all and only the cps and sequences that we need for the
  font as of this version."""

  age = unicode_version

  non_vs_to_canonical = {}
  for k in seq_to_filepath:
    if EMOJI_VS in k:
      non_vs = unicode_data.strip_emoji_vs(k)
      non_vs_to_canonical[non_vs] = k

  aliases = add_aliases.read_default_emoji_aliases()
  for k, v in sorted(aliases.items()):
    if v not in seq_to_filepath and v not in non_vs_to_canonical:
      alias_str = unicode_data.seq_to_string(k)
      target_str = unicode_data.seq_to_string(v)
      print('coverage: alias %s missing target %s' % (alias_str, target_str))
      continue
    if k in seq_to_filepath or k in non_vs_to_canonical:
      alias_str = unicode_data.seq_to_string(k)
      target_str = unicode_data.seq_to_string(v)
      print('coverage: alias %s already exists as %s (%s)' % (
          alias_str, target_str, seq_name(v)))
      continue
    filename = seq_to_filepath.get(v) or seq_to_filepath[non_vs_to_canonical[v]]
    seq_to_filepath[k] = 'alias:' + filename

  # check single emoji, this includes most of the special chars
  emoji = sorted(unicode_data.get_emoji(age=age))
  for cp in emoji:
    if tuple([cp]) not in seq_to_filepath:
      print(
          'coverage: missing single %04x (%s)' % (
              cp, unicode_data.name(cp, '<no name>')))

  # special characters
  # all but combining enclosing keycap are currently marked as emoji
  for cp in [ord('*'), ord('#'), ord(u'\u20e3')] + range(0x30, 0x3a):
    if cp not in emoji and tuple([cp]) not in seq_to_filepath:
      print('coverage: missing special %04x (%s)' % (cp, unicode_data.name(cp)))

  # combining sequences
  comb_seq_to_name = sorted(
      unicode_data.get_emoji_combining_sequences(age=age).iteritems())
  for seq, name in comb_seq_to_name:
    if seq not in seq_to_filepath:
      # strip vs and try again
      non_vs_seq = unicode_data.strip_emoji_vs(seq)
      if non_vs_seq not in seq_to_filepath:
        print('coverage: missing combining sequence %s (%s)' %
              (unicode_data.seq_to_string(seq), name))

  # flag sequences
  flag_seq_to_name = sorted(
      unicode_data.get_emoji_flag_sequences(age=age).iteritems())
  for seq, name in flag_seq_to_name:
    if seq not in seq_to_filepath:
      print('coverage: missing flag sequence %s (%s)' %
            (unicode_data.seq_to_string(seq), name))

  # skin tone modifier sequences
  mod_seq_to_name = sorted(
      unicode_data.get_emoji_modifier_sequences(age=age).iteritems())
  for seq, name in mod_seq_to_name:
    if seq not in seq_to_filepath:
      print('coverage: missing modifier sequence %s (%s)' % (
          unicode_data.seq_to_string(seq), name))

  # zwj sequences
  # some of ours include the emoji presentation variation selector and some
  # don't, and the same is true for the canonical sequences.  normalize all
  # of them to omit it to test coverage, but report the canonical sequence.
  zwj_seq_without_vs = set()
  for seq in seq_to_filepath:
    if ZWJ not in seq:
      continue
    if EMOJI_VS in seq:
      seq = tuple(cp for cp in seq if cp != EMOJI_VS)
    zwj_seq_without_vs.add(seq)

  for seq, name in sorted(
      unicode_data.get_emoji_zwj_sequences(age=age).iteritems()):
    if EMOJI_VS in seq:
      test_seq = tuple(s for s in seq if s != EMOJI_VS)
    else:
      test_seq = seq
    if test_seq not in zwj_seq_without_vs:
      print('coverage: missing (canonical) zwj sequence %s (%s)' % (
          unicode_data.seq_to_string(seq), name))

  # check for 'unknown flag'
  # this is either emoji_ufe82b or 'unknown_flag', but we filter out things that
  # don't start with our prefix so 'unknown_flag' would be excluded by default.
  if tuple([0xfe82b]) not in seq_to_filepath:
    print('coverage: missing unknown flag PUA fe82b')
Example #17
0
def _character_name(code):
    """Returns the printable name of a character."""
    return unicode_data.name(unichr(code), '<Unassigned>')
Example #18
0
def _unicode_name(cp):
  name = unicode_data.name(cp).title()
  for k, v in _NAME_FIXES.iteritems():
    name = name.replace(k, v)
  return name
Example #19
0
    _FITZ_START+1: '3',
    _FITZ_START+2: '4',
    _FITZ_START+3: '5',
    _FITZ_START+4: '6'
}

def _fitz_sequence_name(cps):
  # return '%s Type %s' % (_unicode_name(cps[0]), _FITZ_NAMES[cps[1]])
  return _unicode_name(cps[0])


def _is_keycap_sequence(cps):
  return len(cps) == 2 and cps[1] == 0x20e3


_KEYCAP_NAMES = {cp: unicode_data.name(cp)[6:] for cp in range(0x30, 0x30+10)}

def _keycap_sequence_name(cps):
    return 'Keycap ' + _KEYCAP_NAMES.get(cps[0], unicode_data.name(cps[0]))


def _create_extra_sequence_names():
  BOY = 0x1f466
  GIRL = 0x1f467
  MAN = 0x1f468
  WOMAN = 0x1f469
  HEART = 0x2764  # Heavy Black Heart
  KISS_MARK = 0x1f48b
  EYE = 0x1f441
  SPEECH = 0x1f5e8
def generate_names(
    src_dir, dst_dir, skip_limit=20, omit_groups=None, pretty_print=False,
    verbose=False):
  srcdir = tool_utils.resolve_path(src_dir)
  if not path.isdir(srcdir):
    print('%s is not a directory' % src_dir, file=sys.stderr)
    return

  if omit_groups:
    unknown_groups = set(omit_groups) - set(unicode_data.get_emoji_groups())
    if unknown_groups:
      print('did not recognize %d group%s: %s' % (
          len(unknown_groups), '' if len(unknown_groups) == 1 else 's',
          ', '.join('"%s"' % g for g in omit_groups if g in unknown_groups)), file=sys.stderr)
      print('valid groups are:\n  %s' % (
          '\n  '.join(g for g in unicode_data.get_emoji_groups())), file=sys.stderr)
      return
    print('omitting %d group%s: %s' % (
        len(omit_groups), '' if len(omit_groups) == 1 else 's',
        ', '.join('"%s"' % g for g in omit_groups)))
  else:
    # might be None
    print('keeping all groups')
    omit_groups = []

  # make sure the destination exists
  dstdir = tool_utils.ensure_dir_exists(
      tool_utils.resolve_path(dst_dir))

  # _get_image_data returns canonical cp sequences
  print('src dir:', srcdir)
  seq_to_file = generate_emoji_html._get_image_data(srcdir, 'png', 'emoji_u')
  print('seq to file has %d sequences' % len(seq_to_file))

  # Aliases add non-gendered versions using gendered images for the most part.
  # But when we display the images, we don't distinguish genders in the
  # naming, we rely on the images-- so these look redundant. So we
  # intentionally don't generate images for these.
  # However, the alias file also includes the flag aliases, which we do want,
  # and it also fails to exclude the unknown flag pua (since it doesn't
  # map to anything), so we need to adjust for this.
  canonical_aliases = generate_emoji_html._get_canonical_aliases()

  aliases = set([
      cps for cps in canonical_aliases.keys()
      if not unicode_data.is_regional_indicator_seq(cps)])
  aliases.add((0xfe82b,))  # unknown flag PUA
  excluded = aliases | generate_emoji_html._get_canonical_excluded()

  # The flag aliases have distinct names, so we _do_ want to show them
  # multiple times.
  to_add = {}
  for seq in canonical_aliases:
    if unicode_data.is_regional_indicator_seq(seq):
      replace_seq = canonical_aliases[seq]
      if seq in seq_to_file:
        print('warning, alias %s has file %s' % (
            unicode_data.regional_indicator_seq_to_string(seq),
            seq_to_file[seq]))
        continue
      replace_file = seq_to_file.get(replace_seq)
      if replace_file:
        to_add[seq] = replace_file
  seq_to_file.update(to_add)

  data = []
  last_skipped_group = None
  skipcount = 0
  for group in unicode_data.get_emoji_groups():
    if group in omit_groups:
      continue
    name_data = []
    for seq in unicode_data.get_emoji_in_group(group):
      if seq in excluded:
        continue
      seq_file = seq_to_file.get(seq, None)
      if seq_file is None:
        skipcount += 1
        if verbose:
          if group != last_skipped_group:
            print('group %s' % group)
            last_skipped_group = group
          print('  %s (%s)' % (
              unicode_data.seq_to_string(seq),
              ', '.join(unicode_data.name(cp, 'x') for cp in seq)))
        if skip_limit >= 0 and skipcount > skip_limit:
          raise Exception('skipped too many items')
      else:
        name_data.append(_name_data(seq, seq_file))
    data.append({'category': group, 'emojis': name_data})

  outfile = path.join(dstdir, 'data.json')
  with open(outfile, 'w') as f:
    indent = 2 if pretty_print else None
    separators = None if pretty_print else (',', ':')
    json.dump(data, f, indent=indent, separators=separators)
  print('wrote %s' % outfile)
Example #21
0
def _build_text(name_map, initial_text=''):
    text = initial_text
    print 'build text using map of length %d' % len(name_map)
    while True:
        line = raw_input('> ')
        if not line:
            continue
        if line == 'quit':
            break
        if line == 'help':
            _help()
            continue
        if line == 'names':
            print 'names:\n  ' + '\n  '.join(sorted(name_map.keys()))
            continue
        if line == 'dump':
            print 'dump: \'%s\'' % text
            for cp in text:
                print '%06x %s' % (ord(cp), unicode_data.name(ord(cp)))
            continue
        if line == 'clear':
            text = ''
            continue
        if line == 'write':
            line = raw_input('file name> ')
            if line:
                _write_text(line, text)
            continue

        matches = []
        for name, cp in sorted(name_map.iteritems()):
            if line in name:
                matches.append(name)
        if not matches:
            print 'no match for "%s"' % line
            continue

        if len(matches) == 1:
            print matches[0]
            text += unichr(name_map[matches[0]])
            continue

        # if we match a full line, then use that
        if line in matches:
            print line
            text += unichr(name_map[line])
            continue

        new_matches = []
        for m in matches:
            if line in m.split(' '):
                new_matches.append(m)

        # if we match a full word, and only one line has this full word, use that
        if len(new_matches) == 1:
            print new_matches[0]
            text += unichr(name_map[new_matches[0]])
            continue

        select_multiple = True
        while select_multiple:
            print 'multiple matches:\n  ' + '\n  '.join(
                '[%2d] %s' % (i, n) for i, n in enumerate(matches))
            while True:
                line = raw_input('0-%d or q to skip> ' % (len(matches) - 1))
                if line == 'q':
                    select_multiple = False
                    break
                try:
                    n = int(line)
                    break
                except ValueError:
                    continue

            if not select_multiple:  # q
                break

            if n < 0 or n >= len(matches):
                print '%d out of range' % n
                continue

            text += unichr(name_map[matches[n]])
            select_multiple = False

    print 'done.'
    return text
def _check_coverage(seq_to_filepath, unicode_version):
    """Ensure we have all and only the cps and sequences that we need for the
  font as of this version."""

    age = unicode_version

    non_vs_to_canonical = {}
    for k in seq_to_filepath:
        if EMOJI_VS in k:
            non_vs = unicode_data.strip_emoji_vs(k)
            non_vs_to_canonical[non_vs] = k

    aliases = add_aliases.read_default_emoji_aliases()
    for k, v in sorted(aliases.items()):
        if v not in seq_to_filepath and v not in non_vs_to_canonical:
            alias_str = unicode_data.seq_to_string(k)
            target_str = unicode_data.seq_to_string(v)
            print('coverage: alias %s missing target %s' %
                  (alias_str, target_str))
            continue
        if k in seq_to_filepath or k in non_vs_to_canonical:
            alias_str = unicode_data.seq_to_string(k)
            target_str = unicode_data.seq_to_string(v)
            print('coverage: alias %s already exists as %s (%s)' %
                  (alias_str, target_str, seq_name(v)))
            continue
        filename = seq_to_filepath.get(v) or seq_to_filepath[
            non_vs_to_canonical[v]]
        seq_to_filepath[k] = 'alias:' + filename

    # check single emoji, this includes most of the special chars
    emoji = sorted(unicode_data.get_emoji(age=age))
    for cp in emoji:
        if tuple([cp]) not in seq_to_filepath:
            print('coverage: missing single %04x (%s)' %
                  (cp, unicode_data.name(cp, '<no name>')))

    # special characters
    # all but combining enclosing keycap are currently marked as emoji
    for cp in [ord('*'), ord('#'), ord(u'\u20e3')] + range(0x30, 0x3a):
        if cp not in emoji and tuple([cp]) not in seq_to_filepath:
            print('coverage: missing special %04x (%s)' %
                  (cp, unicode_data.name(cp)))

    # combining sequences
    comb_seq_to_name = sorted(
        unicode_data.get_emoji_combining_sequences(age=age).iteritems())
    for seq, name in comb_seq_to_name:
        if seq not in seq_to_filepath:
            # strip vs and try again
            non_vs_seq = unicode_data.strip_emoji_vs(seq)
            if non_vs_seq not in seq_to_filepath:
                print('coverage: missing combining sequence %s (%s)' %
                      (unicode_data.seq_to_string(seq), name))

    # flag sequences
    flag_seq_to_name = sorted(
        unicode_data.get_emoji_flag_sequences(age=age).iteritems())
    for seq, name in flag_seq_to_name:
        if seq not in seq_to_filepath:
            print('coverage: missing flag sequence %s (%s)' %
                  (unicode_data.seq_to_string(seq), name))

    # skin tone modifier sequences
    mod_seq_to_name = sorted(
        unicode_data.get_emoji_modifier_sequences(age=age).iteritems())
    for seq, name in mod_seq_to_name:
        if seq not in seq_to_filepath:
            print('coverage: missing modifier sequence %s (%s)' %
                  (unicode_data.seq_to_string(seq), name))

    # zwj sequences
    # some of ours include the emoji presentation variation selector and some
    # don't, and the same is true for the canonical sequences.  normalize all
    # of them to omit it to test coverage, but report the canonical sequence.
    zwj_seq_without_vs = set()
    for seq in seq_to_filepath:
        if ZWJ not in seq:
            continue
        if EMOJI_VS in seq:
            seq = tuple(cp for cp in seq if cp != EMOJI_VS)
        zwj_seq_without_vs.add(seq)

    for seq, name in sorted(
            unicode_data.get_emoji_zwj_sequences(age=age).iteritems()):
        if EMOJI_VS in seq:
            test_seq = tuple(s for s in seq if s != EMOJI_VS)
        else:
            test_seq = seq
        if test_seq not in zwj_seq_without_vs:
            print('coverage: missing (canonical) zwj sequence %s (%s)' %
                  (unicode_data.seq_to_string(seq), name))

    # check for 'unknown flag'
    # this is either emoji_ufe82b or 'unknown_flag', but we filter out things that
    # don't start with our prefix so 'unknown_flag' would be excluded by default.
    if tuple([0xfe82b]) not in seq_to_filepath:
        print('coverage: missing unknown flag PUA fe82b')
 def test_name(self):
     """Tests the name() method."""
     self.assertEqual('WARANG CITI OM', unicode_data.name(0x118FF))
Example #24
0
def _keycap_sequence_name(cps):
    name = _KEYCAP_NAMES.get(cps[0], unicode_data.name(cps[0]))
    return 'Keycap ' + name.title()
def generate_names(src_dir,
                   dst_dir,
                   skip_limit=20,
                   omit_groups=None,
                   pretty_print=False,
                   verbose=False):
    srcdir = tool_utils.resolve_path(src_dir)
    if not path.isdir(srcdir):
        print('%s is not a directory' % src_dir, file=sys.stderr)
        return

    if omit_groups:
        unknown_groups = set(omit_groups) - set(
            unicode_data.get_emoji_groups())
        if unknown_groups:
            print(
                'did not recognize %d group%s: %s' %
                (len(unknown_groups), '' if len(unknown_groups) == 1 else 's',
                 ', '.join('"%s"' % g
                           for g in omit_groups if g in unknown_groups)),
                file=sys.stderr)
            print('valid groups are:\n  %s' %
                  ('\n  '.join(g for g in unicode_data.get_emoji_groups())),
                  file=sys.stderr)
            return
        print('omitting %d group%s: %s' %
              (len(omit_groups), '' if len(omit_groups) == 1 else 's',
               ', '.join('"%s"' % g for g in omit_groups)))
    else:
        # might be None
        print('keeping all groups')
        omit_groups = []

    # make sure the destination exists
    dstdir = tool_utils.ensure_dir_exists(tool_utils.resolve_path(dst_dir))

    # _get_image_data returns canonical cp sequences
    print('src dir:', srcdir)
    seq_to_file = generate_emoji_html._get_image_data(srcdir, 'png', 'emoji_u')
    print('seq to file has %d sequences' % len(seq_to_file))

    # Aliases add non-gendered versions using gendered images for the most part.
    # But when we display the images, we don't distinguish genders in the
    # naming, we rely on the images-- so these look redundant. So we
    # intentionally don't generate images for these.
    # However, the alias file also includes the flag aliases, which we do want,
    # and it also fails to exclude the unknown flag pua (since it doesn't
    # map to anything), so we need to adjust for this.
    canonical_aliases = generate_emoji_html._get_canonical_aliases()

    aliases = set([
        cps for cps in canonical_aliases.keys()
        if not unicode_data.is_regional_indicator_seq(cps)
    ])
    aliases.add((0xfe82b, ))  # unknown flag PUA
    excluded = aliases | generate_emoji_html._get_canonical_excluded()

    # The flag aliases have distinct names, so we _do_ want to show them
    # multiple times.
    to_add = {}
    for seq in canonical_aliases:
        if unicode_data.is_regional_indicator_seq(seq):
            replace_seq = canonical_aliases[seq]
            if seq in seq_to_file:
                print('warning, alias %s has file %s' %
                      (unicode_data.regional_indicator_seq_to_string(seq),
                       seq_to_file[seq]))
                continue
            replace_file = seq_to_file.get(replace_seq)
            if replace_file:
                to_add[seq] = replace_file
    seq_to_file.update(to_add)

    data = []
    last_skipped_group = None
    skipcount = 0
    for group in unicode_data.get_emoji_groups():
        if group in omit_groups:
            continue
        name_data = []
        for seq in unicode_data.get_emoji_in_group(group):
            if seq in excluded:
                continue
            seq_file = seq_to_file.get(seq, None)
            if seq_file is None:
                skipcount += 1
                if verbose:
                    if group != last_skipped_group:
                        print('group %s' % group)
                        last_skipped_group = group
                    print('  %s (%s)' %
                          (unicode_data.seq_to_string(seq), ', '.join(
                              unicode_data.name(cp, 'x') for cp in seq)))
                if skip_limit >= 0 and skipcount > skip_limit:
                    raise Exception('skipped too many items')
            else:
                name_data.append(_name_data(seq, seq_file))
        data.append({'category': group, 'emojis': name_data})

    outfile = path.join(dstdir, 'data.json')
    with open(outfile, 'w') as f:
        indent = 2 if pretty_print else None
        separators = None if pretty_print else (',', ':')
        json.dump(data, f, indent=indent, separators=separators)
    print('wrote %s' % outfile)
Example #26
0
 def test_name(self):
     """Tests the name() method."""
     self.assertEqual('WARANG CITI OM', unicode_data.name(0x118FF))
Example #27
0
def _character_name(code):
    """Returns the printable name of a character."""
    return unicode_data.name(unichr(code), '<Unassigned>')
def _keycap_sequence_name(cps):
  name = _KEYCAP_NAMES.get(cps[0], unicode_data.name(cps[0]))
  return 'Keycap ' + name.title()
def _keycap_sequence_name(cps):
    return 'Keycap ' + _KEYCAP_NAMES.get(cps[0], unicode_data.name(cps[0]))
Example #30
0
def _build_text(name_map, initial_text=''):
  text = initial_text
  print 'build text using map of length %d' % len(name_map)
  while True:
    line = raw_input('> ')
    if not line:
      continue
    if line == 'quit':
      break
    if line == 'help':
      _help()
      continue
    if line == 'names':
      print 'names:\n  ' + '\n  '.join(sorted(name_map.keys()))
      continue
    if line == 'dump':
      print 'dump: \'%s\'' % text
      for cp in text:
        print '%06x %s' % (ord(cp), unicode_data.name(ord(cp)))
      continue
    if line == 'clear':
      text = ''
      continue
    if line == 'write':
      line = raw_input('file name> ')
      if line:
        _write_text(line, text)
      continue

    matches = []
    for name, cp in sorted(name_map.iteritems()):
      if line in name:
        matches.append(name)
    if not matches:
      print 'no match for "%s"'% line
      continue

    if len(matches) == 1:
      print matches[0]
      text += unichr(name_map[matches[0]])
      continue

    # if we match a full line, then use that
    if line in matches:
      print line
      text += unichr(name_map[line])
      continue

    new_matches = []
    for m in matches:
      if line in m.split(' '):
        new_matches.append(m)

    # if we match a full word, and only one line has this full word, use that
    if len(new_matches) == 1:
      print new_matches[0]
      text += unichr(name_map[new_matches[0]])
      continue

    select_multiple = True
    while select_multiple:
      print 'multiple matches:\n  ' + '\n  '.join(
          '[%2d] %s' % (i, n) for i, n in enumerate(matches))
      while True:
        line = raw_input('0-%d or q to skip> ' % (len(matches) - 1))
        if line == 'q':
          select_multiple = False
          break
        try:
          n = int(line)
          break
        except ValueError:
          continue

      if not select_multiple: # q
        break

      if n < 0 or n >= len(matches):
        print '%d out of range' % n
        continue

      text += unichr(name_map[matches[n]])
      select_multiple = False

  print 'done.'
  return text